72 cl_uint num_platforms;
73 if (
GPUChkErrI(clGetPlatformIDs(0,
nullptr, &num_platforms))) {
76 if (num_platforms == 0) {
80 GPUInfo(
"%d OpenCL Platforms found", num_platforms);
84 std::unique_ptr<cl_platform_id[]> platforms;
85 platforms.reset(
new cl_platform_id[num_platforms]);
86 if (
GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(),
nullptr))) {
90 auto query = [&](
auto func,
auto obj,
auto var) {
98 std::string platform_profile, platform_version, platform_name, platform_vendor;
99 float platform_version_f;
100 auto queryPlatform = [&](
auto platform) {
101 platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE);
102 platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION);
103 platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME);
104 platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR);
105 sscanf(platform_version.c_str(),
"OpenCL %f", &platform_version_f);
108 std::vector<cl_device_id> devices;
109 std::string device_vendor, device_name, device_il_version;
110 cl_device_type device_type;
111 cl_uint device_freq, device_shaders, device_nbits;
112 cl_bool device_endian;
113 auto queryDevice = [&](
auto device) {
114 platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME);
115 device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR);
116 device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION);
117 clGetDeviceInfo(device, CL_DEVICE_TYPE,
sizeof(device_type), &device_type,
nullptr);
118 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY,
sizeof(device_freq), &device_freq,
nullptr);
119 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(device_shaders), &device_shaders,
nullptr);
120 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
sizeof(device_nbits), &device_nbits,
nullptr);
121 clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE,
sizeof(device_endian), &device_endian,
nullptr);
124 cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1;
125 for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) {
132 std::string platformUsageInfo;
133 bool platformCompatible =
false;
134 queryPlatform(platforms[iPlatform]);
135 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0,
nullptr, &deviceCount) != CL_SUCCESS) {
137 GPUErrorReturn(
"No device in requested platform or error obtaining device count");
139 platformUsageInfo +=
" - no devices";
141 if (platform_version_f >= 2.1f) {
142 platformUsageInfo +=
" - OpenCL 2.2 capable";
143 platformCompatible =
true;
148 GPUInfo(
"%s Platform %d: (%s %s) %s %s (Compatible: %s)%s",
mProcessingSettings.oclPlatformNum >= 0 ?
"Enforced" :
"Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ?
"yes" :
"no",
mProcessingSettings.debugLevel >= 2 ? platformUsageInfo.c_str() :
"");
152 if (deviceCount > devices.size()) {
153 devices.resize(deviceCount);
155 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(),
nullptr) != CL_SUCCESS) {
162 for (uint32_t
i = 0;
i < deviceCount;
i++) {
169 bool deviceOK =
true;
170 queryDevice(devices[
i]);
171 std::string deviceFailure;
172 if (
mProcessingSettings.gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) {
174 deviceFailure +=
" - No GPU device";
176 if (device_nbits / 8 !=
sizeof(
void*)) {
178 deviceFailure +=
" - No 64 bit device";
180 if (!device_endian) {
182 deviceFailure +=
" - No Little Endian Mode";
186 while ((
pos = device_il_version.find(
"SPIR-V",
pos)) != std::string::npos) {
188 sscanf(device_il_version.c_str() +
pos,
"SPIR-V_%f", &spirvVersion);
189 if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) {
192 pos += strlen(
"SPIR-V_0.0");
194 if (
pos == std::string::npos) {
196 deviceFailure +=
" - No SPIR-V " +
std::to_string(GPUCA_OCL_SPIRV_VERSION) +
" (" + device_il_version +
")";
200 double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (
double)device_shaders;
202 GPUInfo(
" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ?
" " :
"[",
i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ?
" " :
" ]", deviceOK ?
"" : deviceFailure.c_str());
206 GPUInfo(
"Unsupported device requested on platform %d: (%d)", iPlatform,
mProcessingSettings.deviceNum);
211 if (deviceSpeed > bestDeviceSpeed) {
213 bestPlatform = iPlatform;
214 bestDeviceSpeed = deviceSpeed;
227 if (bestDevice == (cl_uint)-1) {
228 GPUErrorReturn(
"Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
235 cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem;
236 std::string deviceVersion;
237 size_t deviceMaxWorkGroup, deviceMaxWorkItems[3];
238 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(deviceGlobalMem), &deviceGlobalMem,
nullptr);
239 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
sizeof(deviceConstantBuffer), &deviceConstantBuffer,
nullptr);
240 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(deviceLocalMem), &deviceLocalMem,
nullptr);
241 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup,
nullptr);
242 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(deviceMaxWorkItems), deviceMaxWorkItems,
nullptr);
243 deviceVersion = query(clGetDeviceInfo,
mInternals->
device, CL_DEVICE_VERSION);
244 int versionMajor, versionMinor;
245 sscanf(deviceVersion.c_str(),
"OpenCL %d.%d", &versionMajor, &versionMinor);
247 GPUInfo(
"Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str());
248 GPUInfo(
"\tVersion = %s", deviceVersion);
249 GPUInfo(
"\tFrequency = %d", (int32_t)device_freq);
250 GPUInfo(
"\tShaders = %d", (int32_t)device_shaders);
251 GPUInfo(
"\tGLobalMemory = %ld", (int64_t)deviceGlobalMem);
252 GPUInfo(
"\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer);
253 GPUInfo(
"\tLocalMemory = %ld", (int64_t)deviceLocalMem);
254 GPUInfo(
"\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup);
255 GPUInfo(
"\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]);
258#ifndef GPUCA_NO_CONSTANT_MEMORY
259 if (gGPUConstantMemBufferSize > deviceConstantBuffer) {
260 GPUErrorReturn(
"Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize);
280 GPUInfo(
"OpenCL program and kernels loaded successfully");
296 if (device_type & CL_DEVICE_TYPE_CPU) {
298 GPUInfo(
"Disabling device timers for CPU device");
304 cl_queue_properties prop = 0;
306 prop |= CL_QUEUE_PROFILING_ENABLE;
310 GPUError(
"GPU device timers not supported by OpenCL platform, disabling");
334 const char* krnlGetPtr =
"__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
335 cl_program
program = clCreateProgramWithSource(
mInternals->
context, 1, (
const char**)&krnlGetPtr,
nullptr, &ocl_error);
341 char build_log[16384];
343 GPUImportant(
"Build Log:\n\n%s\n\n", build_log);
346 cl_kernel kernel = clCreateKernel(
program,
"krnlGetPtr", &ocl_error);
360 GPUInfo(
"Mapping hostmemory");
375 GPUInfo(
"OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)
mDeviceMemorySize, (int64_t)
mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
386 for (uint32_t
i = 0;
i <
mEvents.size();
i++) {