95 cl_uint num_platforms;
96 if (
GPUFailedMsgI(clGetPlatformIDs(0,
nullptr, &num_platforms))) {
99 if (num_platforms == 0) {
103 GPUInfo(
"%d OpenCL Platforms found", num_platforms);
107 std::unique_ptr<cl_platform_id[]> platforms;
108 platforms.reset(
new cl_platform_id[num_platforms]);
109 if (
GPUFailedMsgI(clGetPlatformIDs(num_platforms, platforms.get(),
nullptr))) {
113 auto query = [&](
auto func,
auto obj,
auto var) {
121 std::string platform_profile, platform_version, platform_name, platform_vendor;
122 float platform_version_f;
123 auto queryPlatform = [&](
auto platform) {
124 platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE);
125 platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION);
126 platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME);
127 platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR);
128 sscanf(platform_version.c_str(),
"OpenCL %f", &platform_version_f);
131 std::vector<cl_device_id> devices;
132 std::string device_vendor, device_name, device_il_version;
133 cl_device_type device_type;
134 cl_uint device_freq, device_shaders, device_nbits;
135 cl_bool device_endian;
136 auto queryDevice = [&](
auto device) {
137 platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME);
138 device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR);
139 device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION);
140 clGetDeviceInfo(device, CL_DEVICE_TYPE,
sizeof(device_type), &device_type,
nullptr);
141 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY,
sizeof(device_freq), &device_freq,
nullptr);
142 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(device_shaders), &device_shaders,
nullptr);
143 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
sizeof(device_nbits), &device_nbits,
nullptr);
144 clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE,
sizeof(device_endian), &device_endian,
nullptr);
147 cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1;
148 for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) {
155 std::string platformUsageInfo;
156 bool platformCompatible =
false;
157 queryPlatform(platforms[iPlatform]);
158 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0,
nullptr, &deviceCount) != CL_SUCCESS) {
160 GPUErrorReturn(
"No device in requested platform or error obtaining device count");
162 platformUsageInfo +=
" - no devices";
164 if (platform_version_f >= 2.1f) {
165 platformUsageInfo +=
" - OpenCL 2.2 capable";
166 platformCompatible =
true;
171 GPUInfo(
"%s Platform %d: (%s %s) %s %s (Compatible: %s)%s",
mProcessingSettings.oclPlatformNum >= 0 ?
"Enforced" :
"Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ?
"yes" :
"no",
mProcessingSettings.debugLevel >= 2 ? platformUsageInfo.c_str() :
"");
175 if (deviceCount > devices.size()) {
176 devices.resize(deviceCount);
178 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(),
nullptr) != CL_SUCCESS) {
185 for (uint32_t
i = 0;
i < deviceCount;
i++) {
192 bool deviceOK =
true;
193 queryDevice(devices[
i]);
194 std::string deviceFailure;
195 if (
mProcessingSettings.gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) {
197 deviceFailure +=
" - No GPU device";
199 if (device_nbits / 8 !=
sizeof(
void*)) {
201 deviceFailure +=
" - No 64 bit device";
203 if (!device_endian) {
205 deviceFailure +=
" - No Little Endian Mode";
209 while ((
pos = device_il_version.find(
"SPIR-V",
pos)) != std::string::npos) {
211 sscanf(device_il_version.c_str() +
pos,
"SPIR-V_%f", &spirvVersion);
212 if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) {
215 pos += strlen(
"SPIR-V_0.0");
217 if (
pos == std::string::npos) {
219 deviceFailure +=
" - No SPIR-V " +
std::to_string(GPUCA_OCL_SPIRV_VERSION) +
" (" + device_il_version +
")";
223 double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (
double)device_shaders;
225 GPUInfo(
" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ?
" " :
"[",
i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ?
" " :
" ]", deviceOK ?
"" : deviceFailure.c_str());
229 GPUInfo(
"Unsupported device requested on platform %d: (%d)", iPlatform,
mProcessingSettings.deviceNum);
234 if (deviceSpeed > bestDeviceSpeed) {
236 bestPlatform = iPlatform;
237 bestDeviceSpeed = deviceSpeed;
250 if (bestDevice == (cl_uint)-1) {
251 GPUErrorReturn(
"Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
258 cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem;
259 std::string deviceVersion;
260 size_t deviceMaxWorkGroup, deviceMaxWorkItems[3];
261 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(deviceGlobalMem), &deviceGlobalMem,
nullptr);
262 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
sizeof(deviceConstantBuffer), &deviceConstantBuffer,
nullptr);
263 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(deviceLocalMem), &deviceLocalMem,
nullptr);
264 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup,
nullptr);
265 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(deviceMaxWorkItems), deviceMaxWorkItems,
nullptr);
266 deviceVersion = query(clGetDeviceInfo,
mInternals->
device, CL_DEVICE_VERSION);
267 int versionMajor, versionMinor;
268 sscanf(deviceVersion.c_str(),
"OpenCL %d.%d", &versionMajor, &versionMinor);
270 GPUInfo(
"Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str());
271 GPUInfo(
"\tVersion = %s", deviceVersion);
272 GPUInfo(
"\tFrequency = %d", (int32_t)device_freq);
273 GPUInfo(
"\tShaders = %d", (int32_t)device_shaders);
274 GPUInfo(
"\tGLobalMemory = %ld", (int64_t)deviceGlobalMem);
275 GPUInfo(
"\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer);
276 GPUInfo(
"\tLocalMemory = %ld", (int64_t)deviceLocalMem);
277 GPUInfo(
"\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup);
278 GPUInfo(
"\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]);
281#ifndef GPUCA_NO_CONSTANT_MEMORY
282 if (gGPUConstantMemBufferSize > deviceConstantBuffer) {
283 GPUErrorReturn(
"Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize);
303 GPUInfo(
"OpenCL program and kernels loaded successfully");
319 if (device_type & CL_DEVICE_TYPE_CPU) {
321 GPUInfo(
"Disabling device timers for CPU device");
327 cl_queue_properties prop = 0;
329 prop |= CL_QUEUE_PROFILING_ENABLE;
333 GPUError(
"GPU device timers not supported by OpenCL platform, disabling");
357 const char* krnlGetPtr =
"__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
358 cl_program
program = clCreateProgramWithSource(
mInternals->
context, 1, (
const char**)&krnlGetPtr,
nullptr, &ocl_error);
364 char build_log[16384];
366 GPUImportant(
"Build Log:\n\n%s\n\n", build_log);
369 cl_kernel kernel = clCreateKernel(
program,
"krnlGetPtr", &ocl_error);
383 GPUInfo(
"Mapping hostmemory");
398 GPUInfo(
"OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)
mDeviceMemorySize, (int64_t)
mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
409 for (uint32_t
i = 0;
i <
mEvents.size();
i++) {