68 cl_uint num_platforms;
69 if (
GPUChkErrI(clGetPlatformIDs(0,
nullptr, &num_platforms))) {
72 if (num_platforms == 0) {
76 GPUInfo(
"%d OpenCL Platforms found", num_platforms);
80 std::unique_ptr<cl_platform_id[]> platforms;
81 platforms.reset(
new cl_platform_id[num_platforms]);
82 if (
GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(),
nullptr))) {
86 auto query = [&](
auto func,
auto obj,
auto var) {
94 std::string platform_profile, platform_version, platform_name, platform_vendor;
95 float platform_version_f;
96 auto queryPlatform = [&](
auto platform) {
97 platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE);
98 platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION);
99 platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME);
100 platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR);
101 sscanf(platform_version.c_str(),
"OpenCL %f", &platform_version_f);
104 std::vector<cl_device_id> devices;
105 std::string device_vendor, device_name, device_il_version;
106 cl_device_type device_type;
107 cl_uint device_freq, device_shaders, device_nbits;
108 cl_bool device_endian;
109 auto queryDevice = [&](
auto device) {
110 platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME);
111 device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR);
112 device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION);
113 clGetDeviceInfo(device, CL_DEVICE_TYPE,
sizeof(device_type), &device_type,
nullptr);
114 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY,
sizeof(device_freq), &device_freq,
nullptr);
115 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(device_shaders), &device_shaders,
nullptr);
116 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
sizeof(device_nbits), &device_nbits,
nullptr);
117 clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE,
sizeof(device_endian), &device_endian,
nullptr);
120 cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1;
121 for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) {
128 std::string platformUsageInfo;
129 bool platformCompatible =
false;
130 queryPlatform(platforms[iPlatform]);
131 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0,
nullptr, &deviceCount) != CL_SUCCESS) {
133 GPUErrorReturn(
"No device in requested platform or error obtaining device count");
135 platformUsageInfo +=
" - no devices";
137 if (platform_version_f >= 2.1f) {
138 platformUsageInfo +=
" - OpenCL 2.2 capable";
139 platformCompatible =
true;
144 GPUInfo(
"%s Platform %d: (%s %s) %s %s (Compatible: %s)%s",
GetProcessingSettings().oclPlatformNum >= 0 ?
"Enforced" :
"Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ?
"yes" :
"no",
GetProcessingSettings().debugLevel >= 2 ? platformUsageInfo.c_str() :
"");
148 if (deviceCount > devices.size()) {
149 devices.resize(deviceCount);
151 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(),
nullptr) != CL_SUCCESS) {
158 for (uint32_t
i = 0;
i < deviceCount;
i++) {
165 bool deviceOK =
true;
166 queryDevice(devices[
i]);
167 std::string deviceFailure;
168 if (
GetProcessingSettings().gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) {
170 deviceFailure +=
" - No GPU device";
172 if (device_nbits / 8 !=
sizeof(
void*)) {
174 deviceFailure +=
" - No 64 bit device";
176 if (!device_endian) {
178 deviceFailure +=
" - No Little Endian Mode";
182 while ((
pos = device_il_version.find(
"SPIR-V",
pos)) != std::string::npos) {
184 sscanf(device_il_version.c_str() +
pos,
"SPIR-V_%f", &spirvVersion);
185 if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) {
188 pos += strlen(
"SPIR-V_0.0");
190 if (
pos == std::string::npos) {
192 deviceFailure +=
" - No SPIR-V " +
std::to_string(GPUCA_OCL_SPIRV_VERSION) +
" (" + device_il_version +
")";
196 double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (
double)device_shaders;
198 GPUInfo(
" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ?
" " :
"[",
i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ?
" " :
" ]", deviceOK ?
"" : deviceFailure.c_str());
202 GPUInfo(
"Unsupported device requested on platform %d: (%d)", iPlatform,
GetProcessingSettings().deviceNum);
207 if (deviceSpeed > bestDeviceSpeed) {
209 bestPlatform = iPlatform;
210 bestDeviceSpeed = deviceSpeed;
223 if (bestDevice == (cl_uint)-1) {
224 GPUErrorReturn(
"Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
231 cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem;
232 std::string deviceVersion;
233 size_t deviceMaxWorkGroup, deviceMaxWorkItems[3];
234 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(deviceGlobalMem), &deviceGlobalMem,
nullptr);
235 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
sizeof(deviceConstantBuffer), &deviceConstantBuffer,
nullptr);
236 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(deviceLocalMem), &deviceLocalMem,
nullptr);
237 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup,
nullptr);
238 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(deviceMaxWorkItems), deviceMaxWorkItems,
nullptr);
239 deviceVersion = query(clGetDeviceInfo,
mInternals->
device, CL_DEVICE_VERSION);
240 int versionMajor, versionMinor;
241 sscanf(deviceVersion.c_str(),
"OpenCL %d.%d", &versionMajor, &versionMinor);
243 GPUInfo(
"Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str());
244 GPUInfo(
"\tVersion = %s", deviceVersion);
245 GPUInfo(
"\tFrequency = %d", (int32_t)device_freq);
246 GPUInfo(
"\tShaders = %d", (int32_t)device_shaders);
247 GPUInfo(
"\tGLobalMemory = %ld", (int64_t)deviceGlobalMem);
248 GPUInfo(
"\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer);
249 GPUInfo(
"\tLocalMemory = %ld", (int64_t)deviceLocalMem);
250 GPUInfo(
"\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup);
251 GPUInfo(
"\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]);
254#ifndef GPUCA_NO_CONSTANT_MEMORY
255 if (gGPUConstantMemBufferSize > deviceConstantBuffer) {
256 GPUErrorReturn(
"Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize);
276 GPUInfo(
"OpenCL program and kernels loaded successfully");
292 if (device_type & CL_DEVICE_TYPE_CPU) {
294 GPUInfo(
"Disabling device timers for CPU device");
300 cl_queue_properties prop = 0;
302 prop |= CL_QUEUE_PROFILING_ENABLE;
306 GPUError(
"GPU device timers not supported by OpenCL platform, disabling");
330 const char* krnlGetPtr =
"__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
331 cl_program
program = clCreateProgramWithSource(
mInternals->
context, 1, (
const char**)&krnlGetPtr,
nullptr, &ocl_error);
337 char build_log[16384];
339 GPUImportant(
"Build Log:\n\n%s\n\n", build_log);
342 cl_kernel kernel = clCreateKernel(
program,
"krnlGetPtr", &ocl_error);
356 GPUInfo(
"Mapping hostmemory");
371 GPUInfo(
"OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)
mDeviceMemorySize, (int64_t)
mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
382 for (uint32_t
i = 0;
i <
mEvents.size();
i++) {