70 setenv(
"POCL_CPU_MAX_CU_COUNT", nThreadsStr.c_str(), 1);
75 cl_uint num_platforms;
76 if (
GPUChkErrI(clGetPlatformIDs(0,
nullptr, &num_platforms))) {
79 if (num_platforms == 0) {
83 GPUInfo(
"%d OpenCL Platforms found", num_platforms);
87 std::unique_ptr<cl_platform_id[]> platforms;
88 platforms.reset(
new cl_platform_id[num_platforms]);
89 if (
GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(),
nullptr))) {
93 auto query = [&](
auto func,
auto obj,
auto var) {
101 std::string platform_profile, platform_version, platform_name, platform_vendor;
102 float platform_version_f;
103 auto queryPlatform = [&](
auto platform) {
104 platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE);
105 platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION);
106 platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME);
107 platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR);
108 sscanf(platform_version.c_str(),
"OpenCL %f", &platform_version_f);
111 std::vector<cl_device_id> devices;
112 std::string device_vendor, device_name, device_il_version;
113 cl_device_type device_type;
114 cl_uint device_freq, device_shaders, device_nbits;
115 cl_bool device_endian;
116 auto queryDevice = [&](
auto device) {
117 platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME);
118 device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR);
119 device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION);
120 clGetDeviceInfo(device, CL_DEVICE_TYPE,
sizeof(device_type), &device_type,
nullptr);
121 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY,
sizeof(device_freq), &device_freq,
nullptr);
122 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(device_shaders), &device_shaders,
nullptr);
123 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
sizeof(device_nbits), &device_nbits,
nullptr);
124 clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE,
sizeof(device_endian), &device_endian,
nullptr);
127 cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1;
128 for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) {
135 std::string platformUsageInfo;
136 bool platformCompatible =
false;
137 queryPlatform(platforms[iPlatform]);
138 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0,
nullptr, &deviceCount) != CL_SUCCESS) {
140 GPUErrorReturn(
"No device in requested platform or error obtaining device count");
142 platformUsageInfo +=
" - no devices";
144 if (platform_version_f >= 2.1f) {
145 platformUsageInfo +=
" - OpenCL 2.2 capable";
146 platformCompatible =
true;
151 GPUInfo(
"%s Platform %d: (%s %s) %s %s (Compatible: %s)%s",
GetProcessingSettings().oclPlatformNum >= 0 ?
"Enforced" :
"Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ?
"yes" :
"no",
GetProcessingSettings().debugLevel >= 2 ? platformUsageInfo.c_str() :
"");
155 if (deviceCount > devices.size()) {
156 devices.resize(deviceCount);
158 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(),
nullptr) != CL_SUCCESS) {
165 for (uint32_t
i = 0;
i < deviceCount;
i++) {
172 bool deviceOK =
true;
173 queryDevice(devices[
i]);
174 std::string deviceFailure;
175 if (
GetProcessingSettings().gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) {
177 deviceFailure +=
" - No GPU device";
179 if (device_nbits / 8 !=
sizeof(
void*)) {
181 deviceFailure +=
" - No 64 bit device";
183 if (!device_endian) {
185 deviceFailure +=
" - No Little Endian Mode";
189 while ((
pos = device_il_version.find(
"SPIR-V",
pos)) != std::string::npos) {
191 sscanf(device_il_version.c_str() +
pos,
"SPIR-V_%f", &spirvVersion);
192 if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) {
195 pos += strlen(
"SPIR-V_0.0");
197 if (
pos == std::string::npos) {
199 deviceFailure +=
" - No SPIR-V " +
std::to_string(GPUCA_OCL_SPIRV_VERSION) +
" (" + device_il_version +
")";
203 double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (
double)device_shaders;
205 GPUInfo(
" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ?
" " :
"[",
i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ?
" " :
" ]", deviceOK ?
"" : deviceFailure.c_str());
209 GPUInfo(
"Unsupported device requested on platform %d: (%d)", iPlatform,
GetProcessingSettings().deviceNum);
214 if (deviceSpeed > bestDeviceSpeed) {
216 bestPlatform = iPlatform;
217 bestDeviceSpeed = deviceSpeed;
230 if (bestDevice == (cl_uint)-1) {
231 GPUErrorReturn(
"Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
238 cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem;
239 std::string deviceVersion;
240 size_t deviceMaxWorkGroup, deviceMaxWorkItems[3];
241 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(deviceGlobalMem), &deviceGlobalMem,
nullptr);
242 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
sizeof(deviceConstantBuffer), &deviceConstantBuffer,
nullptr);
243 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(deviceLocalMem), &deviceLocalMem,
nullptr);
244 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup,
nullptr);
245 clGetDeviceInfo(
mInternals->
device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(deviceMaxWorkItems), deviceMaxWorkItems,
nullptr);
246 deviceVersion = query(clGetDeviceInfo,
mInternals->
device, CL_DEVICE_VERSION);
247 int versionMajor, versionMinor;
248 sscanf(deviceVersion.c_str(),
"OpenCL %d.%d", &versionMajor, &versionMinor);
250 GPUInfo(
"Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str());
251 GPUInfo(
"\tVersion = %s", deviceVersion);
252 GPUInfo(
"\tFrequency = %d", (int32_t)device_freq);
253 GPUInfo(
"\tShaders = %d", (int32_t)device_shaders);
254 GPUInfo(
"\tGLobalMemory = %ld", (int64_t)deviceGlobalMem);
255 GPUInfo(
"\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer);
256 GPUInfo(
"\tLocalMemory = %ld", (int64_t)deviceLocalMem);
257 GPUInfo(
"\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup);
258 GPUInfo(
"\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]);
261#ifndef GPUCA_NO_CONSTANT_MEMORY
262 if (gGPUConstantMemBufferSize > deviceConstantBuffer) {
263 GPUErrorReturn(
"Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize);
283 GPUInfo(
"OpenCL program and kernels loaded successfully");
299 if (device_type & CL_DEVICE_TYPE_CPU) {
301 GPUInfo(
"Disabling device timers for CPU device");
307 cl_queue_properties prop = 0;
309 prop |= CL_QUEUE_PROFILING_ENABLE;
313 GPUError(
"GPU device timers not supported by OpenCL platform, disabling");
337 const char* krnlGetPtr =
"__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
338 cl_program
program = clCreateProgramWithSource(
mInternals->
context, 1, (
const char**)&krnlGetPtr,
nullptr, &ocl_error);
344 char build_log[16384];
346 GPUImportant(
"Build Log:\n\n%s\n\n", build_log);
349 cl_kernel kernel = clCreateKernel(
program,
"krnlGetPtr", &ocl_error);
363 GPUInfo(
"Mapping hostmemory");
378 GPUInfo(
"OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)
mDeviceMemorySize, (int64_t)
mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
389 for (uint32_t
i = 0;
i <
mEvents.size();
i++) {