Project
Loading...
Searching...
No Matches
GPUReconstructionOCL.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "GPUDefParametersLoad.inc"
17#include "GPUConstantMem.h"
18
19#include <map>
20
21static_assert(std::is_convertible_v<cl_event, void*>, "OpenCL event type incompatible to deviceEvent");
22
23#define GPUErrorReturn(...) \
24 { \
25 GPUError(__VA_ARGS__); \
26 return (1); \
27 }
28
30QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_src);
31#ifdef OPENCL_ENABLED_SPIRV
32QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv);
33#endif
34
36
45
47{
48 Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit
49 if (mMaster == nullptr) {
50 delete mInternals;
51 }
52}
53
54static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0);
55int32_t GPUReconstructionOCL::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const
56{
57 // Check for OPENCL Error and in the case of an error display the corresponding error string
58 if (error != CL_SUCCESS) {
59 GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line);
60 }
61 return error != CL_SUCCESS;
62}
63
65{
66 // Propagate processing settings to PoCL runtime.
67 // Won't affect other OpenCL runtimes.
68 if (int nThreads = mProcessingSettings->nHostThreads; nThreads > 0) {
69 auto nThreadsStr = std::to_string(nThreads);
70 setenv("POCL_CPU_MAX_CU_COUNT", nThreadsStr.c_str(), 1);
71 }
72
73 if (mMaster == nullptr) {
74 cl_int ocl_error;
75 cl_uint num_platforms;
76 if (GPUChkErrI(clGetPlatformIDs(0, nullptr, &num_platforms))) {
77 GPUErrorReturn("Error getting OpenCL Platform Count");
78 }
79 if (num_platforms == 0) {
80 GPUErrorReturn("No OpenCL Platform found");
81 }
82 if (GetProcessingSettings().debugLevel >= 2) {
83 GPUInfo("%d OpenCL Platforms found", num_platforms);
84 }
85
86 // Query platforms and devices
87 std::unique_ptr<cl_platform_id[]> platforms;
88 platforms.reset(new cl_platform_id[num_platforms]);
89 if (GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) {
90 GPUErrorReturn("Error getting OpenCL Platforms");
91 }
92
93 auto query = [&](auto func, auto obj, auto var) {
94 size_t size;
95 func(obj, var, 0, nullptr, &size);
96 std::string retVal(size - 1, ' ');
97 func(obj, var, size, retVal.data(), nullptr);
98 return retVal;
99 };
100
101 std::string platform_profile, platform_version, platform_name, platform_vendor;
102 float platform_version_f;
103 auto queryPlatform = [&](auto platform) {
104 platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE);
105 platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION);
106 platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME);
107 platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR);
108 sscanf(platform_version.c_str(), "OpenCL %f", &platform_version_f);
109 };
110
111 std::vector<cl_device_id> devices;
112 std::string device_vendor, device_name, device_il_version;
113 cl_device_type device_type;
114 cl_uint device_freq, device_shaders, device_nbits;
115 cl_bool device_endian;
116 auto queryDevice = [&](auto device) {
117 platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME);
118 device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR);
119 device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION);
120 clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, nullptr);
121 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(device_freq), &device_freq, nullptr);
122 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(device_shaders), &device_shaders, nullptr);
123 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(device_nbits), &device_nbits, nullptr);
124 clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE, sizeof(device_endian), &device_endian, nullptr);
125 };
126
127 cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1;
128 for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) {
129 if (GetProcessingSettings().oclPlatformNum >= 0) {
130 if (GetProcessingSettings().oclPlatformNum >= (int32_t)num_platforms) {
131 GPUErrorReturn("Invalid platform specified");
132 }
133 iPlatform = GetProcessingSettings().oclPlatformNum;
134 }
135 std::string platformUsageInfo;
136 bool platformCompatible = false;
137 queryPlatform(platforms[iPlatform]);
138 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount) != CL_SUCCESS) {
139 if (GetProcessingSettings().oclPlatformNum >= 0) {
140 GPUErrorReturn("No device in requested platform or error obtaining device count");
141 }
142 platformUsageInfo += " - no devices";
143 } else {
144 if (platform_version_f >= 2.1f) {
145 platformUsageInfo += " - OpenCL 2.2 capable";
146 platformCompatible = true;
147 }
148 }
149
150 if (GetProcessingSettings().oclPlatformNum >= 0 || GetProcessingSettings().debugLevel >= 2) {
151 GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", GetProcessingSettings().oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", GetProcessingSettings().debugLevel >= 2 ? platformUsageInfo.c_str() : "");
152 }
153
154 if (platformCompatible || GetProcessingSettings().oclPlatformNum >= 0 || (GetProcessingSettings().oclPlatformNum == -2 && deviceCount)) {
155 if (deviceCount > devices.size()) {
156 devices.resize(deviceCount);
157 }
158 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(), nullptr) != CL_SUCCESS) {
159 if (GetProcessingSettings().oclPlatformNum >= 0) {
160 GPUErrorReturn("Error getting OpenCL devices");
161 }
162 continue;
163 }
164
165 for (uint32_t i = 0; i < deviceCount; i++) {
166 if (GetProcessingSettings().deviceNum >= 0) {
167 if (GetProcessingSettings().deviceNum >= (signed)deviceCount) {
168 GPUErrorReturn("Requested device ID %d does not exist", GetProcessingSettings().deviceNum);
169 }
170 i = GetProcessingSettings().deviceNum;
171 }
172 bool deviceOK = true;
173 queryDevice(devices[i]);
174 std::string deviceFailure;
175 if (GetProcessingSettings().gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) {
176 deviceOK = false;
177 deviceFailure += " - No GPU device";
178 }
179 if (device_nbits / 8 != sizeof(void*)) {
180 deviceOK = false;
181 deviceFailure += " - No 64 bit device";
182 }
183 if (!device_endian) {
184 deviceOK = false;
185 deviceFailure += " - No Little Endian Mode";
186 }
187 if (!GetProcessingSettings().oclCompileFromSources) {
188 size_t pos = 0;
189 while ((pos = device_il_version.find("SPIR-V", pos)) != std::string::npos) {
190 float spirvVersion;
191 sscanf(device_il_version.c_str() + pos, "SPIR-V_%f", &spirvVersion);
192 if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) {
193 break;
194 }
195 pos += strlen("SPIR-V_0.0");
196 }
197 if (pos == std::string::npos) {
198 deviceOK = false;
199 deviceFailure += " - No SPIR-V " + std::to_string(GPUCA_OCL_SPIRV_VERSION) + " (" + device_il_version + ")";
200 }
201 }
202
203 double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (double)device_shaders;
204 if (GetProcessingSettings().debugLevel >= 2) {
205 GPUInfo(" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ? " " : "[", i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure.c_str());
206 }
207 if (!deviceOK) {
208 if (GetProcessingSettings().deviceNum >= 0) {
209 GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, GetProcessingSettings().deviceNum);
210 break;
211 }
212 continue;
213 }
214 if (deviceSpeed > bestDeviceSpeed) {
215 bestDevice = i;
216 bestPlatform = iPlatform;
217 bestDeviceSpeed = deviceSpeed;
218 mOclVersion = platform_version_f;
219 }
220 if (GetProcessingSettings().deviceNum >= 0) {
221 break;
222 }
223 }
224 }
225 if (GetProcessingSettings().oclPlatformNum >= 0) {
226 break;
227 }
228 }
229
230 if (bestDevice == (cl_uint)-1) {
231 GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
232 }
233 mInternals->platform = platforms[bestPlatform];
234 GPUChkErr(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr));
235 mInternals->device = devices[bestDevice];
236 queryDevice(mInternals->device);
237
238 cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem;
239 std::string deviceVersion;
240 size_t deviceMaxWorkGroup, deviceMaxWorkItems[3];
241 clGetDeviceInfo(mInternals->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(deviceGlobalMem), &deviceGlobalMem, nullptr);
242 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(deviceConstantBuffer), &deviceConstantBuffer, nullptr);
243 clGetDeviceInfo(mInternals->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMem), &deviceLocalMem, nullptr);
244 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup, nullptr);
245 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(deviceMaxWorkItems), deviceMaxWorkItems, nullptr);
246 deviceVersion = query(clGetDeviceInfo, mInternals->device, CL_DEVICE_VERSION);
247 int versionMajor, versionMinor;
248 sscanf(deviceVersion.c_str(), "OpenCL %d.%d", &versionMajor, &versionMinor);
249 if (GetProcessingSettings().debugLevel >= 2) {
250 GPUInfo("Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str());
251 GPUInfo("\tVersion = %s", deviceVersion);
252 GPUInfo("\tFrequency = %d", (int32_t)device_freq);
253 GPUInfo("\tShaders = %d", (int32_t)device_shaders);
254 GPUInfo("\tGLobalMemory = %ld", (int64_t)deviceGlobalMem);
255 GPUInfo("\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer);
256 GPUInfo("\tLocalMemory = %ld", (int64_t)deviceLocalMem);
257 GPUInfo("\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup);
258 GPUInfo("\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]);
259 GPUInfo(" ");
260 }
261#ifndef GPUCA_NO_CONSTANT_MEMORY
262 if (gGPUConstantMemBufferSize > deviceConstantBuffer) {
263 GPUErrorReturn("Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize);
264 }
265#endif
266
267 mDeviceName = device_name.c_str();
268 mDeviceName += " (OpenCL)";
269 mBlockCount = device_shaders;
270 mWarpSize = 32;
271 mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount);
272
273 mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error);
274 if (GPUChkErrI(ocl_error)) {
275 GPUErrorReturn("Could not create OPENCL Device Context!");
276 }
277
278 if (GetOCLPrograms()) {
279 return 1;
280 }
281
282 if (GetProcessingSettings().debugLevel >= 2) {
283 GPUInfo("OpenCL program and kernels loaded successfully");
284 }
285
286 mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error);
287 if (GPUChkErrI(ocl_error)) {
288 clReleaseContext(mInternals->context);
289 GPUErrorReturn("OPENCL Memory Allocation Error");
290 }
291
292 mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error);
293 if (GPUChkErrI(ocl_error)) {
294 clReleaseMemObject(mInternals->mem_gpu);
295 clReleaseContext(mInternals->context);
296 GPUErrorReturn("OPENCL Constant Memory Allocation Error");
297 }
298
299 if (device_type & CL_DEVICE_TYPE_CPU) {
300 if (GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel >= 2) {
301 GPUInfo("Disabling device timers for CPU device");
302 }
303 mProcessingSettings->deviceTimers = 0;
304 }
305 for (int32_t i = 0; i < mNStreams; i++) {
306#ifdef CL_VERSION_2_0
307 cl_queue_properties prop = 0;
308 if (versionMajor >= 2 && IsGPU() && GetProcessingSettings().deviceTimers) {
309 prop |= CL_QUEUE_PROFILING_ENABLE;
310 }
311 mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error);
312 if (GetProcessingSettings().deviceTimers && ocl_error == CL_INVALID_QUEUE_PROPERTIES) {
313 GPUError("GPU device timers not supported by OpenCL platform, disabling");
314 mProcessingSettings->deviceTimers = 0;
315 prop = 0;
316 mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error);
317 }
318#else
319 mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error);
320#endif
321 if (GPUChkErrI(ocl_error)) {
322 GPUErrorReturn("Error creating OpenCL command queue");
323 }
324 }
325 if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) {
326 GPUErrorReturn("Error migrating buffer");
327 }
328 if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) {
329 GPUErrorReturn("Error migrating buffer");
330 }
331
332 mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error);
333 if (GPUChkErrI(ocl_error)) {
334 GPUErrorReturn("Error allocating pinned host memory");
335 }
336
337 const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
338 cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error);
339 if (GPUChkErrI(ocl_error)) {
340 GPUErrorReturn("Error creating program object");
341 }
342 ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr);
343 if (GPUChkErrI(ocl_error)) {
344 char build_log[16384];
345 clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr);
346 GPUImportant("Build Log:\n\n%s\n\n", build_log);
347 GPUErrorReturn("Error compiling program");
348 }
349 cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error);
350 if (GPUChkErrI(ocl_error)) {
351 GPUErrorReturn("Error creating kernel");
352 }
353
354 if (GPUChkErrI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) ||
355 GPUChkErrI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) ||
356 GPUChkErrI(clFinish(mInternals->command_queue[0])) ||
357 GPUChkErrI(clReleaseKernel(kernel)) ||
358 GPUChkErrI(clReleaseProgram(program))) {
359 GPUErrorReturn("Error obtaining device memory ptr");
360 }
361
362 if (GetProcessingSettings().debugLevel >= 2) {
363 GPUInfo("Mapping hostmemory");
364 }
365 mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error);
366 if (GPUChkErrI(ocl_error)) {
367 GPUErrorReturn("Error allocating Page Locked Host Memory");
368 }
369
370 mDeviceMemoryBase = ((void**)mHostMemoryBase)[0];
372
373 if (GetProcessingSettings().debugLevel >= 1) {
374 GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase);
375 memset(mHostMemoryBase, 0xDD, mHostMemorySize);
376 }
377
378 GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)mDeviceMemorySize, (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
379 } else {
380 GPUReconstructionOCL* master = dynamic_cast<GPUReconstructionOCL*>(mMaster);
381 mBlockCount = master->mBlockCount;
382 mWarpSize = master->mWarpSize;
384 mDeviceName = master->mDeviceName;
386 mInternals = master->mInternals;
387 }
388
389 for (uint32_t i = 0; i < mEvents.size(); i++) {
390 cl_event* events = (cl_event*)mEvents[i].data();
391 new (events) cl_event[mEvents[i].size()];
392 }
393
394 return (0);
395}
396
398{
399 // Uninitialize OPENCL
401
402 if (mMaster == nullptr) {
403 if (mDeviceMemoryBase) {
404 clReleaseMemObject(mInternals->mem_gpu);
405 clReleaseMemObject(mInternals->mem_constant);
406 for (uint32_t i = 0; i < mInternals->kernels.size(); i++) {
407 clReleaseKernel(mInternals->kernels[i]);
408 }
409 mInternals->kernels.clear();
410 }
411 if (mHostMemoryBase) {
412 clEnqueueUnmapMemObject(mInternals->command_queue[0], mInternals->mem_host, mHostMemoryBase, 0, nullptr, nullptr);
413 for (int32_t i = 0; i < mNStreams; i++) {
414 clReleaseCommandQueue(mInternals->command_queue[i]);
415 }
416 clReleaseMemObject(mInternals->mem_host);
417 }
418
419 clReleaseProgram(mInternals->program);
420 clReleaseContext(mInternals->context);
421 GPUInfo("OPENCL Uninitialized");
422 }
423 mDeviceMemoryBase = nullptr;
424 mHostMemoryBase = nullptr;
425
426 return (0);
427}
428
429size_t GPUReconstructionOCL::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents)
430{
431 if (evList == nullptr) {
432 nEvents = 0;
433 }
434 if (GetProcessingSettings().debugLevel >= 3) {
435 stream = -1;
436 }
437 if (stream == -1) {
439 }
440 if (size == 0) {
441 if (ev || nEvents) { // Workaround for OCL runtimes, which can throw an error in case size = 0
442 GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
443 }
444 } else if (toGPU == -2) {
445 GPUChkErr(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
446 } else if (toGPU) {
447 GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
448 } else {
449 GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
450 }
451 if (GetProcessingSettings().serializeGPU & 2) {
452 GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true);
453 }
454 return size;
455}
456
457size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev)
458{
459 if (stream == -1) {
461 }
462 GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList<cl_event>()));
463 if (GetProcessingSettings().serializeGPU & 2) {
464 GPUDebug("WriteToConstantMemory", stream, true);
465 }
466 return size;
467}
468
469void GPUReconstructionOCL::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get<cl_event>())); }
470
471void GPUReconstructionOCL::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList<cl_event>())); }
472
474{
475 if (GetProcessingSettings().stuckProtection) {
476 cl_int tmp = 0;
477 for (int32_t i = 0; i <= GetProcessingSettings().stuckProtection / 50; i++) {
478 usleep(50);
479 clGetEventInfo(event.get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(tmp), &tmp, nullptr);
480 if (tmp == CL_COMPLETE) {
481 break;
482 }
483 }
484 if (tmp != CL_COMPLETE) {
485 mGPUStuck = 1;
486 GPUErrorReturn("GPU Stuck, future processing in this component is disabled, skipping event (GPU Event State %d)", (int32_t)tmp);
487 }
488 } else {
489 clFinish(mInternals->command_queue[stream]);
490 }
491 return 0;
492}
493
495{
496 for (int32_t i = 0; i < mNStreams; i++) {
497 GPUChkErr(clFinish(mInternals->command_queue[i]));
498 }
499}
500
502
503void GPUReconstructionOCL::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList<cl_event>())); }
504
506{
507 if (nEvents) {
508 GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList<cl_event>(), nullptr));
509 }
510}
511
513{
514 cl_int eventdone;
515 for (int32_t i = 0; i < nEvents; i++) {
516 GPUChkErr(clGetEventInfo(evList[i].get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr));
517 if (eventdone != CL_COMPLETE) {
518 return false;
519 }
520 }
521 return true;
522}
523
524int32_t GPUReconstructionOCL::GPUDebug(const char* state, int32_t stream, bool force)
525{
526 // Wait for OPENCL-Kernel to finish and check for OPENCL errors afterwards, in case of debugmode
527 if (!force && GetProcessingSettings().debugLevel <= 0) {
528 return (0);
529 }
530 for (int32_t i = 0; i < mNStreams; i++) {
531 if (GPUChkErrI(clFinish(mInternals->command_queue[i]))) {
532 GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i);
533 }
534 }
535 if (GetProcessingSettings().debugLevel >= 3) {
536 GPUInfo("GPU Sync Done");
537 }
538 return (0);
539}
540
542{
543 cl_int ocl_error;
544
545 const char* oclBuildFlags = GetProcessingSettings().oclOverrideSourceBuildFlags != "" ? GetProcessingSettings().oclOverrideSourceBuildFlags.c_str() : GPUCA_M_STR(GPUCA_OCL_BUILD_FLAGS);
546
547#ifdef OPENCL_ENABLED_SPIRV // clang-format off
548 if (mOclVersion >= 2.1f && !GetProcessingSettings().oclCompileFromSources) {
549 GPUInfo("Reading OpenCL program from SPIR-V IL (Platform version %4.2f)", mOclVersion);
550 mInternals->program = clCreateProgramWithIL(mInternals->context, _binary_GPUReconstructionOCLCode_spirv_start, _binary_GPUReconstructionOCLCode_spirv_len, &ocl_error);
551 oclBuildFlags = "";
552 } else
553#endif // clang-format on
554 {
555 GPUInfo("Compiling OpenCL program from sources (Platform version %4.2f)", mOclVersion);
556 size_t program_sizes[1] = {_binary_GPUReconstructionOCLCode_src_len};
557 char* programs_sources[1] = {_binary_GPUReconstructionOCLCode_src_start};
558 mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error);
559 }
560
561 if (GPUChkErrI(ocl_error)) {
562 GPUError("Error creating OpenCL program from binary");
563 return 1;
564 }
565
566 if (GPUChkErrI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) {
567 cl_build_status status;
568 if (GPUChkErrI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) {
569 size_t log_size;
570 clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
571 std::unique_ptr<char[]> build_log(new char[log_size + 1]);
572 clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, log_size, build_log.get(), nullptr);
573 build_log[log_size] = 0;
574 GPUError("Build Log:\n\n%s\n", build_log.get());
575 }
576 return 1;
577 }
578
579 return AddKernels();
580}
581
582const char* GPUReconstructionOCL::convertErrorToString(int32_t errorcode)
583{
584 static const std::map<cl_int, const char*> error_map = {
585 {CL_SUCCESS, "CL_SUCCESS"},
586 {CL_DEVICE_NOT_FOUND, "CL_DEVICE_NOT_FOUND"},
587 {CL_DEVICE_NOT_AVAILABLE, "CL_DEVICE_NOT_AVAILABLE"},
588 {CL_COMPILER_NOT_AVAILABLE, "CL_COMPILER_NOT_AVAILABLE"},
589 {CL_MEM_OBJECT_ALLOCATION_FAILURE, "CL_MEM_OBJECT_ALLOCATION_FAILURE"},
590 {CL_OUT_OF_RESOURCES, "CL_OUT_OF_RESOURCES"},
591 {CL_OUT_OF_HOST_MEMORY, "CL_OUT_OF_HOST_MEMORY"},
592 {CL_PROFILING_INFO_NOT_AVAILABLE, "CL_PROFILING_INFO_NOT_AVAILABLE"},
593 {CL_MEM_COPY_OVERLAP, "CL_MEM_COPY_OVERLAP"},
594 {CL_IMAGE_FORMAT_MISMATCH, "CL_IMAGE_FORMAT_MISMATCH"},
595 {CL_IMAGE_FORMAT_NOT_SUPPORTED, "CL_IMAGE_FORMAT_NOT_SUPPORTED"},
596 {CL_BUILD_PROGRAM_FAILURE, "CL_BUILD_PROGRAM_FAILURE"},
597 {CL_MAP_FAILURE, "CL_MAP_FAILURE"},
598 {CL_MISALIGNED_SUB_BUFFER_OFFSET, "CL_MISALIGNED_SUB_BUFFER_OFFSET"},
599 {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"},
600 {CL_COMPILE_PROGRAM_FAILURE, "CL_COMPILE_PROGRAM_FAILURE"},
601 {CL_LINKER_NOT_AVAILABLE, "CL_LINKER_NOT_AVAILABLE"},
602 {CL_LINK_PROGRAM_FAILURE, "CL_LINK_PROGRAM_FAILURE"},
603 {CL_DEVICE_PARTITION_FAILED, "CL_DEVICE_PARTITION_FAILED"},
604 {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"},
605 {CL_INVALID_VALUE, "CL_INVALID_VALUE"},
606 {CL_INVALID_DEVICE_TYPE, "CL_INVALID_DEVICE_TYPE"},
607 {CL_INVALID_PLATFORM, "CL_INVALID_PLATFORM"},
608 {CL_INVALID_DEVICE, "CL_INVALID_DEVICE"},
609 {CL_INVALID_CONTEXT, "CL_INVALID_CONTEXT"},
610 {CL_INVALID_QUEUE_PROPERTIES, "CL_INVALID_QUEUE_PROPERTIES"},
611 {CL_INVALID_COMMAND_QUEUE, "CL_INVALID_COMMAND_QUEUE"},
612 {CL_INVALID_HOST_PTR, "CL_INVALID_HOST_PTR"},
613 {CL_INVALID_MEM_OBJECT, "CL_INVALID_MEM_OBJECT"},
614 {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"},
615 {CL_INVALID_IMAGE_SIZE, "CL_INVALID_IMAGE_SIZE"},
616 {CL_INVALID_SAMPLER, "CL_INVALID_SAMPLER"},
617 {CL_INVALID_BINARY, "CL_INVALID_BINARY"},
618 {CL_INVALID_BUILD_OPTIONS, "CL_INVALID_BUILD_OPTIONS"},
619 {CL_INVALID_PROGRAM, "CL_INVALID_PROGRAM"},
620 {CL_INVALID_PROGRAM_EXECUTABLE, "CL_INVALID_PROGRAM_EXECUTABLE"},
621 {CL_INVALID_KERNEL_NAME, "CL_INVALID_KERNEL_NAME"},
622 {CL_INVALID_KERNEL_DEFINITION, "CL_INVALID_KERNEL_DEFINITION"},
623 {CL_INVALID_KERNEL, "CL_INVALID_KERNEL"},
624 {CL_INVALID_ARG_INDEX, "CL_INVALID_ARG_INDEX"},
625 {CL_INVALID_ARG_VALUE, "CL_INVALID_ARG_VALUE"},
626 {CL_INVALID_ARG_SIZE, "CL_INVALID_ARG_SIZE"},
627 {CL_INVALID_KERNEL_ARGS, "CL_INVALID_KERNEL_ARGS"},
628 {CL_INVALID_WORK_DIMENSION, "CL_INVALID_WORK_DIMENSION"},
629 {CL_INVALID_WORK_GROUP_SIZE, "CL_INVALID_WORK_GROUP_SIZE"},
630 {CL_INVALID_WORK_ITEM_SIZE, "CL_INVALID_WORK_ITEM_SIZE"},
631 {CL_INVALID_GLOBAL_OFFSET, "CL_INVALID_GLOBAL_OFFSET"},
632 {CL_INVALID_EVENT_WAIT_LIST, "CL_INVALID_EVENT_WAIT_LIST"},
633 {CL_INVALID_EVENT, "CL_INVALID_EVENT"},
634 {CL_INVALID_OPERATION, "CL_INVALID_OPERATION"},
635 {CL_INVALID_GL_OBJECT, "CL_INVALID_GL_OBJECT"},
636 {CL_INVALID_BUFFER_SIZE, "CL_INVALID_BUFFER_SIZE"},
637 {CL_INVALID_MIP_LEVEL, "CL_INVALID_MIP_LEVEL"},
638 {CL_INVALID_GLOBAL_WORK_SIZE, "CL_INVALID_GLOBAL_WORK_SIZE"},
639 {CL_INVALID_PROPERTY, "CL_INVALID_PROPERTY"},
640 {CL_INVALID_IMAGE_DESCRIPTOR, "CL_INVALID_IMAGE_DESCRIPTOR"},
641 {CL_INVALID_COMPILER_OPTIONS, "CL_INVALID_COMPILER_OPTIONS"},
642 {CL_INVALID_LINKER_OPTIONS, "CL_INVALID_LINKER_OPTIONS"},
643 {CL_INVALID_DEVICE_PARTITION_COUNT, "CL_INVALID_DEVICE_PARTITION_COUNT"},
644 {CL_INVALID_PIPE_SIZE, "CL_INVALID_PIPE_SIZE"},
645 {CL_INVALID_DEVICE_QUEUE, "CL_INVALID_DEVICE_QUEUE"},
646 {CL_INVALID_SPEC_ID, "CL_INVALID_SPEC_ID"},
647 {CL_MAX_SIZE_RESTRICTION_EXCEEDED, "CL_MAX_SIZE_RESTRICTION_EXCEEDED"}};
648 auto entry = error_map.find(errorcode);
649 return (entry != error_map.end()) ? entry->second : "Unknown Errorcode";
650}
benchmark::State & state
int32_t i
#define GPUChkErrI(x)
#define GPUChkErr(x)
#define GPUCA_M_STR(a)
#define GPUErrorReturn(...)
GPUReconstruction * GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend &cfg)
int32_t retVal
uint16_t pos
Definition RawData.h:3
void ReleaseEvent(deviceEvent ev) override
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1) override
int32_t DoStuckProtection(int32_t stream, deviceEvent event) override
GPUReconstructionOCL(const GPUSettingsDeviceBackend &cfg)
void RecordMarker(deviceEvent *ev, int32_t stream) override
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1) override
size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) override
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false) override
void SynchronizeStream(int32_t stream) override
GPUReconstructionOCLInternals * mInternals
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1) override
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const override
std::vector< std::vector< deviceEvent > > mEvents
GPUConstantMem * mDeviceConstantMem
std::unique_ptr< GPUSettingsDeviceBackend > mDeviceBackendSettings
std::unique_ptr< GPUSettingsProcessing > mProcessingSettings
const GPUSettingsProcessing & GetProcessingSettings() const
struct _cl_event * event
Definition glcorearb.h:2982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLuint entry
Definition glcorearb.h:5735
GLsizeiptr size
Definition glcorearb.h:659
GLbitfield GLuint program
Definition glcorearb.h:1905
GLenum GLenum dst
Definition glcorearb.h:1767
GLboolean * data
Definition glcorearb.h:298
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
#define QGET_LD_BINARY_SYMBOLS(filename)
cl_command_queue command_queue[GPUCA_MAX_STREAMS]
const int nEvents
Definition test_Fifo.cxx:27