Project
Loading...
Searching...
No Matches
GPUReconstructionOCL.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#define GPUCA_GPUTYPE_OPENCL
16#define __OPENCL_HOST__
17
21
22using namespace o2::gpu;
23
24#include <cstring>
25#include <unistd.h>
26#include <typeinfo>
27#include <cstdlib>
28
29#define GPUErrorReturn(...) \
30 { \
31 GPUError(__VA_ARGS__); \
32 return (1); \
33 }
34
35#define GPUCA_KRNL(x_class, x_attributes, ...) GPUCA_KRNL_PROP(x_class, x_attributes)
36#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCLBackend
37#include "GPUReconstructionKernelList.h"
38#undef GPUCA_KRNL
39
41QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_src);
42#ifdef OPENCL_ENABLED_SPIRV
43QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv);
44#endif
45
47
55
57{
58 Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit
59 if (mMaster == nullptr) {
60 delete mInternals;
61 }
62}
63
64int32_t GPUReconstructionOCLBackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line)
65{
66 // Check for OPENCL Error and in the case of an error display the corresponding error string
67 if (error == CL_SUCCESS) {
68 return (0);
69 }
70 GPUError("OCL Error: %ld / %s (%s:%d)", error, opencl_error_string(error), file, line);
71 return 1;
72}
73
74void GPUReconstructionOCLBackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line)
75{
76 if (GPUFailedMsgAI(error, file, line)) {
77 static bool runningCallbacks = false;
78 if (IsInitialized() && runningCallbacks == false) {
79 runningCallbacks = true;
80 CheckErrorCodes(false, true);
81 }
82 throw std::runtime_error("OpenCL Failure");
83 }
84}
85
90
92{
93 if (mMaster == nullptr) {
94 cl_int ocl_error;
95 cl_uint num_platforms;
96 if (GPUFailedMsgI(clGetPlatformIDs(0, nullptr, &num_platforms))) {
97 GPUErrorReturn("Error getting OpenCL Platform Count");
98 }
99 if (num_platforms == 0) {
100 GPUErrorReturn("No OpenCL Platform found");
101 }
102 if (mProcessingSettings.debugLevel >= 2) {
103 GPUInfo("%d OpenCL Platforms found", num_platforms);
104 }
105
106 // Query platforms and devices
107 std::unique_ptr<cl_platform_id[]> platforms;
108 platforms.reset(new cl_platform_id[num_platforms]);
109 if (GPUFailedMsgI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) {
110 GPUErrorReturn("Error getting OpenCL Platforms");
111 }
112
113 auto query = [&](auto func, auto obj, auto var) {
114 size_t size;
115 func(obj, var, 0, nullptr, &size);
116 std::string retVal(size - 1, ' ');
117 func(obj, var, size, retVal.data(), nullptr);
118 return retVal;
119 };
120
121 std::string platform_profile, platform_version, platform_name, platform_vendor;
122 float platform_version_f;
123 auto queryPlatform = [&](auto platform) {
124 platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE);
125 platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION);
126 platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME);
127 platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR);
128 sscanf(platform_version.c_str(), "OpenCL %f", &platform_version_f);
129 };
130
131 std::vector<cl_device_id> devices;
132 std::string device_vendor, device_name, device_il_version;
133 cl_device_type device_type;
134 cl_uint device_freq, device_shaders, device_nbits;
135 cl_bool device_endian;
136 auto queryDevice = [&](auto device) {
137 platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME);
138 device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR);
139 device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION);
140 clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, nullptr);
141 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(device_freq), &device_freq, nullptr);
142 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(device_shaders), &device_shaders, nullptr);
143 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(device_nbits), &device_nbits, nullptr);
144 clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE, sizeof(device_endian), &device_endian, nullptr);
145 };
146
147 cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1;
148 for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) {
149 if (mProcessingSettings.oclPlatformNum >= 0) {
150 if (mProcessingSettings.oclPlatformNum >= (int32_t)num_platforms) {
151 GPUErrorReturn("Invalid platform specified");
152 }
153 iPlatform = mProcessingSettings.oclPlatformNum;
154 }
155 std::string platformUsageInfo;
156 bool platformCompatible = false;
157 queryPlatform(platforms[iPlatform]);
158 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount) != CL_SUCCESS) {
159 if (mProcessingSettings.oclPlatformNum >= 0) {
160 GPUErrorReturn("No device in requested platform or error obtaining device count");
161 }
162 platformUsageInfo += " - no devices";
163 } else {
164 if (platform_version_f >= 2.1f) {
165 platformUsageInfo += " - OpenCL 2.2 capable";
166 platformCompatible = true;
167 }
168 }
169
170 if (mProcessingSettings.oclPlatformNum >= 0 || mProcessingSettings.debugLevel >= 2) {
171 GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", mProcessingSettings.oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", mProcessingSettings.debugLevel >= 2 ? platformUsageInfo.c_str() : "");
172 }
173
174 if (platformCompatible || mProcessingSettings.oclPlatformNum >= 0 || (mProcessingSettings.oclPlatformNum == -2 && deviceCount)) {
175 if (deviceCount > devices.size()) {
176 devices.resize(deviceCount);
177 }
178 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(), nullptr) != CL_SUCCESS) {
179 if (mProcessingSettings.oclPlatformNum >= 0) {
180 GPUErrorReturn("Error getting OpenCL devices");
181 }
182 continue;
183 }
184
185 for (uint32_t i = 0; i < deviceCount; i++) {
186 if (mProcessingSettings.deviceNum >= 0) {
187 if (mProcessingSettings.deviceNum >= (signed)deviceCount) {
188 GPUErrorReturn("Requested device ID %d does not exist", mProcessingSettings.deviceNum);
189 }
190 i = mProcessingSettings.deviceNum;
191 }
192 bool deviceOK = true;
193 queryDevice(devices[i]);
194 std::string deviceFailure;
195 if (mProcessingSettings.gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) {
196 deviceOK = false;
197 deviceFailure += " - No GPU device";
198 }
199 if (device_nbits / 8 != sizeof(void*)) {
200 deviceOK = false;
201 deviceFailure += " - No 64 bit device";
202 }
203 if (!device_endian) {
204 deviceOK = false;
205 deviceFailure += " - No Little Endian Mode";
206 }
207 if (!GetProcessingSettings().oclCompileFromSources) {
208 size_t pos = 0;
209 while ((pos = device_il_version.find("SPIR-V", pos)) != std::string::npos) {
210 float spirvVersion;
211 sscanf(device_il_version.c_str() + pos, "SPIR-V_%f", &spirvVersion);
212 if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) {
213 break;
214 }
215 pos += strlen("SPIR-V_0.0");
216 }
217 if (pos == std::string::npos) {
218 deviceOK = false;
219 deviceFailure += " - No SPIR-V " + std::to_string(GPUCA_OCL_SPIRV_VERSION) + " (" + device_il_version + ")";
220 }
221 }
222
223 double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (double)device_shaders;
224 if (mProcessingSettings.debugLevel >= 2) {
225 GPUInfo(" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ? " " : "[", i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure.c_str());
226 }
227 if (!deviceOK) {
228 if (mProcessingSettings.deviceNum >= 0) {
229 GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, mProcessingSettings.deviceNum);
230 break;
231 }
232 continue;
233 }
234 if (deviceSpeed > bestDeviceSpeed) {
235 bestDevice = i;
236 bestPlatform = iPlatform;
237 bestDeviceSpeed = deviceSpeed;
238 mOclVersion = platform_version_f;
239 }
240 if (mProcessingSettings.deviceNum >= 0) {
241 break;
242 }
243 }
244 }
245 if (mProcessingSettings.oclPlatformNum >= 0) {
246 break;
247 }
248 }
249
250 if (bestDevice == (cl_uint)-1) {
251 GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
252 }
253 mInternals->platform = platforms[bestPlatform];
254 GPUFailedMsg(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr));
255 mInternals->device = devices[bestDevice];
256 queryDevice(mInternals->device);
257
258 cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem;
259 std::string deviceVersion;
260 size_t deviceMaxWorkGroup, deviceMaxWorkItems[3];
261 clGetDeviceInfo(mInternals->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(deviceGlobalMem), &deviceGlobalMem, nullptr);
262 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(deviceConstantBuffer), &deviceConstantBuffer, nullptr);
263 clGetDeviceInfo(mInternals->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMem), &deviceLocalMem, nullptr);
264 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup, nullptr);
265 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(deviceMaxWorkItems), deviceMaxWorkItems, nullptr);
266 deviceVersion = query(clGetDeviceInfo, mInternals->device, CL_DEVICE_VERSION);
267 int versionMajor, versionMinor;
268 sscanf(deviceVersion.c_str(), "OpenCL %d.%d", &versionMajor, &versionMinor);
269 if (mProcessingSettings.debugLevel >= 2) {
270 GPUInfo("Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str());
271 GPUInfo("\tVersion = %s", deviceVersion);
272 GPUInfo("\tFrequency = %d", (int32_t)device_freq);
273 GPUInfo("\tShaders = %d", (int32_t)device_shaders);
274 GPUInfo("\tGLobalMemory = %ld", (int64_t)deviceGlobalMem);
275 GPUInfo("\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer);
276 GPUInfo("\tLocalMemory = %ld", (int64_t)deviceLocalMem);
277 GPUInfo("\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup);
278 GPUInfo("\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]);
279 GPUInfo(" ");
280 }
281#ifndef GPUCA_NO_CONSTANT_MEMORY
282 if (gGPUConstantMemBufferSize > deviceConstantBuffer) {
283 GPUErrorReturn("Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize);
284 }
285#endif
286
287 mDeviceName = device_name.c_str();
288 mDeviceName += " (OpenCL)";
289 mBlockCount = device_shaders;
290 mWarpSize = 32;
291 mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount);
292
293 mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error);
294 if (GPUFailedMsgI(ocl_error)) {
295 GPUErrorReturn("Could not create OPENCL Device Context!");
296 }
297
298 if (GetOCLPrograms()) {
299 return 1;
300 }
301
302 if (mProcessingSettings.debugLevel >= 2) {
303 GPUInfo("OpenCL program and kernels loaded successfully");
304 }
305
306 mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error);
307 if (GPUFailedMsgI(ocl_error)) {
308 clReleaseContext(mInternals->context);
309 GPUErrorReturn("OPENCL Memory Allocation Error");
310 }
311
312 mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error);
313 if (GPUFailedMsgI(ocl_error)) {
314 clReleaseMemObject(mInternals->mem_gpu);
315 clReleaseContext(mInternals->context);
316 GPUErrorReturn("OPENCL Constant Memory Allocation Error");
317 }
318
319 if (device_type & CL_DEVICE_TYPE_CPU) {
320 if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel >= 2) {
321 GPUInfo("Disabling device timers for CPU device");
322 }
323 mProcessingSettings.deviceTimers = 0;
324 }
325 for (int32_t i = 0; i < mNStreams; i++) {
326#ifdef CL_VERSION_2_0
327 cl_queue_properties prop = 0;
328 if (versionMajor >= 2 && IsGPU() && mProcessingSettings.deviceTimers) {
329 prop |= CL_QUEUE_PROFILING_ENABLE;
330 }
331 mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error);
332 if (mProcessingSettings.deviceTimers && ocl_error == CL_INVALID_QUEUE_PROPERTIES) {
333 GPUError("GPU device timers not supported by OpenCL platform, disabling");
334 mProcessingSettings.deviceTimers = 0;
335 prop = 0;
336 mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error);
337 }
338#else
339 mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error);
340#endif
341 if (GPUFailedMsgI(ocl_error)) {
342 GPUErrorReturn("Error creating OpenCL command queue");
343 }
344 }
345 if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) {
346 GPUErrorReturn("Error migrating buffer");
347 }
348 if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) {
349 GPUErrorReturn("Error migrating buffer");
350 }
351
352 mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error);
353 if (GPUFailedMsgI(ocl_error)) {
354 GPUErrorReturn("Error allocating pinned host memory");
355 }
356
357 const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
358 cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error);
359 if (GPUFailedMsgI(ocl_error)) {
360 GPUErrorReturn("Error creating program object");
361 }
362 ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr);
363 if (GPUFailedMsgI(ocl_error)) {
364 char build_log[16384];
365 clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr);
366 GPUImportant("Build Log:\n\n%s\n\n", build_log);
367 GPUErrorReturn("Error compiling program");
368 }
369 cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error);
370 if (GPUFailedMsgI(ocl_error)) {
371 GPUErrorReturn("Error creating kernel");
372 }
373
374 if (GPUFailedMsgI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) ||
375 GPUFailedMsgI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) ||
376 GPUFailedMsgI(clFinish(mInternals->command_queue[0])) ||
377 GPUFailedMsgI(clReleaseKernel(kernel)) ||
378 GPUFailedMsgI(clReleaseProgram(program))) {
379 GPUErrorReturn("Error obtaining device memory ptr");
380 }
381
382 if (mProcessingSettings.debugLevel >= 2) {
383 GPUInfo("Mapping hostmemory");
384 }
385 mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error);
386 if (GPUFailedMsgI(ocl_error)) {
387 GPUErrorReturn("Error allocating Page Locked Host Memory");
388 }
389
390 mDeviceMemoryBase = ((void**)mHostMemoryBase)[0];
392
393 if (mProcessingSettings.debugLevel >= 1) {
394 GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase);
395 memset(mHostMemoryBase, 0xDD, mHostMemorySize);
396 }
397
398 GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)mDeviceMemorySize, (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
399 } else {
400 GPUReconstructionOCL* master = dynamic_cast<GPUReconstructionOCL*>(mMaster);
401 mBlockCount = master->mBlockCount;
402 mWarpSize = master->mWarpSize;
403 mMaxBackendThreads = master->mMaxBackendThreads;
404 mDeviceName = master->mDeviceName;
405 mDeviceConstantMem = master->mDeviceConstantMem;
406 mInternals = master->mInternals;
407 }
408
409 for (uint32_t i = 0; i < mEvents.size(); i++) {
410 cl_event* events = (cl_event*)mEvents[i].data();
411 new (events) cl_event[mEvents[i].size()];
412 }
413
414 return (0);
415}
416
418{
419 // Uninitialize OPENCL
421
422 if (mMaster == nullptr) {
423 if (mDeviceMemoryBase) {
424 clReleaseMemObject(mInternals->mem_gpu);
425 clReleaseMemObject(mInternals->mem_constant);
426 for (uint32_t i = 0; i < mInternals->kernels.size(); i++) {
427 clReleaseKernel(mInternals->kernels[i].first);
428 }
429 mInternals->kernels.clear();
430 }
431 if (mHostMemoryBase) {
432 clEnqueueUnmapMemObject(mInternals->command_queue[0], mInternals->mem_host, mHostMemoryBase, 0, nullptr, nullptr);
433 for (int32_t i = 0; i < mNStreams; i++) {
434 clReleaseCommandQueue(mInternals->command_queue[i]);
435 }
436 clReleaseMemObject(mInternals->mem_host);
437 }
438
439 clReleaseProgram(mInternals->program);
440 clReleaseContext(mInternals->context);
441 GPUInfo("OPENCL Uninitialized");
442 }
443 mDeviceMemoryBase = nullptr;
444 mHostMemoryBase = nullptr;
445
446 return (0);
447}
448
449size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents)
450{
451 if (evList == nullptr) {
452 nEvents = 0;
453 }
454 if (mProcessingSettings.debugLevel >= 3) {
455 stream = -1;
456 }
457 if (stream == -1) {
459 }
460 if (toGPU == -2) {
461 GPUFailedMsg(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
462 } else if (toGPU) {
463 GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
464 } else {
465 GPUFailedMsg(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
466 }
467 if (mProcessingSettings.serializeGPU & 2) {
468 GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true);
469 }
470 return size;
471}
472
474{
475 if (stream == -1) {
477 }
478 GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList<cl_event>()));
479 if (mProcessingSettings.serializeGPU & 2) {
480 GPUDebug("WriteToConstantMemory", stream, true);
481 }
482 return size;
483}
484
485void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUFailedMsg(clReleaseEvent(ev.get<cl_event>())); }
486
487void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList<cl_event>())); }
488
490{
491 if (mProcessingSettings.stuckProtection) {
492 cl_int tmp = 0;
493 for (int32_t i = 0; i <= mProcessingSettings.stuckProtection / 50; i++) {
494 usleep(50);
495 clGetEventInfo(event.get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(tmp), &tmp, nullptr);
496 if (tmp == CL_COMPLETE) {
497 break;
498 }
499 }
500 if (tmp != CL_COMPLETE) {
501 mGPUStuck = 1;
502 GPUErrorReturn("GPU Stuck, future processing in this component is disabled, skipping event (GPU Event State %d)", (int32_t)tmp);
503 }
504 } else {
505 clFinish(mInternals->command_queue[stream]);
506 }
507 return 0;
508}
509
511{
512 for (int32_t i = 0; i < mNStreams; i++) {
514 }
515}
516
518
519void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUFailedMsg(clWaitForEvents(nEvents, evList->getEventList<cl_event>())); }
520
522{
523 if (nEvents) {
524 GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList<cl_event>(), nullptr));
525 }
526}
527
529{
530 cl_int eventdone;
531 for (int32_t i = 0; i < nEvents; i++) {
532 GPUFailedMsg(clGetEventInfo(evList[i].get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr));
533 if (eventdone != CL_COMPLETE) {
534 return false;
535 }
536 }
537 return true;
538}
539
540int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, bool force)
541{
542 // Wait for OPENCL-Kernel to finish and check for OPENCL errors afterwards, in case of debugmode
543 if (!force && mProcessingSettings.debugLevel <= 0) {
544 return (0);
545 }
546 for (int32_t i = 0; i < mNStreams; i++) {
547 if (GPUFailedMsgI(clFinish(mInternals->command_queue[i]))) {
548 GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i);
549 }
550 }
551 if (mProcessingSettings.debugLevel >= 3) {
552 GPUInfo("GPU Sync Done");
553 }
554 return (0);
555}
556
557template <class T, int32_t I, typename... Args>
559{
560 cl_kernel k = args.s.y.num > 1 ? getKernelObject<cl_kernel, T, I, true>() : getKernelObject<cl_kernel, T, I, false>();
561 return std::apply([this, &args, &k](auto&... vals) { return runKernelBackendInternal(args.s, k, vals...); }, args.v);
562}
563
564template <class S, class T, int32_t I, bool MULTI>
566{
567 static uint32_t krnl = FindKernel<T, I>(MULTI ? 2 : 1);
568 return mInternals->kernels[krnl].first;
569}
570
572{
573 cl_int ocl_error;
574
575 const char* oclBuildFlags = GetProcessingSettings().oclOverrideSourceBuildFlags != "" ? GetProcessingSettings().oclOverrideSourceBuildFlags.c_str() : GPUCA_M_STR(GPUCA_OCL_BUILD_FLAGS);
576
577#ifdef OPENCL_ENABLED_SPIRV // clang-format off
578 if (mOclVersion >= 2.1f && !GetProcessingSettings().oclCompileFromSources) {
579 GPUInfo("Reading OpenCL program from SPIR-V IL (Platform version %4.2f)", mOclVersion);
580 mInternals->program = clCreateProgramWithIL(mInternals->context, _binary_GPUReconstructionOCLCode_spirv_start, _binary_GPUReconstructionOCLCode_spirv_len, &ocl_error);
581 oclBuildFlags = "";
582 } else
583#endif // clang-format on
584 {
585 GPUInfo("Compiling OpenCL program from sources (Platform version %4.2f)", mOclVersion);
586 size_t program_sizes[1] = {_binary_GPUReconstructionOCLCode_src_len};
587 char* programs_sources[1] = {_binary_GPUReconstructionOCLCode_src_start};
588 mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error);
589 }
590
591 if (GPUFailedMsgI(ocl_error)) {
592 GPUError("Error creating OpenCL program from binary");
593 return 1;
594 }
595
596 if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) {
597 cl_build_status status;
598 if (GPUFailedMsgI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) {
599 size_t log_size;
600 clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
601 std::unique_ptr<char[]> build_log(new char[log_size + 1]);
602 clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, log_size, build_log.get(), nullptr);
603 build_log[log_size] = 0;
604 GPUError("Build Log:\n\n%s\n", build_log.get());
605 }
606 return 1;
607 }
608
609#define GPUCA_KRNL(...) \
610 GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__)
611#define GPUCA_KRNL_LOAD_single(x_class, ...) \
612 if (AddKernel<GPUCA_M_KRNL_TEMPLATE(x_class)>(false)) { \
613 return 1; \
614 }
615#define GPUCA_KRNL_LOAD_multi(x_class, ...) \
616 if (AddKernel<GPUCA_M_KRNL_TEMPLATE(x_class)>(true)) { \
617 return 1; \
618 }
619#include "GPUReconstructionKernelList.h"
620#undef GPUCA_KRNL
621#undef GPUCA_KRNL_LOAD_single
622#undef GPUCA_KRNL_LOAD_multi
623
624 return 0;
625}
benchmark::State & state
int32_t i
#define GPUCA_M_STR(a)
#define GPUFailedMsgI(x)
#define GPUFailedMsg(x)
#define GPUCA_GPUReconstructionUpdateDefaults()
#define GPUErrorReturn(...)
GPUReconstruction * GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend &cfg)
int32_t retVal
uint16_t pos
Definition RawData.h:3
void SynchronizeStream(int32_t stream) override
int32_t runKernelBackend(const krnlSetupArgs< T, I, Args... > &args)
int32_t GPUFailedMsgAI(const int64_t error, const char *file, int32_t line)
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false) override
void RecordMarker(deviceEvent *ev, int32_t stream) override
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) override
int32_t runKernelBackendInternal(const krnlSetupTime &_xyz, K &k, const Args &... args)
GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend &cfg)
void GPUFailedMsgA(const int64_t error, const char *file, int32_t line)
GPUReconstructionOCLInternals * mInternals
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1) override
int32_t DoStuckProtection(int32_t stream, deviceEvent event) override
void ReleaseEvent(deviceEvent ev) override
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1) override
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1) override
std::vector< std::vector< deviceEvent > > mEvents
GPUConstantMem * mDeviceConstantMem
GPUSettingsProcessing mProcessingSettings
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
const GPUSettingsProcessing & GetProcessingSettings() const
GPUSettingsDeviceBackend mDeviceBackendSettings
struct _cl_event * event
Definition glcorearb.h:2982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLbitfield GLuint program
Definition glcorearb.h:1905
GLenum GLenum dst
Definition glcorearb.h:1767
GLboolean * data
Definition glcorearb.h:298
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
GPUReconstructionKernels< GPUReconstructionOCLBackend > GPUReconstructionOCL
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
#define QGET_LD_BINARY_SYMBOLS(filename)
cl_command_queue command_queue[GPUCA_MAX_STREAMS]
std::vector< std::pair< cl_kernel, std::string > > kernels
std::tuple< typename std::conditional<(sizeof(Args) > sizeof(void *)), const Args &, const Args >::type... > v
const int nEvents
Definition test_Fifo.cxx:27