Project
Loading...
Searching...
No Matches
GPUReconstructionOCL.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "GPUDefParametersLoad.inc"
17#include "GPUConstantMem.h"
18
19#include <map>
20
21static_assert(std::is_convertible<cl_event, void*>::value, "OpenCL event type incompatible to deviceEvent");
22
23#define GPUErrorReturn(...) \
24 { \
25 GPUError(__VA_ARGS__); \
26 return (1); \
27 }
28
30QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_src);
31#ifdef OPENCL_ENABLED_SPIRV
32QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv);
33#endif
34
36
45
47{
48 Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit
49 if (mMaster == nullptr) {
50 delete mInternals;
51 }
52}
53
54static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0);
55int32_t GPUReconstructionOCL::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const
56{
57 // Check for OPENCL Error and in the case of an error display the corresponding error string
58 if (error != CL_SUCCESS) {
59 GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line);
60 }
61 return error != CL_SUCCESS;
62}
63
65{
66 if (mMaster == nullptr) {
67 cl_int ocl_error;
68 cl_uint num_platforms;
69 if (GPUChkErrI(clGetPlatformIDs(0, nullptr, &num_platforms))) {
70 GPUErrorReturn("Error getting OpenCL Platform Count");
71 }
72 if (num_platforms == 0) {
73 GPUErrorReturn("No OpenCL Platform found");
74 }
75 if (GetProcessingSettings().debugLevel >= 2) {
76 GPUInfo("%d OpenCL Platforms found", num_platforms);
77 }
78
79 // Query platforms and devices
80 std::unique_ptr<cl_platform_id[]> platforms;
81 platforms.reset(new cl_platform_id[num_platforms]);
82 if (GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) {
83 GPUErrorReturn("Error getting OpenCL Platforms");
84 }
85
86 auto query = [&](auto func, auto obj, auto var) {
87 size_t size;
88 func(obj, var, 0, nullptr, &size);
89 std::string retVal(size - 1, ' ');
90 func(obj, var, size, retVal.data(), nullptr);
91 return retVal;
92 };
93
94 std::string platform_profile, platform_version, platform_name, platform_vendor;
95 float platform_version_f;
96 auto queryPlatform = [&](auto platform) {
97 platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE);
98 platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION);
99 platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME);
100 platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR);
101 sscanf(platform_version.c_str(), "OpenCL %f", &platform_version_f);
102 };
103
104 std::vector<cl_device_id> devices;
105 std::string device_vendor, device_name, device_il_version;
106 cl_device_type device_type;
107 cl_uint device_freq, device_shaders, device_nbits;
108 cl_bool device_endian;
109 auto queryDevice = [&](auto device) {
110 platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME);
111 device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR);
112 device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION);
113 clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, nullptr);
114 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(device_freq), &device_freq, nullptr);
115 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(device_shaders), &device_shaders, nullptr);
116 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(device_nbits), &device_nbits, nullptr);
117 clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE, sizeof(device_endian), &device_endian, nullptr);
118 };
119
120 cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1;
121 for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) {
122 if (GetProcessingSettings().oclPlatformNum >= 0) {
123 if (GetProcessingSettings().oclPlatformNum >= (int32_t)num_platforms) {
124 GPUErrorReturn("Invalid platform specified");
125 }
126 iPlatform = GetProcessingSettings().oclPlatformNum;
127 }
128 std::string platformUsageInfo;
129 bool platformCompatible = false;
130 queryPlatform(platforms[iPlatform]);
131 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount) != CL_SUCCESS) {
132 if (GetProcessingSettings().oclPlatformNum >= 0) {
133 GPUErrorReturn("No device in requested platform or error obtaining device count");
134 }
135 platformUsageInfo += " - no devices";
136 } else {
137 if (platform_version_f >= 2.1f) {
138 platformUsageInfo += " - OpenCL 2.2 capable";
139 platformCompatible = true;
140 }
141 }
142
143 if (GetProcessingSettings().oclPlatformNum >= 0 || GetProcessingSettings().debugLevel >= 2) {
144 GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", GetProcessingSettings().oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", GetProcessingSettings().debugLevel >= 2 ? platformUsageInfo.c_str() : "");
145 }
146
147 if (platformCompatible || GetProcessingSettings().oclPlatformNum >= 0 || (GetProcessingSettings().oclPlatformNum == -2 && deviceCount)) {
148 if (deviceCount > devices.size()) {
149 devices.resize(deviceCount);
150 }
151 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(), nullptr) != CL_SUCCESS) {
152 if (GetProcessingSettings().oclPlatformNum >= 0) {
153 GPUErrorReturn("Error getting OpenCL devices");
154 }
155 continue;
156 }
157
158 for (uint32_t i = 0; i < deviceCount; i++) {
159 if (GetProcessingSettings().deviceNum >= 0) {
160 if (GetProcessingSettings().deviceNum >= (signed)deviceCount) {
161 GPUErrorReturn("Requested device ID %d does not exist", GetProcessingSettings().deviceNum);
162 }
163 i = GetProcessingSettings().deviceNum;
164 }
165 bool deviceOK = true;
166 queryDevice(devices[i]);
167 std::string deviceFailure;
168 if (GetProcessingSettings().gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) {
169 deviceOK = false;
170 deviceFailure += " - No GPU device";
171 }
172 if (device_nbits / 8 != sizeof(void*)) {
173 deviceOK = false;
174 deviceFailure += " - No 64 bit device";
175 }
176 if (!device_endian) {
177 deviceOK = false;
178 deviceFailure += " - No Little Endian Mode";
179 }
180 if (!GetProcessingSettings().oclCompileFromSources) {
181 size_t pos = 0;
182 while ((pos = device_il_version.find("SPIR-V", pos)) != std::string::npos) {
183 float spirvVersion;
184 sscanf(device_il_version.c_str() + pos, "SPIR-V_%f", &spirvVersion);
185 if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) {
186 break;
187 }
188 pos += strlen("SPIR-V_0.0");
189 }
190 if (pos == std::string::npos) {
191 deviceOK = false;
192 deviceFailure += " - No SPIR-V " + std::to_string(GPUCA_OCL_SPIRV_VERSION) + " (" + device_il_version + ")";
193 }
194 }
195
196 double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (double)device_shaders;
197 if (GetProcessingSettings().debugLevel >= 2) {
198 GPUInfo(" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ? " " : "[", i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure.c_str());
199 }
200 if (!deviceOK) {
201 if (GetProcessingSettings().deviceNum >= 0) {
202 GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, GetProcessingSettings().deviceNum);
203 break;
204 }
205 continue;
206 }
207 if (deviceSpeed > bestDeviceSpeed) {
208 bestDevice = i;
209 bestPlatform = iPlatform;
210 bestDeviceSpeed = deviceSpeed;
211 mOclVersion = platform_version_f;
212 }
213 if (GetProcessingSettings().deviceNum >= 0) {
214 break;
215 }
216 }
217 }
218 if (GetProcessingSettings().oclPlatformNum >= 0) {
219 break;
220 }
221 }
222
223 if (bestDevice == (cl_uint)-1) {
224 GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
225 }
226 mInternals->platform = platforms[bestPlatform];
227 GPUChkErr(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr));
228 mInternals->device = devices[bestDevice];
229 queryDevice(mInternals->device);
230
231 cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem;
232 std::string deviceVersion;
233 size_t deviceMaxWorkGroup, deviceMaxWorkItems[3];
234 clGetDeviceInfo(mInternals->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(deviceGlobalMem), &deviceGlobalMem, nullptr);
235 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(deviceConstantBuffer), &deviceConstantBuffer, nullptr);
236 clGetDeviceInfo(mInternals->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMem), &deviceLocalMem, nullptr);
237 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup, nullptr);
238 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(deviceMaxWorkItems), deviceMaxWorkItems, nullptr);
239 deviceVersion = query(clGetDeviceInfo, mInternals->device, CL_DEVICE_VERSION);
240 int versionMajor, versionMinor;
241 sscanf(deviceVersion.c_str(), "OpenCL %d.%d", &versionMajor, &versionMinor);
242 if (GetProcessingSettings().debugLevel >= 2) {
243 GPUInfo("Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str());
244 GPUInfo("\tVersion = %s", deviceVersion);
245 GPUInfo("\tFrequency = %d", (int32_t)device_freq);
246 GPUInfo("\tShaders = %d", (int32_t)device_shaders);
247 GPUInfo("\tGLobalMemory = %ld", (int64_t)deviceGlobalMem);
248 GPUInfo("\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer);
249 GPUInfo("\tLocalMemory = %ld", (int64_t)deviceLocalMem);
250 GPUInfo("\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup);
251 GPUInfo("\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]);
252 GPUInfo(" ");
253 }
254#ifndef GPUCA_NO_CONSTANT_MEMORY
255 if (gGPUConstantMemBufferSize > deviceConstantBuffer) {
256 GPUErrorReturn("Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize);
257 }
258#endif
259
260 mDeviceName = device_name.c_str();
261 mDeviceName += " (OpenCL)";
262 mBlockCount = device_shaders;
263 mWarpSize = 32;
264 mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount);
265
266 mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error);
267 if (GPUChkErrI(ocl_error)) {
268 GPUErrorReturn("Could not create OPENCL Device Context!");
269 }
270
271 if (GetOCLPrograms()) {
272 return 1;
273 }
274
275 if (GetProcessingSettings().debugLevel >= 2) {
276 GPUInfo("OpenCL program and kernels loaded successfully");
277 }
278
279 mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error);
280 if (GPUChkErrI(ocl_error)) {
281 clReleaseContext(mInternals->context);
282 GPUErrorReturn("OPENCL Memory Allocation Error");
283 }
284
285 mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error);
286 if (GPUChkErrI(ocl_error)) {
287 clReleaseMemObject(mInternals->mem_gpu);
288 clReleaseContext(mInternals->context);
289 GPUErrorReturn("OPENCL Constant Memory Allocation Error");
290 }
291
292 if (device_type & CL_DEVICE_TYPE_CPU) {
293 if (GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel >= 2) {
294 GPUInfo("Disabling device timers for CPU device");
295 }
296 mProcessingSettings->deviceTimers = 0;
297 }
298 for (int32_t i = 0; i < mNStreams; i++) {
299#ifdef CL_VERSION_2_0
300 cl_queue_properties prop = 0;
301 if (versionMajor >= 2 && IsGPU() && GetProcessingSettings().deviceTimers) {
302 prop |= CL_QUEUE_PROFILING_ENABLE;
303 }
304 mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error);
305 if (GetProcessingSettings().deviceTimers && ocl_error == CL_INVALID_QUEUE_PROPERTIES) {
306 GPUError("GPU device timers not supported by OpenCL platform, disabling");
307 mProcessingSettings->deviceTimers = 0;
308 prop = 0;
309 mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error);
310 }
311#else
312 mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error);
313#endif
314 if (GPUChkErrI(ocl_error)) {
315 GPUErrorReturn("Error creating OpenCL command queue");
316 }
317 }
318 if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) {
319 GPUErrorReturn("Error migrating buffer");
320 }
321 if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) {
322 GPUErrorReturn("Error migrating buffer");
323 }
324
325 mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error);
326 if (GPUChkErrI(ocl_error)) {
327 GPUErrorReturn("Error allocating pinned host memory");
328 }
329
330 const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
331 cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error);
332 if (GPUChkErrI(ocl_error)) {
333 GPUErrorReturn("Error creating program object");
334 }
335 ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr);
336 if (GPUChkErrI(ocl_error)) {
337 char build_log[16384];
338 clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr);
339 GPUImportant("Build Log:\n\n%s\n\n", build_log);
340 GPUErrorReturn("Error compiling program");
341 }
342 cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error);
343 if (GPUChkErrI(ocl_error)) {
344 GPUErrorReturn("Error creating kernel");
345 }
346
347 if (GPUChkErrI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) ||
348 GPUChkErrI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) ||
349 GPUChkErrI(clFinish(mInternals->command_queue[0])) ||
350 GPUChkErrI(clReleaseKernel(kernel)) ||
351 GPUChkErrI(clReleaseProgram(program))) {
352 GPUErrorReturn("Error obtaining device memory ptr");
353 }
354
355 if (GetProcessingSettings().debugLevel >= 2) {
356 GPUInfo("Mapping hostmemory");
357 }
358 mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error);
359 if (GPUChkErrI(ocl_error)) {
360 GPUErrorReturn("Error allocating Page Locked Host Memory");
361 }
362
363 mDeviceMemoryBase = ((void**)mHostMemoryBase)[0];
365
366 if (GetProcessingSettings().debugLevel >= 1) {
367 GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase);
368 memset(mHostMemoryBase, 0xDD, mHostMemorySize);
369 }
370
371 GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)mDeviceMemorySize, (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
372 } else {
373 GPUReconstructionOCL* master = dynamic_cast<GPUReconstructionOCL*>(mMaster);
374 mBlockCount = master->mBlockCount;
375 mWarpSize = master->mWarpSize;
377 mDeviceName = master->mDeviceName;
379 mInternals = master->mInternals;
380 }
381
382 for (uint32_t i = 0; i < mEvents.size(); i++) {
383 cl_event* events = (cl_event*)mEvents[i].data();
384 new (events) cl_event[mEvents[i].size()];
385 }
386
387 return (0);
388}
389
391{
392 // Uninitialize OPENCL
394
395 if (mMaster == nullptr) {
396 if (mDeviceMemoryBase) {
397 clReleaseMemObject(mInternals->mem_gpu);
398 clReleaseMemObject(mInternals->mem_constant);
399 for (uint32_t i = 0; i < mInternals->kernels.size(); i++) {
400 clReleaseKernel(mInternals->kernels[i]);
401 }
402 mInternals->kernels.clear();
403 }
404 if (mHostMemoryBase) {
405 clEnqueueUnmapMemObject(mInternals->command_queue[0], mInternals->mem_host, mHostMemoryBase, 0, nullptr, nullptr);
406 for (int32_t i = 0; i < mNStreams; i++) {
407 clReleaseCommandQueue(mInternals->command_queue[i]);
408 }
409 clReleaseMemObject(mInternals->mem_host);
410 }
411
412 clReleaseProgram(mInternals->program);
413 clReleaseContext(mInternals->context);
414 GPUInfo("OPENCL Uninitialized");
415 }
416 mDeviceMemoryBase = nullptr;
417 mHostMemoryBase = nullptr;
418
419 return (0);
420}
421
422size_t GPUReconstructionOCL::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents)
423{
424 if (evList == nullptr) {
425 nEvents = 0;
426 }
427 if (GetProcessingSettings().debugLevel >= 3) {
428 stream = -1;
429 }
430 if (stream == -1) {
432 }
433 if (size == 0) {
434 if (ev || nEvents) { // Workaround for OCL runtimes, which can throw an error in case size = 0
435 GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
436 }
437 } else if (toGPU == -2) {
438 GPUChkErr(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
439 } else if (toGPU) {
440 GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
441 } else {
442 GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
443 }
444 if (GetProcessingSettings().serializeGPU & 2) {
445 GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true);
446 }
447 return size;
448}
449
450size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev)
451{
452 if (stream == -1) {
454 }
455 GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList<cl_event>()));
456 if (GetProcessingSettings().serializeGPU & 2) {
457 GPUDebug("WriteToConstantMemory", stream, true);
458 }
459 return size;
460}
461
462void GPUReconstructionOCL::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get<cl_event>())); }
463
464void GPUReconstructionOCL::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList<cl_event>())); }
465
467{
468 if (GetProcessingSettings().stuckProtection) {
469 cl_int tmp = 0;
470 for (int32_t i = 0; i <= GetProcessingSettings().stuckProtection / 50; i++) {
471 usleep(50);
472 clGetEventInfo(event.get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(tmp), &tmp, nullptr);
473 if (tmp == CL_COMPLETE) {
474 break;
475 }
476 }
477 if (tmp != CL_COMPLETE) {
478 mGPUStuck = 1;
479 GPUErrorReturn("GPU Stuck, future processing in this component is disabled, skipping event (GPU Event State %d)", (int32_t)tmp);
480 }
481 } else {
482 clFinish(mInternals->command_queue[stream]);
483 }
484 return 0;
485}
486
488{
489 for (int32_t i = 0; i < mNStreams; i++) {
490 GPUChkErr(clFinish(mInternals->command_queue[i]));
491 }
492}
493
495
496void GPUReconstructionOCL::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList<cl_event>())); }
497
499{
500 if (nEvents) {
501 GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList<cl_event>(), nullptr));
502 }
503}
504
506{
507 cl_int eventdone;
508 for (int32_t i = 0; i < nEvents; i++) {
509 GPUChkErr(clGetEventInfo(evList[i].get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr));
510 if (eventdone != CL_COMPLETE) {
511 return false;
512 }
513 }
514 return true;
515}
516
517int32_t GPUReconstructionOCL::GPUDebug(const char* state, int32_t stream, bool force)
518{
519 // Wait for OPENCL-Kernel to finish and check for OPENCL errors afterwards, in case of debugmode
520 if (!force && GetProcessingSettings().debugLevel <= 0) {
521 return (0);
522 }
523 for (int32_t i = 0; i < mNStreams; i++) {
524 if (GPUChkErrI(clFinish(mInternals->command_queue[i]))) {
525 GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i);
526 }
527 }
528 if (GetProcessingSettings().debugLevel >= 3) {
529 GPUInfo("GPU Sync Done");
530 }
531 return (0);
532}
533
535{
536 cl_int ocl_error;
537
538 const char* oclBuildFlags = GetProcessingSettings().oclOverrideSourceBuildFlags != "" ? GetProcessingSettings().oclOverrideSourceBuildFlags.c_str() : GPUCA_M_STR(GPUCA_OCL_BUILD_FLAGS);
539
540#ifdef OPENCL_ENABLED_SPIRV // clang-format off
541 if (mOclVersion >= 2.1f && !GetProcessingSettings().oclCompileFromSources) {
542 GPUInfo("Reading OpenCL program from SPIR-V IL (Platform version %4.2f)", mOclVersion);
543 mInternals->program = clCreateProgramWithIL(mInternals->context, _binary_GPUReconstructionOCLCode_spirv_start, _binary_GPUReconstructionOCLCode_spirv_len, &ocl_error);
544 oclBuildFlags = "";
545 } else
546#endif // clang-format on
547 {
548 GPUInfo("Compiling OpenCL program from sources (Platform version %4.2f)", mOclVersion);
549 size_t program_sizes[1] = {_binary_GPUReconstructionOCLCode_src_len};
550 char* programs_sources[1] = {_binary_GPUReconstructionOCLCode_src_start};
551 mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error);
552 }
553
554 if (GPUChkErrI(ocl_error)) {
555 GPUError("Error creating OpenCL program from binary");
556 return 1;
557 }
558
559 if (GPUChkErrI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) {
560 cl_build_status status;
561 if (GPUChkErrI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) {
562 size_t log_size;
563 clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
564 std::unique_ptr<char[]> build_log(new char[log_size + 1]);
565 clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, log_size, build_log.get(), nullptr);
566 build_log[log_size] = 0;
567 GPUError("Build Log:\n\n%s\n", build_log.get());
568 }
569 return 1;
570 }
571
572 return AddKernels();
573}
574
575const char* GPUReconstructionOCL::convertErrorToString(int32_t errorcode)
576{
577 static const std::map<cl_int, const char*> error_map = {
578 {CL_SUCCESS, "CL_SUCCESS"},
579 {CL_DEVICE_NOT_FOUND, "CL_DEVICE_NOT_FOUND"},
580 {CL_DEVICE_NOT_AVAILABLE, "CL_DEVICE_NOT_AVAILABLE"},
581 {CL_COMPILER_NOT_AVAILABLE, "CL_COMPILER_NOT_AVAILABLE"},
582 {CL_MEM_OBJECT_ALLOCATION_FAILURE, "CL_MEM_OBJECT_ALLOCATION_FAILURE"},
583 {CL_OUT_OF_RESOURCES, "CL_OUT_OF_RESOURCES"},
584 {CL_OUT_OF_HOST_MEMORY, "CL_OUT_OF_HOST_MEMORY"},
585 {CL_PROFILING_INFO_NOT_AVAILABLE, "CL_PROFILING_INFO_NOT_AVAILABLE"},
586 {CL_MEM_COPY_OVERLAP, "CL_MEM_COPY_OVERLAP"},
587 {CL_IMAGE_FORMAT_MISMATCH, "CL_IMAGE_FORMAT_MISMATCH"},
588 {CL_IMAGE_FORMAT_NOT_SUPPORTED, "CL_IMAGE_FORMAT_NOT_SUPPORTED"},
589 {CL_BUILD_PROGRAM_FAILURE, "CL_BUILD_PROGRAM_FAILURE"},
590 {CL_MAP_FAILURE, "CL_MAP_FAILURE"},
591 {CL_MISALIGNED_SUB_BUFFER_OFFSET, "CL_MISALIGNED_SUB_BUFFER_OFFSET"},
592 {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"},
593 {CL_COMPILE_PROGRAM_FAILURE, "CL_COMPILE_PROGRAM_FAILURE"},
594 {CL_LINKER_NOT_AVAILABLE, "CL_LINKER_NOT_AVAILABLE"},
595 {CL_LINK_PROGRAM_FAILURE, "CL_LINK_PROGRAM_FAILURE"},
596 {CL_DEVICE_PARTITION_FAILED, "CL_DEVICE_PARTITION_FAILED"},
597 {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"},
598 {CL_INVALID_VALUE, "CL_INVALID_VALUE"},
599 {CL_INVALID_DEVICE_TYPE, "CL_INVALID_DEVICE_TYPE"},
600 {CL_INVALID_PLATFORM, "CL_INVALID_PLATFORM"},
601 {CL_INVALID_DEVICE, "CL_INVALID_DEVICE"},
602 {CL_INVALID_CONTEXT, "CL_INVALID_CONTEXT"},
603 {CL_INVALID_QUEUE_PROPERTIES, "CL_INVALID_QUEUE_PROPERTIES"},
604 {CL_INVALID_COMMAND_QUEUE, "CL_INVALID_COMMAND_QUEUE"},
605 {CL_INVALID_HOST_PTR, "CL_INVALID_HOST_PTR"},
606 {CL_INVALID_MEM_OBJECT, "CL_INVALID_MEM_OBJECT"},
607 {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"},
608 {CL_INVALID_IMAGE_SIZE, "CL_INVALID_IMAGE_SIZE"},
609 {CL_INVALID_SAMPLER, "CL_INVALID_SAMPLER"},
610 {CL_INVALID_BINARY, "CL_INVALID_BINARY"},
611 {CL_INVALID_BUILD_OPTIONS, "CL_INVALID_BUILD_OPTIONS"},
612 {CL_INVALID_PROGRAM, "CL_INVALID_PROGRAM"},
613 {CL_INVALID_PROGRAM_EXECUTABLE, "CL_INVALID_PROGRAM_EXECUTABLE"},
614 {CL_INVALID_KERNEL_NAME, "CL_INVALID_KERNEL_NAME"},
615 {CL_INVALID_KERNEL_DEFINITION, "CL_INVALID_KERNEL_DEFINITION"},
616 {CL_INVALID_KERNEL, "CL_INVALID_KERNEL"},
617 {CL_INVALID_ARG_INDEX, "CL_INVALID_ARG_INDEX"},
618 {CL_INVALID_ARG_VALUE, "CL_INVALID_ARG_VALUE"},
619 {CL_INVALID_ARG_SIZE, "CL_INVALID_ARG_SIZE"},
620 {CL_INVALID_KERNEL_ARGS, "CL_INVALID_KERNEL_ARGS"},
621 {CL_INVALID_WORK_DIMENSION, "CL_INVALID_WORK_DIMENSION"},
622 {CL_INVALID_WORK_GROUP_SIZE, "CL_INVALID_WORK_GROUP_SIZE"},
623 {CL_INVALID_WORK_ITEM_SIZE, "CL_INVALID_WORK_ITEM_SIZE"},
624 {CL_INVALID_GLOBAL_OFFSET, "CL_INVALID_GLOBAL_OFFSET"},
625 {CL_INVALID_EVENT_WAIT_LIST, "CL_INVALID_EVENT_WAIT_LIST"},
626 {CL_INVALID_EVENT, "CL_INVALID_EVENT"},
627 {CL_INVALID_OPERATION, "CL_INVALID_OPERATION"},
628 {CL_INVALID_GL_OBJECT, "CL_INVALID_GL_OBJECT"},
629 {CL_INVALID_BUFFER_SIZE, "CL_INVALID_BUFFER_SIZE"},
630 {CL_INVALID_MIP_LEVEL, "CL_INVALID_MIP_LEVEL"},
631 {CL_INVALID_GLOBAL_WORK_SIZE, "CL_INVALID_GLOBAL_WORK_SIZE"},
632 {CL_INVALID_PROPERTY, "CL_INVALID_PROPERTY"},
633 {CL_INVALID_IMAGE_DESCRIPTOR, "CL_INVALID_IMAGE_DESCRIPTOR"},
634 {CL_INVALID_COMPILER_OPTIONS, "CL_INVALID_COMPILER_OPTIONS"},
635 {CL_INVALID_LINKER_OPTIONS, "CL_INVALID_LINKER_OPTIONS"},
636 {CL_INVALID_DEVICE_PARTITION_COUNT, "CL_INVALID_DEVICE_PARTITION_COUNT"},
637 {CL_INVALID_PIPE_SIZE, "CL_INVALID_PIPE_SIZE"},
638 {CL_INVALID_DEVICE_QUEUE, "CL_INVALID_DEVICE_QUEUE"},
639 {CL_INVALID_SPEC_ID, "CL_INVALID_SPEC_ID"},
640 {CL_MAX_SIZE_RESTRICTION_EXCEEDED, "CL_MAX_SIZE_RESTRICTION_EXCEEDED"}};
641 auto entry = error_map.find(errorcode);
642 return (entry != error_map.end()) ? entry->second : "Unknown Errorcode";
643}
benchmark::State & state
int32_t i
#define GPUChkErrI(x)
#define GPUChkErr(x)
#define GPUCA_M_STR(a)
#define GPUErrorReturn(...)
GPUReconstruction * GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend &cfg)
int32_t retVal
uint16_t pos
Definition RawData.h:3
void ReleaseEvent(deviceEvent ev) override
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1) override
int32_t DoStuckProtection(int32_t stream, deviceEvent event) override
GPUReconstructionOCL(const GPUSettingsDeviceBackend &cfg)
void RecordMarker(deviceEvent *ev, int32_t stream) override
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1) override
size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) override
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false) override
void SynchronizeStream(int32_t stream) override
GPUReconstructionOCLInternals * mInternals
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1) override
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const override
std::vector< std::vector< deviceEvent > > mEvents
GPUConstantMem * mDeviceConstantMem
std::unique_ptr< GPUSettingsDeviceBackend > mDeviceBackendSettings
std::unique_ptr< GPUSettingsProcessing > mProcessingSettings
const GPUSettingsProcessing & GetProcessingSettings() const
struct _cl_event * event
Definition glcorearb.h:2982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLuint entry
Definition glcorearb.h:5735
GLsizeiptr size
Definition glcorearb.h:659
GLbitfield GLuint program
Definition glcorearb.h:1905
GLenum GLenum dst
Definition glcorearb.h:1767
GLboolean * data
Definition glcorearb.h:298
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
#define QGET_LD_BINARY_SYMBOLS(filename)
cl_command_queue command_queue[GPUCA_MAX_STREAMS]
const int nEvents
Definition test_Fifo.cxx:27