Project
Loading...
Searching...
No Matches
GPUReconstructionOCL.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "GPUDefParametersLoad.inc"
17
18#include <map>
19
20static_assert(std::is_convertible<cl_event, void*>::value, "OpenCL event type incompatible to deviceEvent");
21
22#define GPUErrorReturn(...) \
23 { \
24 GPUError(__VA_ARGS__); \
25 return (1); \
26 }
27
29QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_src);
30#ifdef OPENCL_ENABLED_SPIRV
31QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv);
32#endif
33
35
37{
38 if (mMaster == nullptr) {
40 *mParDevice = o2::gpu::internal::GPUDefParametersLoad();
41 }
42 mDeviceBackendSettings.deviceType = DeviceType::OCL;
43}
44
46{
47 Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit
48 if (mMaster == nullptr) {
49 delete mInternals;
50 }
51}
52
53static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0);
54int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const
55{
56 // Check for OPENCL Error and in the case of an error display the corresponding error string
57 if (error != CL_SUCCESS) {
58 GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line);
59 }
60 return error != CL_SUCCESS;
61}
62
67
69{
70 if (mMaster == nullptr) {
71 cl_int ocl_error;
72 cl_uint num_platforms;
73 if (GPUChkErrI(clGetPlatformIDs(0, nullptr, &num_platforms))) {
74 GPUErrorReturn("Error getting OpenCL Platform Count");
75 }
76 if (num_platforms == 0) {
77 GPUErrorReturn("No OpenCL Platform found");
78 }
79 if (mProcessingSettings.debugLevel >= 2) {
80 GPUInfo("%d OpenCL Platforms found", num_platforms);
81 }
82
83 // Query platforms and devices
84 std::unique_ptr<cl_platform_id[]> platforms;
85 platforms.reset(new cl_platform_id[num_platforms]);
86 if (GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) {
87 GPUErrorReturn("Error getting OpenCL Platforms");
88 }
89
90 auto query = [&](auto func, auto obj, auto var) {
91 size_t size;
92 func(obj, var, 0, nullptr, &size);
93 std::string retVal(size - 1, ' ');
94 func(obj, var, size, retVal.data(), nullptr);
95 return retVal;
96 };
97
98 std::string platform_profile, platform_version, platform_name, platform_vendor;
99 float platform_version_f;
100 auto queryPlatform = [&](auto platform) {
101 platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE);
102 platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION);
103 platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME);
104 platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR);
105 sscanf(platform_version.c_str(), "OpenCL %f", &platform_version_f);
106 };
107
108 std::vector<cl_device_id> devices;
109 std::string device_vendor, device_name, device_il_version;
110 cl_device_type device_type;
111 cl_uint device_freq, device_shaders, device_nbits;
112 cl_bool device_endian;
113 auto queryDevice = [&](auto device) {
114 platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME);
115 device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR);
116 device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION);
117 clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, nullptr);
118 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(device_freq), &device_freq, nullptr);
119 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(device_shaders), &device_shaders, nullptr);
120 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(device_nbits), &device_nbits, nullptr);
121 clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE, sizeof(device_endian), &device_endian, nullptr);
122 };
123
124 cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1;
125 for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) {
126 if (mProcessingSettings.oclPlatformNum >= 0) {
127 if (mProcessingSettings.oclPlatformNum >= (int32_t)num_platforms) {
128 GPUErrorReturn("Invalid platform specified");
129 }
130 iPlatform = mProcessingSettings.oclPlatformNum;
131 }
132 std::string platformUsageInfo;
133 bool platformCompatible = false;
134 queryPlatform(platforms[iPlatform]);
135 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount) != CL_SUCCESS) {
136 if (mProcessingSettings.oclPlatformNum >= 0) {
137 GPUErrorReturn("No device in requested platform or error obtaining device count");
138 }
139 platformUsageInfo += " - no devices";
140 } else {
141 if (platform_version_f >= 2.1f) {
142 platformUsageInfo += " - OpenCL 2.2 capable";
143 platformCompatible = true;
144 }
145 }
146
147 if (mProcessingSettings.oclPlatformNum >= 0 || mProcessingSettings.debugLevel >= 2) {
148 GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", mProcessingSettings.oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", mProcessingSettings.debugLevel >= 2 ? platformUsageInfo.c_str() : "");
149 }
150
151 if (platformCompatible || mProcessingSettings.oclPlatformNum >= 0 || (mProcessingSettings.oclPlatformNum == -2 && deviceCount)) {
152 if (deviceCount > devices.size()) {
153 devices.resize(deviceCount);
154 }
155 if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(), nullptr) != CL_SUCCESS) {
156 if (mProcessingSettings.oclPlatformNum >= 0) {
157 GPUErrorReturn("Error getting OpenCL devices");
158 }
159 continue;
160 }
161
162 for (uint32_t i = 0; i < deviceCount; i++) {
163 if (mProcessingSettings.deviceNum >= 0) {
164 if (mProcessingSettings.deviceNum >= (signed)deviceCount) {
165 GPUErrorReturn("Requested device ID %d does not exist", mProcessingSettings.deviceNum);
166 }
167 i = mProcessingSettings.deviceNum;
168 }
169 bool deviceOK = true;
170 queryDevice(devices[i]);
171 std::string deviceFailure;
172 if (mProcessingSettings.gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) {
173 deviceOK = false;
174 deviceFailure += " - No GPU device";
175 }
176 if (device_nbits / 8 != sizeof(void*)) {
177 deviceOK = false;
178 deviceFailure += " - No 64 bit device";
179 }
180 if (!device_endian) {
181 deviceOK = false;
182 deviceFailure += " - No Little Endian Mode";
183 }
184 if (!GetProcessingSettings().oclCompileFromSources) {
185 size_t pos = 0;
186 while ((pos = device_il_version.find("SPIR-V", pos)) != std::string::npos) {
187 float spirvVersion;
188 sscanf(device_il_version.c_str() + pos, "SPIR-V_%f", &spirvVersion);
189 if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) {
190 break;
191 }
192 pos += strlen("SPIR-V_0.0");
193 }
194 if (pos == std::string::npos) {
195 deviceOK = false;
196 deviceFailure += " - No SPIR-V " + std::to_string(GPUCA_OCL_SPIRV_VERSION) + " (" + device_il_version + ")";
197 }
198 }
199
200 double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (double)device_shaders;
201 if (mProcessingSettings.debugLevel >= 2) {
202 GPUInfo(" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ? " " : "[", i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure.c_str());
203 }
204 if (!deviceOK) {
205 if (mProcessingSettings.deviceNum >= 0) {
206 GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, mProcessingSettings.deviceNum);
207 break;
208 }
209 continue;
210 }
211 if (deviceSpeed > bestDeviceSpeed) {
212 bestDevice = i;
213 bestPlatform = iPlatform;
214 bestDeviceSpeed = deviceSpeed;
215 mOclVersion = platform_version_f;
216 }
217 if (mProcessingSettings.deviceNum >= 0) {
218 break;
219 }
220 }
221 }
222 if (mProcessingSettings.oclPlatformNum >= 0) {
223 break;
224 }
225 }
226
227 if (bestDevice == (cl_uint)-1) {
228 GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
229 }
230 mInternals->platform = platforms[bestPlatform];
231 GPUChkErr(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr));
232 mInternals->device = devices[bestDevice];
233 queryDevice(mInternals->device);
234
235 cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem;
236 std::string deviceVersion;
237 size_t deviceMaxWorkGroup, deviceMaxWorkItems[3];
238 clGetDeviceInfo(mInternals->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(deviceGlobalMem), &deviceGlobalMem, nullptr);
239 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(deviceConstantBuffer), &deviceConstantBuffer, nullptr);
240 clGetDeviceInfo(mInternals->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMem), &deviceLocalMem, nullptr);
241 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup, nullptr);
242 clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(deviceMaxWorkItems), deviceMaxWorkItems, nullptr);
243 deviceVersion = query(clGetDeviceInfo, mInternals->device, CL_DEVICE_VERSION);
244 int versionMajor, versionMinor;
245 sscanf(deviceVersion.c_str(), "OpenCL %d.%d", &versionMajor, &versionMinor);
246 if (mProcessingSettings.debugLevel >= 2) {
247 GPUInfo("Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str());
248 GPUInfo("\tVersion = %s", deviceVersion);
249 GPUInfo("\tFrequency = %d", (int32_t)device_freq);
250 GPUInfo("\tShaders = %d", (int32_t)device_shaders);
251 GPUInfo("\tGLobalMemory = %ld", (int64_t)deviceGlobalMem);
252 GPUInfo("\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer);
253 GPUInfo("\tLocalMemory = %ld", (int64_t)deviceLocalMem);
254 GPUInfo("\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup);
255 GPUInfo("\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]);
256 GPUInfo(" ");
257 }
258#ifndef GPUCA_NO_CONSTANT_MEMORY
259 if (gGPUConstantMemBufferSize > deviceConstantBuffer) {
260 GPUErrorReturn("Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize);
261 }
262#endif
263
264 mDeviceName = device_name.c_str();
265 mDeviceName += " (OpenCL)";
266 mBlockCount = device_shaders;
267 mWarpSize = 32;
268 mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount);
269
270 mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error);
271 if (GPUChkErrI(ocl_error)) {
272 GPUErrorReturn("Could not create OPENCL Device Context!");
273 }
274
275 if (GetOCLPrograms()) {
276 return 1;
277 }
278
279 if (mProcessingSettings.debugLevel >= 2) {
280 GPUInfo("OpenCL program and kernels loaded successfully");
281 }
282
283 mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error);
284 if (GPUChkErrI(ocl_error)) {
285 clReleaseContext(mInternals->context);
286 GPUErrorReturn("OPENCL Memory Allocation Error");
287 }
288
289 mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error);
290 if (GPUChkErrI(ocl_error)) {
291 clReleaseMemObject(mInternals->mem_gpu);
292 clReleaseContext(mInternals->context);
293 GPUErrorReturn("OPENCL Constant Memory Allocation Error");
294 }
295
296 if (device_type & CL_DEVICE_TYPE_CPU) {
297 if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel >= 2) {
298 GPUInfo("Disabling device timers for CPU device");
299 }
300 mProcessingSettings.deviceTimers = 0;
301 }
302 for (int32_t i = 0; i < mNStreams; i++) {
303#ifdef CL_VERSION_2_0
304 cl_queue_properties prop = 0;
305 if (versionMajor >= 2 && IsGPU() && mProcessingSettings.deviceTimers) {
306 prop |= CL_QUEUE_PROFILING_ENABLE;
307 }
308 mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error);
309 if (mProcessingSettings.deviceTimers && ocl_error == CL_INVALID_QUEUE_PROPERTIES) {
310 GPUError("GPU device timers not supported by OpenCL platform, disabling");
311 mProcessingSettings.deviceTimers = 0;
312 prop = 0;
313 mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error);
314 }
315#else
316 mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error);
317#endif
318 if (GPUChkErrI(ocl_error)) {
319 GPUErrorReturn("Error creating OpenCL command queue");
320 }
321 }
322 if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) {
323 GPUErrorReturn("Error migrating buffer");
324 }
325 if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) {
326 GPUErrorReturn("Error migrating buffer");
327 }
328
329 mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error);
330 if (GPUChkErrI(ocl_error)) {
331 GPUErrorReturn("Error allocating pinned host memory");
332 }
333
334 const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
335 cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error);
336 if (GPUChkErrI(ocl_error)) {
337 GPUErrorReturn("Error creating program object");
338 }
339 ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr);
340 if (GPUChkErrI(ocl_error)) {
341 char build_log[16384];
342 clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr);
343 GPUImportant("Build Log:\n\n%s\n\n", build_log);
344 GPUErrorReturn("Error compiling program");
345 }
346 cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error);
347 if (GPUChkErrI(ocl_error)) {
348 GPUErrorReturn("Error creating kernel");
349 }
350
351 if (GPUChkErrI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) ||
352 GPUChkErrI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) ||
353 GPUChkErrI(clFinish(mInternals->command_queue[0])) ||
354 GPUChkErrI(clReleaseKernel(kernel)) ||
355 GPUChkErrI(clReleaseProgram(program))) {
356 GPUErrorReturn("Error obtaining device memory ptr");
357 }
358
359 if (mProcessingSettings.debugLevel >= 2) {
360 GPUInfo("Mapping hostmemory");
361 }
362 mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error);
363 if (GPUChkErrI(ocl_error)) {
364 GPUErrorReturn("Error allocating Page Locked Host Memory");
365 }
366
367 mDeviceMemoryBase = ((void**)mHostMemoryBase)[0];
369
370 if (mProcessingSettings.debugLevel >= 1) {
371 GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase);
372 memset(mHostMemoryBase, 0xDD, mHostMemorySize);
373 }
374
375 GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)mDeviceMemorySize, (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
376 } else {
377 GPUReconstructionOCL* master = dynamic_cast<GPUReconstructionOCL*>(mMaster);
378 mBlockCount = master->mBlockCount;
379 mWarpSize = master->mWarpSize;
380 mMaxBackendThreads = master->mMaxBackendThreads;
381 mDeviceName = master->mDeviceName;
382 mDeviceConstantMem = master->mDeviceConstantMem;
383 mInternals = master->mInternals;
384 }
385
386 for (uint32_t i = 0; i < mEvents.size(); i++) {
387 cl_event* events = (cl_event*)mEvents[i].data();
388 new (events) cl_event[mEvents[i].size()];
389 }
390
391 return (0);
392}
393
395{
396 // Uninitialize OPENCL
398
399 if (mMaster == nullptr) {
400 if (mDeviceMemoryBase) {
401 clReleaseMemObject(mInternals->mem_gpu);
402 clReleaseMemObject(mInternals->mem_constant);
403 for (uint32_t i = 0; i < mInternals->kernels.size(); i++) {
404 clReleaseKernel(mInternals->kernels[i]);
405 }
406 mInternals->kernels.clear();
407 }
408 if (mHostMemoryBase) {
409 clEnqueueUnmapMemObject(mInternals->command_queue[0], mInternals->mem_host, mHostMemoryBase, 0, nullptr, nullptr);
410 for (int32_t i = 0; i < mNStreams; i++) {
411 clReleaseCommandQueue(mInternals->command_queue[i]);
412 }
413 clReleaseMemObject(mInternals->mem_host);
414 }
415
416 clReleaseProgram(mInternals->program);
417 clReleaseContext(mInternals->context);
418 GPUInfo("OPENCL Uninitialized");
419 }
420 mDeviceMemoryBase = nullptr;
421 mHostMemoryBase = nullptr;
422
423 return (0);
424}
425
426size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents)
427{
428 if (evList == nullptr) {
429 nEvents = 0;
430 }
431 if (mProcessingSettings.debugLevel >= 3) {
432 stream = -1;
433 }
434 if (stream == -1) {
436 }
437 if (size == 0) {
438 if (ev || nEvents) { // Workaround for OCL runtimes, which can throw an error in case size = 0
439 GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
440 }
441 } else if (toGPU == -2) {
442 GPUChkErr(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
443 } else if (toGPU) {
444 GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
445 } else {
446 GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
447 }
448 if (mProcessingSettings.serializeGPU & 2) {
449 GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true);
450 }
451 return size;
452}
453
455{
456 if (stream == -1) {
458 }
459 GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList<cl_event>()));
460 if (mProcessingSettings.serializeGPU & 2) {
461 GPUDebug("WriteToConstantMemory", stream, true);
462 }
463 return size;
464}
465
466void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get<cl_event>())); }
467
468void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList<cl_event>())); }
469
471{
472 if (mProcessingSettings.stuckProtection) {
473 cl_int tmp = 0;
474 for (int32_t i = 0; i <= mProcessingSettings.stuckProtection / 50; i++) {
475 usleep(50);
476 clGetEventInfo(event.get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(tmp), &tmp, nullptr);
477 if (tmp == CL_COMPLETE) {
478 break;
479 }
480 }
481 if (tmp != CL_COMPLETE) {
482 mGPUStuck = 1;
483 GPUErrorReturn("GPU Stuck, future processing in this component is disabled, skipping event (GPU Event State %d)", (int32_t)tmp);
484 }
485 } else {
486 clFinish(mInternals->command_queue[stream]);
487 }
488 return 0;
489}
490
492{
493 for (int32_t i = 0; i < mNStreams; i++) {
494 GPUChkErr(clFinish(mInternals->command_queue[i]));
495 }
496}
497
499
500void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList<cl_event>())); }
501
503{
504 if (nEvents) {
505 GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList<cl_event>(), nullptr));
506 }
507}
508
510{
511 cl_int eventdone;
512 for (int32_t i = 0; i < nEvents; i++) {
513 GPUChkErr(clGetEventInfo(evList[i].get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr));
514 if (eventdone != CL_COMPLETE) {
515 return false;
516 }
517 }
518 return true;
519}
520
521int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, bool force)
522{
523 // Wait for OPENCL-Kernel to finish and check for OPENCL errors afterwards, in case of debugmode
524 if (!force && mProcessingSettings.debugLevel <= 0) {
525 return (0);
526 }
527 for (int32_t i = 0; i < mNStreams; i++) {
528 if (GPUChkErrI(clFinish(mInternals->command_queue[i]))) {
529 GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i);
530 }
531 }
532 if (mProcessingSettings.debugLevel >= 3) {
533 GPUInfo("GPU Sync Done");
534 }
535 return (0);
536}
537
539{
540 cl_int ocl_error;
541
542 const char* oclBuildFlags = GetProcessingSettings().oclOverrideSourceBuildFlags != "" ? GetProcessingSettings().oclOverrideSourceBuildFlags.c_str() : GPUCA_M_STR(GPUCA_OCL_BUILD_FLAGS);
543
544#ifdef OPENCL_ENABLED_SPIRV // clang-format off
545 if (mOclVersion >= 2.1f && !GetProcessingSettings().oclCompileFromSources) {
546 GPUInfo("Reading OpenCL program from SPIR-V IL (Platform version %4.2f)", mOclVersion);
547 mInternals->program = clCreateProgramWithIL(mInternals->context, _binary_GPUReconstructionOCLCode_spirv_start, _binary_GPUReconstructionOCLCode_spirv_len, &ocl_error);
548 oclBuildFlags = "";
549 } else
550#endif // clang-format on
551 {
552 GPUInfo("Compiling OpenCL program from sources (Platform version %4.2f)", mOclVersion);
553 size_t program_sizes[1] = {_binary_GPUReconstructionOCLCode_src_len};
554 char* programs_sources[1] = {_binary_GPUReconstructionOCLCode_src_start};
555 mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error);
556 }
557
558 if (GPUChkErrI(ocl_error)) {
559 GPUError("Error creating OpenCL program from binary");
560 return 1;
561 }
562
563 if (GPUChkErrI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) {
564 cl_build_status status;
565 if (GPUChkErrI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) {
566 size_t log_size;
567 clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
568 std::unique_ptr<char[]> build_log(new char[log_size + 1]);
569 clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, log_size, build_log.get(), nullptr);
570 build_log[log_size] = 0;
571 GPUError("Build Log:\n\n%s\n", build_log.get());
572 }
573 return 1;
574 }
575
576 return AddKernels();
577}
578
579const char* GPUReconstructionOCLBackend::convertErrorToString(int32_t errorcode)
580{
581 static const std::map<cl_int, const char*> error_map = {
582 {CL_SUCCESS, "CL_SUCCESS"},
583 {CL_DEVICE_NOT_FOUND, "CL_DEVICE_NOT_FOUND"},
584 {CL_DEVICE_NOT_AVAILABLE, "CL_DEVICE_NOT_AVAILABLE"},
585 {CL_COMPILER_NOT_AVAILABLE, "CL_COMPILER_NOT_AVAILABLE"},
586 {CL_MEM_OBJECT_ALLOCATION_FAILURE, "CL_MEM_OBJECT_ALLOCATION_FAILURE"},
587 {CL_OUT_OF_RESOURCES, "CL_OUT_OF_RESOURCES"},
588 {CL_OUT_OF_HOST_MEMORY, "CL_OUT_OF_HOST_MEMORY"},
589 {CL_PROFILING_INFO_NOT_AVAILABLE, "CL_PROFILING_INFO_NOT_AVAILABLE"},
590 {CL_MEM_COPY_OVERLAP, "CL_MEM_COPY_OVERLAP"},
591 {CL_IMAGE_FORMAT_MISMATCH, "CL_IMAGE_FORMAT_MISMATCH"},
592 {CL_IMAGE_FORMAT_NOT_SUPPORTED, "CL_IMAGE_FORMAT_NOT_SUPPORTED"},
593 {CL_BUILD_PROGRAM_FAILURE, "CL_BUILD_PROGRAM_FAILURE"},
594 {CL_MAP_FAILURE, "CL_MAP_FAILURE"},
595 {CL_MISALIGNED_SUB_BUFFER_OFFSET, "CL_MISALIGNED_SUB_BUFFER_OFFSET"},
596 {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"},
597 {CL_COMPILE_PROGRAM_FAILURE, "CL_COMPILE_PROGRAM_FAILURE"},
598 {CL_LINKER_NOT_AVAILABLE, "CL_LINKER_NOT_AVAILABLE"},
599 {CL_LINK_PROGRAM_FAILURE, "CL_LINK_PROGRAM_FAILURE"},
600 {CL_DEVICE_PARTITION_FAILED, "CL_DEVICE_PARTITION_FAILED"},
601 {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"},
602 {CL_INVALID_VALUE, "CL_INVALID_VALUE"},
603 {CL_INVALID_DEVICE_TYPE, "CL_INVALID_DEVICE_TYPE"},
604 {CL_INVALID_PLATFORM, "CL_INVALID_PLATFORM"},
605 {CL_INVALID_DEVICE, "CL_INVALID_DEVICE"},
606 {CL_INVALID_CONTEXT, "CL_INVALID_CONTEXT"},
607 {CL_INVALID_QUEUE_PROPERTIES, "CL_INVALID_QUEUE_PROPERTIES"},
608 {CL_INVALID_COMMAND_QUEUE, "CL_INVALID_COMMAND_QUEUE"},
609 {CL_INVALID_HOST_PTR, "CL_INVALID_HOST_PTR"},
610 {CL_INVALID_MEM_OBJECT, "CL_INVALID_MEM_OBJECT"},
611 {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"},
612 {CL_INVALID_IMAGE_SIZE, "CL_INVALID_IMAGE_SIZE"},
613 {CL_INVALID_SAMPLER, "CL_INVALID_SAMPLER"},
614 {CL_INVALID_BINARY, "CL_INVALID_BINARY"},
615 {CL_INVALID_BUILD_OPTIONS, "CL_INVALID_BUILD_OPTIONS"},
616 {CL_INVALID_PROGRAM, "CL_INVALID_PROGRAM"},
617 {CL_INVALID_PROGRAM_EXECUTABLE, "CL_INVALID_PROGRAM_EXECUTABLE"},
618 {CL_INVALID_KERNEL_NAME, "CL_INVALID_KERNEL_NAME"},
619 {CL_INVALID_KERNEL_DEFINITION, "CL_INVALID_KERNEL_DEFINITION"},
620 {CL_INVALID_KERNEL, "CL_INVALID_KERNEL"},
621 {CL_INVALID_ARG_INDEX, "CL_INVALID_ARG_INDEX"},
622 {CL_INVALID_ARG_VALUE, "CL_INVALID_ARG_VALUE"},
623 {CL_INVALID_ARG_SIZE, "CL_INVALID_ARG_SIZE"},
624 {CL_INVALID_KERNEL_ARGS, "CL_INVALID_KERNEL_ARGS"},
625 {CL_INVALID_WORK_DIMENSION, "CL_INVALID_WORK_DIMENSION"},
626 {CL_INVALID_WORK_GROUP_SIZE, "CL_INVALID_WORK_GROUP_SIZE"},
627 {CL_INVALID_WORK_ITEM_SIZE, "CL_INVALID_WORK_ITEM_SIZE"},
628 {CL_INVALID_GLOBAL_OFFSET, "CL_INVALID_GLOBAL_OFFSET"},
629 {CL_INVALID_EVENT_WAIT_LIST, "CL_INVALID_EVENT_WAIT_LIST"},
630 {CL_INVALID_EVENT, "CL_INVALID_EVENT"},
631 {CL_INVALID_OPERATION, "CL_INVALID_OPERATION"},
632 {CL_INVALID_GL_OBJECT, "CL_INVALID_GL_OBJECT"},
633 {CL_INVALID_BUFFER_SIZE, "CL_INVALID_BUFFER_SIZE"},
634 {CL_INVALID_MIP_LEVEL, "CL_INVALID_MIP_LEVEL"},
635 {CL_INVALID_GLOBAL_WORK_SIZE, "CL_INVALID_GLOBAL_WORK_SIZE"},
636 {CL_INVALID_PROPERTY, "CL_INVALID_PROPERTY"},
637 {CL_INVALID_IMAGE_DESCRIPTOR, "CL_INVALID_IMAGE_DESCRIPTOR"},
638 {CL_INVALID_COMPILER_OPTIONS, "CL_INVALID_COMPILER_OPTIONS"},
639 {CL_INVALID_LINKER_OPTIONS, "CL_INVALID_LINKER_OPTIONS"},
640 {CL_INVALID_DEVICE_PARTITION_COUNT, "CL_INVALID_DEVICE_PARTITION_COUNT"},
641 {CL_INVALID_PIPE_SIZE, "CL_INVALID_PIPE_SIZE"},
642 {CL_INVALID_DEVICE_QUEUE, "CL_INVALID_DEVICE_QUEUE"},
643 {CL_INVALID_SPEC_ID, "CL_INVALID_SPEC_ID"},
644 {CL_MAX_SIZE_RESTRICTION_EXCEEDED, "CL_MAX_SIZE_RESTRICTION_EXCEEDED"}};
645 auto entry = error_map.find(errorcode);
646 return (entry != error_map.end()) ? entry->second : "Unknown Errorcode";
647}
benchmark::State & state
int32_t i
#define GPUChkErrI(x)
#define GPUChkErr(x)
#define GPUCA_M_STR(a)
#define GPUCA_GPUReconstructionUpdateDefaults()
#define GPUErrorReturn(...)
GPUReconstruction * GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend &cfg)
int32_t retVal
uint16_t pos
Definition RawData.h:3
void SynchronizeStream(int32_t stream) override
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false) override
void RecordMarker(deviceEvent *ev, int32_t stream) override
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) override
GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend &cfg)
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const override
GPUReconstructionOCLInternals * mInternals
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1) override
int32_t DoStuckProtection(int32_t stream, deviceEvent event) override
void ReleaseEvent(deviceEvent ev) override
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1) override
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1) override
std::vector< std::vector< deviceEvent > > mEvents
GPUConstantMem * mDeviceConstantMem
GPUSettingsProcessing mProcessingSettings
const GPUSettingsProcessing & GetProcessingSettings() const
GPUSettingsDeviceBackend mDeviceBackendSettings
struct _cl_event * event
Definition glcorearb.h:2982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLuint entry
Definition glcorearb.h:5735
GLsizeiptr size
Definition glcorearb.h:659
GLbitfield GLuint program
Definition glcorearb.h:1905
GLenum GLenum dst
Definition glcorearb.h:1767
GLboolean * data
Definition glcorearb.h:298
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
GPUReconstructionKernels< GPUReconstructionOCLBackend > GPUReconstructionOCL
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
#define QGET_LD_BINARY_SYMBOLS(filename)
cl_command_queue command_queue[GPUCA_MAX_STREAMS]
const int nEvents
Definition test_Fifo.cxx:27