Project
Loading...
Searching...
No Matches
GPUReconstructionOCLInternals.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15// All OpenCL-header related stuff goes here, so we can run CING over GPUReconstructionOCL
16
17#ifndef GPUTPCGPUTRACKEROPENCLINTERNALS_H
18#define GPUTPCGPUTRACKEROPENCLINTERNALS_H
19
20#define CL_TARGET_OPENCL_VERSION 220
21#include <CL/opencl.h>
22#include <CL/cl_ext.h>
23#include <vector>
24#include <string>
25#include <memory>
26#include "GPULogging.h"
27
28namespace o2::gpu
29{
30
31static const char* opencl_error_string(int32_t errorcode)
32{
33 switch (errorcode) {
34 case CL_SUCCESS:
35 return "Success!";
36 case CL_DEVICE_NOT_FOUND:
37 return "Device not found.";
38 case CL_DEVICE_NOT_AVAILABLE:
39 return "Device not available";
40 case CL_COMPILER_NOT_AVAILABLE:
41 return "Compiler not available";
42 case CL_MEM_OBJECT_ALLOCATION_FAILURE:
43 return "Memory object allocation failure";
44 case CL_OUT_OF_RESOURCES:
45 return "Out of resources";
46 case CL_OUT_OF_HOST_MEMORY:
47 return "Out of host memory";
48 case CL_PROFILING_INFO_NOT_AVAILABLE:
49 return "Profiling information not available";
50 case CL_MEM_COPY_OVERLAP:
51 return "Memory copy overlap";
52 case CL_IMAGE_FORMAT_MISMATCH:
53 return "Image format mismatch";
54 case CL_IMAGE_FORMAT_NOT_SUPPORTED:
55 return "Image format not supported";
56 case CL_BUILD_PROGRAM_FAILURE:
57 return "Program build failure";
58 case CL_MAP_FAILURE:
59 return "Map failure";
60 case CL_INVALID_VALUE:
61 return "Invalid value";
62 case CL_INVALID_DEVICE_TYPE:
63 return "Invalid device type";
64 case CL_INVALID_PLATFORM:
65 return "Invalid platform";
66 case CL_INVALID_DEVICE:
67 return "Invalid device";
68 case CL_INVALID_CONTEXT:
69 return "Invalid context";
70 case CL_INVALID_QUEUE_PROPERTIES:
71 return "Invalid queue properties";
72 case CL_INVALID_COMMAND_QUEUE:
73 return "Invalid command queue";
74 case CL_INVALID_HOST_PTR:
75 return "Invalid host pointer";
76 case CL_INVALID_MEM_OBJECT:
77 return "Invalid memory object";
78 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
79 return "Invalid image format descriptor";
80 case CL_INVALID_IMAGE_SIZE:
81 return "Invalid image size";
82 case CL_INVALID_SAMPLER:
83 return "Invalid sampler";
84 case CL_INVALID_BINARY:
85 return "Invalid binary";
86 case CL_INVALID_BUILD_OPTIONS:
87 return "Invalid build options";
88 case CL_INVALID_PROGRAM:
89 return "Invalid program";
90 case CL_INVALID_PROGRAM_EXECUTABLE:
91 return "Invalid program executable";
92 case CL_INVALID_KERNEL_NAME:
93 return "Invalid kernel name";
94 case CL_INVALID_KERNEL_DEFINITION:
95 return "Invalid kernel definition";
96 case CL_INVALID_KERNEL:
97 return "Invalid kernel";
98 case CL_INVALID_ARG_INDEX:
99 return "Invalid argument index";
100 case CL_INVALID_ARG_VALUE:
101 return "Invalid argument value";
102 case CL_INVALID_ARG_SIZE:
103 return "Invalid argument size";
104 case CL_INVALID_KERNEL_ARGS:
105 return "Invalid kernel arguments";
106 case CL_INVALID_WORK_DIMENSION:
107 return "Invalid work dimension";
108 case CL_INVALID_WORK_GROUP_SIZE:
109 return "Invalid work group size";
110 case CL_INVALID_WORK_ITEM_SIZE:
111 return "Invalid work item size";
112 case CL_INVALID_GLOBAL_OFFSET:
113 return "Invalid global offset";
114 case CL_INVALID_EVENT_WAIT_LIST:
115 return "Invalid event wait list";
116 case CL_INVALID_EVENT:
117 return "Invalid event";
118 case CL_INVALID_OPERATION:
119 return "Invalid operation";
120 case CL_INVALID_GL_OBJECT:
121 return "Invalid OpenGL object";
122 case CL_INVALID_BUFFER_SIZE:
123 return "Invalid buffer size";
124 case CL_INVALID_MIP_LEVEL:
125 return "Invalid mip-map level";
126 default:
127 return "Unknown Errorcode";
128 }
129}
130
131#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__)
132#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__)
133
134static inline int64_t OCLsetKernelParameters_helper(cl_kernel& k, int32_t i)
135{
136 return 0;
137}
138
139template <typename T, typename... Args>
140static inline int64_t OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters)
141{
142 int64_t retVal = clSetKernelArg(kernel, i, sizeof(T), &firstParameter);
143 if (retVal) {
144 return retVal;
145 }
146 return OCLsetKernelParameters_helper(kernel, i + 1, restOfParameters...);
147}
148
149template <typename... Args>
150static inline int64_t OCLsetKernelParameters(cl_kernel& kernel, const Args&... args)
151{
152 return OCLsetKernelParameters_helper(kernel, 0, args...);
153}
154
155static inline int64_t clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait = nullptr, cl_int nWaitEvents = 1)
156{
157 return clEnqueueNDRangeKernel(queue, krnl, 1, nullptr, &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent);
158}
159
161 cl_platform_id platform;
162 cl_device_id device;
163 cl_context context;
165 cl_mem mem_gpu;
167 cl_mem mem_host;
168 cl_program program;
169
170 std::vector<std::pair<cl_kernel, std::string>> kernels;
171};
172
173template <typename K, typename... Args>
174inline int32_t GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args)
175{
176 auto& x = _xyz.x;
177 auto& y = _xyz.y;
178 auto& z = _xyz.z;
179 if (y.num <= 1) {
180 GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, args...));
181 } else {
182 GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, y.num, args...));
183 }
184
185 cl_event ev;
186 cl_event* evr;
187 bool tmpEvent = false;
188 if (z.ev == nullptr && mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) {
189 evr = &ev;
190 tmpEvent = true;
191 } else {
192 evr = (cl_event*)z.ev;
193 }
194 GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents));
195 if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) {
196 cl_ulong time_start, time_end;
197 GPUFailedMsg(clWaitForEvents(1, evr));
198 GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr));
199 GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr));
200 _xyz.t = (time_end - time_start) * 1.e-9f;
201 if (tmpEvent) {
202 GPUFailedMsg(clReleaseEvent(ev));
203 }
204 }
205 return 0;
206}
207
208template <class T, int32_t I>
210{
211 std::string name(GetKernelName<T, I>());
212 if (multi) {
213 name += "_multi";
214 }
215 std::string kname("krnl_" + name);
216
217 cl_int ocl_error;
218 cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error);
219 if (GPUFailedMsgI(ocl_error)) {
220 GPUError("Error creating OPENCL Kernel: %s", name.c_str());
221 return 1;
222 }
223 mInternals->kernels.emplace_back(krnl, name);
224 return 0;
225}
226
227template <class T, int32_t I>
229{
230 std::string name(GetKernelName<T, I>());
231 if (num > 1) {
232 name += "_multi";
233 }
234
235 for (uint32_t k = 0; k < mInternals->kernels.size(); k++) {
236 if (mInternals->kernels[k].second == name) {
237 return (k);
238 }
239 }
240 GPUError("Could not find OpenCL kernel %s", name.c_str());
241 throw ::std::runtime_error("Requested unsupported OpenCL kernel");
242}
243
244static_assert(std::is_convertible<cl_event, void*>::value, "OpenCL event type incompatible to deviceEvent");
245} // namespace o2::gpu
246
247#endif
int32_t i
#define GPUCA_MAX_STREAMS
#define GPUFailedMsgI(x)
#define GPUFailedMsg(x)
int32_t retVal
double num
int32_t runKernelBackendInternal(const krnlSetupTime &_xyz, K &k, const Args &... args)
GPUReconstructionOCLInternals * mInternals
GPUSettingsProcessing mProcessingSettings
GLint GLenum GLint x
Definition glcorearb.h:403
GLuint const GLchar * name
Definition glcorearb.h:781
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
cl_command_queue command_queue[GPUCA_MAX_STREAMS]
std::vector< std::pair< cl_kernel, std::string > > kernels