Project
Loading...
Searching...
No Matches
Utils.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
15
16#ifndef ITSTRACKINGGPU_UTILS_H_
17#define ITSTRACKINGGPU_UTILS_H_
18
19#include <vector>
20#include <string>
21#include <tuple>
22
25
26#include "GPUCommonDef.h"
27#include "GPUCommonHelpers.h"
28#include "GPUCommonLogger.h"
29#include "GPUCommonDefAPI.h"
30
31#ifdef GPUCA_GPUCODE
32#include <thrust/device_ptr.h>
33#ifndef __HIPCC__
34#define THRUST_NAMESPACE thrust::cuda
35#else
36#define THRUST_NAMESPACE thrust::hip
37#endif
38#endif
39
40#ifdef ITS_GPU_LOG
41#define GPULog(...) LOGP(info, __VA_ARGS__)
42#else
43#define GPULog(...)
44#endif
45
46namespace o2::its
47{
48// FWD declarations
49template <int>
50class IndexTableUtils;
51class Tracklet;
52
53template <typename T1, typename T2>
54using gpuPair = std::pair<T1, T2>;
55
56namespace gpu
57{
58
59template <typename T>
60struct gpuSpan {
61 using value_type = T;
62 using ptr = T*;
63 using ref = T&;
64
65 GPUd() gpuSpan() : _data(nullptr), _size(0) {}
66 GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
67 GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
68 GPUd() unsigned int size() const { return _size; }
69 GPUd() bool empty() const { return _size == 0; }
70 GPUd() ref front() const { return _data[0]; }
71 GPUd() ref back() const { return _data[_size - 1]; }
72 GPUd() ptr begin() const { return _data; }
73 GPUd() ptr end() const { return _data + _size; }
74
75 protected:
77 unsigned int _size;
78};
79
80template <typename T>
81struct gpuSpan<const T> {
82 using value_type = T;
83 using ptr = const T*;
84 using ref = const T&;
85
86 GPUd() gpuSpan() : _data(nullptr), _size(0) {}
87 GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
89 GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
90 GPUd() unsigned int size() const { return _size; }
91 GPUd() bool empty() const { return _size == 0; }
92 GPUd() ref front() const { return _data[0]; }
93 GPUd() ref back() const { return _data[_size - 1]; }
94 GPUd() ptr begin() const { return _data; }
95 GPUd() ptr end() const { return _data + _size; }
96
97 protected:
99 unsigned int _size;
100};
101
102// Abstract stream class
104{
105 public:
106#if defined(__HIPCC__)
107 using Handle = hipStream_t;
108 static constexpr Handle DefaultStream = 0;
109 static constexpr unsigned int DefaultFlag = hipStreamNonBlocking;
110 using Event = hipEvent_t;
111#elif defined(__CUDACC__)
112 using Handle = cudaStream_t;
113 static constexpr Handle DefaultStream = 0;
114 static constexpr unsigned int DefaultFlag = cudaStreamNonBlocking;
115 using Event = cudaEvent_t;
116#else
117 using Handle = void*;
118 static constexpr Handle DefaultStream = nullptr;
119 static constexpr unsigned int DefaultFlag = 0;
120 using Event = void*;
121#endif
122
123 Stream(unsigned int flags = DefaultFlag)
124 {
125#if defined(__HIPCC__)
126 GPUChkErrS(hipStreamCreateWithFlags(&mHandle, flags));
127 GPUChkErrS(hipEventCreateWithFlags(&mEvent, hipEventDisableTiming));
128#elif defined(__CUDACC__)
129 GPUChkErrS(cudaStreamCreateWithFlags(&mHandle, flags));
130 GPUChkErrS(cudaEventCreateWithFlags(&mEvent, cudaEventDisableTiming));
131#endif
132 }
133
134 Stream(Handle h) : mHandle(h) {}
136 {
137 if (mHandle != DefaultStream) {
138#if defined(__HIPCC__)
139 GPUChkErrS(hipStreamDestroy(mHandle));
140 GPUChkErrS(hipEventDestroy(mEvent));
141#elif defined(__CUDACC__)
142 GPUChkErrS(cudaStreamDestroy(mHandle));
143 GPUChkErrS(cudaEventDestroy(mEvent));
144#endif
145 }
146 }
147
148 operator bool() const { return mHandle != DefaultStream; }
149 const Handle& get() { return mHandle; }
150 const Handle& getStream() { return mHandle; }
151 const Event& getEvent() { return mEvent; }
152 void sync() const
153 {
154#if defined(__HIPCC__)
155 GPUChkErrS(hipStreamSynchronize(mHandle));
156#elif defined(__CUDACC__)
157 GPUChkErrS(cudaStreamSynchronize(mHandle));
158#endif
159 }
160 void record()
161 {
162#if defined(__HIPCC__)
163 GPUChkErrS(hipEventRecord(mEvent, mHandle));
164#elif defined(__CUDACC__)
165 GPUChkErrS(cudaEventRecord(mEvent, mHandle));
166#endif
167 }
168
169 private:
170 Handle mHandle{DefaultStream};
171 Event mEvent{nullptr};
172};
173
174// Abstract vector for streams.
176{
177 public:
178 size_t size() const noexcept { return mStreams.size(); }
179 void resize(size_t n) { mStreams.resize(n); }
180 void clear() { mStreams.clear(); }
181 auto& operator[](size_t i) { return mStreams[i]; }
182 void push_back(const Stream& stream) { mStreams.push_back(stream); }
183 void sync(bool device = true)
184 {
185 if (device) {
186#if defined(__HIPCC__)
187 GPUChkErrS(hipDeviceSynchronize());
188#elif defined(__CUDACC__)
189 GPUChkErrS(cudaDeviceSynchronize());
190#endif
191 } else {
192 for (auto& s : mStreams) {
193 s.sync();
194 }
195 }
196 }
197 void waitEvent(size_t iStream, size_t iEvent)
198 {
199#if defined(__HIPCC__)
200 GPUChkErrS(hipStreamWaitEvent(mStreams[iStream].get(), mStreams[iEvent].getEvent()));
201#elif defined(__CUDACC__)
202 GPUChkErrS(cudaStreamWaitEvent(mStreams[iStream].get(), mStreams[iEvent].getEvent()));
203#endif
204 }
205
206 private:
207 std::vector<Stream> mStreams;
208};
209
210#ifdef ITS_MEASURE_GPU_TIME
211class GPUTimer
212{
213 public:
214 GPUTimer(const std::string& name)
215 : mName(name)
216 {
217 mStreams.emplace_back(Stream::DefaultStream);
218 startTimers();
219 }
220 GPUTimer(Streams& streams, const std::string& name)
221 : mName(name)
222 {
223 for (size_t i{0}; i < streams.size(); ++i) {
224 mStreams.push_back(streams[i].get());
225 }
226 startTimers();
227 }
228 GPUTimer(Streams& streams, const std::string& name, size_t end, size_t start = 0)
229 : mName(name)
230 {
231 for (size_t sta{start}; sta < end; ++sta) {
232 mStreams.push_back(streams[sta].get());
233 }
234 startTimers();
235 }
236 GPUTimer(Stream& stream, const std::string& name, const int id = 0)
237 : mName(name)
238 {
239 mStreams.push_back(stream.get());
240 mName += ":id" + std::to_string(id);
241 startTimers();
242 }
243 ~GPUTimer()
244 {
245 for (size_t i{0}; i < mStreams.size(); ++i) {
246 float ms = 0.0f;
247#if defined(__HIPCC__)
248 GPUChkErrS(hipEventRecord(mStops[i], mStreams[i]));
249 GPUChkErrS(hipEventSynchronize(mStops[i]));
250 GPUChkErrS(hipEventElapsedTime(&ms, mStarts[i], mStops[i]));
251 GPUChkErrS(hipEventDestroy(mStarts[i]));
252 GPUChkErrS(hipEventDestroy(mStops[i]));
253#elif defined(__CUDACC__)
254 GPUChkErrS(cudaEventRecord(mStops[i], mStreams[i]));
255 GPUChkErrS(cudaEventSynchronize(mStops[i]));
256 GPUChkErrS(cudaEventElapsedTime(&ms, mStarts[i], mStops[i]));
257 GPUChkErrS(cudaEventDestroy(mStarts[i]));
258 GPUChkErrS(cudaEventDestroy(mStops[i]));
259#endif
260 LOGP(info, "Elapsed time for {}:{} {} ms", mName, i, ms);
261 }
262 }
263
264 void startTimers()
265 {
266 mStarts.resize(mStreams.size());
267 mStops.resize(mStreams.size());
268 for (size_t i{0}; i < mStreams.size(); ++i) {
269#if defined(__HIPCC__)
270 GPUChkErrS(hipEventCreate(&mStarts[i]));
271 GPUChkErrS(hipEventCreate(&mStops[i]));
272 GPUChkErrS(hipEventRecord(mStarts[i], mStreams[i]));
273#elif defined(__CUDACC__)
274 GPUChkErrS(cudaEventCreate(&mStarts[i]));
275 GPUChkErrS(cudaEventCreate(&mStops[i]));
276 GPUChkErrS(cudaEventRecord(mStarts[i], mStreams[i]));
277#endif
278 }
279 }
280
281 private:
282 std::string mName;
283 std::vector<Stream::Event> mStarts, mStops;
284 std::vector<Stream::Handle> mStreams;
285};
286#else // ITS_MEASURE_GPU_TIME not defined
288{
289 public:
290 template <typename... Args>
291 GPUTimer(Args&&...)
292 {
293 }
294};
295#endif
296
297#ifdef GPUCA_GPUCODE
298template <typename T>
299struct TypedAllocator {
300 using value_type = T;
301 using pointer = thrust::device_ptr<T>;
302 using const_pointer = thrust::device_ptr<const T>;
303 using size_type = std::size_t;
304 using difference_type = std::ptrdiff_t;
305
306 TypedAllocator() noexcept : mInternalAllocator(nullptr) {}
307 explicit TypedAllocator(ExternalAllocator* a) noexcept : mInternalAllocator(a) {}
308
309 template <typename U>
310 TypedAllocator(const TypedAllocator<U>& o) noexcept : mInternalAllocator(o.mInternalAllocator)
311 {
312 }
313
314 pointer allocate(size_type n)
315 {
316 void* raw = mInternalAllocator->allocate(n * sizeof(T));
317 return thrust::device_pointer_cast(static_cast<T*>(raw));
318 }
319
320 void deallocate(pointer p, size_type n) noexcept
321 {
322 if (!p) {
323 return;
324 }
325 void* raw = thrust::raw_pointer_cast(p);
326 mInternalAllocator->deallocate(static_cast<char*>(raw), n * sizeof(T));
327 }
328
329 bool operator==(TypedAllocator const& o) const noexcept
330 {
331 return mInternalAllocator == o.mInternalAllocator;
332 }
333 bool operator!=(TypedAllocator const& o) const noexcept
334 {
335 return !(*this == o);
336 }
337
338 private:
339 ExternalAllocator* mInternalAllocator;
340};
341
342template <int nLayers>
343GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex,
344 const o2::its::IndexTableUtils<nLayers>* utils,
345 const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
346{
347 const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
348 const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi;
349 const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz;
350 const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi;
351
352 if (zRangeMax < -utils->getLayerZ(layerIndex) ||
353 zRangeMin > utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) {
354 return {};
355 }
356
357 return int4{o2::gpu::CAMath::Max(0, utils->getZBinIndex(layerIndex, zRangeMin)),
358 utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)),
359 o2::gpu::CAMath::Min(utils->getNzBins() - 1, utils->getZBinIndex(layerIndex, zRangeMax)),
360 utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))};
361}
362
363GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int rof,
364 const int* roframesPV,
365 const int nROF,
366 const uint8_t* mask,
367 const Vertex* vertices)
368{
369 const int start_pv_id = roframesPV[rof];
370 const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1;
371 size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded
372 return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
373};
374
375GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int romin,
376 const int romax,
377 const int* roframesPV,
378 const int nROF,
379 const Vertex* vertices)
380{
381 const int start_pv_id = roframesPV[romin];
382 const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1;
383 return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
384};
385
386GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
387 const int totROFs,
388 const int layer,
389 const int** roframesClus,
390 const Cluster** clusters)
391{
392 if (rof < 0 || rof >= totROFs) {
393 return gpuSpan<const Cluster>();
394 }
395 const int start_clus_id{roframesClus[layer][rof]};
396 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
397 const unsigned int delta = roframesClus[layer][stop_rof] - start_clus_id;
398 return gpuSpan<const Cluster>(&(clusters[layer][start_clus_id]), delta);
399}
400
401GPUdii() gpuSpan<const Tracklet> getTrackletsPerCluster(const int rof,
402 const int totROFs,
403 const int mode,
404 const int** roframesClus,
405 const Tracklet** tracklets)
406{
407 if (rof < 0 || rof >= totROFs) {
408 return gpuSpan<const Tracklet>();
409 }
410 const int start_clus_id{roframesClus[1][rof]};
411 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
412 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
413 return gpuSpan<const Tracklet>(&(tracklets[mode][start_clus_id]), delta);
414}
415
416GPUdii() gpuSpan<int> getNTrackletsPerCluster(const int rof,
417 const int totROFs,
418 const int mode,
419 const int** roframesClus,
420 int** ntracklets)
421{
422 if (rof < 0 || rof >= totROFs) {
423 return gpuSpan<int>();
424 }
425 const int start_clus_id{roframesClus[1][rof]};
426 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
427 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
428 return gpuSpan<int>(&(ntracklets[mode][start_clus_id]), delta);
429}
430
431GPUdii() gpuSpan<const int> getNTrackletsPerCluster(const int rof,
432 const int totROFs,
433 const int mode,
434 const int** roframesClus,
435 const int** ntracklets)
436{
437 if (rof < 0 || rof >= totROFs) {
438 return gpuSpan<const int>();
439 }
440 const int start_clus_id{roframesClus[1][rof]};
441 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
442 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
443 return gpuSpan<const int>(&(ntracklets[mode][start_clus_id]), delta);
444}
445
446GPUdii() gpuSpan<int> getNLinesPerCluster(const int rof,
447 const int totROFs,
448 const int** roframesClus,
449 int* nlines)
450{
451 if (rof < 0 || rof >= totROFs) {
452 return gpuSpan<int>();
453 }
454 const int start_clus_id{roframesClus[1][rof]};
455 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
456 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
457 return gpuSpan<int>(&(nlines[start_clus_id]), delta);
458}
459
460GPUdii() gpuSpan<const int> getNLinesPerCluster(const int rof,
461 const int totROFs,
462 const int** roframesClus,
463 const int* nlines)
464{
465 if (rof < 0 || rof >= totROFs) {
466 return gpuSpan<const int>();
467 }
468 const int start_clus_id{roframesClus[1][rof]};
469 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
470 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
471 return gpuSpan<const int>(&(nlines[start_clus_id]), delta);
472}
473#endif
474} // namespace gpu
475} // namespace o2::its
476
477#endif
int32_t i
#define GPUdii()
#define GPUChkErrS(x)
Class for time synchronization of RawReader instances.
HMPID cluster implementation.
Definition Cluster.h:27
GPUTimer(Args &&...)
Definition Utils.h:291
const Handle & getStream()
Definition Utils.h:150
void sync() const
Definition Utils.h:152
const Handle & get()
Definition Utils.h:149
static constexpr Handle DefaultStream
Definition Utils.h:118
static constexpr unsigned int DefaultFlag
Definition Utils.h:119
const Event & getEvent()
Definition Utils.h:151
Stream(unsigned int flags=DefaultFlag)
Definition Utils.h:123
Stream(Handle h)
Definition Utils.h:134
void resize(size_t n)
Definition Utils.h:179
void push_back(const Stream &stream)
Definition Utils.h:182
void waitEvent(size_t iStream, size_t iEvent)
Definition Utils.h:197
void sync(bool device=true)
Definition Utils.h:183
auto & operator[](size_t i)
Definition Utils.h:181
size_t size() const noexcept
Definition Utils.h:178
GLdouble n
Definition glcorearb.h:1982
GLenum mode
Definition glcorearb.h:266
GLenum void ** pointer
Definition glcorearb.h:805
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLuint end
Definition glcorearb.h:469
GLuint const GLchar * name
Definition glcorearb.h:781
GLboolean * data
Definition glcorearb.h:298
GLbitfield flags
Definition glcorearb.h:1570
GLenum GLuint GLint GLint layer
Definition glcorearb.h:1310
GLuint start
Definition glcorearb.h:469
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLuint GLuint stream
Definition glcorearb.h:1806
GLint GLuint mask
Definition glcorearb.h:291
constexpr float TwoPI
constexpr float PI
auto get(const std::byte *buffer, size_t=0)
Definition DataHeader.h:454
std::pair< T1, T2 > gpuPair
Definition Utils.h:54
o2::dataformats::Vertex< o2::dataformats::TimeStamp< int > > Vertex
Definition Definitions.h:38
bool operator!=(const DsChannelId &a, const DsChannelId &b)
Definition DsChannelId.h:66
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
Common utility functions.
void empty(int)
GPUd() gpuSpan(const gpuSpan< T > &other)
Definition Utils.h:88
GPUd() ref front() const
Definition Utils.h:92
GPUd() bool empty() const
Definition Utils.h:91
GPUd() ref operator[](unsigned int idx) const
Definition Utils.h:89
GPUd() ptr end() const
Definition Utils.h:95
GPUd() unsigned int size() const
Definition Utils.h:90
GPUd() ptr begin() const
Definition Utils.h:94
GPUd() ref back() const
Definition Utils.h:93
GPUd() bool empty() const
Definition Utils.h:69
unsigned int dim
Definition Utils.h:66
GPUd() ptr begin() const
Definition Utils.h:72
GPUd() ref operator[](unsigned int idx) const
Definition Utils.h:67
GPUd() gpuSpan()
Definition Utils.h:65
GPUd() ptr end() const
Definition Utils.h:73
GPUd() unsigned int size() const
Definition Utils.h:68
unsigned int _size
Definition Utils.h:77
GPUd() ref back() const
Definition Utils.h:71
GPUd() ref front() const
Definition Utils.h:70
bool operator==(const CoarseLocation &a, const CoarseLocation &b)
VectorOfTObjectPtrs other
std::vector< Cluster > clusters
std::vector< Tracklet64 > tracklets