Project
Loading...
Searching...
No Matches
Utils.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
15
16#ifndef ITSTRACKINGGPU_UTILS_H_
17#define ITSTRACKINGGPU_UTILS_H_
18
19#include <vector>
20#include <string>
21#include <tuple>
22
25
26#include "GPUCommonDef.h"
27#include "GPUCommonHelpers.h"
28#include "GPUCommonLogger.h"
29#include "GPUCommonDefAPI.h"
30
31#ifdef GPUCA_GPUCODE
32#include <thrust/device_ptr.h>
33#ifndef __HIPCC__
34#define THRUST_NAMESPACE thrust::cuda
35#else
36#define THRUST_NAMESPACE thrust::hip
37#endif
38#endif
39
40#ifdef ITS_GPU_LOG
41#define GPULog(...) \
42 do { \
43 LOGP(info, __VA_ARGS__); \
44 GPUChkErrS(cudaDeviceSynchronize()); \
45 } while (0)
46#else
47#define GPULog(...)
48#endif
49
50namespace o2::its
51{
52// FWD declarations
53template <int>
54class IndexTableUtils;
55class Tracklet;
56
57template <typename T1, typename T2>
58using gpuPair = std::pair<T1, T2>;
59
60namespace gpu
61{
62
63template <typename T>
64struct gpuSpan {
65 using value_type = T;
66 using ptr = T*;
67 using ref = T&;
68
69 GPUd() gpuSpan() : _data(nullptr), _size(0) {}
70 GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
71 GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
72 GPUd() unsigned int size() const { return _size; }
73 GPUd() bool empty() const { return _size == 0; }
74 GPUd() ref front() const { return _data[0]; }
75 GPUd() ref back() const { return _data[_size - 1]; }
76 GPUd() ptr begin() const { return _data; }
77 GPUd() ptr end() const { return _data + _size; }
78
79 protected:
81 unsigned int _size;
82};
83
84template <typename T>
85struct gpuSpan<const T> {
86 using value_type = T;
87 using ptr = const T*;
88 using ref = const T&;
89
90 GPUd() gpuSpan() : _data(nullptr), _size(0) {}
91 GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
93 GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
94 GPUd() unsigned int size() const { return _size; }
95 GPUd() bool empty() const { return _size == 0; }
96 GPUd() ref front() const { return _data[0]; }
97 GPUd() ref back() const { return _data[_size - 1]; }
98 GPUd() ptr begin() const { return _data; }
99 GPUd() ptr end() const { return _data + _size; }
100
101 protected:
103 unsigned int _size;
104};
105
106// Abstract stream class
108{
109 public:
110#if defined(__HIPCC__)
111 using Handle = hipStream_t;
112 static constexpr Handle DefaultStream = 0;
113 static constexpr unsigned int DefaultFlag = hipStreamNonBlocking;
114 using Event = hipEvent_t;
115#elif defined(__CUDACC__)
116 using Handle = cudaStream_t;
117 static constexpr Handle DefaultStream = 0;
118 static constexpr unsigned int DefaultFlag = cudaStreamNonBlocking;
119 using Event = cudaEvent_t;
120#else
121 using Handle = void*;
122 static constexpr Handle DefaultStream = nullptr;
123 static constexpr unsigned int DefaultFlag = 0;
124 using Event = void*;
125#endif
126
127 Stream(unsigned int flags = DefaultFlag)
128 {
129#if defined(__HIPCC__)
130 GPUChkErrS(hipStreamCreateWithFlags(&mHandle, flags));
131 GPUChkErrS(hipEventCreateWithFlags(&mEvent, hipEventDisableTiming));
132#elif defined(__CUDACC__)
133 GPUChkErrS(cudaStreamCreateWithFlags(&mHandle, flags));
134 GPUChkErrS(cudaEventCreateWithFlags(&mEvent, cudaEventDisableTiming));
135#endif
136 }
137
138 Stream(Handle h) : mHandle(h) {}
140 {
141 if (mHandle != DefaultStream) {
142#if defined(__HIPCC__)
143 GPUChkErrS(hipStreamDestroy(mHandle));
144 GPUChkErrS(hipEventDestroy(mEvent));
145#elif defined(__CUDACC__)
146 GPUChkErrS(cudaStreamDestroy(mHandle));
147 GPUChkErrS(cudaEventDestroy(mEvent));
148#endif
149 }
150 }
151
152 operator bool() const { return mHandle != DefaultStream; }
153 const Handle& get() { return mHandle; }
154 const Handle& getStream() { return mHandle; }
155 const Event& getEvent() { return mEvent; }
156 void sync() const
157 {
158#if defined(__HIPCC__)
159 GPUChkErrS(hipStreamSynchronize(mHandle));
160#elif defined(__CUDACC__)
161 GPUChkErrS(cudaStreamSynchronize(mHandle));
162#endif
163 }
164 void record()
165 {
166#if defined(__HIPCC__)
167 GPUChkErrS(hipEventRecord(mEvent, mHandle));
168#elif defined(__CUDACC__)
169 GPUChkErrS(cudaEventRecord(mEvent, mHandle));
170#endif
171 }
172
173 private:
174 Handle mHandle{DefaultStream};
175 Event mEvent{nullptr};
176};
177
178// Abstract vector for streams.
180{
181 public:
182 size_t size() const noexcept { return mStreams.size(); }
183 void resize(size_t n) { mStreams.resize(n); }
184 void clear() { mStreams.clear(); }
185 auto& operator[](size_t i) { return mStreams[i]; }
186 void push_back(const Stream& stream) { mStreams.push_back(stream); }
187 void sync(bool device = true)
188 {
189 if (device) {
190#if defined(__HIPCC__)
191 GPUChkErrS(hipDeviceSynchronize());
192#elif defined(__CUDACC__)
193 GPUChkErrS(cudaDeviceSynchronize());
194#endif
195 } else {
196 for (auto& s : mStreams) {
197 s.sync();
198 }
199 }
200 }
201 void waitEvent(size_t iStream, size_t iEvent)
202 {
203#if defined(__HIPCC__)
204 GPUChkErrS(hipStreamWaitEvent(mStreams[iStream].get(), mStreams[iEvent].getEvent()));
205#elif defined(__CUDACC__)
206 GPUChkErrS(cudaStreamWaitEvent(mStreams[iStream].get(), mStreams[iEvent].getEvent()));
207#endif
208 }
209
210 private:
211 std::vector<Stream> mStreams;
212};
213
214#ifdef ITS_MEASURE_GPU_TIME
215class GPUTimer
216{
217 public:
218 GPUTimer(const std::string& name)
219 : mName(name)
220 {
221 mStreams.emplace_back(Stream::DefaultStream);
222 startTimers();
223 }
224 GPUTimer(Streams& streams, const std::string& name)
225 : mName(name)
226 {
227 for (size_t i{0}; i < streams.size(); ++i) {
228 mStreams.push_back(streams[i].get());
229 }
230 startTimers();
231 }
232 GPUTimer(Streams& streams, const std::string& name, size_t end, size_t start = 0)
233 : mName(name)
234 {
235 for (size_t sta{start}; sta < end; ++sta) {
236 mStreams.push_back(streams[sta].get());
237 }
238 startTimers();
239 }
240 GPUTimer(Stream& stream, const std::string& name, const int id = 0)
241 : mName(name)
242 {
243 mStreams.push_back(stream.get());
244 mName += ":id" + std::to_string(id);
245 startTimers();
246 }
247 ~GPUTimer()
248 {
249 for (size_t i{0}; i < mStreams.size(); ++i) {
250 float ms = 0.0f;
251#if defined(__HIPCC__)
252 GPUChkErrS(hipEventRecord(mStops[i], mStreams[i]));
253 GPUChkErrS(hipEventSynchronize(mStops[i]));
254 GPUChkErrS(hipEventElapsedTime(&ms, mStarts[i], mStops[i]));
255 GPUChkErrS(hipEventDestroy(mStarts[i]));
256 GPUChkErrS(hipEventDestroy(mStops[i]));
257#elif defined(__CUDACC__)
258 GPUChkErrS(cudaEventRecord(mStops[i], mStreams[i]));
259 GPUChkErrS(cudaEventSynchronize(mStops[i]));
260 GPUChkErrS(cudaEventElapsedTime(&ms, mStarts[i], mStops[i]));
261 GPUChkErrS(cudaEventDestroy(mStarts[i]));
262 GPUChkErrS(cudaEventDestroy(mStops[i]));
263#endif
264 LOGP(info, "Elapsed time for {}:{} {} ms", mName, i, ms);
265 }
266 }
267
268 void startTimers()
269 {
270 mStarts.resize(mStreams.size());
271 mStops.resize(mStreams.size());
272 for (size_t i{0}; i < mStreams.size(); ++i) {
273#if defined(__HIPCC__)
274 GPUChkErrS(hipEventCreate(&mStarts[i]));
275 GPUChkErrS(hipEventCreate(&mStops[i]));
276 GPUChkErrS(hipEventRecord(mStarts[i], mStreams[i]));
277#elif defined(__CUDACC__)
278 GPUChkErrS(cudaEventCreate(&mStarts[i]));
279 GPUChkErrS(cudaEventCreate(&mStops[i]));
280 GPUChkErrS(cudaEventRecord(mStarts[i], mStreams[i]));
281#endif
282 }
283 }
284
285 private:
286 std::string mName;
287 std::vector<Stream::Event> mStarts, mStops;
288 std::vector<Stream::Handle> mStreams;
289};
290#else // ITS_MEASURE_GPU_TIME not defined
292{
293 public:
294 template <typename... Args>
295 GPUTimer(Args&&...)
296 {
297 }
298};
299#endif
300
301#ifdef GPUCA_GPUCODE
302template <typename T>
303struct TypedAllocator {
304 using value_type = T;
305 using pointer = thrust::device_ptr<T>;
306 using const_pointer = thrust::device_ptr<const T>;
307 using size_type = std::size_t;
308 using difference_type = std::ptrdiff_t;
309
310 TypedAllocator() noexcept : mInternalAllocator(nullptr) {}
311 explicit TypedAllocator(ExternalAllocator* a) noexcept : mInternalAllocator(a) {}
312
313 template <typename U>
314 TypedAllocator(const TypedAllocator<U>& o) noexcept : mInternalAllocator(o.mInternalAllocator)
315 {
316 }
317
318 pointer allocate(size_type n)
319 {
320 void* raw = mInternalAllocator->allocateStack(n * sizeof(T));
321 return thrust::device_pointer_cast(static_cast<T*>(raw));
322 }
323
324 void deallocate(pointer p, size_type n) noexcept
325 {
326 if (!p) {
327 return;
328 }
329 void* raw = thrust::raw_pointer_cast(p);
330 mInternalAllocator->deallocate(static_cast<char*>(raw), n * sizeof(T));
331 }
332
333 bool operator==(TypedAllocator const& o) const noexcept
334 {
335 return mInternalAllocator == o.mInternalAllocator;
336 }
337 bool operator!=(TypedAllocator const& o) const noexcept
338 {
339 return !(*this == o);
340 }
341
342 private:
343 ExternalAllocator* mInternalAllocator;
344};
345
346GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int rof,
347 const int* roframesPV,
348 const int nROF,
349 const uint8_t* mask,
350 const Vertex* vertices)
351{
352 const int start_pv_id = roframesPV[rof];
353 const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1;
354 size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded
355 return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
356};
357
358GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int romin,
359 const int romax,
360 const int* roframesPV,
361 const int nROF,
362 const Vertex* vertices)
363{
364 const int start_pv_id = roframesPV[romin];
365 const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1;
366 return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
367};
368
369GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
370 const int totROFs,
371 const int layer,
372 const int** roframesClus,
373 const Cluster** clusters)
374{
375 if (rof < 0 || rof >= totROFs) {
376 return gpuSpan<const Cluster>();
377 }
378 const int start_clus_id{roframesClus[layer][rof]};
379 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
380 const unsigned int delta = roframesClus[layer][stop_rof] - start_clus_id;
381 return gpuSpan<const Cluster>(&(clusters[layer][start_clus_id]), delta);
382}
383
384GPUdii() gpuSpan<const Tracklet> getTrackletsPerCluster(const int rof,
385 const int totROFs,
386 const int mode,
387 const int** roframesClus,
388 const Tracklet** tracklets)
389{
390 if (rof < 0 || rof >= totROFs) {
391 return gpuSpan<const Tracklet>();
392 }
393 const int start_clus_id{roframesClus[1][rof]};
394 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
395 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
396 return gpuSpan<const Tracklet>(&(tracklets[mode][start_clus_id]), delta);
397}
398
399GPUdii() gpuSpan<int> getNTrackletsPerCluster(const int rof,
400 const int totROFs,
401 const int mode,
402 const int** roframesClus,
403 int** ntracklets)
404{
405 if (rof < 0 || rof >= totROFs) {
406 return gpuSpan<int>();
407 }
408 const int start_clus_id{roframesClus[1][rof]};
409 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
410 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
411 return gpuSpan<int>(&(ntracklets[mode][start_clus_id]), delta);
412}
413
414GPUdii() gpuSpan<const int> getNTrackletsPerCluster(const int rof,
415 const int totROFs,
416 const int mode,
417 const int** roframesClus,
418 const int** ntracklets)
419{
420 if (rof < 0 || rof >= totROFs) {
421 return gpuSpan<const int>();
422 }
423 const int start_clus_id{roframesClus[1][rof]};
424 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
425 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
426 return gpuSpan<const int>(&(ntracklets[mode][start_clus_id]), delta);
427}
428
429GPUdii() gpuSpan<int> getNLinesPerCluster(const int rof,
430 const int totROFs,
431 const int** roframesClus,
432 int* nlines)
433{
434 if (rof < 0 || rof >= totROFs) {
435 return gpuSpan<int>();
436 }
437 const int start_clus_id{roframesClus[1][rof]};
438 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
439 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
440 return gpuSpan<int>(&(nlines[start_clus_id]), delta);
441}
442
443GPUdii() gpuSpan<const int> getNLinesPerCluster(const int rof,
444 const int totROFs,
445 const int** roframesClus,
446 const int* nlines)
447{
448 if (rof < 0 || rof >= totROFs) {
449 return gpuSpan<const int>();
450 }
451 const int start_clus_id{roframesClus[1][rof]};
452 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
453 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
454 return gpuSpan<const int>(&(nlines[start_clus_id]), delta);
455}
456#endif
457} // namespace gpu
458} // namespace o2::its
459
460#endif
int32_t i
#define GPUdii()
#define GPUChkErrS(x)
o2::raw::RawFileWriter * raw
Class for time synchronization of RawReader instances.
HMPID cluster implementation.
Definition Cluster.h:27
GPUTimer(Args &&...)
Definition Utils.h:295
const Handle & getStream()
Definition Utils.h:154
void sync() const
Definition Utils.h:156
const Handle & get()
Definition Utils.h:153
static constexpr Handle DefaultStream
Definition Utils.h:122
static constexpr unsigned int DefaultFlag
Definition Utils.h:123
const Event & getEvent()
Definition Utils.h:155
Stream(unsigned int flags=DefaultFlag)
Definition Utils.h:127
Stream(Handle h)
Definition Utils.h:138
void resize(size_t n)
Definition Utils.h:183
void push_back(const Stream &stream)
Definition Utils.h:186
void waitEvent(size_t iStream, size_t iEvent)
Definition Utils.h:201
void sync(bool device=true)
Definition Utils.h:187
auto & operator[](size_t i)
Definition Utils.h:185
size_t size() const noexcept
Definition Utils.h:182
GLdouble n
Definition glcorearb.h:1982
GLenum mode
Definition glcorearb.h:266
GLenum void ** pointer
Definition glcorearb.h:805
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLuint end
Definition glcorearb.h:469
GLuint const GLchar * name
Definition glcorearb.h:781
GLboolean * data
Definition glcorearb.h:298
GLbitfield flags
Definition glcorearb.h:1570
GLenum GLuint GLint GLint layer
Definition glcorearb.h:1310
GLuint start
Definition glcorearb.h:469
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLuint GLuint stream
Definition glcorearb.h:1806
GLint GLuint mask
Definition glcorearb.h:291
auto get(const std::byte *buffer, size_t=0)
Definition DataHeader.h:454
std::pair< T1, T2 > gpuPair
Definition Utils.h:58
o2::dataformats::Vertex< o2::its::TimeEstBC > Vertex
Definition Vertex.h:26
bool operator!=(const DsChannelId &a, const DsChannelId &b)
Definition DsChannelId.h:66
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
void empty(int)
GPUd() gpuSpan(const gpuSpan< T > &other)
Definition Utils.h:92
GPUd() ref front() const
Definition Utils.h:96
GPUd() bool empty() const
Definition Utils.h:95
GPUd() ref operator[](unsigned int idx) const
Definition Utils.h:93
GPUd() ptr end() const
Definition Utils.h:99
GPUd() unsigned int size() const
Definition Utils.h:94
GPUd() ptr begin() const
Definition Utils.h:98
GPUd() ref back() const
Definition Utils.h:97
GPUd() bool empty() const
Definition Utils.h:73
unsigned int dim
Definition Utils.h:70
GPUd() ptr begin() const
Definition Utils.h:76
GPUd() ref operator[](unsigned int idx) const
Definition Utils.h:71
GPUd() gpuSpan()
Definition Utils.h:69
GPUd() ptr end() const
Definition Utils.h:77
GPUd() unsigned int size() const
Definition Utils.h:72
unsigned int _size
Definition Utils.h:81
GPUd() ref back() const
Definition Utils.h:75
GPUd() ref front() const
Definition Utils.h:74
bool operator==(const CoarseLocation &a, const CoarseLocation &b)
VectorOfTObjectPtrs other
std::vector< Cluster > clusters
std::vector< Tracklet64 > tracklets