16#ifndef ITSTRACKINGGPU_UTILS_H_
17#define ITSTRACKINGGPU_UTILS_H_
32#include <thrust/device_ptr.h>
34#define THRUST_NAMESPACE thrust::cuda
36#define THRUST_NAMESPACE thrust::hip
41#define GPULog(...) LOGP(info, __VA_ARGS__)
53template <
typename T1,
typename T2>
67 GPUd()
ref operator[](
unsigned int idx)
const {
return _data[idx]; }
89 GPUd()
ref operator[](
unsigned int idx)
const {
return _data[idx]; }
106#if defined(__HIPCC__)
107 using Handle = hipStream_t;
109 static constexpr unsigned int DefaultFlag = hipStreamNonBlocking;
110 using Event = hipEvent_t;
111#elif defined(__CUDACC__)
112 using Handle = cudaStream_t;
114 static constexpr unsigned int DefaultFlag = cudaStreamNonBlocking;
115 using Event = cudaEvent_t;
125#if defined(__HIPCC__)
127 GPUChkErrS(hipEventCreateWithFlags(&mEvent, hipEventDisableTiming));
128#elif defined(__CUDACC__)
130 GPUChkErrS(cudaEventCreateWithFlags(&mEvent, cudaEventDisableTiming));
138#if defined(__HIPCC__)
141#elif defined(__CUDACC__)
154#if defined(__HIPCC__)
156#elif defined(__CUDACC__)
162#if defined(__HIPCC__)
164#elif defined(__CUDACC__)
171 Event mEvent{
nullptr};
178 size_t size() const noexcept {
return mStreams.size(); }
186#if defined(__HIPCC__)
188#elif defined(__CUDACC__)
192 for (
auto& s : mStreams) {
199#if defined(__HIPCC__)
200 GPUChkErrS(hipStreamWaitEvent(mStreams[iStream].
get(), mStreams[iEvent].getEvent()));
201#elif defined(__CUDACC__)
202 GPUChkErrS(cudaStreamWaitEvent(mStreams[iStream].
get(), mStreams[iEvent].getEvent()));
207 std::vector<Stream> mStreams;
210#ifdef ITS_MEASURE_GPU_TIME
214 GPUTimer(
const std::string&
name)
220 GPUTimer(Streams& streams,
const std::string&
name)
223 for (
size_t i{0};
i < streams.size(); ++
i) {
224 mStreams.push_back(streams[
i].
get());
231 for (
size_t sta{
start}; sta <
end; ++sta) {
232 mStreams.push_back(streams[sta].
get());
239 mStreams.push_back(
stream.get());
245 for (
size_t i{0};
i < mStreams.size(); ++
i) {
247#if defined(__HIPCC__)
250 GPUChkErrS(hipEventElapsedTime(&ms, mStarts[
i], mStops[
i]));
253#elif defined(__CUDACC__)
256 GPUChkErrS(cudaEventElapsedTime(&ms, mStarts[
i], mStops[
i]));
260 LOGP(info,
"Elapsed time for {}:{} {} ms", mName,
i, ms);
266 mStarts.resize(mStreams.size());
267 mStops.resize(mStreams.size());
268 for (
size_t i{0};
i < mStreams.size(); ++
i) {
269#if defined(__HIPCC__)
273#elif defined(__CUDACC__)
283 std::vector<Stream::Event> mStarts, mStops;
284 std::vector<Stream::Handle> mStreams;
290 template <
typename... Args>
299struct TypedAllocator {
300 using value_type = T;
301 using pointer = thrust::device_ptr<T>;
302 using const_pointer = thrust::device_ptr<const T>;
303 using size_type = std::size_t;
304 using difference_type = std::ptrdiff_t;
306 TypedAllocator() noexcept : mInternalAllocator(
nullptr) {}
307 explicit TypedAllocator(ExternalAllocator*
a) noexcept : mInternalAllocator(
a) {}
309 template <
typename U>
310 TypedAllocator(
const TypedAllocator<U>& o) noexcept : mInternalAllocator(o.mInternalAllocator)
316 void* raw = mInternalAllocator->allocate(
n *
sizeof(T));
317 return thrust::device_pointer_cast(
static_cast<T*
>(raw));
320 void deallocate(
pointer p, size_type
n)
noexcept
325 void* raw = thrust::raw_pointer_cast(p);
326 mInternalAllocator->deallocate(
static_cast<char*
>(raw),
n *
sizeof(T));
329 bool operator==(TypedAllocator
const& o)
const noexcept
331 return mInternalAllocator == o.mInternalAllocator;
333 bool operator!=(TypedAllocator
const& o)
const noexcept
335 return !(*
this == o);
339 ExternalAllocator* mInternalAllocator;
342template <
int nLayers>
343GPUdii() const
int4 getBinsRect(const
Cluster& currentCluster, const
int layerIndex,
344 const
o2::its::IndexTableUtils<nLayers>*
utils,
345 const
float z1, const
float z2,
float maxdeltaz,
float maxdeltaphi)
347 const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
349 const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz;
352 if (zRangeMax < -utils->getLayerZ(layerIndex) ||
353 zRangeMin >
utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) {
357 return int4{o2::gpu::CAMath::Max(0,
utils->getZBinIndex(layerIndex, zRangeMin)),
358 utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)),
359 o2::gpu::CAMath::Min(
utils->getNzBins() - 1,
utils->getZBinIndex(layerIndex, zRangeMax)),
360 utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))};
363GPUdii() gpuSpan<const
Vertex> getPrimaryVertices(const
int rof,
364 const
int* roframesPV,
369 const int start_pv_id = roframesPV[rof];
370 const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1;
371 size_t delta =
mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0;
372 return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
375GPUdii() gpuSpan<const
Vertex> getPrimaryVertices(const
int romin,
377 const
int* roframesPV,
381 const int start_pv_id = roframesPV[romin];
382 const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1;
383 return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
386GPUdii() gpuSpan<const
Cluster> getClustersOnLayer(const
int rof,
389 const
int** roframesClus,
392 if (rof < 0 || rof >= totROFs) {
393 return gpuSpan<const Cluster>();
395 const int start_clus_id{roframesClus[
layer][rof]};
396 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
397 const unsigned int delta = roframesClus[
layer][stop_rof] - start_clus_id;
398 return gpuSpan<const Cluster>(&(
clusters[
layer][start_clus_id]), delta);
401GPUdii() gpuSpan<const Tracklet> getTrackletsPerCluster(const
int rof,
404 const
int** roframesClus,
407 if (rof < 0 || rof >= totROFs) {
408 return gpuSpan<const Tracklet>();
410 const int start_clus_id{roframesClus[1][rof]};
411 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
412 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
413 return gpuSpan<const Tracklet>(&(
tracklets[
mode][start_clus_id]), delta);
416GPUdii() gpuSpan<
int> getNTrackletsPerCluster(const
int rof,
419 const
int** roframesClus,
422 if (rof < 0 || rof >= totROFs) {
423 return gpuSpan<int>();
425 const int start_clus_id{roframesClus[1][rof]};
426 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
427 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
428 return gpuSpan<int>(&(ntracklets[
mode][start_clus_id]), delta);
431GPUdii() gpuSpan<const
int> getNTrackletsPerCluster(const
int rof,
434 const
int** roframesClus,
435 const
int** ntracklets)
437 if (rof < 0 || rof >= totROFs) {
438 return gpuSpan<const int>();
440 const int start_clus_id{roframesClus[1][rof]};
441 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
442 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
443 return gpuSpan<const int>(&(ntracklets[
mode][start_clus_id]), delta);
446GPUdii() gpuSpan<
int> getNLinesPerCluster(const
int rof,
448 const
int** roframesClus,
451 if (rof < 0 || rof >= totROFs) {
452 return gpuSpan<int>();
454 const int start_clus_id{roframesClus[1][rof]};
455 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
456 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
457 return gpuSpan<int>(&(nlines[start_clus_id]), delta);
460GPUdii() gpuSpan<const
int> getNLinesPerCluster(const
int rof,
462 const
int** roframesClus,
465 if (rof < 0 || rof >= totROFs) {
466 return gpuSpan<const int>();
468 const int start_clus_id{roframesClus[1][rof]};
469 const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
470 const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
471 return gpuSpan<const int>(&(nlines[start_clus_id]), delta);
Class for time synchronization of RawReader instances.
HMPID cluster implementation.
const Handle & getStream()
static constexpr Handle DefaultStream
static constexpr unsigned int DefaultFlag
Stream(unsigned int flags=DefaultFlag)
void push_back(const Stream &stream)
void waitEvent(size_t iStream, size_t iEvent)
void sync(bool device=true)
auto & operator[](size_t i)
size_t size() const noexcept
GLuint const GLchar * name
GLenum GLuint GLint GLint layer
GLboolean GLboolean GLboolean GLboolean a
std::pair< T1, T2 > gpuPair
o2::dataformats::Vertex< o2::dataformats::TimeStamp< int > > Vertex
bool operator!=(const DsChannelId &a, const DsChannelId &b)
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
std::string to_string(gsl::span< T, Size > span)
Common utility functions.
GPUd() gpuSpan(const gpuSpan< T > &other)
GPUd() bool empty() const
GPUd() ref operator[](unsigned int idx) const
GPUd() unsigned int size() const
GPUd() bool empty() const
GPUd() ref operator[](unsigned int idx) const
GPUd() unsigned int size() const
VectorOfTObjectPtrs other
std::vector< Cluster > clusters
std::vector< Tracklet64 > tracklets