16#ifndef ITSTRACKINGGPU_UTILS_H_ 
   17#define ITSTRACKINGGPU_UTILS_H_ 
   32#include <thrust/device_ptr.h> 
   34#define THRUST_NAMESPACE thrust::cuda 
   36#define THRUST_NAMESPACE thrust::hip 
   41#define GPULog(...) LOGP(info, __VA_ARGS__) 
   53template <
typename T1, 
typename T2>
 
   67  GPUd() 
ref operator[](
unsigned int idx)
 const { 
return _data[idx]; }
 
 
   89  GPUd() 
ref operator[](
unsigned int idx)
 const { 
return _data[idx]; }
 
 
  106#if defined(__HIPCC__) 
  107  using Handle = hipStream_t;
 
  109  static constexpr unsigned int DefaultFlag = hipStreamNonBlocking;
 
  110  using Event = hipEvent_t;
 
  111#elif defined(__CUDACC__) 
  112  using Handle = cudaStream_t;
 
  114  static constexpr unsigned int DefaultFlag = cudaStreamNonBlocking;
 
  115  using Event = cudaEvent_t;
 
  125#if defined(__HIPCC__) 
  127    GPUChkErrS(hipEventCreateWithFlags(&mEvent, hipEventDisableTiming));
 
  128#elif defined(__CUDACC__) 
  130    GPUChkErrS(cudaEventCreateWithFlags(&mEvent, cudaEventDisableTiming));
 
 
  138#if defined(__HIPCC__) 
  141#elif defined(__CUDACC__) 
 
  154#if defined(__HIPCC__) 
  156#elif defined(__CUDACC__) 
 
  162#if defined(__HIPCC__) 
  164#elif defined(__CUDACC__) 
 
  171  Event mEvent{
nullptr};
 
 
  178  size_t size() const noexcept { 
return mStreams.size(); }
 
  186#if defined(__HIPCC__) 
  188#elif defined(__CUDACC__) 
  192      for (
auto& s : mStreams) {
 
 
  199#if defined(__HIPCC__) 
  200    GPUChkErrS(hipStreamWaitEvent(mStreams[iStream].
get(), mStreams[iEvent].getEvent()));
 
  201#elif defined(__CUDACC__) 
  202    GPUChkErrS(cudaStreamWaitEvent(mStreams[iStream].
get(), mStreams[iEvent].getEvent()));
 
 
  207  std::vector<Stream> mStreams;
 
 
  210#ifdef ITS_MEASURE_GPU_TIME 
  214  GPUTimer(
const std::string& 
name)
 
  220  GPUTimer(Streams& streams, 
const std::string& 
name)
 
  223    for (
size_t i{0}; 
i < streams.size(); ++
i) {
 
  224      mStreams.push_back(streams[
i].
get());
 
  231    for (
size_t sta{
start}; sta < 
end; ++sta) {
 
  232      mStreams.push_back(streams[sta].
get());
 
  239    mStreams.push_back(
stream.get());
 
  245    for (
size_t i{0}; 
i < mStreams.size(); ++
i) {
 
  247#if defined(__HIPCC__) 
  250      GPUChkErrS(hipEventElapsedTime(&ms, mStarts[
i], mStops[
i]));
 
  253#elif defined(__CUDACC__) 
  256      GPUChkErrS(cudaEventElapsedTime(&ms, mStarts[
i], mStops[
i]));
 
  260      LOGP(info, 
"Elapsed time for {}:{} {} ms", mName, 
i, ms);
 
  266    mStarts.resize(mStreams.size());
 
  267    mStops.resize(mStreams.size());
 
  268    for (
size_t i{0}; 
i < mStreams.size(); ++
i) {
 
  269#if defined(__HIPCC__) 
  273#elif defined(__CUDACC__) 
  283  std::vector<Stream::Event> mStarts, mStops;
 
  284  std::vector<Stream::Handle> mStreams;
 
  290  template <
typename... Args>
 
 
  299struct TypedAllocator {
 
  300  using value_type = T;
 
  301  using pointer = thrust::device_ptr<T>;
 
  302  using const_pointer = thrust::device_ptr<const T>;
 
  303  using size_type = std::size_t;
 
  304  using difference_type = std::ptrdiff_t;
 
  306  TypedAllocator() noexcept : mInternalAllocator(
nullptr) {}
 
  307  explicit TypedAllocator(ExternalAllocator* 
a) noexcept : mInternalAllocator(
a) {}
 
  309  template <
typename U>
 
  310  TypedAllocator(
const TypedAllocator<U>& o) noexcept : mInternalAllocator(o.mInternalAllocator)
 
  316    void* raw = mInternalAllocator->allocate(
n * 
sizeof(T));
 
  317    return thrust::device_pointer_cast(
static_cast<T*
>(raw));
 
  320  void deallocate(
pointer p, size_type 
n) 
noexcept 
  325    void* raw = thrust::raw_pointer_cast(p);
 
  326    mInternalAllocator->deallocate(
static_cast<char*
>(raw), 
n * 
sizeof(T));
 
  329  bool operator==(TypedAllocator 
const& o) 
const noexcept 
  331    return mInternalAllocator == o.mInternalAllocator;
 
  333  bool operator!=(TypedAllocator 
const& o) 
const noexcept 
  335    return !(*
this == o);
 
  339  ExternalAllocator* mInternalAllocator;
 
  342template <
int nLayers>
 
  343GPUdii() const 
int4 getBinsRect(const 
Cluster& currentCluster, const 
int layerIndex,
 
  344                                const 
o2::its::IndexTableUtils<nLayers>* 
utils,
 
  345                                const 
float z1, const 
float z2, 
float maxdeltaz, 
float maxdeltaphi)
 
  347  const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
 
  349  const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz;
 
  352  if (zRangeMax < -utils->getLayerZ(layerIndex) ||
 
  353      zRangeMin > 
utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) {
 
  357  return int4{o2::gpu::CAMath::Max(0, 
utils->getZBinIndex(layerIndex, zRangeMin)),
 
  358              utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)),
 
  359              o2::gpu::CAMath::Min(
utils->getNzBins() - 1, 
utils->getZBinIndex(layerIndex, zRangeMax)),
 
  360              utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))};
 
  363GPUdii() gpuSpan<const 
Vertex> getPrimaryVertices(const 
int rof,
 
  364                                                  const 
int* roframesPV,
 
  369  const int start_pv_id = roframesPV[rof];
 
  370  const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1;
 
  371  size_t delta = 
mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; 
 
  372  return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
 
  375GPUdii() gpuSpan<const 
Vertex> getPrimaryVertices(const 
int romin,
 
  377                                                  const 
int* roframesPV,
 
  381  const int start_pv_id = roframesPV[romin];
 
  382  const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1;
 
  383  return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
 
  386GPUdii() gpuSpan<const 
Cluster> getClustersOnLayer(const 
int rof,
 
  389                                                   const 
int** roframesClus,
 
  392  if (rof < 0 || rof >= totROFs) {
 
  393    return gpuSpan<const Cluster>();
 
  395  const int start_clus_id{roframesClus[
layer][rof]};
 
  396  const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
 
  397  const unsigned int delta = roframesClus[
layer][stop_rof] - start_clus_id;
 
  398  return gpuSpan<const Cluster>(&(
clusters[
layer][start_clus_id]), delta);
 
  401GPUdii() gpuSpan<const Tracklet> getTrackletsPerCluster(const 
int rof,
 
  404                                                        const 
int** roframesClus,
 
  407  if (rof < 0 || rof >= totROFs) {
 
  408    return gpuSpan<const Tracklet>();
 
  410  const int start_clus_id{roframesClus[1][rof]};
 
  411  const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
 
  412  const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
 
  413  return gpuSpan<const Tracklet>(&(
tracklets[
mode][start_clus_id]), delta);
 
  416GPUdii() gpuSpan<
int> getNTrackletsPerCluster(const 
int rof,
 
  419                                              const 
int** roframesClus,
 
  422  if (rof < 0 || rof >= totROFs) {
 
  423    return gpuSpan<int>();
 
  425  const int start_clus_id{roframesClus[1][rof]};
 
  426  const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
 
  427  const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
 
  428  return gpuSpan<int>(&(ntracklets[
mode][start_clus_id]), delta);
 
  431GPUdii() gpuSpan<const 
int> getNTrackletsPerCluster(const 
int rof,
 
  434                                                    const 
int** roframesClus,
 
  435                                                    const 
int** ntracklets)
 
  437  if (rof < 0 || rof >= totROFs) {
 
  438    return gpuSpan<const int>();
 
  440  const int start_clus_id{roframesClus[1][rof]};
 
  441  const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
 
  442  const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
 
  443  return gpuSpan<const int>(&(ntracklets[
mode][start_clus_id]), delta);
 
  446GPUdii() gpuSpan<
int> getNLinesPerCluster(const 
int rof,
 
  448                                          const 
int** roframesClus,
 
  451  if (rof < 0 || rof >= totROFs) {
 
  452    return gpuSpan<int>();
 
  454  const int start_clus_id{roframesClus[1][rof]};
 
  455  const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
 
  456  const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
 
  457  return gpuSpan<int>(&(nlines[start_clus_id]), delta);
 
  460GPUdii() gpuSpan<const 
int> getNLinesPerCluster(const 
int rof,
 
  462                                                const 
int** roframesClus,
 
  465  if (rof < 0 || rof >= totROFs) {
 
  466    return gpuSpan<const int>();
 
  468  const int start_clus_id{roframesClus[1][rof]};
 
  469  const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
 
  470  const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
 
  471  return gpuSpan<const int>(&(nlines[start_clus_id]), delta);
 
Class for time synchronization of RawReader instances.
 
HMPID cluster implementation.
 
const Handle & getStream()
 
static constexpr Handle DefaultStream
 
static constexpr unsigned int DefaultFlag
 
Stream(unsigned int flags=DefaultFlag)
 
void push_back(const Stream &stream)
 
void waitEvent(size_t iStream, size_t iEvent)
 
void sync(bool device=true)
 
auto & operator[](size_t i)
 
size_t size() const noexcept
 
GLuint const GLchar * name
 
GLenum GLuint GLint GLint layer
 
GLboolean GLboolean GLboolean GLboolean a
 
std::pair< T1, T2 > gpuPair
 
o2::dataformats::Vertex< o2::dataformats::TimeStamp< int > > Vertex
 
bool operator!=(const DsChannelId &a, const DsChannelId &b)
 
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
 
std::string to_string(gsl::span< T, Size > span)
 
Common utility functions.
 
GPUd() gpuSpan(const gpuSpan< T > &other)
 
GPUd() bool empty() const
 
GPUd() ref operator[](unsigned int idx) const
 
GPUd() unsigned int size() const
 
GPUd() bool empty() const
 
GPUd() ref operator[](unsigned int idx) const
 
GPUd() unsigned int size() const
 
VectorOfTObjectPtrs other
 
std::vector< Cluster > clusters
 
std::vector< Tracklet64 > tracklets