Project
Loading...
Searching...
No Matches
TimeFrameGPU.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
12
13#ifndef TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
14#define TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
15
16#include <gsl/gsl>
17#include <bitset>
18
23
24namespace o2::its::gpu
25{
26
27template <int nLayers = 7>
28class TimeFrameGPU final : public TimeFrame<nLayers>
29{
30 using typename TimeFrame<nLayers>::CellSeedN;
32
33 public:
34 TimeFrameGPU() = default;
35 ~TimeFrameGPU() = default;
36
38 void pushMemoryStack(const int);
39 void popMemoryStack(const int);
40 void registerHostMemory(const int);
41 void unregisterHostMemory(const int);
42 void initialise(const int, const TrackingParameters&, const int, IndexTableUtilsN* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
43 void initDevice(IndexTableUtilsN*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
45 void loadIndexTableUtils(const int);
46 void loadTrackingFrameInfoDevice(const int, const int);
48 void loadUnsortedClustersDevice(const int, const int);
49 void createUnsortedClustersDeviceArray(const int, const int = nLayers);
50 void loadClustersDevice(const int, const int);
51 void createClustersDeviceArray(const int, const int = nLayers);
52 void loadClustersIndexTables(const int, const int);
54 void createUsedClustersDevice(const int, const int);
55 void createUsedClustersDeviceArray(const int, const int = nLayers);
57 void loadROFrameClustersDevice(const int, const int);
59 void loadMultiplicityCutMask(const int);
60 void loadVertices(const int);
61
63 void createTrackletsLUTDevice(const int, const int);
73 void createTrackletsBuffers(const int);
75 void createCellsBuffers(const int);
76 void createCellsBuffersArray(const int);
78 void createCellsLUTDevice(const int);
81 void createNeighboursDevice(const unsigned int layer);
82 void createNeighboursLUTDevice(const int, const unsigned int);
85 void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
89
91 void createVtxTrackletsLUTDevice(const int32_t);
92 void createVtxTrackletsBuffers(const int32_t);
93 void createVtxLinesLUTDevice(const int32_t);
94 void createVtxLinesBuffer(const int32_t);
95
97 auto& getStream(const size_t stream) { return mGpuStreams[stream]; }
98 auto& getStreams() { return mGpuStreams; }
99 void syncStream(const size_t stream);
100 void syncStreams(const bool = true);
101 void waitEvent(const int, const int);
102 void recordEvent(const int);
103 void recordEvents(const int = 0, const int = nLayers);
104
106 virtual void wipe() final;
107
109 virtual bool isGPU() const noexcept final { return true; }
110 virtual const char* getName() const noexcept { return "GPU"; }
111 int getNClustersInRofSpan(const int, const int, const int) const;
112 IndexTableUtilsN* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
113 int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
114 auto& getTrackITSExt() { return mTrackITSExt; }
115 Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
116 int* getDeviceROFramesPV() { return mROFramesPVDevice; }
117 unsigned char* getDeviceUsedClusters(const int);
119
120 // Hybrid
121 Road<nLayers - 2>* getDeviceRoads() { return mRoadsDevice; }
122 TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
123 int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
124 gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
125 gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
126 std::array<int*, nLayers - 2>& getDeviceNeighboursAll() { return mNeighboursDevice; }
127 int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
128 int** getDeviceNeighboursArray() { return mNeighboursDevice.data(); }
130 const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
131 const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
132 const Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
133 const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
134 std::vector<unsigned int> getClusterSizes();
135 uint8_t** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
136 const int** getDeviceROFrameClusters() const { return mROFramesClustersDeviceArray; }
137 Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
138 int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
139 int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
140 int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
141 CellSeedN** getDeviceArrayCells() { return mCellsDeviceArray; }
142 CellSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
143 o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
144 float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
145 int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
146 uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }
147
148 // Vertexer
149 auto& getDeviceNTrackletsPerROF() const noexcept { return mNTrackletsPerROFDevice; }
150 auto& getDeviceNTrackletsPerCluster() const noexcept { return mNTrackletsPerClusterDevice; }
151 auto& getDeviceNTrackletsPerClusterSum() const noexcept { return mNTrackletsPerClusterSumDevice; }
152 int32_t** getDeviceArrayNTrackletsPerROF() const noexcept { return mNTrackletsPerROFDeviceArray; }
153 int32_t** getDeviceArrayNTrackletsPerCluster() const noexcept { return mNTrackletsPerClusterDeviceArray; }
154 int32_t** getDeviceArrayNTrackletsPerClusterSum() const noexcept { return mNTrackletsPerClusterSumDeviceArray; }
155 uint8_t* getDeviceUsedTracklets() const noexcept { return mUsedTrackletsDevice; }
156 int32_t* getDeviceNLinesPerCluster() const noexcept { return mNLinesPerClusterDevice; }
157 int32_t* getDeviceNLinesPerClusterSum() const noexcept { return mNLinesPerClusterSumDevice; }
158 Line* getDeviceLines() const noexcept { return mLinesDevice; }
159 gsl::span<int*> getDeviceTrackletsPerROFs() { return mNTrackletsPerROFDevice; }
160
162
163 // Host-specific getters
164 gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
165 gsl::span<int, nLayers - 2> getNCells() { return mNCells; }
166 auto& getArrayNCells() { return mNCells; }
167 gsl::span<int, nLayers - 3> getNNeighbours() { return mNNeighbours; }
168 auto& getArrayNNeighbours() { return mNNeighbours; }
169
170 // Host-available device getters
171 gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
172 gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
173 gsl::span<Tracklet*> getDeviceTracklets() { return mTrackletsDevice; }
174 gsl::span<CellSeedN*> getDeviceCells() { return mCellsDevice; }
175
176 // Overridden getters
177 int getNumberOfTracklets() const final;
178 int getNumberOfCells() const final;
179 int getNumberOfNeighbours() const final;
180
181 private:
182 void allocMemAsync(void**, size_t, Stream&, bool, int32_t = o2::gpu::GPUMemoryResource::MEMORY_GPU); // Abstract owned and unowned memory allocations on specific stream
183 void allocMem(void**, size_t, bool, int32_t = o2::gpu::GPUMemoryResource::MEMORY_GPU); // Abstract owned and unowned memory allocations on default stream
184 TimeFrameGPUParameters mGpuParams;
185
186 // Host-available device buffer sizes
187 std::array<int, nLayers - 1> mNTracklets;
188 std::array<int, nLayers - 2> mNCells;
189 std::array<int, nLayers - 3> mNNeighbours;
190
191 // Device pointers
192 IndexTableUtilsN* mIndexTableUtilsDevice;
193
194 // Hybrid pref
195 uint8_t* mMultMaskDevice;
196 Vertex* mPrimaryVerticesDevice;
197 int* mROFramesPVDevice;
198 std::array<Cluster*, nLayers> mClustersDevice;
199 std::array<Cluster*, nLayers> mUnsortedClustersDevice;
200 std::array<int*, nLayers> mClustersIndexTablesDevice;
201 std::array<unsigned char*, nLayers> mUsedClustersDevice;
202 std::array<int*, nLayers> mROFramesClustersDevice;
203 const Cluster** mClustersDeviceArray;
204 const Cluster** mUnsortedClustersDeviceArray;
205 const int** mClustersIndexTablesDeviceArray;
206 uint8_t** mUsedClustersDeviceArray;
207 const int** mROFramesClustersDeviceArray;
208 std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
209 std::array<int*, nLayers - 1> mTrackletsLUTDevice;
210 std::array<int*, nLayers - 2> mCellsLUTDevice;
211 std::array<int*, nLayers - 3> mNeighboursLUTDevice;
212
213 Tracklet** mTrackletsDeviceArray{nullptr};
214 int** mCellsLUTDeviceArray{nullptr};
215 int** mNeighboursCellDeviceArray{nullptr};
216 int** mNeighboursCellLUTDeviceArray{nullptr};
217 int** mTrackletsLUTDeviceArray{nullptr};
218 std::array<CellSeedN*, nLayers - 2> mCellsDevice;
219 CellSeedN** mCellsDeviceArray;
220 std::array<int*, nLayers - 3> mNeighboursIndexTablesDevice;
221 CellSeedN* mTrackSeedsDevice{nullptr};
222 std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;
223 o2::track::TrackParCovF** mCellSeedsDeviceArray;
224 std::array<float*, nLayers - 2> mCellSeedsChi2Device;
225 float** mCellSeedsChi2DeviceArray;
226
227 Road<nLayers - 2>* mRoadsDevice;
228 TrackITSExt* mTrackITSExtDevice;
229 std::array<gpuPair<int, int>*, nLayers - 2> mNeighbourPairsDevice;
230 std::array<int*, nLayers - 2> mNeighboursDevice;
231 std::array<TrackingFrameInfo*, nLayers> mTrackingFrameInfoDevice;
232 const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;
233
235 std::array<int32_t*, 2> mNTrackletsPerROFDevice;
236 std::array<int32_t*, 2> mNTrackletsPerClusterDevice;
237 std::array<int32_t*, 2> mNTrackletsPerClusterSumDevice;
238 uint8_t* mUsedTrackletsDevice;
239 int32_t* mNLinesPerClusterDevice;
240 int32_t* mNLinesPerClusterSumDevice;
241 int32_t** mNTrackletsPerROFDeviceArray;
242 int32_t** mNTrackletsPerClusterDeviceArray;
243 int32_t** mNTrackletsPerClusterSumDeviceArray;
244 Line* mLinesDevice;
245
246 // State
247 Streams mGpuStreams;
248 std::bitset<nLayers + 1> mPinnedUnsortedClusters{0};
249 std::bitset<nLayers + 1> mPinnedClusters{0};
250 std::bitset<nLayers + 1> mPinnedClustersIndexTables{0};
251 std::bitset<nLayers + 1> mPinnedUsedClusters{0};
252 std::bitset<nLayers + 1> mPinnedROFramesClusters{0};
253 std::bitset<nLayers + 1> mPinnedTrackingFrameInfo{0};
254
255 // Temporary buffer for storing output tracks from GPU tracking
256 bounded_vector<TrackITSExt> mTrackITSExt;
257};
258
259template <int nLayers>
260inline int TimeFrameGPU<nLayers>::getNClustersInRofSpan(const int rofIdstart, const int rofSpanSize, const int layerId) const
261{
262 return static_cast<int>(this->mROFramesClusters[layerId][(rofIdstart + rofSpanSize) < this->mROFramesClusters.size() ? rofIdstart + rofSpanSize : this->mROFramesClusters.size() - 1] - this->mROFramesClusters[layerId][rofIdstart]);
263}
264
265template <int nLayers>
266inline std::vector<unsigned int> TimeFrameGPU<nLayers>::getClusterSizes()
267{
268 std::vector<unsigned int> sizes(this->mUnsortedClusters.size());
269 std::transform(this->mUnsortedClusters.begin(), this->mUnsortedClusters.end(), sizes.begin(),
270 [](const auto& v) { return static_cast<unsigned int>(v.size()); });
271 return sizes;
272}
273
274template <int nLayers>
276{
277 return std::accumulate(mNTracklets.begin(), mNTracklets.end(), 0);
278}
279
280template <int nLayers>
282{
283 return std::accumulate(mNCells.begin(), mNCells.end(), 0);
284}
285
286template <int nLayers>
288{
289 return std::accumulate(mNNeighbours.begin(), mNNeighbours.end(), 0);
290}
291
292} // namespace o2::its::gpu
293
294#endif
const TrackingFrameInfo ** getDeviceArrayTrackingFrameInfo() const
gsl::span< int * > getDeviceCellLUTs()
uint8_t * getDeviceUsedTracklets() const noexcept
void initialise(const int, const TrackingParameters &, const int, IndexTableUtilsN *utils=nullptr, const TimeFrameGPUParameters *pars=nullptr)
gsl::span< CellSeedN * > getDeviceCells()
void createCellsBuffersArray(const int)
void loadTrackSeedsDevice(bounded_vector< CellSeedN > &)
const int ** getDeviceArrayClustersIndexTables() const
auto & getDeviceNTrackletsPerROF() const noexcept
auto & getDeviceNTrackletsPerClusterSum() const noexcept
virtual const char * getName() const noexcept
auto & getDeviceNTrackletsPerCluster() const noexcept
const Cluster ** getDeviceArrayUnsortedClusters() const
virtual bool isGPU() const noexcept final
interface
int * getDeviceNeighbours(const int layer)
int * getDeviceNeighboursIndexTables(const int layer)
void loadIndexTableUtils(const int)
virtual void wipe() final
cleanup
const int ** getDeviceROFrameClusters() const
float ** getDeviceArrayTrackSeedsChi2()
void createNeighboursLUTDevice(const int, const unsigned int)
Tracklet ** getDeviceArrayTracklets()
TrackingFrameInfo * getDeviceTrackingFrameInfo(const int)
int32_t ** getDeviceArrayNTrackletsPerCluster() const noexcept
void createClustersIndexTablesArray(const int)
void createTrackletsBuffersArray(const int)
int getNClustersInRofSpan(const int, const int, const int) const
gsl::span< int, nLayers - 2 > getNCells()
void loadTrackingFrameInfoDevice(const int, const int)
void syncStreams(const bool=true)
int32_t ** getDeviceArrayNTrackletsPerROF() const noexcept
std::array< int *, nLayers - 2 > & getDeviceNeighboursAll()
void createVtxLinesBuffer(const int32_t)
void createUnsortedClustersDeviceArray(const int, const int=nLayers)
void downloadNeighboursLUTDevice(bounded_vector< int > &, const int)
void loadVertices(const int)
void createCellsLUTDevice(const int)
int * getDeviceNeighboursLUT(const int layer)
void waitEvent(const int, const int)
Road< nLayers - 2 > * getDeviceRoads()
void createVtxTrackletsBuffers(const int32_t)
void createClustersDeviceArray(const int, const int=nLayers)
void createTrackITSExtDevice(bounded_vector< CellSeedN > &)
void loadClustersDevice(const int, const int)
void initDevice(IndexTableUtilsN *, const TrackingParameters &trkParam, const TimeFrameGPUParameters &, const int, const int)
void registerHostMemory(const int)
o2::track::TrackParCovF ** getDeviceArrayTrackSeeds()
int getNumberOfCells() const final
void loadClustersIndexTables(const int, const int)
gsl::span< int * > getDeviceNeighboursLUTs()
std::vector< unsigned int > getClusterSizes()
const o2::base::Propagator * getChainPropagator()
void syncStream(const size_t stream)
void createVtxTrackletsLUTDevice(const int32_t)
Vertexer.
void createUsedClustersDeviceArray(const int, const int=nLayers)
int ** getDeviceArrayNeighboursCellLUT() const
CellSeedN ** getDeviceArrayCells()
int32_t * getDeviceNLinesPerClusterSum() const noexcept
void createVtxLinesLUTDevice(const int32_t)
void recordEvents(const int=0, const int=nLayers)
int ** getDeviceArrayTrackletsLUT() const
void setDevicePropagator(const o2::base::PropagatorImpl< float > *p) final
int getNumberOfTracklets() const final
gsl::span< int * > getDeviceTrackletsLUTs()
void downloadCellsNeighboursDevice(std::vector< bounded_vector< std::pair< int, int > > > &, const int)
void recordEvent(const int)
void createCellsBuffers(const int)
void createTrackingFrameInfoDeviceArray(const int)
unsigned char * getDeviceUsedClusters(const int)
void pushMemoryStack(const int)
Most relevant operations.
gpuPair< int, int > * getDeviceNeighbourPairs(const int layer)
void createTrackletsLUTDevice(const int, const int)
void createTrackletsBuffers(const int)
void downloadTrackITSExtDevice(bounded_vector< CellSeedN > &)
gsl::span< Tracklet * > getDeviceTracklets()
TrackITSExt * getDeviceTrackITSExt()
void loadMultiplicityCutMask(const int)
int32_t ** getDeviceArrayNTrackletsPerClusterSum() const noexcept
void loadUnsortedClustersDevice(const int, const int)
auto & getStream(const size_t stream)
synchronization
int ** getDeviceArrayCellsLUT() const
IndexTableUtilsN * getDeviceIndexTableUtils()
void popMemoryStack(const int)
int32_t * getDeviceNLinesPerCluster() const noexcept
void createNeighboursDevice(const unsigned int layer)
gsl::span< int, nLayers - 3 > getNNeighbours()
int * getDeviceROFramesClusters(const int layer)
const Cluster ** getDeviceArrayClusters() const
void createTrackletsLUTDeviceArray(const int)
CellSeedN * getDeviceTrackSeeds()
uint8_t ** getDeviceArrayUsedClusters() const
void unregisterHostMemory(const int)
Line * getDeviceLines() const noexcept
gsl::span< int * > getDeviceTrackletsPerROFs()
int getNumberOfNeighbours() const final
void createUsedClustersDevice(const int, const int)
void createNeighboursIndexTablesDevice(const int)
gsl::span< int, nLayers - 1 > getNTracklets()
void createROFrameClustersDeviceArray(const int)
void createCellsLUTDeviceArray(const int)
void loadROFrameClustersDevice(const int, const int)
const GLdouble * v
Definition glcorearb.h:832
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
Definition glcorearb.h:2595
GLenum array
Definition glcorearb.h:4274
GLenum GLuint GLint GLint layer
Definition glcorearb.h:1310
GLuint GLuint stream
Definition glcorearb.h:1806
uint8_t itsSharedClusterMap uint8_t
std::pair< T1, T2 > gpuPair
Definition Utils.h:54
std::pmr::vector< T > bounded_vector
TrackParametrizationWithError< float > TrackParCovF
Definition Track.h:31
constexpr int nLayers
Definition Specs.h:45
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Defining DataPointCompositeObject explicitly as copiable.
Common utility functions.
const o2::base::PropagatorImpl< float > * mPropagatorDevice
Definition TimeFrame.h:294
CellSeed< nLayers > CellSeedN
Definition TimeFrame.h:68
IndexTableUtils< nLayers > IndexTableUtilsN
Definition TimeFrame.h:67