Project
Loading...
Searching...
No Matches
TimeFrameGPU.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
12
13#ifndef TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
14#define TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
15
16#include <gsl/gsl>
17#include <bitset>
18
23
24namespace o2::its::gpu
25{
26
27template <int nLayers = 7>
28class TimeFrameGPU final : public TimeFrame<nLayers>
29{
30 using typename TimeFrame<nLayers>::CellSeedN;
32
33 public:
34 TimeFrameGPU() = default;
35 ~TimeFrameGPU() = default;
36
38 void pushMemoryStack(const int);
39 void popMemoryStack(const int);
40 void registerHostMemory(const int);
41 void unregisterHostMemory(const int);
42 void initialise(const int, const TrackingParameters&, const int, IndexTableUtilsN* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
43 void initDevice(IndexTableUtilsN*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
45 void loadIndexTableUtils(const int);
46 void loadTrackingFrameInfoDevice(const int, const int);
48 void loadUnsortedClustersDevice(const int, const int);
49 void createUnsortedClustersDeviceArray(const int, const int = nLayers);
50 void loadClustersDevice(const int, const int);
51 void createClustersDeviceArray(const int, const int = nLayers);
52 void loadClustersIndexTables(const int, const int);
54 void createUsedClustersDevice(const int, const int);
55 void createUsedClustersDeviceArray(const int, const int = nLayers);
57 void loadROFrameClustersDevice(const int, const int);
59 void loadMultiplicityCutMask(const int);
60 void loadVertices(const int);
61
63 void createTrackletsLUTDevice(const int, const int);
73 void createTrackletsBuffers(const int);
75 void createCellsBuffers(const int);
76 void createCellsBuffersArray(const int);
78 void createCellsLUTDevice(const int);
81 void createNeighboursDevice(const unsigned int layer);
82 void createNeighboursLUTDevice(const int, const unsigned int);
83 void createTrackITSExtDevice(const size_t);
85 void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
89
91 void createVtxTrackletsLUTDevice(const int32_t);
92 void createVtxTrackletsBuffers(const int32_t);
93 void createVtxLinesLUTDevice(const int32_t);
94 void createVtxLinesBuffer(const int32_t);
95
97 auto& getStream(const size_t stream) { return mGpuStreams[stream]; }
98 auto& getStreams() { return mGpuStreams; }
99 void syncStream(const size_t stream);
100 void syncStreams(const bool = true);
101 void waitEvent(const int, const int);
102 void recordEvent(const int);
103 void recordEvents(const int = 0, const int = nLayers);
104
106 virtual void wipe() final;
107
109 virtual bool isGPU() const noexcept final { return true; }
110 virtual const char* getName() const noexcept { return "GPU"; }
111 int getNClustersInRofSpan(const int, const int, const int) const;
112 IndexTableUtilsN* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
113 int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
114 auto& getTrackITSExt() { return mTrackITSExt; }
115 Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
116 int* getDeviceROFramesPV() { return mROFramesPVDevice; }
117 unsigned char* getDeviceUsedClusters(const int);
119
120 // Hybrid
121 Road<nLayers - 2>* getDeviceRoads() { return mRoadsDevice; }
122 TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
123 int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
124 gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
125 gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
126 std::array<int*, nLayers - 2>& getDeviceNeighboursAll() { return mNeighboursDevice; }
127 int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
128 int** getDeviceNeighboursArray() { return mNeighboursDevice.data(); }
130 const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
131 const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
132 const Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
133 const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
134 std::vector<unsigned int> getClusterSizes();
135 uint8_t** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
136 const int** getDeviceROFrameClusters() const { return mROFramesClustersDeviceArray; }
137 Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
138 int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
139 int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
140 int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
141 CellSeedN** getDeviceArrayCells() { return mCellsDeviceArray; }
142 CellSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
143 int* getDeviceTrackSeedsLUT() { return mTrackSeedsLUTDevice; }
144 auto getNTrackSeeds() const { return mNTracks; }
145 o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
146 float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
147 int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
148 uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }
149
150 // Vertexer
151 auto& getDeviceNTrackletsPerROF() const noexcept { return mNTrackletsPerROFDevice; }
152 auto& getDeviceNTrackletsPerCluster() const noexcept { return mNTrackletsPerClusterDevice; }
153 auto& getDeviceNTrackletsPerClusterSum() const noexcept { return mNTrackletsPerClusterSumDevice; }
154 int32_t** getDeviceArrayNTrackletsPerROF() const noexcept { return mNTrackletsPerROFDeviceArray; }
155 int32_t** getDeviceArrayNTrackletsPerCluster() const noexcept { return mNTrackletsPerClusterDeviceArray; }
156 int32_t** getDeviceArrayNTrackletsPerClusterSum() const noexcept { return mNTrackletsPerClusterSumDeviceArray; }
157 uint8_t* getDeviceUsedTracklets() const noexcept { return mUsedTrackletsDevice; }
158 int32_t* getDeviceNLinesPerCluster() const noexcept { return mNLinesPerClusterDevice; }
159 int32_t* getDeviceNLinesPerClusterSum() const noexcept { return mNLinesPerClusterSumDevice; }
160 Line* getDeviceLines() const noexcept { return mLinesDevice; }
161 gsl::span<int*> getDeviceTrackletsPerROFs() { return mNTrackletsPerROFDevice; }
162
164
165 // Host-specific getters
166 gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
167 gsl::span<int, nLayers - 2> getNCells() { return mNCells; }
168 auto& getArrayNCells() { return mNCells; }
169 gsl::span<int, nLayers - 3> getNNeighbours() { return mNNeighbours; }
170 auto& getArrayNNeighbours() { return mNNeighbours; }
171
172 // Host-available device getters
173 gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
174 gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
175 gsl::span<Tracklet*> getDeviceTracklets() { return mTrackletsDevice; }
176 gsl::span<CellSeedN*> getDeviceCells() { return mCellsDevice; }
177
178 // Overridden getters
179 int getNumberOfTracklets() const final;
180 int getNumberOfCells() const final;
181 int getNumberOfNeighbours() const final;
182
183 private:
184 void allocMemAsync(void**, size_t, Stream&, bool, int32_t = o2::gpu::GPUMemoryResource::MEMORY_GPU); // Abstract owned and unowned memory allocations on specific stream
185 void allocMem(void**, size_t, bool, int32_t = o2::gpu::GPUMemoryResource::MEMORY_GPU); // Abstract owned and unowned memory allocations on default stream
186 TimeFrameGPUParameters mGpuParams;
187
188 // Host-available device buffer sizes
189 std::array<int, nLayers - 1> mNTracklets;
190 std::array<int, nLayers - 2> mNCells;
191 std::array<int, nLayers - 3> mNNeighbours;
192
193 // Device pointers
194 IndexTableUtilsN* mIndexTableUtilsDevice;
195
196 // Hybrid pref
197 uint8_t* mMultMaskDevice;
198 Vertex* mPrimaryVerticesDevice;
199 int* mROFramesPVDevice;
200 std::array<Cluster*, nLayers> mClustersDevice;
201 std::array<Cluster*, nLayers> mUnsortedClustersDevice;
202 std::array<int*, nLayers> mClustersIndexTablesDevice;
203 std::array<unsigned char*, nLayers> mUsedClustersDevice;
204 std::array<int*, nLayers> mROFramesClustersDevice;
205 const Cluster** mClustersDeviceArray;
206 const Cluster** mUnsortedClustersDeviceArray;
207 const int** mClustersIndexTablesDeviceArray;
208 uint8_t** mUsedClustersDeviceArray;
209 const int** mROFramesClustersDeviceArray;
210 std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
211 std::array<int*, nLayers - 1> mTrackletsLUTDevice;
212 std::array<int*, nLayers - 2> mCellsLUTDevice;
213 std::array<int*, nLayers - 3> mNeighboursLUTDevice;
214
215 Tracklet** mTrackletsDeviceArray{nullptr};
216 int** mCellsLUTDeviceArray{nullptr};
217 int** mNeighboursCellDeviceArray{nullptr};
218 int** mNeighboursCellLUTDeviceArray{nullptr};
219 int** mTrackletsLUTDeviceArray{nullptr};
220 std::array<CellSeedN*, nLayers - 2> mCellsDevice;
221 CellSeedN** mCellsDeviceArray;
222 std::array<int*, nLayers - 3> mNeighboursIndexTablesDevice;
223 CellSeedN* mTrackSeedsDevice{nullptr};
224 int* mTrackSeedsLUTDevice{nullptr};
225 unsigned int mNTracks{0};
226 std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;
227 o2::track::TrackParCovF** mCellSeedsDeviceArray;
228 std::array<float*, nLayers - 2> mCellSeedsChi2Device;
229 float** mCellSeedsChi2DeviceArray;
230
231 Road<nLayers - 2>* mRoadsDevice;
232 TrackITSExt* mTrackITSExtDevice;
233 std::array<gpuPair<int, int>*, nLayers - 2> mNeighbourPairsDevice;
234 std::array<int*, nLayers - 2> mNeighboursDevice;
235 std::array<TrackingFrameInfo*, nLayers> mTrackingFrameInfoDevice;
236 const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;
237
239 std::array<int32_t*, 2> mNTrackletsPerROFDevice;
240 std::array<int32_t*, 2> mNTrackletsPerClusterDevice;
241 std::array<int32_t*, 2> mNTrackletsPerClusterSumDevice;
242 uint8_t* mUsedTrackletsDevice;
243 int32_t* mNLinesPerClusterDevice;
244 int32_t* mNLinesPerClusterSumDevice;
245 int32_t** mNTrackletsPerROFDeviceArray;
246 int32_t** mNTrackletsPerClusterDeviceArray;
247 int32_t** mNTrackletsPerClusterSumDeviceArray;
248 Line* mLinesDevice;
249
250 // State
251 Streams mGpuStreams;
252 std::bitset<nLayers + 1> mPinnedUnsortedClusters{0};
253 std::bitset<nLayers + 1> mPinnedClusters{0};
254 std::bitset<nLayers + 1> mPinnedClustersIndexTables{0};
255 std::bitset<nLayers + 1> mPinnedUsedClusters{0};
256 std::bitset<nLayers + 1> mPinnedROFramesClusters{0};
257 std::bitset<nLayers + 1> mPinnedTrackingFrameInfo{0};
258
259 // Temporary buffer for storing output tracks from GPU tracking
260 bounded_vector<TrackITSExt> mTrackITSExt;
261};
262
263template <int nLayers>
264inline int TimeFrameGPU<nLayers>::getNClustersInRofSpan(const int rofIdstart, const int rofSpanSize, const int layerId) const
265{
266 return static_cast<int>(this->mROFramesClusters[layerId][(rofIdstart + rofSpanSize) < this->mROFramesClusters.size() ? rofIdstart + rofSpanSize : this->mROFramesClusters.size() - 1] - this->mROFramesClusters[layerId][rofIdstart]);
267}
268
269template <int nLayers>
270inline std::vector<unsigned int> TimeFrameGPU<nLayers>::getClusterSizes()
271{
272 std::vector<unsigned int> sizes(this->mUnsortedClusters.size());
273 std::transform(this->mUnsortedClusters.begin(), this->mUnsortedClusters.end(), sizes.begin(),
274 [](const auto& v) { return static_cast<unsigned int>(v.size()); });
275 return sizes;
276}
277
278template <int nLayers>
280{
281 return std::accumulate(mNTracklets.begin(), mNTracklets.end(), 0);
282}
283
284template <int nLayers>
286{
287 return std::accumulate(mNCells.begin(), mNCells.end(), 0);
288}
289
290template <int nLayers>
292{
293 return std::accumulate(mNNeighbours.begin(), mNNeighbours.end(), 0);
294}
295
296} // namespace o2::its::gpu
297
298#endif
const TrackingFrameInfo ** getDeviceArrayTrackingFrameInfo() const
gsl::span< int * > getDeviceCellLUTs()
uint8_t * getDeviceUsedTracklets() const noexcept
void initialise(const int, const TrackingParameters &, const int, IndexTableUtilsN *utils=nullptr, const TimeFrameGPUParameters *pars=nullptr)
gsl::span< CellSeedN * > getDeviceCells()
void createCellsBuffersArray(const int)
void loadTrackSeedsDevice(bounded_vector< CellSeedN > &)
const int ** getDeviceArrayClustersIndexTables() const
auto & getDeviceNTrackletsPerROF() const noexcept
auto & getDeviceNTrackletsPerClusterSum() const noexcept
virtual const char * getName() const noexcept
auto & getDeviceNTrackletsPerCluster() const noexcept
const Cluster ** getDeviceArrayUnsortedClusters() const
virtual bool isGPU() const noexcept final
interface
int * getDeviceNeighbours(const int layer)
int * getDeviceNeighboursIndexTables(const int layer)
void loadIndexTableUtils(const int)
virtual void wipe() final
cleanup
const int ** getDeviceROFrameClusters() const
float ** getDeviceArrayTrackSeedsChi2()
void createTrackITSExtDevice(const size_t)
void createNeighboursLUTDevice(const int, const unsigned int)
Tracklet ** getDeviceArrayTracklets()
TrackingFrameInfo * getDeviceTrackingFrameInfo(const int)
int32_t ** getDeviceArrayNTrackletsPerCluster() const noexcept
void createClustersIndexTablesArray(const int)
void createTrackletsBuffersArray(const int)
int getNClustersInRofSpan(const int, const int, const int) const
gsl::span< int, nLayers - 2 > getNCells()
void loadTrackingFrameInfoDevice(const int, const int)
void syncStreams(const bool=true)
int32_t ** getDeviceArrayNTrackletsPerROF() const noexcept
std::array< int *, nLayers - 2 > & getDeviceNeighboursAll()
void createVtxLinesBuffer(const int32_t)
void createUnsortedClustersDeviceArray(const int, const int=nLayers)
void downloadNeighboursLUTDevice(bounded_vector< int > &, const int)
void loadVertices(const int)
void createCellsLUTDevice(const int)
int * getDeviceNeighboursLUT(const int layer)
void waitEvent(const int, const int)
Road< nLayers - 2 > * getDeviceRoads()
void createVtxTrackletsBuffers(const int32_t)
void createClustersDeviceArray(const int, const int=nLayers)
void loadClustersDevice(const int, const int)
void initDevice(IndexTableUtilsN *, const TrackingParameters &trkParam, const TimeFrameGPUParameters &, const int, const int)
void registerHostMemory(const int)
o2::track::TrackParCovF ** getDeviceArrayTrackSeeds()
int getNumberOfCells() const final
void loadClustersIndexTables(const int, const int)
gsl::span< int * > getDeviceNeighboursLUTs()
std::vector< unsigned int > getClusterSizes()
const o2::base::Propagator * getChainPropagator()
void syncStream(const size_t stream)
void createVtxTrackletsLUTDevice(const int32_t)
Vertexer.
void createUsedClustersDeviceArray(const int, const int=nLayers)
int ** getDeviceArrayNeighboursCellLUT() const
CellSeedN ** getDeviceArrayCells()
int32_t * getDeviceNLinesPerClusterSum() const noexcept
void createVtxLinesLUTDevice(const int32_t)
void recordEvents(const int=0, const int=nLayers)
int ** getDeviceArrayTrackletsLUT() const
void setDevicePropagator(const o2::base::PropagatorImpl< float > *p) final
int getNumberOfTracklets() const final
gsl::span< int * > getDeviceTrackletsLUTs()
void downloadCellsNeighboursDevice(std::vector< bounded_vector< std::pair< int, int > > > &, const int)
void recordEvent(const int)
void createCellsBuffers(const int)
void createTrackingFrameInfoDeviceArray(const int)
unsigned char * getDeviceUsedClusters(const int)
void pushMemoryStack(const int)
Most relevant operations.
gpuPair< int, int > * getDeviceNeighbourPairs(const int layer)
void createTrackletsLUTDevice(const int, const int)
void createTrackletsBuffers(const int)
gsl::span< Tracklet * > getDeviceTracklets()
TrackITSExt * getDeviceTrackITSExt()
void loadMultiplicityCutMask(const int)
int32_t ** getDeviceArrayNTrackletsPerClusterSum() const noexcept
void loadUnsortedClustersDevice(const int, const int)
auto & getStream(const size_t stream)
synchronization
int ** getDeviceArrayCellsLUT() const
IndexTableUtilsN * getDeviceIndexTableUtils()
void popMemoryStack(const int)
int32_t * getDeviceNLinesPerCluster() const noexcept
void createNeighboursDevice(const unsigned int layer)
gsl::span< int, nLayers - 3 > getNNeighbours()
int * getDeviceROFramesClusters(const int layer)
const Cluster ** getDeviceArrayClusters() const
void createTrackletsLUTDeviceArray(const int)
CellSeedN * getDeviceTrackSeeds()
uint8_t ** getDeviceArrayUsedClusters() const
void unregisterHostMemory(const int)
Line * getDeviceLines() const noexcept
gsl::span< int * > getDeviceTrackletsPerROFs()
int getNumberOfNeighbours() const final
void createUsedClustersDevice(const int, const int)
void createNeighboursIndexTablesDevice(const int)
gsl::span< int, nLayers - 1 > getNTracklets()
void createROFrameClustersDeviceArray(const int)
void createCellsLUTDeviceArray(const int)
void loadROFrameClustersDevice(const int, const int)
const GLdouble * v
Definition glcorearb.h:832
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
Definition glcorearb.h:2595
GLenum array
Definition glcorearb.h:4274
GLenum GLuint GLint GLint layer
Definition glcorearb.h:1310
GLuint GLuint stream
Definition glcorearb.h:1806
uint8_t itsSharedClusterMap uint8_t
std::pair< T1, T2 > gpuPair
Definition Utils.h:54
std::pmr::vector< T > bounded_vector
TrackParametrizationWithError< float > TrackParCovF
Definition Track.h:31
constexpr int nLayers
Definition Specs.h:45
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Defining DataPointCompositeObject explicitly as copiable.
Common utility functions.
const o2::base::PropagatorImpl< float > * mPropagatorDevice
Definition TimeFrame.h:294
CellSeed< nLayers > CellSeedN
Definition TimeFrame.h:68
IndexTableUtils< nLayers > IndexTableUtilsN
Definition TimeFrame.h:67