Project
Loading...
Searching...
No Matches
TrackerTraitsGPU.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
12
13#include <array>
14#include <vector>
15#include <unistd.h>
16
18
23
24namespace o2::its
25{
26
27template <int NLayers>
29{
30 mTimeFrameGPU->initialise(iteration, this->mTrkParams[iteration], NLayers);
31 // on default stream
32 mTimeFrameGPU->loadVertices(iteration);
33 // TODO these tables can be put in persistent memory
34 mTimeFrameGPU->loadROFOverlapTable(iteration); // this can be put in constant memory actually
35 mTimeFrameGPU->loadROFVertexLookupTable(iteration);
36 // once the tables are in persistent memory just update the vertex one
37 // mTimeFrameGPU->updateROFVertexLookupTable(iteration);
38 mTimeFrameGPU->loadIndexTableUtils(iteration);
39 mTimeFrameGPU->loadROFCutMask(iteration);
40 // pinned on host
41 mTimeFrameGPU->createUsedClustersDeviceArray(iteration);
42 mTimeFrameGPU->createClustersDeviceArray(iteration);
43 mTimeFrameGPU->createUnsortedClustersDeviceArray(iteration);
44 mTimeFrameGPU->createClustersIndexTablesArray(iteration);
45 mTimeFrameGPU->createTrackingFrameInfoDeviceArray(iteration);
46 mTimeFrameGPU->createROFrameClustersDeviceArray(iteration);
47 // device array
48 mTimeFrameGPU->createTrackletsLUTDeviceArray(iteration);
49 mTimeFrameGPU->createTrackletsBuffersArray(iteration);
50 mTimeFrameGPU->createCellsBuffersArray(iteration);
51 mTimeFrameGPU->createCellsLUTDeviceArray(iteration);
52 // push every create artefact on the stack
53 mTimeFrameGPU->pushMemoryStack(iteration);
54}
55
56template <int NLayers>
58{
59 mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<NLayers>*>(tf);
60 this->mTimeFrame = static_cast<TimeFrame<NLayers>*>(tf);
61}
62
63template <int NLayers>
64void TrackerTraitsGPU<NLayers>::computeLayerTracklets(const int iteration, int iVertex)
65{
66 // start by queuing loading needed of two last layers
67 for (int iLayer{NLayers}; iLayer-- > NLayers - 2;) {
68 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer);
69 mTimeFrameGPU->loadClustersDevice(iteration, iLayer);
70 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer);
71 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer);
72 mTimeFrameGPU->recordEvent(iLayer);
73 }
74
75 for (int iLayer{this->mTrkParams[iteration].TrackletsPerRoad()}; iLayer--;) {
76 if (iLayer) { // queue loading data of next layer in parallel, this the copies are overlapping with computation kernels
77 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer - 1);
78 mTimeFrameGPU->loadClustersDevice(iteration, iLayer - 1);
79 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer - 1);
80 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer - 1);
81 mTimeFrameGPU->recordEvent(iLayer - 1);
82 }
83 mTimeFrameGPU->createTrackletsLUTDevice(iteration, iLayer);
84 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
85 countTrackletsInROFsHandler<NLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
86 mTimeFrameGPU->getDeviceROFMaskTableView(),
87 iLayer,
88 mTimeFrameGPU->getDeviceROFOverlapTableView(),
89 mTimeFrameGPU->getDeviceROFVertexLookupTableView(),
90 iVertex,
91 mTimeFrameGPU->getDeviceVertices(),
92 mTimeFrameGPU->getDeviceROFramesPV(),
93 mTimeFrameGPU->getDeviceArrayClusters(),
94 mTimeFrameGPU->getClusterSizes(),
95 mTimeFrameGPU->getDeviceROFrameClusters(),
96 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
97 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
98 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
99 mTimeFrameGPU->getDeviceTrackletsLUTs(),
100 iteration,
101 this->mTrkParams[iteration].NSigmaCut,
102 mTimeFrameGPU->getPhiCuts(),
103 this->mTrkParams[iteration].PVres,
104 mTimeFrameGPU->getMinRs(),
105 mTimeFrameGPU->getMaxRs(),
106 mTimeFrameGPU->getPositionResolutions(),
107 this->mTrkParams[iteration].LayerRadii,
108 mTimeFrameGPU->getMSangles(),
109 mTimeFrameGPU->getFrameworkAllocator(),
110 mTimeFrameGPU->getStreams());
111 mTimeFrameGPU->createTrackletsBuffers(iLayer);
112 if (mTimeFrameGPU->getNTracklets()[iLayer] == 0) {
113 continue;
114 }
115 computeTrackletsInROFsHandler<NLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
116 mTimeFrameGPU->getDeviceROFMaskTableView(),
117 iLayer,
118 mTimeFrameGPU->getDeviceROFOverlapTableView(),
119 mTimeFrameGPU->getDeviceROFVertexLookupTableView(),
120 iVertex,
121 mTimeFrameGPU->getDeviceVertices(),
122 mTimeFrameGPU->getDeviceROFramesPV(),
123 mTimeFrameGPU->getDeviceArrayClusters(),
124 mTimeFrameGPU->getClusterSizes(),
125 mTimeFrameGPU->getDeviceROFrameClusters(),
126 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
127 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
128 mTimeFrameGPU->getDeviceArrayTracklets(),
129 mTimeFrameGPU->getDeviceTracklets(),
130 mTimeFrameGPU->getNTracklets(),
131 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
132 mTimeFrameGPU->getDeviceTrackletsLUTs(),
133 iteration,
134 this->mTrkParams[iteration].NSigmaCut,
135 mTimeFrameGPU->getPhiCuts(),
136 this->mTrkParams[iteration].PVres,
137 mTimeFrameGPU->getMinRs(),
138 mTimeFrameGPU->getMaxRs(),
139 mTimeFrameGPU->getPositionResolutions(),
140 this->mTrkParams[iteration].LayerRadii,
141 mTimeFrameGPU->getMSangles(),
142 mTimeFrameGPU->getFrameworkAllocator(),
143 mTimeFrameGPU->getStreams());
144 }
145}
146
147template <int NLayers>
149{
150 // start by queuing loading needed of three last layers
151 for (int iLayer{NLayers}; iLayer-- > NLayers - 3;) {
152 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer);
153 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer);
154 mTimeFrameGPU->recordEvent(iLayer);
155 }
156
157 for (int iLayer{this->mTrkParams[iteration].CellsPerRoad()}; iLayer--;) {
158 if (iLayer) {
159 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer - 1);
160 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer - 1);
161 mTimeFrameGPU->recordEvent(iLayer - 1);
162 }
163
164 // if there are no tracklets skip entirely
165 const int currentLayerTrackletsNum{static_cast<int>(mTimeFrameGPU->getNTracklets()[iLayer])};
166 if (!currentLayerTrackletsNum || !mTimeFrameGPU->getNTracklets()[iLayer + 1]) {
167 mTimeFrameGPU->getNCells()[iLayer] = 0;
168 continue;
169 }
170
171 mTimeFrameGPU->createCellsLUTDevice(iLayer);
172 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
173 mTimeFrameGPU->waitEvent(iLayer, iLayer + 2); // wait stream until all data is available
174 countCellsHandler<NLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
175 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
176 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
177 mTimeFrameGPU->getDeviceArrayTracklets(),
178 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
179 currentLayerTrackletsNum,
180 iLayer,
181 nullptr,
182 mTimeFrameGPU->getDeviceArrayCellsLUT(),
183 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
184 this->mBz,
185 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
186 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
187 this->mTrkParams[iteration].NSigmaCut,
188 this->mTrkParams[iteration].LayerxX0,
189 mTimeFrameGPU->getFrameworkAllocator(),
190 mTimeFrameGPU->getStreams());
191 mTimeFrameGPU->createCellsBuffers(iLayer);
192 if (mTimeFrameGPU->getNCells()[iLayer] == 0) {
193 continue;
194 }
195 computeCellsHandler<NLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
196 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
197 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
198 mTimeFrameGPU->getDeviceArrayTracklets(),
199 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
200 currentLayerTrackletsNum,
201 iLayer,
202 mTimeFrameGPU->getDeviceCells()[iLayer],
203 mTimeFrameGPU->getDeviceArrayCellsLUT(),
204 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
205 this->mBz,
206 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
207 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
208 this->mTrkParams[iteration].NSigmaCut,
209 this->mTrkParams[iteration].LayerxX0,
210 mTimeFrameGPU->getStreams());
211 }
212}
213
214template <int NLayers>
216{
217 for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NeighboursPerRoad(); ++iLayer) {
218 const int currentLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer])};
219 const int nextLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer + 1])};
220 if (!nextLayerCellsNum || !currentLayerCellsNum) {
221 mTimeFrameGPU->getNNeighbours()[iLayer] = 0;
222 continue;
223 }
224 mTimeFrameGPU->createNeighboursIndexTablesDevice(iLayer);
225 mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum);
226 countCellNeighboursHandler<NLayers>(mTimeFrameGPU->getDeviceArrayCells(),
227 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here.
228 mTimeFrameGPU->getDeviceArrayCellsLUT(),
229 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
230 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
231 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
232 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
233 this->mBz,
234 iLayer,
235 currentLayerCellsNum,
236 nextLayerCellsNum,
237 1e2,
238 mTimeFrameGPU->getFrameworkAllocator(),
239 mTimeFrameGPU->getStream(iLayer));
240 mTimeFrameGPU->createNeighboursDevice(iLayer);
241 if (mTimeFrameGPU->getNNeighbours()[iLayer] == 0) {
242 continue;
243 }
244 computeCellNeighboursHandler<NLayers>(mTimeFrameGPU->getDeviceArrayCells(),
245 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer),
246 mTimeFrameGPU->getDeviceArrayCellsLUT(),
247 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
248 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
249 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
250 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
251 this->mBz,
252 iLayer,
253 currentLayerCellsNum,
254 nextLayerCellsNum,
255 1e2,
256 mTimeFrameGPU->getStream(iLayer));
257 mTimeFrameGPU->getArrayNNeighbours()[iLayer] = filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
258 mTimeFrameGPU->getDeviceNeighbours(iLayer),
259 mTimeFrameGPU->getArrayNNeighbours()[iLayer],
260 mTimeFrameGPU->getStream(iLayer),
261 mTimeFrameGPU->getFrameworkAllocator());
262 }
263 mTimeFrameGPU->syncStreams(false);
264}
265
266template <int NLayers>
268{
269 bounded_vector<bounded_vector<int>> firstClusters(this->mTrkParams[iteration].NLayers, bounded_vector<int>(this->getMemoryPool().get()), this->getMemoryPool().get());
270 bounded_vector<bounded_vector<int>> sharedFirstClusters(this->mTrkParams[iteration].NLayers, bounded_vector<int>(this->getMemoryPool().get()), this->getMemoryPool().get());
271 firstClusters.resize(this->mTrkParams[iteration].NLayers);
272 sharedFirstClusters.resize(this->mTrkParams[iteration].NLayers);
273 for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) {
274 const int minimumLayer{startLevel - 1};
275 bounded_vector<TrackSeed<NLayers>> trackSeeds(this->getMemoryPool().get());
276 for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) {
277 if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) {
278 continue;
279 }
280 processNeighboursHandler<NLayers>(startLayer,
281 startLevel,
282 mTimeFrameGPU->getDeviceArrayCells(),
283 mTimeFrameGPU->getDeviceCells()[startLayer],
284 mTimeFrameGPU->getArrayNCells(),
285 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
286 mTimeFrameGPU->getDeviceNeighboursAll(),
287 mTimeFrameGPU->getDeviceNeighboursLUTs(),
288 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
289 trackSeeds,
290 this->mBz,
291 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
292 this->mTrkParams[iteration].MaxChi2NDF,
293 this->mTrkParams[iteration].LayerxX0,
294 mTimeFrameGPU->getDevicePropagator(),
295 this->mTrkParams[iteration].CorrType,
296 mTimeFrameGPU->getFrameworkAllocator());
297 }
298 // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
299 if (trackSeeds.empty()) {
300 LOGP(debug, "No track seeds found, skipping track finding");
301 continue;
302 }
303 mTimeFrameGPU->loadTrackSeedsDevice(trackSeeds);
304
305 // Since TrackITSExt is an enourmous class it is better to first count how many
306 // successfull fits we do and only then allocate
307 countTrackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(),
308 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
309 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
310 mTimeFrameGPU->getDeviceTrackSeedsLUT(),
311 this->mTrkParams[iteration].LayerRadii,
312 this->mTrkParams[iteration].MinPt,
313 this->mTrkParams[iteration].LayerxX0,
314 trackSeeds.size(),
315 this->mBz,
316 startLevel,
317 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
318 this->mTrkParams[iteration].MaxChi2NDF,
319 this->mTrkParams[iteration].ReseedIfShorter,
320 this->mTrkParams[iteration].RepeatRefitOut,
321 this->mTrkParams[iteration].ShiftRefToCluster,
322 mTimeFrameGPU->getDevicePropagator(),
323 this->mTrkParams[iteration].CorrType,
324 mTimeFrameGPU->getFrameworkAllocator());
325 mTimeFrameGPU->createTrackITSExtDevice(trackSeeds.size());
326 computeTrackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(),
327 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
328 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
329 mTimeFrameGPU->getDeviceTrackITSExt(),
330 mTimeFrameGPU->getDeviceTrackSeedsLUT(),
331 this->mTrkParams[iteration].LayerRadii,
332 this->mTrkParams[iteration].MinPt,
333 this->mTrkParams[iteration].LayerxX0,
334 trackSeeds.size(),
335 mTimeFrameGPU->getNTrackSeeds(),
336 this->mBz,
337 startLevel,
338 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
339 this->mTrkParams[iteration].MaxChi2NDF,
340 this->mTrkParams[iteration].ReseedIfShorter,
341 this->mTrkParams[iteration].RepeatRefitOut,
342 this->mTrkParams[iteration].ShiftRefToCluster,
343 mTimeFrameGPU->getDevicePropagator(),
344 this->mTrkParams[iteration].CorrType,
345 mTimeFrameGPU->getFrameworkAllocator());
346 mTimeFrameGPU->downloadTrackITSExtDevice();
347
348 auto& tracks = mTimeFrameGPU->getTrackITSExt();
349 this->acceptTracks(iteration, tracks, firstClusters, sharedFirstClusters);
350 mTimeFrameGPU->loadUsedClustersDevice();
351 }
352 this->markTracks(iteration, sharedFirstClusters);
353 // wipe the artefact memory
354 mTimeFrameGPU->popMemoryStack(iteration);
355};
356
357template <int NLayers>
359{
360 return mTimeFrameGPU->getNumberOfClusters();
361}
362
363template <int NLayers>
365{
366 return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0);
367}
368
369template <int NLayers>
371{
372 return mTimeFrameGPU->getNumberOfCells();
373}
374
375template <int NLayers>
377{
378 this->mBz = bz;
379 mTimeFrameGPU->setBz(bz);
380}
381
382template class TrackerTraitsGPU<7>;
383} // namespace o2::its
std::ostringstream debug
Shared host/device helpers for ITS tracker trait implementations.
Definition of the ITS track.
void computeLayerTracklets(const int iteration, int) final
int getTFNumberOfClusters() const override
void initialiseTimeFrame(const int iteration) final
void computeLayerCells(const int iteration) final
void adoptTimeFrame(TimeFrame< NLayers > *tf) final
void findRoads(const int iteration) final
int getTFNumberOfCells() const override
void findCellsNeighbours(const int iteration) final
int getTFNumberOfTracklets() const override
auto get(const std::byte *buffer, size_t=0)
Definition DataHeader.h:454
void countTrackSeedHandler(TrackSeed< NLayers > *trackSeeds, const TrackingFrameInfo **foundTrackingFrameInfo, const Cluster **unsortedClusters, int *seedLUT, const std::vector< float > &layerRadiiHost, const std::vector< float > &minPtsHost, const std::vector< float > &layerxX0Host, const unsigned int nSeeds, const float Bz, const int startLevel, const float maxChi2ClusterAttachment, const float maxChi2NDF, const int reseedIfShorter, const bool repeatRefitOut, const bool shiftRefToCluster, const o2::base::Propagator *propagator, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator *alloc)
std::pmr::vector< T > bounded_vector
int filterCellNeighboursHandler(gpuPair< int, int > *, int *, unsigned int, gpu::Stream &, o2::its::ExternalAllocator *=nullptr)
void computeTrackSeedHandler(TrackSeed< NLayers > *trackSeeds, const TrackingFrameInfo **foundTrackingFrameInfo, const Cluster **unsortedClusters, o2::its::TrackITSExt *tracks, const int *seedLUT, const std::vector< float > &layerRadiiHost, const std::vector< float > &minPtsHost, const std::vector< float > &layerxX0Host, const unsigned int nSeeds, const unsigned int nTracks, const float Bz, const int startLevel, const float maxChi2ClusterAttachment, const float maxChi2NDF, const int reseedIfShorter, const bool repeatRefitOut, const bool shiftRefToCluster, const o2::base::Propagator *propagator, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator *alloc)
std::unique_ptr< GPUReconstructionTimeframe > tf