Project
Loading...
Searching...
No Matches
TrackerTraitsGPU.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
12
13#include <array>
14#include <vector>
15#include <unistd.h>
16
18
23
24namespace o2::its
25{
26
27template <int nLayers>
29{
30 mTimeFrameGPU->initialise(iteration, this->mTrkParams[iteration], nLayers);
31 // on default stream
32 mTimeFrameGPU->loadVertices(iteration);
33 mTimeFrameGPU->loadIndexTableUtils(iteration);
34 mTimeFrameGPU->loadMultiplicityCutMask(iteration);
35 // pinned on host
36 mTimeFrameGPU->createUsedClustersDeviceArray(iteration);
37 mTimeFrameGPU->createClustersDeviceArray(iteration);
38 mTimeFrameGPU->createUnsortedClustersDeviceArray(iteration);
39 mTimeFrameGPU->createClustersIndexTablesArray(iteration);
40 mTimeFrameGPU->createTrackingFrameInfoDeviceArray(iteration);
41 mTimeFrameGPU->createROFrameClustersDeviceArray(iteration);
42 // device array
43 mTimeFrameGPU->createTrackletsLUTDeviceArray(iteration);
44 mTimeFrameGPU->createTrackletsBuffersArray(iteration);
45 mTimeFrameGPU->createCellsBuffersArray(iteration);
46 mTimeFrameGPU->createCellsLUTDeviceArray(iteration);
47 // push every create artefact on the stack
48 mTimeFrameGPU->pushMemoryStack(iteration);
49}
50
51template <int nLayers>
53{
54 mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<nLayers>*>(tf);
55 this->mTimeFrame = static_cast<TimeFrame<nLayers>*>(tf);
56}
57
58template <int nLayers>
59void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int iROFslice, int iVertex)
60{
62
63 int startROF{0};
64 int endROF{mTimeFrameGPU->getNrof()};
65
66 // start by queuing loading needed of two last layers
67 for (int iLayer{nLayers}; iLayer-- > nLayers - 2;) {
68 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer);
69 mTimeFrameGPU->loadClustersDevice(iteration, iLayer);
70 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer);
71 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer);
72 mTimeFrameGPU->recordEvent(iLayer);
73 }
74
75 for (int iLayer{this->mTrkParams[iteration].TrackletsPerRoad()}; iLayer--;) {
76 if (iLayer) { // queue loading data of next layer in parallel, this the copies are overlapping with computation kernels
77 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer - 1);
78 mTimeFrameGPU->loadClustersDevice(iteration, iLayer - 1);
79 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer - 1);
80 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer - 1);
81 mTimeFrameGPU->recordEvent(iLayer - 1);
82 }
83 mTimeFrameGPU->createTrackletsLUTDevice(iteration, iLayer);
84 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
85 countTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
86 mTimeFrameGPU->getDeviceMultCutMask(),
87 iLayer,
88 startROF,
89 endROF,
90 mTimeFrameGPU->getNrof(),
91 this->mTrkParams[iteration].DeltaROF,
92 iVertex,
93 mTimeFrameGPU->getDeviceVertices(),
94 mTimeFrameGPU->getDeviceROFramesPV(),
95 mTimeFrameGPU->getPrimaryVerticesNum(),
96 mTimeFrameGPU->getDeviceArrayClusters(),
97 mTimeFrameGPU->getClusterSizes(),
98 mTimeFrameGPU->getDeviceROFrameClusters(),
99 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
100 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
101 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
102 mTimeFrameGPU->getDeviceTrackletsLUTs(),
103 iteration,
104 this->mTrkParams[iteration].NSigmaCut,
105 mTimeFrameGPU->getPhiCuts(),
106 this->mTrkParams[iteration].PVres,
107 mTimeFrameGPU->getMinRs(),
108 mTimeFrameGPU->getMaxRs(),
109 mTimeFrameGPU->getPositionResolutions(),
110 this->mTrkParams[iteration].LayerRadii,
111 mTimeFrameGPU->getMSangles(),
112 mTimeFrameGPU->getFrameworkAllocator(),
113 conf.nBlocksLayerTracklets[iteration],
114 conf.nThreadsLayerTracklets[iteration],
115 mTimeFrameGPU->getStreams());
116 mTimeFrameGPU->createTrackletsBuffers(iLayer);
117 if (mTimeFrameGPU->getNTracklets()[iLayer] == 0) {
118 continue;
119 }
120 computeTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
121 mTimeFrameGPU->getDeviceMultCutMask(),
122 iLayer,
123 startROF,
124 endROF,
125 mTimeFrameGPU->getNrof(),
126 this->mTrkParams[iteration].DeltaROF,
127 iVertex,
128 mTimeFrameGPU->getDeviceVertices(),
129 mTimeFrameGPU->getDeviceROFramesPV(),
130 mTimeFrameGPU->getPrimaryVerticesNum(),
131 mTimeFrameGPU->getDeviceArrayClusters(),
132 mTimeFrameGPU->getClusterSizes(),
133 mTimeFrameGPU->getDeviceROFrameClusters(),
134 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
135 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
136 mTimeFrameGPU->getDeviceArrayTracklets(),
137 mTimeFrameGPU->getDeviceTracklets(),
138 mTimeFrameGPU->getNTracklets(),
139 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
140 mTimeFrameGPU->getDeviceTrackletsLUTs(),
141 iteration,
142 this->mTrkParams[iteration].NSigmaCut,
143 mTimeFrameGPU->getPhiCuts(),
144 this->mTrkParams[iteration].PVres,
145 mTimeFrameGPU->getMinRs(),
146 mTimeFrameGPU->getMaxRs(),
147 mTimeFrameGPU->getPositionResolutions(),
148 this->mTrkParams[iteration].LayerRadii,
149 mTimeFrameGPU->getMSangles(),
150 mTimeFrameGPU->getFrameworkAllocator(),
151 conf.nBlocksLayerTracklets[iteration],
152 conf.nThreadsLayerTracklets[iteration],
153 mTimeFrameGPU->getStreams());
154 }
155}
156
157template <int nLayers>
159{
161
162 // start by queuing loading needed of three last layers
163 for (int iLayer{nLayers}; iLayer-- > nLayers - 3;) {
164 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer);
165 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer);
166 mTimeFrameGPU->recordEvent(iLayer);
167 }
168
169 for (int iLayer{this->mTrkParams[iteration].CellsPerRoad()}; iLayer--;) {
170 if (iLayer) {
171 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer - 1);
172 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer - 1);
173 mTimeFrameGPU->recordEvent(iLayer - 1);
174 }
175
176 // if there are no tracklets skip entirely
177 const int currentLayerTrackletsNum{static_cast<int>(mTimeFrameGPU->getNTracklets()[iLayer])};
178 if (!currentLayerTrackletsNum || !mTimeFrameGPU->getNTracklets()[iLayer + 1]) {
179 mTimeFrameGPU->getNCells()[iLayer] = 0;
180 continue;
181 }
182
183 mTimeFrameGPU->createCellsLUTDevice(iLayer);
184 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
185 mTimeFrameGPU->waitEvent(iLayer, iLayer + 2); // wait stream until all data is available
186 countCellsHandler<nLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
187 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
188 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
189 mTimeFrameGPU->getDeviceArrayTracklets(),
190 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
191 currentLayerTrackletsNum,
192 iLayer,
193 nullptr,
194 mTimeFrameGPU->getDeviceArrayCellsLUT(),
195 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
196 this->mTrkParams[iteration].DeltaROF,
197 this->mBz,
198 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
199 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
200 this->mTrkParams[iteration].NSigmaCut,
201 mTimeFrameGPU->getFrameworkAllocator(),
202 conf.nBlocksLayerCells[iteration],
203 conf.nThreadsLayerCells[iteration],
204 mTimeFrameGPU->getStreams());
205 mTimeFrameGPU->createCellsBuffers(iLayer);
206 if (mTimeFrameGPU->getNCells()[iLayer] == 0) {
207 continue;
208 }
209 computeCellsHandler<nLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
210 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
211 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
212 mTimeFrameGPU->getDeviceArrayTracklets(),
213 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
214 currentLayerTrackletsNum,
215 iLayer,
216 mTimeFrameGPU->getDeviceCells()[iLayer],
217 mTimeFrameGPU->getDeviceArrayCellsLUT(),
218 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
219 this->mTrkParams[iteration].DeltaROF,
220 this->mBz,
221 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
222 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
223 this->mTrkParams[iteration].NSigmaCut,
224 conf.nBlocksLayerCells[iteration],
225 conf.nThreadsLayerCells[iteration],
226 mTimeFrameGPU->getStreams());
227 }
228}
229
230template <int nLayers>
232{
234
235 for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NeighboursPerRoad(); ++iLayer) {
236 const int currentLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer])};
237 const int nextLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer + 1])};
238 if (!nextLayerCellsNum || !currentLayerCellsNum) {
239 mTimeFrameGPU->getNNeighbours()[iLayer] = 0;
240 continue;
241 }
242 mTimeFrameGPU->createNeighboursIndexTablesDevice(iLayer);
243 mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum);
244 countCellNeighboursHandler<nLayers>(mTimeFrameGPU->getDeviceArrayCells(),
245 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here.
246 mTimeFrameGPU->getDeviceArrayCellsLUT(),
247 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
248 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
249 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
250 this->mTrkParams[0].DeltaROF,
251 this->mTrkParams[0].MaxChi2ClusterAttachment,
252 this->mBz,
253 iLayer,
254 currentLayerCellsNum,
255 nextLayerCellsNum,
256 1e2,
257 mTimeFrameGPU->getFrameworkAllocator(),
258 conf.nBlocksFindNeighbours[iteration],
259 conf.nThreadsFindNeighbours[iteration],
260 mTimeFrameGPU->getStream(iLayer));
261 mTimeFrameGPU->createNeighboursDevice(iLayer);
262 if (mTimeFrameGPU->getNNeighbours()[iLayer] == 0) {
263 continue;
264 }
265 computeCellNeighboursHandler<nLayers>(mTimeFrameGPU->getDeviceArrayCells(),
266 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer),
267 mTimeFrameGPU->getDeviceArrayCellsLUT(),
268 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
269 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
270 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
271 this->mTrkParams[0].DeltaROF,
272 this->mTrkParams[0].MaxChi2ClusterAttachment,
273 this->mBz,
274 iLayer,
275 currentLayerCellsNum,
276 nextLayerCellsNum,
277 1e2,
278 conf.nBlocksFindNeighbours[iteration],
279 conf.nThreadsFindNeighbours[iteration],
280 mTimeFrameGPU->getStream(iLayer));
281 mTimeFrameGPU->getArrayNNeighbours()[iLayer] = filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
282 mTimeFrameGPU->getDeviceNeighbours(iLayer),
283 mTimeFrameGPU->getArrayNNeighbours()[iLayer],
284 mTimeFrameGPU->getStream(iLayer),
285 mTimeFrameGPU->getFrameworkAllocator());
286 }
287 mTimeFrameGPU->syncStreams(false);
288}
289
290template <int nLayers>
292{
294 for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) {
295 const int minimumLayer{startLevel - 1};
296 bounded_vector<CellSeed<nLayers>> trackSeeds(this->getMemoryPool().get());
297 for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) {
298 if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) {
299 continue;
300 }
301 processNeighboursHandler<nLayers>(startLayer,
302 startLevel,
303 mTimeFrameGPU->getDeviceArrayCells(),
304 mTimeFrameGPU->getDeviceCells()[startLayer],
305 mTimeFrameGPU->getArrayNCells(),
306 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
307 mTimeFrameGPU->getDeviceNeighboursAll(),
308 mTimeFrameGPU->getDeviceNeighboursLUTs(),
309 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
310 trackSeeds,
311 this->mBz,
312 this->mTrkParams[0].MaxChi2ClusterAttachment,
313 this->mTrkParams[0].MaxChi2NDF,
314 mTimeFrameGPU->getDevicePropagator(),
315 this->mTrkParams[0].CorrType,
316 mTimeFrameGPU->getFrameworkAllocator(),
317 conf.nBlocksProcessNeighbours[iteration],
318 conf.nThreadsProcessNeighbours[iteration]);
319 }
320 // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
321 if (trackSeeds.empty()) {
322 LOGP(debug, "No track seeds found, skipping track finding");
323 continue;
324 }
325 mTimeFrameGPU->loadTrackSeedsDevice(trackSeeds);
326
327 // Since TrackITSExt is an enourmous class it is better to first count how many
328 // successfull fits we do and only then allocate
329 countTrackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(),
330 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
331 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
332 mTimeFrameGPU->getDeviceTrackSeedsLUT(),
333 this->mTrkParams[iteration].LayerRadii,
334 this->mTrkParams[iteration].MinPt,
335 trackSeeds.size(),
336 this->mBz,
337 startLevel,
338 this->mTrkParams[0].MaxChi2ClusterAttachment,
339 this->mTrkParams[0].MaxChi2NDF,
340 this->mTrkParams[0].ReseedIfShorter,
341 this->mTrkParams[0].RepeatRefitOut,
342 this->mTrkParams[0].ShiftRefToCluster,
343 mTimeFrameGPU->getDevicePropagator(),
344 this->mTrkParams[0].CorrType,
345 mTimeFrameGPU->getFrameworkAllocator(),
346 conf.nBlocksTracksSeeds[iteration],
347 conf.nThreadsTracksSeeds[iteration]);
348 mTimeFrameGPU->createTrackITSExtDevice(trackSeeds.size());
349 computeTrackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(),
350 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
351 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
352 mTimeFrameGPU->getDeviceTrackITSExt(),
353 mTimeFrameGPU->getDeviceTrackSeedsLUT(),
354 this->mTrkParams[iteration].LayerRadii,
355 this->mTrkParams[iteration].MinPt,
356 trackSeeds.size(),
357 mTimeFrameGPU->getNTrackSeeds(),
358 this->mBz,
359 startLevel,
360 this->mTrkParams[0].MaxChi2ClusterAttachment,
361 this->mTrkParams[0].MaxChi2NDF,
362 this->mTrkParams[0].ReseedIfShorter,
363 this->mTrkParams[0].RepeatRefitOut,
364 this->mTrkParams[0].ShiftRefToCluster,
365 mTimeFrameGPU->getDevicePropagator(),
366 this->mTrkParams[0].CorrType,
367 mTimeFrameGPU->getFrameworkAllocator(),
368 conf.nBlocksTracksSeeds[iteration],
369 conf.nThreadsTracksSeeds[iteration]);
370 mTimeFrameGPU->downloadTrackITSExtDevice();
371
372 auto& tracks = mTimeFrameGPU->getTrackITSExt();
373
374 for (auto& track : tracks) {
375 if (!track.getChi2()) {
376 continue; // this is to skip the unset tracks that are put at the beginning of the vector by the sorting. To see if this can be optimised.
377 }
378 int nShared = 0;
379 bool isFirstShared{false};
380 for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) {
381 if (track.getClusterIndex(iLayer) == constants::UnusedIndex) {
382 continue;
383 }
384 nShared += int(mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer)));
385 isFirstShared |= !iLayer && mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer));
386 }
387
388 if (nShared > this->mTrkParams[0].ClusterSharing) {
389 continue;
390 }
391
392 std::array<int, 3> rofs{INT_MAX, INT_MAX, INT_MAX};
393 for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) {
394 if (track.getClusterIndex(iLayer) == constants::UnusedIndex) {
395 continue;
396 }
397 mTimeFrameGPU->markUsedCluster(iLayer, track.getClusterIndex(iLayer));
398 int currentROF = mTimeFrameGPU->getClusterROF(iLayer, track.getClusterIndex(iLayer));
399 for (int iR{0}; iR < 3; ++iR) {
400 if (rofs[iR] == INT_MAX) {
401 rofs[iR] = currentROF;
402 }
403 if (rofs[iR] == currentROF) {
404 break;
405 }
406 }
407 }
408 if (rofs[2] != INT_MAX) {
409 continue;
410 }
411 if (rofs[1] != INT_MAX) {
412 track.setNextROFbit();
413 }
414 mTimeFrameGPU->getTracks(std::min(rofs[0], rofs[1])).emplace_back(track);
415 }
416 mTimeFrameGPU->loadUsedClustersDevice();
417 }
418 // wipe the artefact memory
419 mTimeFrameGPU->popMemoryStack(iteration);
420};
421
422template <int nLayers>
424{
425 return mTimeFrameGPU->getNumberOfClusters();
426}
427
428template <int nLayers>
430{
431 return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0);
432}
433
434template <int nLayers>
436{
437 return mTimeFrameGPU->getNumberOfCells();
438}
439
440template <int nLayers>
442{
443 this->mBz = bz;
444 mTimeFrameGPU->setBz(bz);
445}
446
447template class TrackerTraitsGPU<7>;
448} // namespace o2::its
std::ostringstream debug
Definition of the ITS track.
int getTFNumberOfClusters() const override
void initialiseTimeFrame(const int iteration) final
void findCellsNeighbours(const int iteration) final
void adoptTimeFrame(TimeFrame< nLayers > *tf) final
void computeLayerCells(const int iteration) final
void computeLayerTracklets(const int iteration, int, int) final
int getTFNumberOfTracklets() const override
int getTFNumberOfCells() const override
void findRoads(const int iteration) final
auto get(const std::byte *buffer, size_t=0)
Definition DataHeader.h:454
std::pmr::vector< T > bounded_vector
int filterCellNeighboursHandler(gpuPair< int, int > *, int *, unsigned int, gpu::Stream &, o2::its::ExternalAllocator *=nullptr)
void countTrackSeedHandler(CellSeed< nLayers > *trackSeeds, const TrackingFrameInfo **foundTrackingFrameInfo, const Cluster **unsortedClusters, int *seedLUT, const std::vector< float > &layerRadiiHost, const std::vector< float > &minPtsHost, const unsigned int nSeeds, const float Bz, const int startLevel, const float maxChi2ClusterAttachment, const float maxChi2NDF, const int reseedIfShorter, const bool repeatRefitOut, const bool shiftRefToCluster, const o2::base::Propagator *propagator, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator *alloc, const int nBlocks, const int nThreads)
void computeTrackSeedHandler(CellSeed< nLayers > *trackSeeds, const TrackingFrameInfo **foundTrackingFrameInfo, const Cluster **unsortedClusters, o2::its::TrackITSExt *tracks, const int *seedLUT, const std::vector< float > &layerRadiiHost, const std::vector< float > &minPtsHost, const unsigned int nSeeds, const unsigned int nTracks, const float Bz, const int startLevel, const float maxChi2ClusterAttachment, const float maxChi2NDF, const int reseedIfShorter, const bool repeatRefitOut, const bool shiftRefToCluster, const o2::base::Propagator *propagator, const o2::base::PropagatorF::MatCorrType matCorrType, o2::its::ExternalAllocator *alloc, const int nBlocks, const int nThreads)
std::unique_ptr< GPUReconstructionTimeframe > tf