Project
Loading...
Searching...
No Matches
TrackerTraitsGPU.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
12
13#include <array>
14#include <vector>
15#include <unistd.h>
16
18
23
24namespace o2::its
25{
26
27template <int nLayers>
29{
30 mTimeFrameGPU->initialise(iteration, this->mTrkParams[iteration], nLayers);
31
32 // on default stream
33 mTimeFrameGPU->loadVertices(iteration);
34 mTimeFrameGPU->loadIndexTableUtils(iteration);
35 mTimeFrameGPU->loadMultiplicityCutMask(iteration);
36 mTimeFrameGPU->createUsedClustersDeviceArray(iteration);
37 mTimeFrameGPU->createClustersDeviceArray(iteration);
38 mTimeFrameGPU->createUnsortedClustersDeviceArray(iteration);
39 mTimeFrameGPU->createClustersIndexTablesArray(iteration);
40 mTimeFrameGPU->createTrackingFrameInfoDeviceArray(iteration);
41 mTimeFrameGPU->createROFrameClustersDeviceArray(iteration);
42 mTimeFrameGPU->createTrackletsLUTDeviceArray(iteration);
43 mTimeFrameGPU->createTrackletsBuffersArray(iteration);
44 mTimeFrameGPU->createCellsBuffersArray(iteration);
45 mTimeFrameGPU->createCellsLUTDeviceArray(iteration);
46}
47
48template <int nLayers>
50{
51 mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<nLayers>*>(tf);
52 this->mTimeFrame = static_cast<TimeFrame<nLayers>*>(tf);
53}
54
55template <int nLayers>
56void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int iROFslice, int iVertex)
57{
59
60 int startROF{0};
61 int endROF{mTimeFrameGPU->getNrof()};
62
63 // start by queuing loading needed of two last layers
64 for (int iLayer{nLayers}; iLayer-- > nLayers - 2;) {
65 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer);
66 mTimeFrameGPU->loadClustersDevice(iteration, iLayer);
67 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer);
68 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer);
69 mTimeFrameGPU->recordEvent(iLayer);
70 }
71
72 for (int iLayer{this->mTrkParams[iteration].TrackletsPerRoad()}; iLayer--;) {
73 if (iLayer) { // queue loading data of next layer in parallel, this the copies are overlapping with computation kernels
74 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer - 1);
75 mTimeFrameGPU->loadClustersDevice(iteration, iLayer - 1);
76 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer - 1);
77 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer - 1);
78 mTimeFrameGPU->recordEvent(iLayer - 1);
79 }
80 mTimeFrameGPU->createTrackletsLUTDevice(iteration, iLayer);
81 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
82 countTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
83 mTimeFrameGPU->getDeviceMultCutMask(),
84 iLayer,
85 startROF,
86 endROF,
87 mTimeFrameGPU->getNrof(),
88 this->mTrkParams[iteration].DeltaROF,
89 iVertex,
90 mTimeFrameGPU->getDeviceVertices(),
91 mTimeFrameGPU->getDeviceROFramesPV(),
92 mTimeFrameGPU->getPrimaryVerticesNum(),
93 mTimeFrameGPU->getDeviceArrayClusters(),
94 mTimeFrameGPU->getClusterSizes(),
95 mTimeFrameGPU->getDeviceROFrameClusters(),
96 mTimeFrameGPU->getDeviceArrayUsedClusters(),
97 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
98 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
99 mTimeFrameGPU->getDeviceTrackletsLUTs(),
100 iteration,
101 this->mTrkParams[iteration].NSigmaCut,
102 mTimeFrameGPU->getPhiCuts(),
103 this->mTrkParams[iteration].PVres,
104 mTimeFrameGPU->getMinRs(),
105 mTimeFrameGPU->getMaxRs(),
106 mTimeFrameGPU->getPositionResolutions(),
107 this->mTrkParams[iteration].LayerRadii,
108 mTimeFrameGPU->getMSangles(),
109 mTimeFrameGPU->getExternalAllocator(),
110 conf.nBlocksLayerTracklets[iteration],
111 conf.nThreadsLayerTracklets[iteration],
112 mTimeFrameGPU->getStreams());
113 mTimeFrameGPU->createTrackletsBuffers(iLayer);
114 if (mTimeFrameGPU->getNTracklets()[iLayer] == 0) {
115 continue;
116 }
117 computeTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
118 mTimeFrameGPU->getDeviceMultCutMask(),
119 iLayer,
120 startROF,
121 endROF,
122 mTimeFrameGPU->getNrof(),
123 this->mTrkParams[iteration].DeltaROF,
124 iVertex,
125 mTimeFrameGPU->getDeviceVertices(),
126 mTimeFrameGPU->getDeviceROFramesPV(),
127 mTimeFrameGPU->getPrimaryVerticesNum(),
128 mTimeFrameGPU->getDeviceArrayClusters(),
129 mTimeFrameGPU->getClusterSizes(),
130 mTimeFrameGPU->getDeviceROFrameClusters(),
131 mTimeFrameGPU->getDeviceArrayUsedClusters(),
132 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
133 mTimeFrameGPU->getDeviceArrayTracklets(),
134 mTimeFrameGPU->getDeviceTracklets(),
135 mTimeFrameGPU->getNTracklets(),
136 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
137 mTimeFrameGPU->getDeviceTrackletsLUTs(),
138 iteration,
139 this->mTrkParams[iteration].NSigmaCut,
140 mTimeFrameGPU->getPhiCuts(),
141 this->mTrkParams[iteration].PVres,
142 mTimeFrameGPU->getMinRs(),
143 mTimeFrameGPU->getMaxRs(),
144 mTimeFrameGPU->getPositionResolutions(),
145 this->mTrkParams[iteration].LayerRadii,
146 mTimeFrameGPU->getMSangles(),
147 mTimeFrameGPU->getExternalAllocator(),
148 conf.nBlocksLayerTracklets[iteration],
149 conf.nThreadsLayerTracklets[iteration],
150 mTimeFrameGPU->getStreams());
151 }
152}
153
154template <int nLayers>
156{
158
159 // start by queuing loading needed of three last layers
160 for (int iLayer{nLayers}; iLayer-- > nLayers - 3;) {
161 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer);
162 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer);
163 mTimeFrameGPU->recordEvent(iLayer);
164 }
165
166 for (int iLayer{this->mTrkParams[iteration].CellsPerRoad()}; iLayer--;) {
167 if (iLayer) {
168 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer - 1);
169 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer - 1);
170 mTimeFrameGPU->recordEvent(iLayer - 1);
171 }
172
173 // if there are no tracklets skip entirely
174 const int currentLayerTrackletsNum{static_cast<int>(mTimeFrameGPU->getNTracklets()[iLayer])};
175 if (!currentLayerTrackletsNum || !mTimeFrameGPU->getNTracklets()[iLayer + 1]) {
176 mTimeFrameGPU->getNCells()[iLayer] = 0;
177 continue;
178 }
179
180 mTimeFrameGPU->createCellsLUTDevice(iLayer);
181 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
182 mTimeFrameGPU->waitEvent(iLayer, iLayer + 2); // wait stream until all data is available
183 countCellsHandler<nLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
184 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
185 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
186 mTimeFrameGPU->getDeviceArrayTracklets(),
187 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
188 currentLayerTrackletsNum,
189 iLayer,
190 nullptr,
191 mTimeFrameGPU->getDeviceArrayCellsLUT(),
192 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
193 this->mTrkParams[iteration].DeltaROF,
194 this->mBz,
195 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
196 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
197 this->mTrkParams[iteration].NSigmaCut,
198 mTimeFrameGPU->getExternalAllocator(),
199 conf.nBlocksLayerCells[iteration],
200 conf.nThreadsLayerCells[iteration],
201 mTimeFrameGPU->getStreams());
202 mTimeFrameGPU->createCellsBuffers(iLayer);
203 if (mTimeFrameGPU->getNCells()[iLayer] == 0) {
204 continue;
205 }
206 computeCellsHandler<nLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
207 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
208 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
209 mTimeFrameGPU->getDeviceArrayTracklets(),
210 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
211 currentLayerTrackletsNum,
212 iLayer,
213 mTimeFrameGPU->getDeviceCells()[iLayer],
214 mTimeFrameGPU->getDeviceArrayCellsLUT(),
215 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
216 this->mTrkParams[iteration].DeltaROF,
217 this->mBz,
218 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
219 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
220 this->mTrkParams[iteration].NSigmaCut,
221 conf.nBlocksLayerCells[iteration],
222 conf.nThreadsLayerCells[iteration],
223 mTimeFrameGPU->getStreams());
224 }
225}
226
227template <int nLayers>
229{
231
232 for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NeighboursPerRoad(); ++iLayer) {
233 const int currentLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer])};
234 const int nextLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer + 1])};
235 if (!nextLayerCellsNum || !currentLayerCellsNum) {
236 mTimeFrameGPU->getNNeighbours()[iLayer] = 0;
237 continue;
238 }
239 mTimeFrameGPU->createNeighboursIndexTablesDevice(iLayer);
240 mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum);
241 countCellNeighboursHandler<nLayers>(mTimeFrameGPU->getDeviceArrayCells(),
242 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here.
243 mTimeFrameGPU->getDeviceArrayCellsLUT(),
244 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
245 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
246 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
247 this->mTrkParams[0].DeltaROF,
248 this->mTrkParams[0].MaxChi2ClusterAttachment,
249 this->mBz,
250 iLayer,
251 currentLayerCellsNum,
252 nextLayerCellsNum,
253 1e2,
254 mTimeFrameGPU->getExternalAllocator(),
255 conf.nBlocksFindNeighbours[iteration],
256 conf.nThreadsFindNeighbours[iteration],
257 mTimeFrameGPU->getStream(iLayer));
258 mTimeFrameGPU->createNeighboursDevice(iLayer);
259 if (mTimeFrameGPU->getNNeighbours()[iLayer] == 0) {
260 continue;
261 }
262 computeCellNeighboursHandler<nLayers>(mTimeFrameGPU->getDeviceArrayCells(),
263 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer),
264 mTimeFrameGPU->getDeviceArrayCellsLUT(),
265 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
266 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
267 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
268 this->mTrkParams[0].DeltaROF,
269 this->mTrkParams[0].MaxChi2ClusterAttachment,
270 this->mBz,
271 iLayer,
272 currentLayerCellsNum,
273 nextLayerCellsNum,
274 1e2,
275 conf.nBlocksFindNeighbours[iteration],
276 conf.nThreadsFindNeighbours[iteration],
277 mTimeFrameGPU->getStream(iLayer));
278 mTimeFrameGPU->getArrayNNeighbours()[iLayer] = filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
279 mTimeFrameGPU->getDeviceNeighbours(iLayer),
280 mTimeFrameGPU->getArrayNNeighbours()[iLayer],
281 mTimeFrameGPU->getStream(iLayer),
282 mTimeFrameGPU->getExternalAllocator());
283 }
284 mTimeFrameGPU->syncStreams(false);
285}
286
287template <int nLayers>
289{
291 for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) {
292 const int minimumLayer{startLevel - 1};
293 bounded_vector<CellSeed<nLayers>> trackSeeds(this->getMemoryPool().get());
294 for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) {
295 if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) {
296 continue;
297 }
298 processNeighboursHandler<nLayers>(startLayer,
299 startLevel,
300 mTimeFrameGPU->getDeviceArrayCells(),
301 mTimeFrameGPU->getDeviceCells()[startLayer],
302 mTimeFrameGPU->getArrayNCells(),
303 mTimeFrameGPU->getDeviceArrayUsedClusters(),
304 mTimeFrameGPU->getDeviceNeighboursAll(),
305 mTimeFrameGPU->getDeviceNeighboursLUTs(),
306 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
307 trackSeeds,
308 this->mBz,
309 this->mTrkParams[0].MaxChi2ClusterAttachment,
310 this->mTrkParams[0].MaxChi2NDF,
311 mTimeFrameGPU->getDevicePropagator(),
312 this->mTrkParams[0].CorrType,
313 mTimeFrameGPU->getExternalAllocator(),
314 conf.nBlocksProcessNeighbours[iteration],
315 conf.nThreadsProcessNeighbours[iteration]);
316 }
317 // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
318 if (trackSeeds.empty()) {
319 LOGP(debug, "No track seeds found, skipping track finding");
320 continue;
321 }
322 mTimeFrameGPU->createTrackITSExtDevice(trackSeeds);
323 mTimeFrameGPU->loadTrackSeedsDevice(trackSeeds);
324
325 trackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(), // CellSeed* trackSeeds
326 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), // TrackingFrameInfo** foundTrackingFrameInfo
327 mTimeFrameGPU->getDeviceTrackITSExt(), // o2::its::TrackITSExt* tracks
328 this->mTrkParams[iteration].MinPt, // std::vector<float>& minPtsHost,
329 trackSeeds.size(), // const size_t nSeeds
330 this->mBz, // const float Bz
331 startLevel, // const int startLevel,
332 this->mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
333 this->mTrkParams[0].MaxChi2NDF, // float maxChi2NDF
334 mTimeFrameGPU->getDevicePropagator(), // const o2::base::Propagator* propagator
335 this->mTrkParams[0].CorrType, // o2::base::PropagatorImpl<float>::MatCorrType
336 conf.nBlocksTracksSeeds[iteration],
337 conf.nThreadsTracksSeeds[iteration]);
338
339 mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds);
340
341 auto& tracks = mTimeFrameGPU->getTrackITSExt();
342
343 for (auto& track : tracks) {
344 if (!track.getChi2()) {
345 continue; // this is to skip the unset tracks that are put at the beginning of the vector by the sorting. To see if this can be optimised.
346 }
347 int nShared = 0;
348 bool isFirstShared{false};
349 for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) {
350 if (track.getClusterIndex(iLayer) == constants::UnusedIndex) {
351 continue;
352 }
353 nShared += int(mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer)));
354 isFirstShared |= !iLayer && mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer));
355 }
356
357 if (nShared > this->mTrkParams[0].ClusterSharing) {
358 continue;
359 }
360
361 std::array<int, 3> rofs{INT_MAX, INT_MAX, INT_MAX};
362 for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) {
363 if (track.getClusterIndex(iLayer) == constants::UnusedIndex) {
364 continue;
365 }
366 mTimeFrameGPU->markUsedCluster(iLayer, track.getClusterIndex(iLayer));
367 int currentROF = mTimeFrameGPU->getClusterROF(iLayer, track.getClusterIndex(iLayer));
368 for (int iR{0}; iR < 3; ++iR) {
369 if (rofs[iR] == INT_MAX) {
370 rofs[iR] = currentROF;
371 }
372 if (rofs[iR] == currentROF) {
373 break;
374 }
375 }
376 }
377 if (rofs[2] != INT_MAX) {
378 continue;
379 }
380 if (rofs[1] != INT_MAX) {
381 track.setNextROFbit();
382 }
383 mTimeFrameGPU->getTracks(std::min(rofs[0], rofs[1])).emplace_back(track);
384 }
385 mTimeFrameGPU->loadUsedClustersDevice();
386 }
387};
388
389template <int nLayers>
391{
392 return mTimeFrameGPU->getNumberOfClusters();
393}
394
395template <int nLayers>
397{
398 return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0);
399}
400
401template <int nLayers>
403{
404 return mTimeFrameGPU->getNumberOfCells();
405}
406
407template <int nLayers>
409{
410 this->mBz = bz;
411 mTimeFrameGPU->setBz(bz);
412}
413
414template class TrackerTraitsGPU<7>;
415} // namespace o2::its
Definition of the ITS track.
std::ostringstream debug
int getTFNumberOfClusters() const override
void initialiseTimeFrame(const int iteration) final
void findCellsNeighbours(const int iteration) final
void adoptTimeFrame(TimeFrame< nLayers > *tf) final
void computeLayerCells(const int iteration) final
void computeLayerTracklets(const int iteration, int, int) final
int getTFNumberOfTracklets() const override
int getTFNumberOfCells() const override
void findRoads(const int iteration) final
auto get(const std::byte *buffer, size_t=0)
Definition DataHeader.h:454
std::pmr::vector< T > bounded_vector
int filterCellNeighboursHandler(gpuPair< int, int > *, int *, unsigned int, gpu::Stream &, o2::its::ExternalAllocator *=nullptr)
void trackSeedHandler(CellSeed< nLayers > *trackSeeds, const TrackingFrameInfo **foundTrackingFrameInfo, o2::its::TrackITSExt *tracks, std::vector< float > &minPtsHost, const unsigned int nSeeds, const float Bz, const int startLevel, float maxChi2ClusterAttachment, float maxChi2NDF, const o2::base::Propagator *propagator, const o2::base::PropagatorF::MatCorrType matCorrType, const int nBlocks, const int nThreads)
std::unique_ptr< GPUReconstructionTimeframe > tf