Project
Loading...
Searching...
No Matches
TrackerTraitsGPU.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
12
13#include <array>
14#include <vector>
15#include <unistd.h>
16
18
23
24namespace o2::its
25{
26
27template <int nLayers>
29{
30 mTimeFrameGPU->initialise(iteration, this->mTrkParams[iteration], nLayers);
31 // on default stream
32 mTimeFrameGPU->loadVertices(iteration);
33 mTimeFrameGPU->loadIndexTableUtils(iteration);
34 mTimeFrameGPU->loadMultiplicityCutMask(iteration);
35 // pinned on host
36 mTimeFrameGPU->createUsedClustersDeviceArray(iteration);
37 mTimeFrameGPU->createClustersDeviceArray(iteration);
38 mTimeFrameGPU->createUnsortedClustersDeviceArray(iteration);
39 mTimeFrameGPU->createClustersIndexTablesArray(iteration);
40 mTimeFrameGPU->createTrackingFrameInfoDeviceArray(iteration);
41 mTimeFrameGPU->createROFrameClustersDeviceArray(iteration);
42 // device array
43 mTimeFrameGPU->createTrackletsLUTDeviceArray(iteration);
44 mTimeFrameGPU->createTrackletsBuffersArray(iteration);
45 mTimeFrameGPU->createCellsBuffersArray(iteration);
46 mTimeFrameGPU->createCellsLUTDeviceArray(iteration);
47 // push every create artefact on the stack
48 mTimeFrameGPU->pushMemoryStack(iteration);
49}
50
51template <int nLayers>
53{
54 mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<nLayers>*>(tf);
55 this->mTimeFrame = static_cast<TimeFrame<nLayers>*>(tf);
56}
57
58template <int nLayers>
59void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int iROFslice, int iVertex)
60{
62
63 int startROF{0};
64 int endROF{mTimeFrameGPU->getNrof()};
65
66 // start by queuing loading needed of two last layers
67 for (int iLayer{nLayers}; iLayer-- > nLayers - 2;) {
68 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer);
69 mTimeFrameGPU->loadClustersDevice(iteration, iLayer);
70 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer);
71 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer);
72 mTimeFrameGPU->recordEvent(iLayer);
73 }
74
75 for (int iLayer{this->mTrkParams[iteration].TrackletsPerRoad()}; iLayer--;) {
76 if (iLayer) { // queue loading data of next layer in parallel, this the copies are overlapping with computation kernels
77 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer - 1);
78 mTimeFrameGPU->loadClustersDevice(iteration, iLayer - 1);
79 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer - 1);
80 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer - 1);
81 mTimeFrameGPU->recordEvent(iLayer - 1);
82 }
83 mTimeFrameGPU->createTrackletsLUTDevice(iteration, iLayer);
84 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
85 countTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
86 mTimeFrameGPU->getDeviceMultCutMask(),
87 iLayer,
88 startROF,
89 endROF,
90 mTimeFrameGPU->getNrof(),
91 this->mTrkParams[iteration].DeltaROF,
92 iVertex,
93 mTimeFrameGPU->getDeviceVertices(),
94 mTimeFrameGPU->getDeviceROFramesPV(),
95 mTimeFrameGPU->getPrimaryVerticesNum(),
96 mTimeFrameGPU->getDeviceArrayClusters(),
97 mTimeFrameGPU->getClusterSizes(),
98 mTimeFrameGPU->getDeviceROFrameClusters(),
99 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
100 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
101 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
102 mTimeFrameGPU->getDeviceTrackletsLUTs(),
103 iteration,
104 this->mTrkParams[iteration].NSigmaCut,
105 mTimeFrameGPU->getPhiCuts(),
106 this->mTrkParams[iteration].PVres,
107 mTimeFrameGPU->getMinRs(),
108 mTimeFrameGPU->getMaxRs(),
109 mTimeFrameGPU->getPositionResolutions(),
110 this->mTrkParams[iteration].LayerRadii,
111 mTimeFrameGPU->getMSangles(),
112 mTimeFrameGPU->getFrameworkAllocator(),
113 conf.nBlocksLayerTracklets[iteration],
114 conf.nThreadsLayerTracklets[iteration],
115 mTimeFrameGPU->getStreams());
116 mTimeFrameGPU->createTrackletsBuffers(iLayer);
117 if (mTimeFrameGPU->getNTracklets()[iLayer] == 0) {
118 continue;
119 }
120 computeTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
121 mTimeFrameGPU->getDeviceMultCutMask(),
122 iLayer,
123 startROF,
124 endROF,
125 mTimeFrameGPU->getNrof(),
126 this->mTrkParams[iteration].DeltaROF,
127 iVertex,
128 mTimeFrameGPU->getDeviceVertices(),
129 mTimeFrameGPU->getDeviceROFramesPV(),
130 mTimeFrameGPU->getPrimaryVerticesNum(),
131 mTimeFrameGPU->getDeviceArrayClusters(),
132 mTimeFrameGPU->getClusterSizes(),
133 mTimeFrameGPU->getDeviceROFrameClusters(),
134 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
135 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
136 mTimeFrameGPU->getDeviceArrayTracklets(),
137 mTimeFrameGPU->getDeviceTracklets(),
138 mTimeFrameGPU->getNTracklets(),
139 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
140 mTimeFrameGPU->getDeviceTrackletsLUTs(),
141 iteration,
142 this->mTrkParams[iteration].NSigmaCut,
143 mTimeFrameGPU->getPhiCuts(),
144 this->mTrkParams[iteration].PVres,
145 mTimeFrameGPU->getMinRs(),
146 mTimeFrameGPU->getMaxRs(),
147 mTimeFrameGPU->getPositionResolutions(),
148 this->mTrkParams[iteration].LayerRadii,
149 mTimeFrameGPU->getMSangles(),
150 mTimeFrameGPU->getFrameworkAllocator(),
151 conf.nBlocksLayerTracklets[iteration],
152 conf.nThreadsLayerTracklets[iteration],
153 mTimeFrameGPU->getStreams());
154 }
155}
156
157template <int nLayers>
159{
161
162 // start by queuing loading needed of three last layers
163 for (int iLayer{nLayers}; iLayer-- > nLayers - 3;) {
164 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer);
165 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer);
166 mTimeFrameGPU->recordEvent(iLayer);
167 }
168
169 for (int iLayer{this->mTrkParams[iteration].CellsPerRoad()}; iLayer--;) {
170 if (iLayer) {
171 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer - 1);
172 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer - 1);
173 mTimeFrameGPU->recordEvent(iLayer - 1);
174 }
175
176 // if there are no tracklets skip entirely
177 const int currentLayerTrackletsNum{static_cast<int>(mTimeFrameGPU->getNTracklets()[iLayer])};
178 if (!currentLayerTrackletsNum || !mTimeFrameGPU->getNTracklets()[iLayer + 1]) {
179 mTimeFrameGPU->getNCells()[iLayer] = 0;
180 continue;
181 }
182
183 mTimeFrameGPU->createCellsLUTDevice(iLayer);
184 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
185 mTimeFrameGPU->waitEvent(iLayer, iLayer + 2); // wait stream until all data is available
186 countCellsHandler<nLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
187 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
188 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
189 mTimeFrameGPU->getDeviceArrayTracklets(),
190 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
191 currentLayerTrackletsNum,
192 iLayer,
193 nullptr,
194 mTimeFrameGPU->getDeviceArrayCellsLUT(),
195 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
196 this->mTrkParams[iteration].DeltaROF,
197 this->mBz,
198 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
199 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
200 this->mTrkParams[iteration].NSigmaCut,
201 mTimeFrameGPU->getFrameworkAllocator(),
202 conf.nBlocksLayerCells[iteration],
203 conf.nThreadsLayerCells[iteration],
204 mTimeFrameGPU->getStreams());
205 mTimeFrameGPU->createCellsBuffers(iLayer);
206 if (mTimeFrameGPU->getNCells()[iLayer] == 0) {
207 continue;
208 }
209 computeCellsHandler<nLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
210 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
211 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
212 mTimeFrameGPU->getDeviceArrayTracklets(),
213 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
214 currentLayerTrackletsNum,
215 iLayer,
216 mTimeFrameGPU->getDeviceCells()[iLayer],
217 mTimeFrameGPU->getDeviceArrayCellsLUT(),
218 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
219 this->mTrkParams[iteration].DeltaROF,
220 this->mBz,
221 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
222 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
223 this->mTrkParams[iteration].NSigmaCut,
224 conf.nBlocksLayerCells[iteration],
225 conf.nThreadsLayerCells[iteration],
226 mTimeFrameGPU->getStreams());
227 }
228}
229
230template <int nLayers>
232{
234
235 for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NeighboursPerRoad(); ++iLayer) {
236 const int currentLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer])};
237 const int nextLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer + 1])};
238 if (!nextLayerCellsNum || !currentLayerCellsNum) {
239 mTimeFrameGPU->getNNeighbours()[iLayer] = 0;
240 continue;
241 }
242 mTimeFrameGPU->createNeighboursIndexTablesDevice(iLayer);
243 mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum);
244 countCellNeighboursHandler<nLayers>(mTimeFrameGPU->getDeviceArrayCells(),
245 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here.
246 mTimeFrameGPU->getDeviceArrayCellsLUT(),
247 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
248 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
249 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
250 this->mTrkParams[0].DeltaROF,
251 this->mTrkParams[0].MaxChi2ClusterAttachment,
252 this->mBz,
253 iLayer,
254 currentLayerCellsNum,
255 nextLayerCellsNum,
256 1e2,
257 mTimeFrameGPU->getFrameworkAllocator(),
258 conf.nBlocksFindNeighbours[iteration],
259 conf.nThreadsFindNeighbours[iteration],
260 mTimeFrameGPU->getStream(iLayer));
261 mTimeFrameGPU->createNeighboursDevice(iLayer);
262 if (mTimeFrameGPU->getNNeighbours()[iLayer] == 0) {
263 continue;
264 }
265 computeCellNeighboursHandler<nLayers>(mTimeFrameGPU->getDeviceArrayCells(),
266 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer),
267 mTimeFrameGPU->getDeviceArrayCellsLUT(),
268 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
269 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
270 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
271 this->mTrkParams[0].DeltaROF,
272 this->mTrkParams[0].MaxChi2ClusterAttachment,
273 this->mBz,
274 iLayer,
275 currentLayerCellsNum,
276 nextLayerCellsNum,
277 1e2,
278 conf.nBlocksFindNeighbours[iteration],
279 conf.nThreadsFindNeighbours[iteration],
280 mTimeFrameGPU->getStream(iLayer));
281 mTimeFrameGPU->getArrayNNeighbours()[iLayer] = filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
282 mTimeFrameGPU->getDeviceNeighbours(iLayer),
283 mTimeFrameGPU->getArrayNNeighbours()[iLayer],
284 mTimeFrameGPU->getStream(iLayer),
285 mTimeFrameGPU->getFrameworkAllocator());
286 }
287 mTimeFrameGPU->syncStreams(false);
288}
289
290template <int nLayers>
292{
294 for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) {
295 const int minimumLayer{startLevel - 1};
296 bounded_vector<CellSeed<nLayers>> trackSeeds(this->getMemoryPool().get());
297 for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) {
298 if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) {
299 continue;
300 }
301 processNeighboursHandler<nLayers>(startLayer,
302 startLevel,
303 mTimeFrameGPU->getDeviceArrayCells(),
304 mTimeFrameGPU->getDeviceCells()[startLayer],
305 mTimeFrameGPU->getArrayNCells(),
306 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
307 mTimeFrameGPU->getDeviceNeighboursAll(),
308 mTimeFrameGPU->getDeviceNeighboursLUTs(),
309 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
310 trackSeeds,
311 this->mBz,
312 this->mTrkParams[0].MaxChi2ClusterAttachment,
313 this->mTrkParams[0].MaxChi2NDF,
314 mTimeFrameGPU->getDevicePropagator(),
315 this->mTrkParams[0].CorrType,
316 mTimeFrameGPU->getFrameworkAllocator(),
317 conf.nBlocksProcessNeighbours[iteration],
318 conf.nThreadsProcessNeighbours[iteration]);
319 }
320 // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
321 if (trackSeeds.empty()) {
322 LOGP(debug, "No track seeds found, skipping track finding");
323 continue;
324 }
325 mTimeFrameGPU->createTrackITSExtDevice(trackSeeds);
326 mTimeFrameGPU->loadTrackSeedsDevice(trackSeeds);
327
328 trackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(), // CellSeed* trackSeeds
329 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), // TrackingFrameInfo** foundTrackingFrameInfo
330 mTimeFrameGPU->getDeviceTrackITSExt(), // o2::its::TrackITSExt* tracks
331 this->mTrkParams[iteration].MinPt, // std::vector<float>& minPtsHost,
332 trackSeeds.size(), // const size_t nSeeds
333 this->mBz, // const float Bz
334 startLevel, // const int startLevel,
335 this->mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
336 this->mTrkParams[0].MaxChi2NDF, // float maxChi2NDF
337 mTimeFrameGPU->getDevicePropagator(), // const o2::base::Propagator* propagator
338 this->mTrkParams[0].CorrType, // o2::base::PropagatorImpl<float>::MatCorrType
339 conf.nBlocksTracksSeeds[iteration],
340 conf.nThreadsTracksSeeds[iteration]);
341
342 mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds);
343
344 auto& tracks = mTimeFrameGPU->getTrackITSExt();
345
346 for (auto& track : tracks) {
347 if (!track.getChi2()) {
348 continue; // this is to skip the unset tracks that are put at the beginning of the vector by the sorting. To see if this can be optimised.
349 }
350 int nShared = 0;
351 bool isFirstShared{false};
352 for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) {
353 if (track.getClusterIndex(iLayer) == constants::UnusedIndex) {
354 continue;
355 }
356 nShared += int(mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer)));
357 isFirstShared |= !iLayer && mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer));
358 }
359
360 if (nShared > this->mTrkParams[0].ClusterSharing) {
361 continue;
362 }
363
364 std::array<int, 3> rofs{INT_MAX, INT_MAX, INT_MAX};
365 for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) {
366 if (track.getClusterIndex(iLayer) == constants::UnusedIndex) {
367 continue;
368 }
369 mTimeFrameGPU->markUsedCluster(iLayer, track.getClusterIndex(iLayer));
370 int currentROF = mTimeFrameGPU->getClusterROF(iLayer, track.getClusterIndex(iLayer));
371 for (int iR{0}; iR < 3; ++iR) {
372 if (rofs[iR] == INT_MAX) {
373 rofs[iR] = currentROF;
374 }
375 if (rofs[iR] == currentROF) {
376 break;
377 }
378 }
379 }
380 if (rofs[2] != INT_MAX) {
381 continue;
382 }
383 if (rofs[1] != INT_MAX) {
384 track.setNextROFbit();
385 }
386 mTimeFrameGPU->getTracks(std::min(rofs[0], rofs[1])).emplace_back(track);
387 }
388 mTimeFrameGPU->loadUsedClustersDevice();
389 }
390 // wipe the artefact memory
391 mTimeFrameGPU->popMemoryStack(iteration);
392};
393
394template <int nLayers>
396{
397 return mTimeFrameGPU->getNumberOfClusters();
398}
399
400template <int nLayers>
402{
403 return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0);
404}
405
406template <int nLayers>
408{
409 return mTimeFrameGPU->getNumberOfCells();
410}
411
412template <int nLayers>
414{
415 this->mBz = bz;
416 mTimeFrameGPU->setBz(bz);
417}
418
419template class TrackerTraitsGPU<7>;
420} // namespace o2::its
std::ostringstream debug
Definition of the ITS track.
int getTFNumberOfClusters() const override
void initialiseTimeFrame(const int iteration) final
void findCellsNeighbours(const int iteration) final
void adoptTimeFrame(TimeFrame< nLayers > *tf) final
void computeLayerCells(const int iteration) final
void computeLayerTracklets(const int iteration, int, int) final
int getTFNumberOfTracklets() const override
int getTFNumberOfCells() const override
void findRoads(const int iteration) final
auto get(const std::byte *buffer, size_t=0)
Definition DataHeader.h:454
std::pmr::vector< T > bounded_vector
int filterCellNeighboursHandler(gpuPair< int, int > *, int *, unsigned int, gpu::Stream &, o2::its::ExternalAllocator *=nullptr)
void trackSeedHandler(CellSeed< nLayers > *trackSeeds, const TrackingFrameInfo **foundTrackingFrameInfo, o2::its::TrackITSExt *tracks, std::vector< float > &minPtsHost, const unsigned int nSeeds, const float Bz, const int startLevel, float maxChi2ClusterAttachment, float maxChi2NDF, const o2::base::Propagator *propagator, const o2::base::PropagatorF::MatCorrType matCorrType, const int nBlocks, const int nThreads)
std::unique_ptr< GPUReconstructionTimeframe > tf