Project
Loading...
Searching...
No Matches
TrackerTraitsGPU.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
12
13#include <array>
14#include <vector>
15#include <unistd.h>
16
18
23
24namespace o2::its
25{
26
27template <int nLayers>
29{
30 mTimeFrameGPU->initialise(iteration, this->mTrkParams[iteration], nLayers);
31
32 // on default stream
33 mTimeFrameGPU->loadVertices(iteration);
34 mTimeFrameGPU->loadIndexTableUtils(iteration);
35 mTimeFrameGPU->loadMultiplicityCutMask(iteration);
36 // pinned on host
37 mTimeFrameGPU->createUsedClustersDeviceArray(iteration);
38 mTimeFrameGPU->createClustersDeviceArray(iteration);
39 mTimeFrameGPU->createUnsortedClustersDeviceArray(iteration);
40 mTimeFrameGPU->createClustersIndexTablesArray(iteration);
41 mTimeFrameGPU->createTrackingFrameInfoDeviceArray(iteration);
42 mTimeFrameGPU->createROFrameClustersDeviceArray(iteration);
43 // device array
44 mTimeFrameGPU->createTrackletsLUTDeviceArray(iteration);
45 mTimeFrameGPU->createTrackletsBuffersArray(iteration);
46 mTimeFrameGPU->createCellsBuffersArray(iteration);
47 mTimeFrameGPU->createCellsLUTDeviceArray(iteration);
48}
49
50template <int nLayers>
52{
53 mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<nLayers>*>(tf);
54 this->mTimeFrame = static_cast<TimeFrame<nLayers>*>(tf);
55}
56
57template <int nLayers>
58void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int iROFslice, int iVertex)
59{
61
62 int startROF{0};
63 int endROF{mTimeFrameGPU->getNrof()};
64
65 // start by queuing loading needed of two last layers
66 for (int iLayer{nLayers}; iLayer-- > nLayers - 2;) {
67 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer);
68 mTimeFrameGPU->loadClustersDevice(iteration, iLayer);
69 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer);
70 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer);
71 mTimeFrameGPU->recordEvent(iLayer);
72 }
73
74 for (int iLayer{this->mTrkParams[iteration].TrackletsPerRoad()}; iLayer--;) {
75 if (iLayer) { // queue loading data of next layer in parallel, this the copies are overlapping with computation kernels
76 mTimeFrameGPU->createUsedClustersDevice(iteration, iLayer - 1);
77 mTimeFrameGPU->loadClustersDevice(iteration, iLayer - 1);
78 mTimeFrameGPU->loadClustersIndexTables(iteration, iLayer - 1);
79 mTimeFrameGPU->loadROFrameClustersDevice(iteration, iLayer - 1);
80 mTimeFrameGPU->recordEvent(iLayer - 1);
81 }
82 mTimeFrameGPU->createTrackletsLUTDevice(iteration, iLayer);
83 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
84 countTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
85 mTimeFrameGPU->getDeviceMultCutMask(),
86 iLayer,
87 startROF,
88 endROF,
89 mTimeFrameGPU->getNrof(),
90 this->mTrkParams[iteration].DeltaROF,
91 iVertex,
92 mTimeFrameGPU->getDeviceVertices(),
93 mTimeFrameGPU->getDeviceROFramesPV(),
94 mTimeFrameGPU->getPrimaryVerticesNum(),
95 mTimeFrameGPU->getDeviceArrayClusters(),
96 mTimeFrameGPU->getClusterSizes(),
97 mTimeFrameGPU->getDeviceROFrameClusters(),
98 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
99 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
100 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
101 mTimeFrameGPU->getDeviceTrackletsLUTs(),
102 iteration,
103 this->mTrkParams[iteration].NSigmaCut,
104 mTimeFrameGPU->getPhiCuts(),
105 this->mTrkParams[iteration].PVres,
106 mTimeFrameGPU->getMinRs(),
107 mTimeFrameGPU->getMaxRs(),
108 mTimeFrameGPU->getPositionResolutions(),
109 this->mTrkParams[iteration].LayerRadii,
110 mTimeFrameGPU->getMSangles(),
111 mTimeFrameGPU->getExternalDeviceAllocator(),
112 conf.nBlocksLayerTracklets[iteration],
113 conf.nThreadsLayerTracklets[iteration],
114 mTimeFrameGPU->getStreams());
115 mTimeFrameGPU->createTrackletsBuffers(iLayer);
116 if (mTimeFrameGPU->getNTracklets()[iLayer] == 0) {
117 continue;
118 }
119 computeTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
120 mTimeFrameGPU->getDeviceMultCutMask(),
121 iLayer,
122 startROF,
123 endROF,
124 mTimeFrameGPU->getNrof(),
125 this->mTrkParams[iteration].DeltaROF,
126 iVertex,
127 mTimeFrameGPU->getDeviceVertices(),
128 mTimeFrameGPU->getDeviceROFramesPV(),
129 mTimeFrameGPU->getPrimaryVerticesNum(),
130 mTimeFrameGPU->getDeviceArrayClusters(),
131 mTimeFrameGPU->getClusterSizes(),
132 mTimeFrameGPU->getDeviceROFrameClusters(),
133 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
134 mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
135 mTimeFrameGPU->getDeviceArrayTracklets(),
136 mTimeFrameGPU->getDeviceTracklets(),
137 mTimeFrameGPU->getNTracklets(),
138 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
139 mTimeFrameGPU->getDeviceTrackletsLUTs(),
140 iteration,
141 this->mTrkParams[iteration].NSigmaCut,
142 mTimeFrameGPU->getPhiCuts(),
143 this->mTrkParams[iteration].PVres,
144 mTimeFrameGPU->getMinRs(),
145 mTimeFrameGPU->getMaxRs(),
146 mTimeFrameGPU->getPositionResolutions(),
147 this->mTrkParams[iteration].LayerRadii,
148 mTimeFrameGPU->getMSangles(),
149 mTimeFrameGPU->getExternalDeviceAllocator(),
150 conf.nBlocksLayerTracklets[iteration],
151 conf.nThreadsLayerTracklets[iteration],
152 mTimeFrameGPU->getStreams());
153 }
154}
155
156template <int nLayers>
158{
160
161 // start by queuing loading needed of three last layers
162 for (int iLayer{nLayers}; iLayer-- > nLayers - 3;) {
163 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer);
164 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer);
165 mTimeFrameGPU->recordEvent(iLayer);
166 }
167
168 for (int iLayer{this->mTrkParams[iteration].CellsPerRoad()}; iLayer--;) {
169 if (iLayer) {
170 mTimeFrameGPU->loadUnsortedClustersDevice(iteration, iLayer - 1);
171 mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration, iLayer - 1);
172 mTimeFrameGPU->recordEvent(iLayer - 1);
173 }
174
175 // if there are no tracklets skip entirely
176 const int currentLayerTrackletsNum{static_cast<int>(mTimeFrameGPU->getNTracklets()[iLayer])};
177 if (!currentLayerTrackletsNum || !mTimeFrameGPU->getNTracklets()[iLayer + 1]) {
178 mTimeFrameGPU->getNCells()[iLayer] = 0;
179 continue;
180 }
181
182 mTimeFrameGPU->createCellsLUTDevice(iLayer);
183 mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available
184 mTimeFrameGPU->waitEvent(iLayer, iLayer + 2); // wait stream until all data is available
185 countCellsHandler<nLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
186 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
187 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
188 mTimeFrameGPU->getDeviceArrayTracklets(),
189 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
190 currentLayerTrackletsNum,
191 iLayer,
192 nullptr,
193 mTimeFrameGPU->getDeviceArrayCellsLUT(),
194 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
195 this->mTrkParams[iteration].DeltaROF,
196 this->mBz,
197 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
198 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
199 this->mTrkParams[iteration].NSigmaCut,
200 mTimeFrameGPU->getExternalDeviceAllocator(),
201 conf.nBlocksLayerCells[iteration],
202 conf.nThreadsLayerCells[iteration],
203 mTimeFrameGPU->getStreams());
204 mTimeFrameGPU->createCellsBuffers(iLayer);
205 if (mTimeFrameGPU->getNCells()[iLayer] == 0) {
206 continue;
207 }
208 computeCellsHandler<nLayers>(mTimeFrameGPU->getDeviceArrayClusters(),
209 mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
210 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
211 mTimeFrameGPU->getDeviceArrayTracklets(),
212 mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
213 currentLayerTrackletsNum,
214 iLayer,
215 mTimeFrameGPU->getDeviceCells()[iLayer],
216 mTimeFrameGPU->getDeviceArrayCellsLUT(),
217 mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
218 this->mTrkParams[iteration].DeltaROF,
219 this->mBz,
220 this->mTrkParams[iteration].MaxChi2ClusterAttachment,
221 this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
222 this->mTrkParams[iteration].NSigmaCut,
223 conf.nBlocksLayerCells[iteration],
224 conf.nThreadsLayerCells[iteration],
225 mTimeFrameGPU->getStreams());
226 }
227}
228
229template <int nLayers>
231{
233
234 for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NeighboursPerRoad(); ++iLayer) {
235 const int currentLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer])};
236 const int nextLayerCellsNum{static_cast<int>(mTimeFrameGPU->getNCells()[iLayer + 1])};
237 if (!nextLayerCellsNum || !currentLayerCellsNum) {
238 mTimeFrameGPU->getNNeighbours()[iLayer] = 0;
239 continue;
240 }
241 mTimeFrameGPU->createNeighboursIndexTablesDevice(iLayer);
242 mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum);
243 countCellNeighboursHandler<nLayers>(mTimeFrameGPU->getDeviceArrayCells(),
244 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here.
245 mTimeFrameGPU->getDeviceArrayCellsLUT(),
246 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
247 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
248 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
249 this->mTrkParams[0].DeltaROF,
250 this->mTrkParams[0].MaxChi2ClusterAttachment,
251 this->mBz,
252 iLayer,
253 currentLayerCellsNum,
254 nextLayerCellsNum,
255 1e2,
256 mTimeFrameGPU->getExternalDeviceAllocator(),
257 conf.nBlocksFindNeighbours[iteration],
258 conf.nThreadsFindNeighbours[iteration],
259 mTimeFrameGPU->getStream(iLayer));
260 mTimeFrameGPU->createNeighboursDevice(iLayer);
261 if (mTimeFrameGPU->getNNeighbours()[iLayer] == 0) {
262 continue;
263 }
264 computeCellNeighboursHandler<nLayers>(mTimeFrameGPU->getDeviceArrayCells(),
265 mTimeFrameGPU->getDeviceNeighboursLUT(iLayer),
266 mTimeFrameGPU->getDeviceArrayCellsLUT(),
267 mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
268 mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
269 (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
270 this->mTrkParams[0].DeltaROF,
271 this->mTrkParams[0].MaxChi2ClusterAttachment,
272 this->mBz,
273 iLayer,
274 currentLayerCellsNum,
275 nextLayerCellsNum,
276 1e2,
277 conf.nBlocksFindNeighbours[iteration],
278 conf.nThreadsFindNeighbours[iteration],
279 mTimeFrameGPU->getStream(iLayer));
280 mTimeFrameGPU->getArrayNNeighbours()[iLayer] = filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
281 mTimeFrameGPU->getDeviceNeighbours(iLayer),
282 mTimeFrameGPU->getArrayNNeighbours()[iLayer],
283 mTimeFrameGPU->getStream(iLayer),
284 mTimeFrameGPU->getExternalDeviceAllocator());
285 }
286 mTimeFrameGPU->syncStreams(false);
287}
288
289template <int nLayers>
291{
293 for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) {
294 const int minimumLayer{startLevel - 1};
295 bounded_vector<CellSeed<nLayers>> trackSeeds(this->getMemoryPool().get());
296 for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) {
297 if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) {
298 continue;
299 }
300 processNeighboursHandler<nLayers>(startLayer,
301 startLevel,
302 mTimeFrameGPU->getDeviceArrayCells(),
303 mTimeFrameGPU->getDeviceCells()[startLayer],
304 mTimeFrameGPU->getArrayNCells(),
305 (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(),
306 mTimeFrameGPU->getDeviceNeighboursAll(),
307 mTimeFrameGPU->getDeviceNeighboursLUTs(),
308 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
309 trackSeeds,
310 this->mBz,
311 this->mTrkParams[0].MaxChi2ClusterAttachment,
312 this->mTrkParams[0].MaxChi2NDF,
313 mTimeFrameGPU->getDevicePropagator(),
314 this->mTrkParams[0].CorrType,
315 mTimeFrameGPU->getExternalDeviceAllocator(),
316 conf.nBlocksProcessNeighbours[iteration],
317 conf.nThreadsProcessNeighbours[iteration]);
318 }
319 // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
320 if (trackSeeds.empty()) {
321 LOGP(debug, "No track seeds found, skipping track finding");
322 continue;
323 }
324 mTimeFrameGPU->createTrackITSExtDevice(trackSeeds);
325 mTimeFrameGPU->loadTrackSeedsDevice(trackSeeds);
326
327 trackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(), // CellSeed* trackSeeds
328 mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), // TrackingFrameInfo** foundTrackingFrameInfo
329 mTimeFrameGPU->getDeviceTrackITSExt(), // o2::its::TrackITSExt* tracks
330 this->mTrkParams[iteration].MinPt, // std::vector<float>& minPtsHost,
331 trackSeeds.size(), // const size_t nSeeds
332 this->mBz, // const float Bz
333 startLevel, // const int startLevel,
334 this->mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
335 this->mTrkParams[0].MaxChi2NDF, // float maxChi2NDF
336 mTimeFrameGPU->getDevicePropagator(), // const o2::base::Propagator* propagator
337 this->mTrkParams[0].CorrType, // o2::base::PropagatorImpl<float>::MatCorrType
338 conf.nBlocksTracksSeeds[iteration],
339 conf.nThreadsTracksSeeds[iteration]);
340
341 mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds);
342
343 auto& tracks = mTimeFrameGPU->getTrackITSExt();
344
345 for (auto& track : tracks) {
346 if (!track.getChi2()) {
347 continue; // this is to skip the unset tracks that are put at the beginning of the vector by the sorting. To see if this can be optimised.
348 }
349 int nShared = 0;
350 bool isFirstShared{false};
351 for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) {
352 if (track.getClusterIndex(iLayer) == constants::UnusedIndex) {
353 continue;
354 }
355 nShared += int(mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer)));
356 isFirstShared |= !iLayer && mTimeFrameGPU->isClusterUsed(iLayer, track.getClusterIndex(iLayer));
357 }
358
359 if (nShared > this->mTrkParams[0].ClusterSharing) {
360 continue;
361 }
362
363 std::array<int, 3> rofs{INT_MAX, INT_MAX, INT_MAX};
364 for (int iLayer{0}; iLayer < this->mTrkParams[0].NLayers; ++iLayer) {
365 if (track.getClusterIndex(iLayer) == constants::UnusedIndex) {
366 continue;
367 }
368 mTimeFrameGPU->markUsedCluster(iLayer, track.getClusterIndex(iLayer));
369 int currentROF = mTimeFrameGPU->getClusterROF(iLayer, track.getClusterIndex(iLayer));
370 for (int iR{0}; iR < 3; ++iR) {
371 if (rofs[iR] == INT_MAX) {
372 rofs[iR] = currentROF;
373 }
374 if (rofs[iR] == currentROF) {
375 break;
376 }
377 }
378 }
379 if (rofs[2] != INT_MAX) {
380 continue;
381 }
382 if (rofs[1] != INT_MAX) {
383 track.setNextROFbit();
384 }
385 mTimeFrameGPU->getTracks(std::min(rofs[0], rofs[1])).emplace_back(track);
386 }
387 mTimeFrameGPU->loadUsedClustersDevice();
388 }
389};
390
391template <int nLayers>
393{
394 return mTimeFrameGPU->getNumberOfClusters();
395}
396
397template <int nLayers>
399{
400 return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0);
401}
402
403template <int nLayers>
405{
406 return mTimeFrameGPU->getNumberOfCells();
407}
408
409template <int nLayers>
411{
412 this->mBz = bz;
413 mTimeFrameGPU->setBz(bz);
414}
415
416template class TrackerTraitsGPU<7>;
417} // namespace o2::its
Definition of the ITS track.
std::ostringstream debug
int getTFNumberOfClusters() const override
void initialiseTimeFrame(const int iteration) final
void findCellsNeighbours(const int iteration) final
void adoptTimeFrame(TimeFrame< nLayers > *tf) final
void computeLayerCells(const int iteration) final
void computeLayerTracklets(const int iteration, int, int) final
int getTFNumberOfTracklets() const override
int getTFNumberOfCells() const override
void findRoads(const int iteration) final
auto get(const std::byte *buffer, size_t=0)
Definition DataHeader.h:454
std::pmr::vector< T > bounded_vector
int filterCellNeighboursHandler(gpuPair< int, int > *, int *, unsigned int, gpu::Stream &, o2::its::ExternalAllocator *=nullptr)
void trackSeedHandler(CellSeed< nLayers > *trackSeeds, const TrackingFrameInfo **foundTrackingFrameInfo, o2::its::TrackITSExt *tracks, std::vector< float > &minPtsHost, const unsigned int nSeeds, const float Bz, const int startLevel, float maxChi2ClusterAttachment, float maxChi2NDF, const o2::base::Propagator *propagator, const o2::base::PropagatorF::MatCorrType matCorrType, const int nBlocks, const int nThreads)
std::unique_ptr< GPUReconstructionTimeframe > tf