Project
Loading...
Searching...
No Matches
GPUTPCGMMerger.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#define GPUCA_CADEBUG 0
16#define GPUCA_MERGE_LOOPER_MC 0
17
18#include "GPUCommonDef.h"
19
20#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
22#include "GPUROOTDump.h"
23#endif
24
25#ifndef GPUCA_GPUCODE_DEVICE
26#include <cstdio>
27#include <cstring>
28#include <cmath>
29#include "GPUReconstruction.h"
30#endif
31
32#include "GPUTPCTracker.h"
33#include "GPUTPCClusterData.h"
34#include "GPUTPCTrackParam.h"
35#include "GPUTPCGMMerger.h"
36#include "GPUO2DataTypes.h"
37#include "TPCFastTransform.h"
38#include "GPUTPCConvertImpl.h"
39
40#include "GPUCommonMath.h"
41#include "GPUCommonAlgorithm.h"
42#include "GPUCommonConstants.h"
43
44#include "GPUTPCTrackParam.h"
45#include "GPUTPCSectorOutput.h"
46#include "GPUTPCGMMergedTrack.h"
47#include "GPUParam.h"
49
50#include "GPUTPCGMTrackParam.h"
51#include "GPUTPCGMSectorTrack.h"
52#include "GPUTPCGMBorderTrack.h"
53
56#ifndef GPUCA_GPUCODE
59#endif
60
61namespace o2::gpu::internal
62{
63}
64using namespace o2::gpu;
65using namespace o2::gpu::internal;
66using namespace o2::tpc;
67using namespace gputpcgmmergertypes;
68
69static constexpr int32_t kMaxParts = 400;
70static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS;
71
72namespace o2::gpu::internal
73{
75 float refz;
76 float x;
77 float y;
78 uint32_t id;
79};
80} // namespace o2::gpu::internal
81
82#ifndef GPUCA_GPUCODE
83
84#include "GPUQA.h"
86
88{
89 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
90 mNextSectorInd[iSector] = iSector + 1;
91 mPrevSectorInd[iSector] = iSector - 1;
92 }
93 int32_t mid = NSECTORS / 2 - 1;
94 int32_t last = NSECTORS - 1;
95 mNextSectorInd[mid] = 0;
96 mPrevSectorInd[0] = mid;
97 mNextSectorInd[last] = NSECTORS / 2;
98 mPrevSectorInd[NSECTORS / 2] = last;
99}
100
101// DEBUG CODE
102#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
103#include "GPUQAHelper.h"
104
105void GPUTPCGMMerger::CheckMergedTracks()
106{
107 std::vector<bool> trkUsed(SectorTrackInfoLocalTotal());
108 for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) {
109 trkUsed[i] = false;
110 }
111
112 for (int32_t itr = 0; itr < SectorTrackInfoLocalTotal(); itr++) {
113 GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
114 if (track.PrevSegmentNeighbour() >= 0) {
115 continue;
116 }
117 if (track.PrevNeighbour() >= 0) {
118 continue;
119 }
120 int32_t leg = 0;
121 GPUTPCGMSectorTrack *trbase = &track, *tr = &track;
122 while (true) {
123 int32_t iTrk = tr - mSectorTrackInfos;
124 if (trkUsed[iTrk]) {
125 GPUError("FAILURE: double use");
126 }
127 trkUsed[iTrk] = true;
128
129 int32_t jtr = tr->NextSegmentNeighbour();
130 if (jtr >= 0) {
131 tr = &(mSectorTrackInfos[jtr]);
132 continue;
133 }
134 jtr = trbase->NextNeighbour();
135 if (jtr >= 0) {
136 trbase = &(mSectorTrackInfos[jtr]);
137 tr = trbase;
138 if (tr->PrevSegmentNeighbour() >= 0) {
139 break;
140 }
141 leg++;
142 continue;
143 }
144 break;
145 }
146 }
147 for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) {
148 if (trkUsed[i] == false) {
149 GPUError("FAILURE: trk missed");
150 }
151 }
152}
153
154template <class T>
155inline const auto* resolveMCLabels(const o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>* a, const AliHLTTPCClusterMCLabel* b)
156{
157 return a;
158}
159template <>
160inline const auto* resolveMCLabels<AliHLTTPCClusterMCLabel>(const o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>* a, const AliHLTTPCClusterMCLabel* b)
161{
162 return b;
163}
164
165template <class T, class S>
166int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const
167{
168 GPUTPCGMSectorTrack* sectorTrack = nullptr;
169 int32_t nClusters = 0;
170 if constexpr (std::is_same<S, GPUTPCGMBorderTrack&>::value) {
171 sectorTrack = &mSectorTrackInfos[trk.TrackID()];
172 nClusters = sectorTrack->OrigTrack()->NHits();
173 } else {
174 nClusters = trk.NClusters();
175 }
176 auto acc = GPUTPCTrkLbl<false, GPUTPCTrkLbl_ret>(resolveMCLabels<T>(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f);
177 for (int32_t i = 0; i < nClusters; i++) {
178 int32_t id;
179 if constexpr (std::is_same<S, GPUTPCGMBorderTrack&>::value) {
180 const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()];
181 const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i];
182 id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0];
183 } else {
184 id = mClusters[trk.FirstClusterRef() + i].num;
185 }
186 acc.addLabel(id);
187 }
188 return acc.computeLabel().id;
189}
190
191template <class S>
192int64_t GPUTPCGMMerger::GetTrackLabel(const S& trk) const
193{
194#ifdef GPUCA_TPC_GEOMETRY_O2
195 if (GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth) {
196 return GetTrackLabelA<o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>, S>(trk);
197 } else
198#endif
199 {
200 return GetTrackLabelA<AliHLTTPCClusterMCLabel, S>(trk);
201 }
202}
203
204#endif
205// END DEBUG CODE
206
207void GPUTPCGMMerger::PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const
208{
209 const GPUTPCGMSectorTrack* orgTrack = trk;
210 while (trk->PrevSegmentNeighbour() >= 0) {
211 trk = &mSectorTrackInfos[trk->PrevSegmentNeighbour()];
212 }
213 const GPUTPCGMSectorTrack* orgTower = trk;
214 while (trk->PrevNeighbour() >= 0) {
215 trk = &mSectorTrackInfos[trk->PrevNeighbour()];
216 }
217
218 int32_t nextId = trk - mSectorTrackInfos;
219 out << "Graph of track " << (orgTrack - mSectorTrackInfos) << "\n";
220 while (nextId >= 0) {
221 trk = &mSectorTrackInfos[nextId];
222 if (trk->PrevSegmentNeighbour() >= 0) {
223 out << "TRACK TREE INVALID!!! " << trk->PrevSegmentNeighbour() << " --> " << nextId << "\n";
224 }
225 out << (trk == orgTower ? "--" : " ");
226 while (nextId >= 0) {
227 GPUTPCGMSectorTrack* trk2 = &mSectorTrackInfos[nextId];
228 if (trk != trk2 && (trk2->PrevNeighbour() >= 0 || trk2->NextNeighbour() >= 0)) {
229 out << " (TRACK TREE INVALID!!! " << trk2->PrevNeighbour() << " <-- " << nextId << " --> " << trk2->NextNeighbour() << ") ";
230 }
231 char tmp[128];
232 snprintf(tmp, 128, " %s%5d(%5.2f)", trk2 == orgTrack ? "!" : " ", nextId, trk2->QPt());
233 out << tmp;
234 nextId = trk2->NextSegmentNeighbour();
235 }
236 out << "\n";
237 nextId = trk->NextNeighbour();
238 }
239}
240
242
244{
245 computePointerWithAlignment(mem, mSectorTrackInfos, mNTotalSectorTracks);
246 computePointerWithAlignment(mem, mSectorTrackInfoIndex, NSECTORS * 2 + 1);
247 if (mRec->GetProcessingSettings().deterministicGPUReconstruction) {
248 computePointerWithAlignment(mem, mTmpSortMemory, std::max(mNTotalSectorTracks, mNMaxTracks * 2));
249 }
250
251 void* memBase = mem;
252 computePointerWithAlignment(mem, mBorderMemory, 2 * mNTotalSectorTracks); // MergeBorders & Resolve
253 computePointerWithAlignment(mem, mBorderRangeMemory, 2 * mNTotalSectorTracks);
254 int32_t nTracks = 0;
255 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
256 const int32_t n = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks();
257 mBorder[iSector] = mBorderMemory + 2 * nTracks;
258 mBorder[NSECTORS + iSector] = mBorderMemory + 2 * nTracks + n;
259 mBorderRange[iSector] = mBorderRangeMemory + 2 * nTracks;
260 nTracks += n;
261 }
262 computePointerWithAlignment(mem, mTrackLinks, mNTotalSectorTracks);
263 computePointerWithAlignment(mem, mTrackCCRoots, mNTotalSectorTracks);
264 void* memMax = mem;
265 mem = memBase;
266 computePointerWithAlignment(mem, mTrackIDs, GPUCA_NSECTORS * mNMaxSingleSectorTracks); // UnpackResetIds - RefitSectorTracks - UnpackSectorGlobal
267 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
268 mem = memBase;
269 computePointerWithAlignment(mem, mTrackSort, mNMaxTracks); // PrepareClustersForFit0 - SortTracksQPt - PrepareClustersForFit1 - PrepareClustersForFit1 / Finalize0 - Finalize2
270 computePointerWithAlignment(mem, mSharedCount, mNMaxClusters);
271 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
272 mem = memBase;
273 computePointerWithAlignment(mem, mLoopData, mNMaxTracks); // GPUTPCGMMergerTrackFit - GPUTPCGMMergerFollowLoopers
274 computePointerWithAlignment(mem, mRetryRefitIds, mNMaxTracks); // Reducing mNMaxTracks for mLoopData / mRetryRefitIds does not save memory, since the other parts are larger anyway
275 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
276 mem = memBase;
277 computePointerWithAlignment(mem, mLooperCandidates, mNMaxLooperMatches); // MergeLoopers 1-3
278 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
279 return memMax;
280}
281
283{
284 computePointerWithAlignment(mem, mMemory);
285 return mem;
286}
287
289{
290 computePointerWithAlignment(mem, mTrackOrderAttach, mNMaxTracks);
291 if (mRec->GetProcessingSettings().mergerSortTracks) {
292 computePointerWithAlignment(mem, mTrackOrderProcess, mNMaxTracks);
293 }
294 return mem;
295}
296
298{
299 computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks);
301 computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks);
302 }
303 computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters);
304 if (mRec->GetParam().par.earlyTpcTransform) {
305 computePointerWithAlignment(mem, mClustersXYZ, mNMaxOutputTrackClusters);
306 }
307 computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters);
308 return mem;
309}
310
312{
313 if ((mRec->GetRecoSteps() & GPUDataTypes::RecoStep::Refit) || mRec->GetProcessingSettings().outputSharedClusterMap) {
314 computePointerWithAlignment(mem, mClusterStateExt, mNMaxClusters);
315 } else {
316 mClusterStateExt = nullptr;
317 }
318 return mem;
319}
320
322{
323 computePointerWithAlignment(mem, mOutputTracksTPCO2, HostProcessor(this).NOutputTracksTPCO2());
324 return mem;
325}
326
328{
329 computePointerWithAlignment(mem, mOutputClusRefsTPCO2, HostProcessor(this).NOutputClusRefsTPCO2());
330 return mem;
331}
332
334{
335 computePointerWithAlignment(mem, mOutputTracksTPCO2MC, HostProcessor(this).NOutputTracksTPCO2());
336 return mem;
337}
338
340{
341 computePointerWithAlignment(mem, mTrackSortO2, mNMaxTracks);
342 computePointerWithAlignment(mem, mClusRefTmp, mNMaxTracks);
343 return mem;
344}
345
347{
353 if (mRec->GetProcessingSettings().createO2Output) {
357 if (mRec->GetProcessingSettings().runMC) {
359 }
360 }
362}
363
365{
366 mNTotalSectorTracks = 0;
367 mNClusters = 0;
368 mNMaxSingleSectorTracks = 0;
369 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
370 uint32_t ntrk = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks();
371 mNTotalSectorTracks += ntrk;
372 mNClusters += *mRec->GetConstantMem().tpcTrackers[iSector].NTrackHits();
373 if (mNMaxSingleSectorTracks < ntrk) {
374 mNMaxSingleSectorTracks = ntrk;
375 }
376 }
377 mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters);
378 if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) {
379 mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks);
380 } else {
381 mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks);
382 }
383 if (io.clustersNative) {
384 mNMaxClusters = io.clustersNative->nClustersTotal;
385 } else if (mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) {
386 mNMaxClusters = 0;
387 for (int32_t i = 0; i < NSECTORS; i++) {
388 mNMaxClusters += mRec->GetConstantMem().tpcTrackers[i].NHitsTotal();
389 }
390 } else {
391 mNMaxClusters = mNClusters;
392 }
393 mNMaxLooperMatches = mNMaxClusters / 4; // We have that much scratch memory anyway
394}
395
397{
398 for (int32_t i = 0; i < NSECTORS; i++) {
399 if (mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks > (int32_t)mNMaxSingleSectorTracks) {
400 throw std::runtime_error("mNMaxSingleSectorTracks too small");
401 }
402 }
403 if (!(mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) {
404 throw std::runtime_error("Must run also sector tracking");
405 }
406 return 0;
407}
408
409#endif // GPUCA_GPUCODE
410
411GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output)
412{
413 const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal();
414 for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) {
415 mTrackLinks[i] = -1;
416 }
417}
418
419GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack, const GPUTPCTrack* inTrack, float alpha, int32_t sector)
420{
422 prop.SetMaterialTPC();
423 prop.SetMaxSinPhi(GPUCA_MAX_SIN_PHI);
424 prop.SetToyMCEventsFlag(false);
425 prop.SetSeedingErrors(true); // Larger errors for seeds, better since we don't start with good hypothesis
426 prop.SetFitInProjections(false);
427 prop.SetPolynomialField(&Param().polynomialField);
429 trk.X() = inTrack->Param().GetX();
430 trk.Y() = inTrack->Param().GetY();
431 trk.Z() = inTrack->Param().GetZ();
432 trk.SinPhi() = inTrack->Param().GetSinPhi();
433 trk.DzDs() = inTrack->Param().GetDzDs();
434 trk.QPt() = inTrack->Param().GetQPt();
435 trk.TZOffset() = Param().par.earlyTpcTransform ? inTrack->Param().GetZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin);
436 trk.ShiftZ(this, sector, sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead
437 sectorTrack.SetX2(0.f);
438 for (int32_t way = 0; way < 2; way++) {
439 if (way) {
440 prop.SetFitInProjections(true);
441 prop.SetPropagateBzOnly(true);
442 }
443 trk.ResetCovariance();
444 prop.SetTrack(&trk, alpha);
445 int32_t start = way ? inTrack->NHits() - 1 : 0;
446 int32_t end = way ? 0 : (inTrack->NHits() - 1);
447 int32_t incr = way ? -1 : 1;
448 for (int32_t i = start; i != end; i += incr) {
449 float x, y, z;
450 int32_t row, flags;
451 const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sector];
452 const GPUTPCHitId& ic = tracker.TrackHits()[inTrack->FirstHitID() + i];
453 int32_t clusterIndex = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex());
454 row = ic.RowIndex();
455 const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex];
456 flags = cl.getFlags();
457 if (Param().par.earlyTpcTransform) {
458 x = tracker.Data().ClusterData()[clusterIndex].x;
459 y = tracker.Data().ClusterData()[clusterIndex].y;
460 z = tracker.Data().ClusterData()[clusterIndex].z - trk.TZOffset();
461 } else {
462 GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset());
463 }
464 if (prop.PropagateToXAlpha(x, alpha, true)) {
465 return way == 0;
466 }
467 trk.ConstrainSinPhi();
468 if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, nullptr, false, sector, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge
469 return way == 0;
470 }
471 trk.ConstrainSinPhi();
472 }
473 if (way) {
474 sectorTrack.SetParam2(trk);
475 } else {
476 sectorTrack.Set(trk, inTrack, alpha, sector);
477 }
478 }
479 return 0;
480}
481
482GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr)
483{
484 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
485 const GPUTPCHitId& ic1 = trk.TrackHits()[sectorTr->FirstHitID()];
486 const GPUTPCHitId& ic2 = trk.TrackHits()[sectorTr->FirstHitID() + sectorTr->NHits() - 1];
487 int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex());
488 int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex());
489 if (Param().par.earlyTpcTransform) {
490 track.SetClusterZT(trk.Data().ClusterData()[clusterIndex1].z, trk.Data().ClusterData()[clusterIndex2].z);
491 } else {
492 const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0];
493 track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime());
494 }
495}
496
497GPUd() void GPUTPCGMMerger::UnpackSaveNumber(int32_t id)
498{
499 mSectorTrackInfoIndex[id] = mMemory->nUnpackedTracks;
500}
501
502GPUd() void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
503{
504 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
505 float alpha = Param().Alpha(iSector);
506 const GPUTPCTrack* sectorTr = mMemory->firstExtrapolatedTracks[iSector];
507 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
508 uint32_t nTracks = *trk.NTracks();
509 for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) {
510 sectorTr = &trk.Tracks()[itr];
511 int32_t localId = mTrackIDs[(sectorTr->LocalTrackId() >> 24) * mNMaxSingleSectorTracks + (sectorTr->LocalTrackId() & 0xFFFFFF)];
512 if (localId == -1) {
513 continue;
514 }
515 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u);
516 GPUTPCGMSectorTrack& track = mSectorTrackInfos[myTrack];
517 SetTrackClusterZT(track, iSector, sectorTr);
518 track.Set(this, sectorTr, alpha, iSector);
519 track.SetGlobalSectorTrackCov();
520 track.SetPrevNeighbour(-1);
521 track.SetNextNeighbour(-1);
522 track.SetNextSegmentNeighbour(-1);
523 track.SetPrevSegmentNeighbour(-1);
524 track.SetLocalTrackId(localId);
525 }
526}
527
528GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
529{
530 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
531 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
532 for (uint32_t i = iBlock * nThreads + iThread; i < nLocalTracks; i += nBlocks * nThreads) {
533 mTrackIDs[iSector * mNMaxSingleSectorTracks + i] = -1;
534 }
535}
536
537GPUd() void GPUTPCGMMerger::RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
538{
539 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
540 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
541
542 float alpha = Param().Alpha(iSector);
543 const GPUTPCTrack* sectorTr = nullptr;
544
545 for (uint32_t itr = iBlock * nThreads + iThread; itr < nLocalTracks; itr += nBlocks * nThreads) {
546 sectorTr = &trk.Tracks()[itr];
548 SetTrackClusterZT(track, iSector, sectorTr);
549 if (Param().rec.tpc.mergerCovSource == 0) {
550 track.Set(this, sectorTr, alpha, iSector);
551 if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) {
552 continue;
553 }
554 } else if (Param().rec.tpc.mergerCovSource == 1) {
555 track.Set(this, sectorTr, alpha, iSector);
556 track.CopyBaseTrackCov();
557 } else if (Param().rec.tpc.mergerCovSource == 2) {
558 if (RefitSectorTrack(track, sectorTr, alpha, iSector)) {
559 track.Set(this, sectorTr, alpha, iSector); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed
560 if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) {
561 continue;
562 }
563 }
564 }
565
566 CADEBUG(GPUInfo("INPUT Sector %d, Track %u, QPt %f DzDs %f", iSector, itr, track.QPt(), track.DzDs()));
567 track.SetPrevNeighbour(-1);
568 track.SetNextNeighbour(-1);
569 track.SetNextSegmentNeighbour(-1);
570 track.SetPrevSegmentNeighbour(-1);
571 track.SetExtrapolatedTrackId(0, -1);
572 track.SetExtrapolatedTrackId(1, -1);
573 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u);
574 mTrackIDs[iSector * mNMaxSingleSectorTracks + sectorTr->LocalTrackId()] = myTrack;
575 mSectorTrackInfos[myTrack] = track;
576 }
577}
578
579GPUd() void GPUTPCGMMerger::LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
580{
581 for (int32_t itr = SectorTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SectorTrackInfoGlobalLast(NSECTORS - 1); itr += nThreads * nBlocks) {
582 GPUTPCGMSectorTrack& extrapolatedTrack = mSectorTrackInfos[itr];
583 GPUTPCGMSectorTrack& localTrack = mSectorTrackInfos[extrapolatedTrack.LocalTrackId()];
584 if (localTrack.ExtrapolatedTrackId(0) != -1 || !CAMath::AtomicCAS(&localTrack.ExtrapolatedTrackIds()[0], -1, itr)) {
585 localTrack.SetExtrapolatedTrackId(1, itr);
586 }
587 }
588}
589
590GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam)
591{
592 //* prepare sector tracks for merging with next/previous/same sector
593 //* each track transported to the border line
594
595 float fieldBz = Param().bzCLight;
596
597 float dAlpha = Param().par.dAlpha / 2;
598 float x0 = 0;
599
600 if (iBorder == 0) { // transport to the left edge of the sector and rotate horizontally
601 dAlpha = dAlpha - CAMath::Pi() / 2;
602 } else if (iBorder == 1) { // transport to the right edge of the sector and rotate horizontally
603 dAlpha = -dAlpha - CAMath::Pi() / 2;
604 } else if (iBorder == 2) { // transport to the middle of the sector and rotate vertically to the border on the left
605 x0 = Param().tpcGeometry.Row2X(63);
606 } else if (iBorder == 3) { // transport to the middle of the sector and rotate vertically to the border on the right
607 dAlpha = -dAlpha;
608 x0 = Param().tpcGeometry.Row2X(63);
609 } else if (iBorder == 4) { // transport to the middle of the sßector, w/o rotation
610 dAlpha = 0;
611 x0 = Param().tpcGeometry.Row2X(63);
612 }
613
614 const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
615 float cosAlpha = CAMath::Cos(dAlpha);
616 float sinAlpha = CAMath::Sin(dAlpha);
617
618 GPUTPCGMSectorTrack trackTmp;
619 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
620 const GPUTPCGMSectorTrack* track = &mSectorTrackInfos[itr];
621 int32_t iSector = track->Sector();
622
623 if (track->PrevSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->PrevSegmentNeighbour()].Sector()) {
624 continue;
625 }
626 if (useOrigTrackParam) { // TODO: Check how far this makes sense with sector track refit
627 if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperQPtB5Limit) {
628 continue;
629 }
630 const GPUTPCGMSectorTrack* trackMin = track;
631 while (track->NextSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->NextSegmentNeighbour()].Sector()) {
632 track = &mSectorTrackInfos[track->NextSegmentNeighbour()];
633 if (track->OrigTrack()->Param().X() < trackMin->OrigTrack()->Param().X()) {
634 trackMin = track;
635 }
636 }
637 trackTmp = *trackMin;
638 track = &trackTmp;
639 if (Param().rec.tpc.mergerCovSource == 2 && trackTmp.X2() != 0.f) {
640 trackTmp.UseParam2();
641 } else {
642 trackTmp.Set(this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Sector());
643 }
644 } else {
645 if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperSecondHorizontalQPtB5Limit) {
646 if (iBorder == 0 && track->NextNeighbour() >= 0) {
647 continue;
648 }
649 if (iBorder == 1 && track->PrevNeighbour() >= 0) {
650 continue;
651 }
652 }
653 }
655
656 if (track->TransportToXAlpha(this, x0, sinAlpha, cosAlpha, fieldBz, b, maxSin)) {
657 b.SetTrackID(itr);
658 b.SetNClusters(track->NClusters());
659 for (int32_t i = 0; i < 4; i++) {
660 if (CAMath::Abs(b.Cov()[i]) >= 5.0f) {
661 b.SetCov(i, 5.0f);
662 }
663 }
664 if (CAMath::Abs(b.Cov()[4]) >= 0.5f) {
665 b.SetCov(4, 0.5f);
666 }
667 uint32_t myTrack = CAMath::AtomicAdd(&nB[iSector], 1u);
668 B[iSector][myTrack] = b;
669 }
670 }
671}
672
673template <>
674GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
675{
676 CADEBUG(GPUInfo("\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode));
677 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
678 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
679 bool sameSector = (iSector1 == iSector2);
680 for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) {
681 GPUTPCGMBorderTrack& b = B1[itr];
682 float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
683 if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) {
684 d *= 2;
685 } else if (d > 3) {
686 d = 3;
687 }
688 CADEBUG(printf(" Input Sector 1 %d Track %d: ", iSector1, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d));
690 range.fId = itr;
691 range.fMin = b.Par()[1] + b.ZOffsetLinear() - d;
692 range.fMax = b.Par()[1] + b.ZOffsetLinear() + d;
693 range1[itr] = range;
694 if (sameSector) {
695 range2[itr] = range;
696 }
697 }
698 if (!sameSector) {
699 for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) {
700 GPUTPCGMBorderTrack& b = B2[itr];
701 float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
702 if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) {
703 d *= 2;
704 } else if (d > 3) {
705 d = 3;
706 }
707 CADEBUG(printf(" Input Sector 2 %d Track %d: ", iSector2, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d));
709 range.fId = itr;
710 range.fMin = b.Par()[1] + b.ZOffsetLinear() - d;
711 range.fMax = b.Par()[1] + b.ZOffsetLinear() + d;
712 range2[itr] = range;
713 }
714 }
715}
716
717template <>
718GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
719{
720#if !defined(GPUCA_GPUCODE_COMPILEKERNELS)
721 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
722 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
723
724 if (iThread == 0) {
725 if (iBlock == 0) {
726#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
727 GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); });
728#else
729 GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; });
730#endif
731 } else if (iBlock == 1) {
732#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
733 GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); });
734#else
735 GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; });
736#endif
737 }
738 }
739#else
740 printf("This sorting variant is disabled for RTC");
741#endif
742}
743
744#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize MergeBorderTracks<3>
745namespace o2::gpu::internal
746{
747namespace // anonymous
748{
749struct MergeBorderTracks_compMax {
750 GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
751 {
752#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
753 return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId);
754#else
755 return a.fMax < b.fMax;
756#endif
757 }
758};
759struct MergeBorderTracks_compMin {
760 GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
761 {
762#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
763 return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId);
764#else
765 return a.fMin < b.fMin;
766#endif
767 }
768};
769} // anonymous namespace
770} // namespace o2::gpu::internal
771
772template <>
773inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax)
774{
775 thrust::device_ptr<GPUTPCGMBorderRange> p(range);
777 if (cmpMax) {
778 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax());
779 } else {
780 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMin());
781 }
782}
783#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize MergeBorderTracks<3>
784
785template <>
786GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax)
787{
788#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
789 if (iThread == 0) {
790 if (cmpMax) {
791 GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; });
792 } else {
793 GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; });
794 }
795 }
796#endif
797}
798
799template <>
800GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
801{
802 // int32_t statAll = 0, statMerged = 0;
803 float factor2ys = Param().rec.tpc.trackMergerFactor2YS;
804 float factor2zt = Param().rec.tpc.trackMergerFactor2ZT;
805 float factor2k = Param().rec.tpc.trackMergerFactor2K;
806 float factor2General = Param().rec.tpc.trackMergerFactor2General;
807
808 factor2k = factor2General * factor2k;
809 factor2ys = factor2General * factor2ys;
810 factor2zt = factor2General * factor2zt;
811
812 int32_t minNPartHits = Param().rec.tpc.trackMergerMinPartHits;
813 int32_t minNTotalHits = Param().rec.tpc.trackMergerMinTotalHits;
814
815 bool sameSector = (iSector1 == iSector2);
816
817 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
818 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
819
820 int32_t i2 = 0;
821 for (int32_t i1 = iBlock * nThreads + iThread; i1 < N1; i1 += nThreads * nBlocks) {
822 GPUTPCGMBorderRange r1 = range1[i1];
823 while (i2 < N2 && range2[i2].fMax < r1.fMin) {
824 i2++;
825 }
826
828 if (b1.NClusters() < minNPartHits) {
829 continue;
830 }
831 int32_t iBest2 = -1;
832 int32_t lBest2 = 0;
833 // statAll++;
834 for (int32_t k2 = i2; k2 < N2; k2++) {
835 GPUTPCGMBorderRange r2 = range2[k2];
836 if (r2.fMin > r1.fMax) {
837 break;
838 }
839 if (sameSector && (r1.fId >= r2.fId)) {
840 continue;
841 }
842 // do check
843
844 GPUTPCGMBorderTrack& b2 = B2[r2.fId];
845#if defined(GPUCA_MERGER_BY_MC_LABEL) && !defined(GPUCA_GPUCODE)
846 int64_t label1 = GetTrackLabel(b1);
847 int64_t label2 = GetTrackLabel(b2);
848 if (label1 != label2 && label1 != -1) // DEBUG CODE, match by MC label
849#endif
850 {
851 CADEBUG(if (GetConstantMem()->ioPtrs.mcLabelsTPC) {printf("Comparing track %3d to %3d: ", r1.fId, r2.fId); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b1.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b1.Cov()[i]); } printf("\n%28s", ""); });
852 CADEBUG(if (GetConstantMem()->ioPtrs.mcLabelsTPC) {for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b2.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b2.Cov()[i]); } printf(" - %5s - ", GetTrackLabel(b1) == GetTrackLabel(b2) ? "CLONE" : "FAKE"); });
853 if (b2.NClusters() < lBest2) {
854 CADEBUG2(continue, printf("!NCl1\n"));
855 }
856 if (mergeMode > 0) {
857 // Merging CE tracks
858 int32_t maxRowDiff = mergeMode == 2 ? 1 : 3; // TODO: check cut
859 if (CAMath::Abs(b1.Row() - b2.Row()) > maxRowDiff) {
860 CADEBUG2(continue, printf("!ROW\n"));
861 }
862 if (CAMath::Abs(b1.Par()[2] - b2.Par()[2]) > 0.5f || CAMath::Abs(b1.Par()[3] - b2.Par()[3]) > 0.5f) {
863 CADEBUG2(continue, printf("!CE SinPhi/Tgl\n")); // Crude cut to avoid totally wrong matches, TODO: check cut
864 }
865 }
866
867 GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksAll, b1.TrackID(), weight)) { MergedTrackStreamer(b1, b2, "merge_all_tracks", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksAll)); });
868
869 if (!b1.CheckChi2Y(b2, factor2ys)) {
870 CADEBUG2(continue, printf("!Y\n"));
871 }
872 // if( !b1.CheckChi2Z(b2, factor2zt ) ) CADEBUG2(continue, printf("!NCl1\n"));
873 if (!b1.CheckChi2QPt(b2, factor2k)) {
874 CADEBUG2(continue, printf("!QPt\n"));
875 }
876 float fys = CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20 ? factor2ys : (2.f * factor2ys);
877 float fzt = CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20 ? factor2zt : (2.f * factor2zt);
878 if (!b1.CheckChi2YS(b2, fys)) {
879 CADEBUG2(continue, printf("!YS\n"));
880 }
881 if (!b1.CheckChi2ZT(b2, fzt)) {
882 CADEBUG2(continue, printf("!ZT\n"));
883 }
884 if (CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20) {
885 if (b2.NClusters() < minNPartHits) {
886 CADEBUG2(continue, printf("!NCl2\n"));
887 }
888 if (b1.NClusters() + b2.NClusters() < minNTotalHits) {
889 CADEBUG2(continue, printf("!NCl3\n"));
890 }
891 }
892 CADEBUG(printf("OK: dZ %8.3f D1 %8.3f D2 %8.3f\n", CAMath::Abs(b1.Par()[1] - b2.Par()[1]), 3.5f * sqrt(b1.Cov()[1]), 3.5f * sqrt(b2.Cov()[1])));
893 } // DEBUG CODE, match by MC label
894 lBest2 = b2.NClusters();
895 iBest2 = b2.TrackID();
896 }
897
898 if (iBest2 < 0) {
899 continue;
900 }
901 GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksBest, b1.TrackID(), weight)) { MergedTrackStreamer(b1, MergedTrackStreamerFindBorderTrack(B2, N2, iBest2), "merge_best_track", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksBest)); });
902
903 // statMerged++;
904
905 CADEBUG(GPUInfo("Found match %d %d", b1.TrackID(), iBest2));
906
907 mTrackLinks[b1.TrackID()] = iBest2;
908 if (mergeMode > 0) {
909#if defined(GPUCA_NO_FAST_MATH) // TODO: Use a better define as swith
910 CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID());
911#else
912 mTrackLinks[iBest2] = b1.TrackID();
913#endif
914 }
915 }
916 // GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged);
917}
918
919GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, int8_t withinSector, int8_t mergeMode) const
920{
921 if (withinSector == 1) { // Merge tracks within the same sector
922 jSector = iSector;
923 n1 = n2 = mMemory->tmpCounter[iSector];
924 b1 = b2 = mBorder[iSector];
925 } else if (withinSector == -1) { // Merge tracks accross the central electrode
926 jSector = (iSector + NSECTORS / 2);
927 const int32_t offset = mergeMode == 2 ? NSECTORS : 0;
928 n1 = mMemory->tmpCounter[iSector + offset];
929 n2 = mMemory->tmpCounter[jSector + offset];
930 b1 = mBorder[iSector + offset];
931 b2 = mBorder[jSector + offset];
932 } else { // Merge tracks of adjacent sectors
933 jSector = mNextSectorInd[iSector];
934 n1 = mMemory->tmpCounter[iSector];
935 n2 = mMemory->tmpCounter[NSECTORS + jSector];
936 b1 = mBorder[iSector];
937 b2 = mBorder[NSECTORS + jSector];
938 }
939}
940
941template <int32_t I>
942GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode)
943{
944 int32_t n1, n2;
946 int32_t jSector;
947 MergeBorderTracksSetup(n1, n2, b1, b2, jSector, iSector, withinSector, mergeMode);
948 MergeBorderTracks<I>(nBlocks, nThreads, iBlock, iThread, iSector, b1, n1, jSector, b2, n2, mergeMode);
949}
950
951#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
952template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
953template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
954template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
955#endif
956
957GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
958{
959 float x0 = Param().tpcGeometry.Row2X(63);
960 const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
961
962 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
963 GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
964 int32_t iSector = track.Sector();
966 if (track.TransportToX(this, x0, Param().bzCLight, b, maxSin)) {
967 b.SetTrackID(itr);
968 CADEBUG(printf("WITHIN SECTOR %d Track %d - ", iSector, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf("\n"));
969 b.SetNClusters(track.NClusters());
970 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[iSector], 1u);
971 mBorder[iSector][myTrack] = b;
972 }
973 }
974}
975
976GPUd() void GPUTPCGMMerger::MergeSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam)
977{
978 bool part2 = iBlock & 1;
979 int32_t border = part2 ? border1 : border0;
980 GPUAtomic(uint32_t)* n = mMemory->tmpCounter;
981 GPUTPCGMBorderTrack** b = mBorder;
982 if (part2) {
983 n += NSECTORS;
984 b += NSECTORS;
985 }
986 MergeSectorsPrepareStep2((nBlocks + !part2) >> 1, nThreads, iBlock >> 1, iThread, border, b, n, useOrigTrackParam);
987}
988
989GPUdi() void GPUTPCGMMerger::setBlockRange(int32_t elems, int32_t nBlocks, int32_t iBlock, int32_t& start, int32_t& end)
990{
991 start = (elems + nBlocks - 1) / nBlocks * iBlock;
992 end = (elems + nBlocks - 1) / nBlocks * (iBlock + 1);
993 end = CAMath::Min(elems, end);
994}
995
996GPUd() void GPUTPCGMMerger::hookEdge(int32_t u, int32_t v)
997{
998 if (v < 0) {
999 return;
1000 }
1001 while (true) {
1002 u = mTrackCCRoots[u];
1003 v = mTrackCCRoots[v];
1004 if (u == v) {
1005 break;
1006 }
1007 int32_t h = CAMath::Max(u, v);
1008 int32_t l = CAMath::Min(u, v);
1009
1010 int32_t old = CAMath::AtomicCAS(&mTrackCCRoots[h], h, l);
1011 if (old == h) {
1012 break;
1013 }
1014
1015 u = mTrackCCRoots[h];
1016 v = l;
1017 }
1018}
1019
1020GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsSetup(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1021{
1022 int32_t start, end;
1023 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1024 for (int32_t i = start + iThread; i < end; i += nThreads) {
1025 mTrackCCRoots[i] = i;
1026 }
1027}
1028
1029GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1030{
1031 // Compute connected components in parallel, step 1.
1032 // Source: Adaptive Work-Efficient Connected Components on the GPU, Sutton et al, 2016 (https://arxiv.org/pdf/1612.01178.pdf)
1033 int32_t start, end;
1034 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1035 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1036 hookEdge(itr, mTrackLinks[itr]);
1037 }
1038}
1039
1040GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookNeighbors(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1041{
1042 // Compute connected components in parallel, step 1 - Part 2.
1043 nBlocks = nBlocks / 4 * 4;
1044 if (iBlock >= nBlocks) {
1045 return;
1046 }
1047
1048 int32_t start, end;
1049 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks / 4, iBlock / 4, start, end);
1050
1051 int32_t myNeighbor = iBlock % 4;
1052
1053 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1054 int32_t v = mSectorTrackInfos[itr].AnyNeighbour(myNeighbor);
1055 hookEdge(itr, v);
1056 }
1057}
1058
1059GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsMultiJump(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1060{
1061 // Compute connected components in parallel, step 2.
1062 int32_t start, end;
1063 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1064 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1065 int32_t root = itr;
1066 int32_t next = mTrackCCRoots[root];
1067 if (root == next) {
1068 continue;
1069 }
1070 do {
1071 root = next;
1072 next = mTrackCCRoots[next];
1073 } while (root != next);
1074 mTrackCCRoots[itr] = root;
1075 }
1076}
1077
1078GPUd() void GPUTPCGMMerger::ResolveMergeSectors(GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll)
1079{
1080 if (!mergeAll) {
1081 /*int32_t neighborType = useOrigTrackParam ? 1 : 0;
1082 int32_t old1 = newTrack2.PrevNeighbour(0);
1083 int32_t old2 = newTrack1.NextNeighbour(0);
1084 if (old1 < 0 && old2 < 0) neighborType = 0;
1085 if (old1 == itr) continue;
1086 if (neighborType) old1 = newTrack2.PrevNeighbour(1);
1087 if ( old1 >= 0 )
1088 {
1089 GPUTPCGMSectorTrack &oldTrack1 = mSectorTrackInfos[old1];
1090 if ( oldTrack1.NClusters() < newTrack1.NClusters() ) {
1091 newTrack2.SetPrevNeighbour( -1, neighborType );
1092 oldTrack1.SetNextNeighbour( -1, neighborType );
1093 } else continue;
1094 }
1095
1096 if (old2 == itr2) continue;
1097 if (neighborType) old2 = newTrack1.NextNeighbour(1);
1098 if ( old2 >= 0 )
1099 {
1100 GPUTPCGMSectorTrack &oldTrack2 = mSectorTrackInfos[old2];
1101 if ( oldTrack2.NClusters() < newTrack2.NClusters() )
1102 {
1103 oldTrack2.SetPrevNeighbour( -1, neighborType );
1104 } else continue;
1105 }
1106 newTrack1.SetNextNeighbour( itr2, neighborType );
1107 newTrack2.SetPrevNeighbour( itr, neighborType );*/
1108 }
1109
1110 int32_t start, end;
1111 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1112
1113 for (int32_t baseIdx = 0; baseIdx < SectorTrackInfoLocalTotal(); baseIdx += nThreads) {
1114 int32_t itr = baseIdx + iThread;
1115 bool inRange = itr < SectorTrackInfoLocalTotal();
1116
1117 int32_t itr2 = -1;
1118 if (inRange) {
1119 itr2 = mTrackLinks[itr];
1120 }
1121
1122 bool resolveSector = (itr2 > -1);
1123 if (resolveSector) {
1124 int32_t root = mTrackCCRoots[itr];
1125 resolveSector &= (start <= root) && (root < end);
1126 }
1127
1128 int16_t smemIdx = work_group_scan_inclusive_add(int16_t(resolveSector));
1129
1130 if (resolveSector) {
1131 smem.iTrack1[smemIdx - 1] = itr;
1132 smem.iTrack2[smemIdx - 1] = itr2;
1133 }
1134 GPUbarrier();
1135
1136 if (iThread < nThreads - 1) {
1137 continue;
1138 }
1139
1140 const int32_t nSectors = smemIdx;
1141
1142 for (int32_t i = 0; i < nSectors; i++) {
1143 itr = smem.iTrack1[i];
1144 itr2 = smem.iTrack2[i];
1145
1146 GPUTPCGMSectorTrack* track1 = &mSectorTrackInfos[itr];
1147 GPUTPCGMSectorTrack* track2 = &mSectorTrackInfos[itr2];
1148 GPUTPCGMSectorTrack* track1Base = track1;
1149 GPUTPCGMSectorTrack* track2Base = track2;
1150
1151 bool sameSegment = CAMath::Abs(track1->NClusters() > track2->NClusters() ? track1->QPt() : track2->QPt()) * Param().qptB5Scaler < 2 || track1->QPt() * track2->QPt() > 0;
1152 // GPUInfo("\nMerge %d with %d - same segment %d", itr, itr2, (int32_t) sameSegment);
1153 // PrintMergeGraph(track1, std::cout);
1154 // PrintMergeGraph(track2, std::cout);
1155
1156 while (track2->PrevSegmentNeighbour() >= 0) {
1157 track2 = &mSectorTrackInfos[track2->PrevSegmentNeighbour()];
1158 }
1159 if (sameSegment) {
1160 if (track1 == track2) {
1161 continue;
1162 }
1163 while (track1->PrevSegmentNeighbour() >= 0) {
1164 track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()];
1165 if (track1 == track2) {
1166 goto NextTrack;
1167 }
1168 }
1169 GPUCommonAlgorithm::swap(track1, track1Base);
1170 for (int32_t k = 0; k < 2; k++) {
1171 GPUTPCGMSectorTrack* tmp = track1Base;
1172 while (tmp->Neighbour(k) >= 0) {
1173 tmp = &mSectorTrackInfos[tmp->Neighbour(k)];
1174 if (tmp == track2) {
1175 goto NextTrack;
1176 }
1177 }
1178 }
1179
1180 while (track1->NextSegmentNeighbour() >= 0) {
1181 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1182 if (track1 == track2) {
1183 goto NextTrack;
1184 }
1185 }
1186 } else {
1187 while (track1->PrevSegmentNeighbour() >= 0) {
1188 track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()];
1189 }
1190
1191 if (track1 == track2) {
1192 continue;
1193 }
1194 for (int32_t k = 0; k < 2; k++) {
1195 GPUTPCGMSectorTrack* tmp = track1;
1196 while (tmp->Neighbour(k) >= 0) {
1197 tmp = &mSectorTrackInfos[tmp->Neighbour(k)];
1198 if (tmp == track2) {
1199 goto NextTrack;
1200 }
1201 }
1202 }
1203
1204 float z1min, z1max, z2min, z2max;
1205 z1min = track1->MinClusterZT();
1206 z1max = track1->MaxClusterZT();
1207 z2min = track2->MinClusterZT();
1208 z2max = track2->MaxClusterZT();
1209 if (track1 != track1Base) {
1210 z1min = CAMath::Min(z1min, track1Base->MinClusterZT());
1211 z1max = CAMath::Max(z1max, track1Base->MaxClusterZT());
1212 }
1213 if (track2 != track2Base) {
1214 z2min = CAMath::Min(z2min, track2Base->MinClusterZT());
1215 z2max = CAMath::Max(z2max, track2Base->MaxClusterZT());
1216 }
1217 bool goUp = z2max - z1min > z1max - z2min;
1218
1219 if (track1->Neighbour(goUp) < 0 && track2->Neighbour(!goUp) < 0) {
1220 track1->SetNeighbor(track2 - mSectorTrackInfos, goUp);
1221 track2->SetNeighbor(track1 - mSectorTrackInfos, !goUp);
1222 // GPUInfo("Result (simple neighbor)");
1223 // PrintMergeGraph(track1, std::cout);
1224 continue;
1225 } else if (track1->Neighbour(goUp) < 0) {
1226 track2 = &mSectorTrackInfos[track2->Neighbour(!goUp)];
1227 GPUCommonAlgorithm::swap(track1, track2);
1228 } else if (track2->Neighbour(!goUp) < 0) {
1229 track1 = &mSectorTrackInfos[track1->Neighbour(goUp)];
1230 } else { // Both would work, but we use the simpler one
1231 track1 = &mSectorTrackInfos[track1->Neighbour(goUp)];
1232 }
1233 track1Base = track1;
1234 }
1235
1236 track2Base = track2;
1237 if (!sameSegment) {
1238 while (track1->NextSegmentNeighbour() >= 0) {
1239 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1240 }
1241 }
1242 track1->SetNextSegmentNeighbour(track2 - mSectorTrackInfos);
1243 track2->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos);
1244 // k = 0: Merge right side
1245 // k = 1: Merge left side
1246 for (int32_t k = 0; k < 2; k++) {
1247 track1 = track1Base;
1248 track2 = track2Base;
1249 while (track2->Neighbour(k) >= 0) {
1250 if (track1->Neighbour(k) >= 0) {
1251 GPUTPCGMSectorTrack* track1new = &mSectorTrackInfos[track1->Neighbour(k)];
1252 GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)];
1253 track2->SetNeighbor(-1, k);
1254 track2new->SetNeighbor(-1, k ^ 1);
1255 track1 = track1new;
1256 while (track1->NextSegmentNeighbour() >= 0) {
1257 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1258 }
1259 track1->SetNextSegmentNeighbour(track2new - mSectorTrackInfos);
1260 track2new->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos);
1261 track1 = track1new;
1262 track2 = track2new;
1263 } else {
1264 GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)];
1265 track1->SetNeighbor(track2->Neighbour(k), k);
1266 track2->SetNeighbor(-1, k);
1267 track2new->SetNeighbor(track1 - mSectorTrackInfos, k ^ 1);
1268 }
1269 }
1270 }
1271 // GPUInfo("Result");
1272 // PrintMergeGraph(track1, std::cout);
1273 NextTrack:;
1274 }
1275 }
1276}
1277
1278GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr)
1279{
1280 if (Param().rec.tpc.mergerCERowLimit > 0 && CAMath::Abs(track->QPt()) * Param().qptB5Scaler < 0.3f && (cls.row < Param().rec.tpc.mergerCERowLimit || cls.row >= GPUCA_ROW_COUNT - Param().rec.tpc.mergerCERowLimit)) {
1281 return;
1282 }
1283
1284 float z = 0;
1285 if (Param().par.earlyTpcTransform) {
1286 z = clsXYZ->z;
1287 } else {
1288 float x, y;
1289 auto& cln = mConstantMem->ioPtrs.clustersNative->clustersLinear[cls.num];
1290 GPUTPCConvertImpl::convert(*mConstantMem, cls.sector, cls.row, cln.getPad(), cln.getTime(), x, y, z);
1291 }
1292
1293 if (!Param().par.continuousTracking && CAMath::Abs(z) > 10) {
1294 return;
1295 }
1296 int32_t sector = track->Sector();
1297 for (int32_t attempt = 0; attempt < 2; attempt++) {
1299 const float x0 = Param().tpcGeometry.Row2X(attempt == 0 ? 63 : cls.row);
1300 if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) {
1301 b.SetTrackID(itr);
1302 b.SetNClusters(mOutputTracks[itr].NClusters());
1303 if (CAMath::Abs(b.Cov()[4]) >= 0.5f) {
1304 b.SetCov(4, 0.5f); // TODO: Is this needed and better than the cut in BorderTrack?
1305 }
1306 if (track->CSide()) {
1307 b.SetPar(1, b.Par()[1] - 2 * (z - b.ZOffsetLinear()));
1308 b.SetZOffsetLinear(-b.ZOffsetLinear());
1309 }
1310 b.SetRow(cls.row);
1311 uint32_t id = sector + attempt * NSECTORS;
1312 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[id], 1u);
1313 mBorder[id][myTrack] = b;
1314 break;
1315 }
1316 }
1317}
1318
1319GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1320{
1321 const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear;
1322 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) {
1323 if (mOutputTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) {
1324 if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) {
1325 continue;
1326 }
1327 GPUTPCGMMergedTrack* trk[2] = {&mOutputTracks[i], &mOutputTracks[mTrackLinks[i]]};
1328
1329 if (!trk[1]->OK() || trk[1]->CCE()) {
1330 continue;
1331 }
1332 bool celooper = (trk[0]->GetParam().GetQPt() * Param().qptB5Scaler > 1 && trk[0]->GetParam().GetQPt() * trk[1]->GetParam().GetQPt() < 0);
1333 bool looper = trk[0]->Looper() || trk[1]->Looper() || celooper;
1334 if (!looper && trk[0]->GetParam().GetPar(3) * trk[1]->GetParam().GetPar(3) < 0) {
1335 continue;
1336 }
1337
1338 uint32_t newRef = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, trk[0]->NClusters() + trk[1]->NClusters());
1339 if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxOutputTrackClusters) {
1340 raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxOutputTrackClusters);
1341 for (uint32_t k = newRef; k < mNMaxOutputTrackClusters; k++) {
1342 mClusters[k].num = 0;
1343 mClusters[k].state = 0;
1344 }
1345 CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters);
1346 return;
1347 }
1348
1349 bool needswap = false;
1350 if (looper) {
1351 float z0max, z1max;
1352 if (Param().par.earlyTpcTransform) {
1353 z0max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef()].z), CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z));
1354 z1max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[1]->FirstClusterRef()].z), CAMath::Abs(mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z));
1355 } else {
1356 z0max = -CAMath::Min(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime());
1357 z1max = -CAMath::Min(cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime());
1358 }
1359 if (z1max < z0max) {
1360 needswap = true;
1361 }
1362 } else {
1363 if (mClusters[trk[0]->FirstClusterRef()].row > mClusters[trk[1]->FirstClusterRef()].row) {
1364 needswap = true;
1365 }
1366 }
1367 if (needswap) {
1368 GPUCommonAlgorithm::swap(trk[0], trk[1]);
1369 }
1370
1371 bool reverse[2] = {false, false};
1372 if (looper) {
1373 if (Param().par.earlyTpcTransform) {
1374 reverse[0] = (mClustersXYZ[trk[0]->FirstClusterRef()].z > mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z) ^ (trk[0]->CSide() > 0);
1375 reverse[1] = (mClustersXYZ[trk[1]->FirstClusterRef()].z < mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z) ^ (trk[1]->CSide() > 0);
1376 } else {
1377 reverse[0] = cls[mClusters[trk[0]->FirstClusterRef()].num].getTime() < cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime();
1378 reverse[1] = cls[mClusters[trk[1]->FirstClusterRef()].num].getTime() > cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime();
1379 }
1380 }
1381
1382 if (Param().par.continuousTracking) {
1383 if (Param().par.earlyTpcTransform) {
1384 const float z0 = trk[0]->CSide() ? CAMath::Max(mClustersXYZ[trk[0]->FirstClusterRef()].z, mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z) : CAMath::Min(mClustersXYZ[trk[0]->FirstClusterRef()].z, mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z);
1385 const float z1 = trk[1]->CSide() ? CAMath::Max(mClustersXYZ[trk[1]->FirstClusterRef()].z, mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z) : CAMath::Min(mClustersXYZ[trk[1]->FirstClusterRef()].z, mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z);
1386 const float offset = CAMath::Abs(z1) > CAMath::Abs(z0) ? -z0 : z1;
1387 trk[1]->Param().Z() += trk[1]->Param().TZOffset() - offset;
1388 trk[1]->Param().TZOffset() = offset;
1389 } else {
1390 GPUTPCGMMergedTrackHit* clsmax;
1391 const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(),
1392 cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(),
1393 &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1],
1394 &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax);
1395 const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f);
1396 trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TZOffset() - offset);
1397 trk[1]->Param().TZOffset() = offset;
1398 }
1399 }
1400
1401 int32_t pos = newRef;
1402 int32_t leg = -1;
1403 int32_t lastLeg = -1;
1404#pragma unroll
1405 for (int32_t k = 1; k >= 0; k--) {
1406 int32_t loopstart = reverse[k] ? (trk[k]->NClusters() - 1) : 0;
1407 int32_t loopend = reverse[k] ? -1 : (int32_t)trk[k]->NClusters();
1408 int32_t loopinc = reverse[k] ? -1 : 1;
1409 for (int32_t j = loopstart; j != loopend; j += loopinc) {
1410 if (Param().par.earlyTpcTransform) {
1411 mClustersXYZ[pos] = mClustersXYZ[trk[k]->FirstClusterRef() + j];
1412 }
1413 mClusters[pos] = mClusters[trk[k]->FirstClusterRef() + j];
1414 if (looper) {
1415 if (mClusters[trk[k]->FirstClusterRef() + j].leg != lastLeg) {
1416 leg++;
1417 lastLeg = mClusters[trk[k]->FirstClusterRef() + j].leg;
1418 }
1419 mClusters[pos].leg = leg;
1420 }
1421 pos++;
1422 }
1423 if (celooper) {
1424 lastLeg = -1;
1425 }
1426 }
1427 trk[1]->SetFirstClusterRef(newRef);
1428 trk[1]->SetNClusters(trk[0]->NClusters() + trk[1]->NClusters());
1429 if (trk[1]->NClusters() > GPUCA_MERGER_MAX_TRACK_CLUSTERS) {
1430 trk[1]->SetFirstClusterRef(trk[1]->FirstClusterRef() + trk[1]->NClusters() - GPUCA_MERGER_MAX_TRACK_CLUSTERS);
1431 trk[1]->SetNClusters(GPUCA_MERGER_MAX_TRACK_CLUSTERS);
1432 }
1433 trk[1]->SetCCE(true);
1434 if (looper) {
1435 trk[1]->SetLooper(true);
1436 trk[1]->SetLegs(leg + 1);
1437 }
1438 trk[0]->SetNClusters(0);
1439 trk[0]->SetOK(false);
1440 }
1441 }
1442
1443 // for (int32_t i = 0;i < mMemory->nOutputTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks
1444}
1445
1446namespace o2::gpu::internal
1447{
1448namespace // anonymous
1449{
1450struct GPUTPCGMMerger_CompareClusterIdsLooper {
1451 struct clcomparestruct {
1452 uint8_t leg;
1453 };
1454
1455 const uint8_t leg;
1456 const bool outwards;
1458 const clcomparestruct* const cmp2;
1459 GPUd() GPUTPCGMMerger_CompareClusterIdsLooper(uint8_t l, bool o, const GPUTPCGMMerger::trackCluster* c1, const clcomparestruct* c2) : leg(l), outwards(o), cmp1(c1), cmp2(c2) {}
1460 GPUd() bool operator()(const int16_t aa, const int16_t bb)
1461 {
1462 const clcomparestruct& a = cmp2[aa];
1463 const clcomparestruct& b = cmp2[bb];
1466 if (a.leg != b.leg) {
1467 return ((leg > 0) ^ (a.leg > b.leg));
1468 }
1469 if (a1.row != b1.row) {
1470 return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards);
1471 }
1472#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1473 if (a1.id != b1.id) {
1474 return (a1.id > b1.id);
1475 }
1476 return aa > bb;
1477#else
1478 return a1.id > b1.id;
1479#endif
1480 }
1481};
1482
1483struct GPUTPCGMMerger_CompareClusterIds {
1485 GPUd() GPUTPCGMMerger_CompareClusterIds(const GPUTPCGMMerger::trackCluster* cmp) : mCmp(cmp) {}
1486 GPUd() bool operator()(const int16_t aa, const int16_t bb)
1487 {
1490 if (a.row != b.row) {
1491 return (a.row > b.row);
1492 }
1493#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1494 if (a.id != b.id) {
1495 return (a.id > b.id);
1496 }
1497 return aa > bb;
1498#else
1499 return (a.id > b.id);
1500#endif
1501 }
1502};
1503} // anonymous namespace
1504} // namespace o2::gpu::internal
1505
1506GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1507{
1508 GPUTPCGMSectorTrack* trackParts[kMaxParts];
1509
1510 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
1511
1512 GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
1513
1514 if (track.PrevSegmentNeighbour() >= 0) {
1515 continue;
1516 }
1517 if (track.PrevNeighbour() >= 0) {
1518 continue;
1519 }
1520 int32_t nParts = 0;
1521 int32_t nHits = 0;
1522 int32_t leg = 0;
1523 GPUTPCGMSectorTrack *trbase = &track, *tr = &track;
1524 tr->SetPrevSegmentNeighbour(1000000000);
1525 while (true) {
1526 if (nParts >= kMaxParts) {
1527 break;
1528 }
1529 if (nHits + tr->NClusters() > kMaxClusters) {
1530 break;
1531 }
1532 nHits += tr->NClusters();
1533
1534 tr->SetLeg(leg);
1535 trackParts[nParts++] = tr;
1536 for (int32_t i = 0; i < 2; i++) {
1537 if (tr->ExtrapolatedTrackId(i) != -1) {
1538 if (nParts >= kMaxParts) {
1539 break;
1540 }
1541 if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) {
1542 break;
1543 }
1544 trackParts[nParts] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)];
1545 trackParts[nParts++]->SetLeg(leg);
1546 nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters();
1547 }
1548 }
1549 int32_t jtr = tr->NextSegmentNeighbour();
1550 if (jtr >= 0) {
1551 tr = &(mSectorTrackInfos[jtr]);
1552 tr->SetPrevSegmentNeighbour(1000000002);
1553 continue;
1554 }
1555 jtr = trbase->NextNeighbour();
1556 if (jtr >= 0) {
1557 trbase = &(mSectorTrackInfos[jtr]);
1558 tr = trbase;
1559 if (tr->PrevSegmentNeighbour() >= 0) {
1560 break;
1561 }
1562 tr->SetPrevSegmentNeighbour(1000000001);
1563 leg++;
1564 continue;
1565 }
1566 break;
1567 }
1568
1569 // unpack and sort clusters
1570 if (nParts > 1 && leg == 0) {
1571 GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) {
1572#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1573 if (a->X() != b->X()) {
1574 return (a->X() > b->X());
1575 }
1576 if (a->Y() != b->Y()) {
1577 return (a->Y() > b->Y());
1578 }
1579 if (a->Z() != b->Z()) {
1580 return (a->Z() > b->Z());
1581 }
1582 return a->QPt() > b->QPt();
1583#else
1584 return (a->X() > b->X());
1585#endif
1586 });
1587 }
1588
1589 if (Param().rec.tpc.dropLoopers && leg > 0) {
1590 nParts = 1;
1591 leg = 0;
1592 }
1593
1594 trackCluster trackClusters[kMaxClusters];
1595 nHits = 0;
1596 for (int32_t ipart = 0; ipart < nParts; ipart++) {
1597 const GPUTPCGMSectorTrack* t = trackParts[ipart];
1598 CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nOutputTracks, ipart, t->QPt(), t->DzDs()));
1599 int32_t nTrackHits = t->NClusters();
1600 trackCluster* c2 = trackClusters + nHits + nTrackHits - 1;
1601 for (int32_t i = 0; i < nTrackHits; i++, c2--) {
1602 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()];
1603 const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i];
1604 uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0];
1605 *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector(), t->Leg()};
1606 }
1607 nHits += nTrackHits;
1608 }
1609 if (nHits < GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(track.QPt() * Param().qptB5Scaler)) {
1610 continue;
1611 }
1612
1613 int32_t ordered = leg == 0;
1614 if (ordered) {
1615 for (int32_t i = 1; i < nHits; i++) {
1616 if (trackClusters[i].row > trackClusters[i - 1].row || trackClusters[i].id == trackClusters[i - 1].id) {
1617 ordered = 0;
1618 break;
1619 }
1620 }
1621 }
1622 int32_t firstTrackIndex = 0;
1623 int32_t lastTrackIndex = nParts - 1;
1624 if (ordered == 0) {
1625 int32_t nTmpHits = 0;
1626 trackCluster trackClustersUnsorted[kMaxClusters];
1627 int16_t clusterIndices[kMaxClusters];
1628 for (int32_t i = 0; i < nHits; i++) {
1629 trackClustersUnsorted[i] = trackClusters[i];
1630 clusterIndices[i] = i;
1631 }
1632
1633 if (leg > 0) {
1634 // Find QPt and DzDs for the segment closest to the vertex, if low/mid Pt
1635 float baseZT = 1e9;
1636 uint8_t baseLeg = 0;
1637 for (int32_t i = 0; i < nParts; i++) {
1638 if (trackParts[i]->Leg() == 0 || trackParts[i]->Leg() == leg) {
1639 float zt;
1640 if (Param().par.earlyTpcTransform) {
1641 zt = CAMath::Min(CAMath::Abs(trackParts[i]->ClusterZT0()), CAMath::Abs(trackParts[i]->ClusterZTN()));
1642 } else {
1643 zt = -trackParts[i]->MinClusterZT(); // Negative time ~ smallest z, to behave the same way // TODO: Check all these min / max ZT
1644 }
1645 if (zt < baseZT) {
1646 baseZT = zt;
1647 baseLeg = trackParts[i]->Leg();
1648 }
1649 }
1650 }
1651 int32_t iLongest = 1e9;
1652 int32_t length = 0;
1653 for (int32_t i = (baseLeg ? (nParts - 1) : 0); baseLeg ? (i >= 0) : (i < nParts); baseLeg ? i-- : i++) {
1654 if (trackParts[i]->Leg() != baseLeg) {
1655 break;
1656 }
1657 if (trackParts[i]->OrigTrack()->NHits() > length) {
1658 iLongest = i;
1659 length = trackParts[i]->OrigTrack()->NHits();
1660 }
1661 }
1662 bool outwards;
1663 if (Param().par.earlyTpcTransform) {
1664 outwards = (trackParts[iLongest]->ClusterZT0() > trackParts[iLongest]->ClusterZTN()) ^ trackParts[iLongest]->CSide();
1665 } else {
1666 outwards = trackParts[iLongest]->ClusterZT0() < trackParts[iLongest]->ClusterZTN();
1667 }
1668 GPUTPCGMMerger_CompareClusterIdsLooper::clcomparestruct clusterSort[kMaxClusters];
1669 for (int32_t iPart = 0; iPart < nParts; iPart++) {
1670 const GPUTPCGMSectorTrack* t = trackParts[iPart];
1671 int32_t nTrackHits = t->NClusters();
1672 for (int32_t j = 0; j < nTrackHits; j++) {
1673 int32_t i = nTmpHits + j;
1674 clusterSort[i].leg = t->Leg();
1675 }
1676 nTmpHits += nTrackHits;
1677 }
1678
1679 GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIdsLooper(baseLeg, outwards, trackClusters, clusterSort));
1680 } else {
1681 GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIds(trackClusters));
1682 }
1683 nTmpHits = 0;
1684 firstTrackIndex = lastTrackIndex = -1;
1685 for (int32_t i = 0; i < nParts; i++) {
1686 nTmpHits += trackParts[i]->NClusters();
1687 if (nTmpHits > clusterIndices[0] && firstTrackIndex == -1) {
1688 firstTrackIndex = i;
1689 }
1690 if (nTmpHits > clusterIndices[nHits - 1] && lastTrackIndex == -1) {
1691 lastTrackIndex = i;
1692 }
1693 }
1694
1695 int32_t nFilteredHits = 0;
1696 int32_t indPrev = -1;
1697 for (int32_t i = 0; i < nHits; i++) {
1698 int32_t ind = clusterIndices[i];
1699 if (indPrev >= 0 && trackClustersUnsorted[ind].id == trackClustersUnsorted[indPrev].id) {
1700 continue;
1701 }
1702 indPrev = ind;
1703 trackClusters[nFilteredHits] = trackClustersUnsorted[ind];
1704 nFilteredHits++;
1705 }
1706 nHits = nFilteredHits;
1707 }
1708
1709 uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits);
1710 if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) {
1711 raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters);
1712 CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters);
1713 continue;
1714 }
1715
1716 GPUTPCGMMergedTrackHit* cl = mClusters + iOutTrackFirstCluster;
1717 GPUTPCGMMergedTrackHitXYZ* clXYZ = mClustersXYZ + iOutTrackFirstCluster;
1718
1719 for (int32_t i = 0; i < nHits; i++) {
1720 uint8_t state;
1721 if (Param().par.earlyTpcTransform) {
1722 const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()];
1723 clXYZ[i].x = c.x;
1724 clXYZ[i].y = c.y;
1725 clXYZ[i].z = c.z;
1726 clXYZ[i].amp = c.amp;
1727#ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME
1728 clXYZ[i].pad = c.mPad;
1729 clXYZ[i].time = c.mTime;
1730#endif
1731 state = c.flags;
1732 } else {
1733 const ClusterNative& c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[i].id];
1734 state = c.getFlags();
1735 }
1736 cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags
1737 cl[i].row = trackClusters[i].row;
1738 cl[i].num = trackClusters[i].id;
1739 cl[i].sector = trackClusters[i].sector;
1740 cl[i].leg = trackClusters[i].leg;
1741 }
1742
1743 uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nOutputTracks, 1u);
1744 if (iOutputTrack >= mNMaxTracks) {
1745 raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks);
1746 CAMath::AtomicExch(&mMemory->nOutputTracks, mNMaxTracks);
1747 continue;
1748 }
1749
1750 GPUTPCGMMergedTrack& mergedTrack = mOutputTracks[iOutputTrack];
1751
1752 mergedTrack.SetFlags(0);
1753 mergedTrack.SetOK(1);
1754 mergedTrack.SetLooper(leg > 0);
1755 mergedTrack.SetLegs(leg);
1756 mergedTrack.SetNClusters(nHits);
1757 mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster);
1758 GPUTPCGMTrackParam& p1 = mergedTrack.Param();
1759 const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex];
1760 mergedTrack.SetCSide(p2.CSide());
1761
1763 const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : Param().tpcGeometry.Row2X(cl[0].row);
1764 if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) {
1765 p1.X() = toX;
1766 p1.Y() = b.Par()[0];
1767 p1.Z() = b.Par()[1];
1768 p1.SinPhi() = b.Par()[2];
1769 } else {
1770 p1.X() = p2.X();
1771 p1.Y() = p2.Y();
1772 p1.Z() = p2.Z();
1773 p1.SinPhi() = p2.SinPhi();
1774 }
1775 p1.TZOffset() = p2.TZOffset();
1776 p1.DzDs() = p2.DzDs();
1777 p1.QPt() = p2.QPt();
1778 mergedTrack.SetAlpha(p2.Alpha());
1779 if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) {
1780 p1.QPt() = 100.f / Param().rec.bz0Pt10MeV;
1781 }
1782
1783 // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nOutputTracks, p1.QPt(), nParts, nHits);
1784
1785 /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nOutputTracks))
1786 {
1787 mergedTrack.SetOK(0);
1788 mergedTrack.SetNClusters(0);
1789 }
1790 if (mergedTrack.NClusters() && mergedTrack.OK()) */
1791 if (Param().rec.tpc.mergeCE) {
1792 bool CEside;
1793 if (Param().par.earlyTpcTransform) {
1794 CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z);
1795 } else {
1796 auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear;
1797 CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime();
1798 }
1799 MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], &clXYZ[CEside ? (nHits - 1) : 0], iOutputTrack);
1800 }
1801 } // itr
1802}
1803
1804GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1805{
1806 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) {
1807 mTrackOrderProcess[i] = i;
1808 }
1809}
1810
1811GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1812{
1813 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) {
1814 mTrackSort[i] = i;
1815 }
1816}
1817
1818#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
1819namespace o2::gpu::internal
1820{
1821namespace // anonymous
1822{
1823struct GPUTPCGMMergerSortTracks_comp {
1824 const GPUTPCGMMergedTrack* const mCmp;
1825 GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
1826 GPUd() bool operator()(const int32_t aa, const int32_t bb)
1827 {
1830 if (a.CCE() != b.CCE()) {
1831 return a.CCE() > b.CCE();
1832 }
1833 if (a.Legs() != b.Legs()) {
1834 return a.Legs() > b.Legs();
1835 }
1836#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1837 if (a.NClusters() != b.NClusters()) {
1838 return a.NClusters() > b.NClusters();
1839 }
1840 if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
1841 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1842 }
1843 if (a.GetParam().GetY() != b.GetParam().GetY()) {
1844 return a.GetParam().GetY() > b.GetParam().GetY();
1845 }
1846 return aa > bb;
1847#else
1848 return a.NClusters() > b.NClusters();
1849#endif
1850 }
1851};
1852
1853struct GPUTPCGMMergerSortTracksQPt_comp {
1854 const GPUTPCGMMergedTrack* const mCmp;
1855 GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
1856 GPUd() bool operator()(const int32_t aa, const int32_t bb)
1857 {
1860#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1861 if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
1862 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1863 }
1864 if (a.GetParam().GetY() != b.GetParam().GetY()) {
1865 return a.GetParam().GetY() > b.GetParam().GetY();
1866 }
1867 return a.GetParam().GetZ() > b.GetParam().GetZ();
1868#else
1869 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1870#endif
1871 }
1872};
1873} // anonymous namespace
1874} // namespace o2::gpu::internal
1875
1876template <>
1877inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
1878{
1879 thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess());
1881 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
1882}
1883
1884template <>
1885inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
1886{
1887 thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort());
1889 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
1890}
1891#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
1892
1893GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1894{
1895#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1896 if (iThread || iBlock) {
1897 return;
1898 }
1899 // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object
1900 auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) {
1903 if (a.CCE() != b.CCE()) {
1904 return a.CCE() > b.CCE();
1905 }
1906 if (a.Legs() != b.Legs()) {
1907 return a.Legs() > b.Legs();
1908 }
1909#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1910 if (a.NClusters() != b.NClusters()) {
1911 return a.NClusters() > b.NClusters();
1912 }
1913 if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
1914 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1915 }
1916 if (a.GetParam().GetY() != b.GetParam().GetY()) {
1917 return a.GetParam().GetY() > b.GetParam().GetY();
1918 }
1919 return aa > bb;
1920#else
1921 return a.NClusters() > b.NClusters();
1922#endif
1923 };
1924
1925 GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp);
1926#endif
1927}
1928
1929GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1930{
1931#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1932 if (iThread || iBlock) {
1933 return;
1934 }
1935 // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object
1936 auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) {
1939#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1940 if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
1941 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1942 }
1943 if (a.GetParam().GetY() != b.GetParam().GetY()) {
1944 return a.GetParam().GetY() > b.GetParam().GetY();
1945 }
1946 return a.GetParam().GetZ() > b.GetParam().GetZ();
1947#else
1948 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1949#endif
1950 };
1951
1952 GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp);
1953#endif
1954}
1955
1956GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1957{
1958 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) {
1959 mTrackOrderAttach[mTrackSort[i]] = i;
1960 const GPUTPCGMMergedTrack& trk = mOutputTracks[i];
1961 if (trk.OK()) {
1962 for (uint32_t j = 0; j < trk.NClusters(); j++) {
1963 mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = attachAttached | attachGood;
1964 CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u);
1965 }
1966 }
1967 }
1968}
1969
1970GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1971{
1972 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nBlocks * nThreads) {
1973 if (mSharedCount[mClusters[i].num] > 1) {
1974 mClusters[i].state |= GPUTPCGMMergedTrackHit::flagShared;
1975 }
1976 }
1977 if (mClusterStateExt) {
1978 for (uint32_t i = iBlock * nThreads + iThread; i < mNMaxClusters; i += nBlocks * nThreads) {
1979 uint8_t state = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[i].getFlags();
1980 if (mSharedCount[i] > 1) {
1982 }
1983 mClusterStateExt[i] = state;
1984 }
1985 }
1986}
1987
1988GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1989{
1990 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) {
1991 mTrackSort[mTrackOrderAttach[i]] = i;
1992 }
1993 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) {
1994 mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below
1995 }
1996}
1997
1998GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1999{
2000 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) {
2001 const GPUTPCGMMergedTrack& trk = mOutputTracks[i];
2002 if (!trk.OK() || trk.NClusters() == 0) {
2003 continue;
2004 }
2005 uint8_t goodLeg = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].leg;
2006 for (uint32_t j = 0; j < trk.NClusters(); j++) {
2007 int32_t id = mClusters[trk.FirstClusterRef() + j].num;
2008 uint32_t weight = mTrackOrderAttach[i] | attachAttached;
2009 uint8_t clusterState = mClusters[trk.FirstClusterRef() + j].state;
2010 if (!(clusterState & GPUTPCGMMergedTrackHit::flagReject)) {
2011 weight |= attachGood;
2012 } else if (clusterState & GPUTPCGMMergedTrackHit::flagNotFit) {
2014 }
2015 if (mClusters[trk.FirstClusterRef() + j].leg == goodLeg) {
2017 }
2018 CAMath::AtomicMax(&mClusterAttachment[id], weight);
2019 }
2020 }
2021}
2022
2023GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
2024{
2025 for (uint32_t i = iBlock * nThreads + iThread; i < mNMaxClusters; i += nThreads * nBlocks) {
2026 if (mClusterAttachment[i] != 0) {
2027 mClusterAttachment[i] = (mClusterAttachment[i] & attachFlagMask) | mTrackSort[mClusterAttachment[i] & attachTrackMask];
2028 }
2029 }
2030}
2031
2032GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
2033{
2034 const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold
2035 for (uint32_t i = get_global_id(0); i < mMemory->nOutputTracks; i += get_global_size(0)) {
2036 const auto& trk = mOutputTracks[i];
2037 const auto& p = trk.GetParam();
2038 const float qptabs = CAMath::Abs(p.GetQPt());
2039 if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) {
2040 const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector;
2041 const float refz = p.GetZ() + (Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTZOffset(), Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100);
2042 float sinA, cosA;
2043 CAMath::SinCos(trk.GetAlpha(), sinA, cosA);
2044 float gx = cosA * p.GetX() - sinA * p.GetY();
2045 float gy = cosA * p.GetY() + sinA * p.GetX();
2046 float bz = Param().polynomialField.GetFieldBz(gx, gy, p.GetZ());
2047 const float r1 = p.GetQPt() * bz;
2048 const float r = CAMath::Abs(r1) > 0.0001f ? (1.f / r1) : 10000;
2049 const float mx = p.GetX() + r * p.GetSinPhi();
2050 const float my = p.GetY() - r * CAMath::Sqrt(1 - p.GetSinPhi() * p.GetSinPhi());
2051 const float gmx = cosA * mx - sinA * my;
2052 const float gmy = cosA * my + sinA * mx;
2053 uint32_t myId = CAMath::AtomicAdd(&mMemory->nLooperMatchCandidates, 1u);
2054 if (myId >= mNMaxLooperMatches) {
2055 raiseError(GPUErrors::ERROR_LOOPER_MATCH_OVERFLOW, myId, mNMaxLooperMatches);
2056 CAMath::AtomicExch(&mMemory->nLooperMatchCandidates, mNMaxLooperMatches);
2057 return;
2058 }
2059 mLooperCandidates[myId] = MergeLooperParam{refz, gmx, gmy, i};
2060
2061 /*printf("Track %u Sanity qpt %f snp %f bz %f\n", mMemory->nLooperMatchCandidates, p.GetQPt(), p.GetSinPhi(), bz);
2062 for (uint32_t k = 0;k < trk.NClusters();k++) {
2063 float xx, yy, zz;
2064 if (Param().par.earlyTpcTransform) {
2065 const float zOffset = (mClusters[trk.FirstClusterRef() + k].sector < 18) == (mClusters[trk.FirstClusterRef() + 0].sector < 18) ? p.GetTZOffset() : -p.GetTZOffset();
2066 xx = mClustersXYZ[trk.FirstClusterRef() + k].x;
2067 yy = mClustersXYZ[trk.FirstClusterRef() + k].y;
2068 zz = mClustersXYZ[trk.FirstClusterRef() + k].z - zOffset;
2069 } else {
2070 const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num];
2071 GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset());
2072 }
2073 float sa2, ca2;
2074 CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].sector), sa2, ca2);
2075 float cx = ca2 * xx - sa2 * yy;
2076 float cy = ca2 * yy + sa2 * xx;
2077 float dist = CAMath::Sqrt((cx - gmx) * (cx - gmx) + (cy - gmy) * (cy - gmy));
2078 printf("Hit %3d/%3d sector %d xy %f %f R %f\n", k, trk.NClusters(), (int32_t)mClusters[trk.FirstClusterRef() + k].sector, cx, cy, dist);
2079 }*/
2080 }
2081 }
2082}
2083
2084GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
2085{
2086#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
2087 if (iThread || iBlock) {
2088 return;
2089 }
2090 auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); };
2091 GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp);
2092#endif
2093}
2094
2095#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
2096namespace o2::gpu::internal
2097{
2098namespace // anonymous
2099{
2100struct GPUTPCGMMergerMergeLoopers_comp {
2101 GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b)
2102 {
2103 return CAMath::Abs(a.refz) < CAMath::Abs(b.refz);
2104 }
2105};
2106} // anonymous namespace
2107} // namespace o2::gpu::internal
2108
2109template <>
2110inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(const krnlSetupTime& _xyz)
2111{
2112 thrust::device_ptr<MergeLooperParam> params(mProcessorsShadow->tpcMerger.LooperCandidates());
2114 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), params, params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp());
2115}
2116#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
2117
2118GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
2119{
2120 const MergeLooperParam* params = mLooperCandidates;
2121
2122#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2123 std::vector<int64_t> paramLabels(mMemory->nLooperMatchCandidates);
2124 for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) {
2125 paramLabels[i] = GetTrackLabel(mOutputTracks[params[i].id]);
2126 }
2127 /*std::vector<bool> dropped(mMemory->nLooperMatchCandidates);
2128 std::vector<bool> droppedMC(mMemory->nLooperMatchCandidates);
2129 std::vector<int32_t> histMatch(101);
2130 std::vector<int32_t> histFail(101);*/
2131 if (!mRec->GetProcessingSettings().runQA) {
2132 throw std::runtime_error("Need QA enabled for the Merge Loopers MC QA");
2133 }
2134#endif
2135
2136 for (uint32_t i = get_global_id(0); i < mMemory->nLooperMatchCandidates; i += get_global_size(0)) {
2137 for (uint32_t j = i + 1; j < mMemory->nLooperMatchCandidates; j++) {
2138 // int32_t bs = 0;
2139 if (CAMath::Abs(params[j].refz) > CAMath::Abs(params[i].refz) + 100.f) {
2140 break;
2141 }
2142 const float d2xy = CAMath::Sum2(params[i].x - params[j].x, params[i].y - params[j].y);
2143 if (d2xy > 15.f) {
2144 //bs |= 1;
2145 continue;
2146 }
2147 const auto& trk1 = mOutputTracks[params[i].id];
2148 const auto& trk2 = mOutputTracks[params[j].id];
2149 const auto& param1 = trk1.GetParam();
2150 const auto& param2 = trk2.GetParam();
2151 if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) {
2152 //bs |= 2;
2153 continue;
2154 }
2155
2156 const float dznormalized = (CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz)) / (CAMath::TwoPi() * 0.5f * (CAMath::Abs(param1.GetDzDs()) + CAMath::Abs(param2.GetDzDs())) * 1.f / (0.5f * (CAMath::Abs(param1.GetQPt()) + CAMath::Abs(param2.GetQPt())) * CAMath::Abs(Param().polynomialField.GetNominalBz())));
2157 const float phasecorr = CAMath::Modf((CAMath::ASin(param1.GetSinPhi()) + trk1.GetAlpha() - CAMath::ASin(param2.GetSinPhi()) - trk2.GetAlpha()) / CAMath::TwoPi() + 5.5f, 1.f) - 0.5f;
2158 const float phasecorrdirection = (params[j].refz * param1.GetQPt() * param1.GetDzDs()) > 0 ? 1 : -1;
2159 const float dzcorr = dznormalized + phasecorr * phasecorrdirection;
2160 const bool sameside = !(trk1.CSide() ^ trk2.CSide());
2161 const float dzcorrlimit[4] = {sameside ? 0.018f : 0.012f, sameside ? 0.12f : 0.025f, 0.14f, 0.15f};
2162 const int32_t dzcorrcount = sameside ? 4 : 2;
2163 bool dzcorrok = false;
2164 float dznorm = 0.f;
2165 for (int32_t k = 0; k < dzcorrcount; k++) {
2166 const float d = CAMath::Abs(dzcorr - 0.5f * k);
2167 if (d <= dzcorrlimit[k]) {
2168 dzcorrok = true;
2169 dznorm = d / dzcorrlimit[k];
2170 break;
2171 }
2172 }
2173 if (!dzcorrok) {
2174 //bs |= 4;
2175 continue;
2176 }
2177
2178 const float dtgl = param1.GetDzDs() - (param1.GetQPt() * param2.GetQPt() > 0 ? param2.GetDzDs() : -param2.GetDzDs());
2179 const float dqpt = (CAMath::Abs(param1.GetQPt()) - CAMath::Abs(param2.GetQPt())) / CAMath::Min(param1.GetQPt(), param2.GetQPt());
2180 float d = CAMath::Sum2(dtgl * (1.f / 0.03f), dqpt * (1.f / 0.04f)) + d2xy * (1.f / 4.f) + dznorm * (1.f / 0.3f);
2181 bool EQ = d < 6.f;
2182#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2183 const int64_t label1 = paramLabels[i];
2184 const int64_t label2 = paramLabels[j];
2185 bool labelEQ = label1 != -1 && label1 == label2;
2186 if (1 || EQ || labelEQ) {
2187 // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mOutputTracks[params[i].id].CSide(), (int32_t)mOutputTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y);
2188 static auto& tup = GPUROOTDump<TNtuple>::get("mergeloopers", "labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs");
2189 tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz), dzcorr, dtgl, dqpt, dznorm, bs);
2190 static auto tup2 = GPUROOTDump<TNtuple>::getNew("mergeloopers2", "labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr");
2191 tup2.Fill((float)labelEQ, params[i].refz, params[j].refz, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), param1.GetSinPhi(), param2.GetSinPhi(), trk1.GetAlpha(), trk2.GetAlpha(), dznormalized, phasecorr, phasecorrdirection, dzcorr);
2192 }
2193 /*if (EQ) {
2194 dropped[j] = true;
2195 }
2196 if (labelEQ) {
2197 droppedMC[j] = true;
2198 histMatch[CAMath::Min<int32_t>(100, d * 10.f)]++;
2199 }
2200 if (d < 10.f && !labelEQ) {
2201 histFail[CAMath::Min<int32_t>(100, d * 10.f)]++;
2202 }*/
2203#endif
2204 if (EQ) {
2205 mOutputTracks[params[j].id].SetMergedLooper(true);
2206 if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) {
2207 mOutputTracks[params[i].id].SetMergedLooper(true);
2208 }
2209 }
2210 }
2211 }
2212 /*#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2213 int32_t total = 0, totalmc = 0, good = 0, missed = 0, fake = 0;
2214 for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) {
2215 total += dropped[i];
2216 totalmc += droppedMC[i];
2217 good += dropped[i] && droppedMC[i];
2218 missed += droppedMC[i] && !dropped[i];
2219 fake += dropped[i] && !droppedMC[i];
2220 }
2221 if (good) {
2222 printf("%20s: %8d\n", "Total", total);
2223 printf("%20s: %8d\n", "TotalMC", totalmc);
2224 printf("%20s: %8d (%8.3f%% %8.3f%%)\n", "Good", good, 100.f * good / total, 100.f * good / totalmc);
2225 printf("%20s: %8d (%8.3f%%)\n", "Missed", missed, 100.f * missed / totalmc);
2226 printf("%20s: %8d (%8.3f%%)\n", "Fake", fake, 100.f * fake / total);
2227 }
2228 printf("Match histo\n");
2229 for (uint32_t i = 0; i < histMatch.size(); i++) {
2230 printf("%8.3f: %3d\n", i / 10.f + 0.05f, histMatch[i]);
2231 }
2232 printf("Fake histo\n");
2233 for (uint32_t i = 0; i < histFail.size(); i++) {
2234 printf("%8.3f: %3d\n", i / 10.f + 0.05f, histFail[i]);
2235 }
2236#endif*/
2237}
benchmark::State & state
Class of a TPC cluster in TPC-native coordinates (row, time)
A const (ready only) version of MCTruthContainer.
int32_t i
#define GPUdii()
#define get_global_size(dim)
#define GPUAtomic(type)
#define GPUbarrier()
#define GPUdni()
#define GPUhd()
#define GPUrestrict()
#define get_global_id(dim)
#define GPUd()
#define GPUCA_DEBUG_STREAMER_CHECK(...)
#define GPUCA_MERGER_MAX_TRACK_CLUSTERS
#define GPUCA_MAX_SIN_PHI_LOW
#define GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(QPTB5)
#define GPUCA_MAX_SIN_PHI
#define CADEBUG(...)
Definition GPUDef.h:85
#define CADEBUG2(cmd,...)
Definition GPUDef.h:86
bool o
uint8_t leg
const GPUTPCGMMerger::trackCluster & b1
const GPUTPCGMMerger::trackCluster & a1
const GPUTPCGMMerger::trackCluster *const mCmp
const bool outwards
const clcomparestruct *const cmp2
const GPUTPCGMMerger::trackCluster *const cmp1
bool const GPUTPCGMMerger::trackCluster * c1
bool const GPUTPCGMMerger::trackCluster const clcomparestruct * c2
#define GPUCA_NSECTORS
#define GPUCA_ROW_COUNT
constexpr int p2()
constexpr int p1()
constexpr to accelerate the coordinates changing
void output(const std::map< std::string, ChannelStat > &channels)
Definition rawdump.cxx:197
uint16_t pos
Definition RawData.h:3
uint32_t j
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
Definition of TPCFastTransform class.
std::array< int, 64 > reverse(std::array< int, 64 > a)
int nClusters
double num
Definition B.h:16
Class for time synchronization of RawReader instances.
GPUReconstruction * mRec
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
static GPUROOTDump< T, Args... > getNew(const char *name1, Names... names)
Definition GPUROOTDump.h:64
static GPUROOTDump< T, Args... > & get(const char *name1, Names... names)
Definition GPUROOTDump.h:58
const GPUParam & GetParam() const
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
RecoStepField GetRecoSteps() const
const GPUConstantMem & GetConstantMem() const
GPUMemorySizeScalers * MemoryScalers()
const GPUSettingsProcessing & GetProcessingSettings() const
void * SetPointersRefitScratch(void *mem)
void * SetPointersMemory(void *mem)
static constexpr const int32_t NSECTORS
void * SetPointersOutput(void *mem)
void * SetPointersOutputO2MC(void *mem)
void * SetPointersOutputO2Scratch(void *mem)
void * SetPointersMerger(void *mem)
void * SetPointersOutputO2Clus(void *mem)
void * SetPointersOutputState(void *mem)
void SetMaxData(const GPUTrackingInOutPointers &io)
void * SetPointersOutputO2(void *mem)
int32_t int32_t int32_t bool float float Z
int32_t int32_t int32_t bool float Y
GLint GLint GLsizei GLint border
Definition glcorearb.h:275
GLdouble n
Definition glcorearb.h:1982
GLfloat GLfloat GLfloat alpha
Definition glcorearb.h:279
GLint GLenum GLint x
Definition glcorearb.h:403
GLuint GLuint end
Definition glcorearb.h:469
const GLdouble * v
Definition glcorearb.h:832
GLuint GLuint GLfloat weight
Definition glcorearb.h:5477
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLint * range
Definition glcorearb.h:1899
GLint y
Definition glcorearb.h:270
GLenum const GLfloat * params
Definition glcorearb.h:272
GLintptr offset
Definition glcorearb.h:660
GLuint GLsizei GLsizei * length
Definition glcorearb.h:790
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLuint GLfloat x0
Definition glcorearb.h:5034
GLbitfield flags
Definition glcorearb.h:1570
GLboolean r
Definition glcorearb.h:1233
GLuint start
Definition glcorearb.h:469
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLuint id
Definition glcorearb.h:650
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
uint8_t itsSharedClusterMap uint8_t
GPUdi() o2
Definition TrackTRD.h:38
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
float float float float z1
Definition MathUtils.h:44
if(!okForPhiMin(phi0, phi1))
Global TPC definitions and constants.
Definition SimTraits.h:167
@ streamMergeBorderTracksBest
stream MergeBorderTracks best track
@ streamMergeBorderTracksAll
stream MergeBorderTracks all tracks
GPUReconstruction * rec
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
size_t getValue(size_t maxVal, size_t val)
size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss)
size_t NTPCMergedTracks(size_t tpcSectorTracks)
const o2::tpc::ClusterNativeAccess * clustersNative
std::vector< int > row
char const *restrict const cmp
Definition x9.h:96