Project
Loading...
Searching...
No Matches
GPUTPCGMMerger.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#define GPUCA_CADEBUG 0
16#define GPUCA_MERGE_LOOPER_MC 0
17
18#include "GPUCommonDef.h"
19
20#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
22#include "GPUROOTDump.h"
23#endif
24
25#ifndef GPUCA_GPUCODE_DEVICE
26#include <cstdio>
27#include <cstring>
28#include <cmath>
29#include "GPUReconstruction.h"
30#endif
31
32#include "GPUTPCTracker.h"
33#include "GPUTPCClusterData.h"
34#include "GPUTPCTrackParam.h"
35#include "GPUTPCGMMerger.h"
36#include "GPUO2DataTypes.h"
37#include "TPCFastTransform.h"
38#include "GPUTPCConvertImpl.h"
39#include "GPUTPCGeometry.h"
40#include "GPUDefParametersRuntime.h"
41
42#include "GPUCommonMath.h"
43#include "GPUCommonAlgorithm.h"
44#include "GPUCommonConstants.h"
45
46#include "GPUTPCTrackParam.h"
47#include "GPUTPCGMMergedTrack.h"
48#include "GPUParam.h"
50
51#include "GPUTPCGMTrackParam.h"
52#include "GPUTPCGMSectorTrack.h"
53#include "GPUTPCGMBorderTrack.h"
54
57#ifndef GPUCA_GPUCODE
60#endif
61
62static constexpr int32_t kMaxParts = 400;
63static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS;
64
65using namespace o2::gpu;
66using namespace o2::tpc;
67using namespace gputpcgmmergertypes;
68
69namespace o2::gpu::internal
70{
72 float refz;
73 float x;
74 float y;
75 uint32_t id;
76};
77
79 GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
80 {
81 return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax);
82 }
83};
85 GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
86 {
87 return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin);
88 }
89};
90
94 GPUd() bool operator()(const int32_t aa, const int32_t bb)
95 {
98 if (a.CCE() != b.CCE()) {
99 return a.CCE() > b.CCE();
100 }
101 if (a.Legs() != b.Legs()) {
102 return a.Legs() > b.Legs();
103 }
104 GPUCA_DETERMINISTIC_CODE( // clang-format off
105 if (a.NClusters() != b.NClusters()) {
106 return a.NClusters() > b.NClusters();
107 } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
108 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
109 } if (a.GetParam().GetY() != b.GetParam().GetY()) {
110 return a.GetParam().GetY() > b.GetParam().GetY();
111 }
112 return aa > bb;
113 , // !GPUCA_DETERMINISTIC_CODE
114 return a.NClusters() > b.NClusters();
115 ) // clang-format on
116 }
117};
118
122 GPUd() bool operator()(const int32_t aa, const int32_t bb)
123 {
126 GPUCA_DETERMINISTIC_CODE( // clang-format off
127 if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
128 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
129 } if (a.GetParam().GetY() != b.GetParam().GetY()) {
130 return a.GetParam().GetY() > b.GetParam().GetY();
131 }
132 return a.GetParam().GetZ() > b.GetParam().GetZ();
133 , // !GPUCA_DETERMINISTIC_CODE
134 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
135 ) // clang-format on
136 }
137};
138
140 GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b)
141 {
142 return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz));
143 }
144};
145
146} // namespace o2::gpu::internal
147
148using namespace o2::gpu::internal;
149
150#ifndef GPUCA_GPUCODE
151
152#include "GPUQA.h"
153#include "GPUMemorySizeScalers.h"
154
156{
157 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
158 mNextSectorInd[iSector] = iSector + 1;
159 mPrevSectorInd[iSector] = iSector - 1;
160 }
161 int32_t mid = NSECTORS / 2 - 1;
162 int32_t last = NSECTORS - 1;
163 mNextSectorInd[mid] = 0;
164 mPrevSectorInd[0] = mid;
165 mNextSectorInd[last] = NSECTORS / 2;
166 mPrevSectorInd[NSECTORS / 2] = last;
167}
168
169// DEBUG CODE
170#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
171#include "GPUQAHelper.h"
172
173void GPUTPCGMMerger::CheckMergedTracks()
174{
175 std::vector<bool> trkUsed(SectorTrackInfoLocalTotal());
176 for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) {
177 trkUsed[i] = false;
178 }
179
180 for (int32_t itr = 0; itr < SectorTrackInfoLocalTotal(); itr++) {
181 GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
182 if (track.PrevSegmentNeighbour() >= 0) {
183 continue;
184 }
185 if (track.PrevNeighbour() >= 0) {
186 continue;
187 }
188 int32_t leg = 0;
189 GPUTPCGMSectorTrack *trbase = &track, *tr = &track;
190 while (true) {
191 int32_t iTrk = tr - mSectorTrackInfos;
192 if (trkUsed[iTrk]) {
193 GPUError("FAILURE: double use");
194 }
195 trkUsed[iTrk] = true;
196
197 int32_t jtr = tr->NextSegmentNeighbour();
198 if (jtr >= 0) {
199 tr = &(mSectorTrackInfos[jtr]);
200 continue;
201 }
202 jtr = trbase->NextNeighbour();
203 if (jtr >= 0) {
204 trbase = &(mSectorTrackInfos[jtr]);
205 tr = trbase;
206 if (tr->PrevSegmentNeighbour() >= 0) {
207 break;
208 }
209 leg++;
210 continue;
211 }
212 break;
213 }
214 }
215 for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) {
216 if (trkUsed[i] == false) {
217 GPUError("FAILURE: trk missed");
218 }
219 }
220}
221
222template <class T>
223inline const auto* resolveMCLabels(const o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>* a, const AliHLTTPCClusterMCLabel* b)
224{
225 return a;
226}
227template <>
228inline const auto* resolveMCLabels<AliHLTTPCClusterMCLabel>(const o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>* a, const AliHLTTPCClusterMCLabel* b)
229{
230 return b;
231}
232
233template <class T, class S>
234int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const
235{
236 GPUTPCGMSectorTrack* sectorTrack = nullptr;
237 int32_t nClusters = 0;
238 if constexpr (std::is_same_v<S, GPUTPCGMBorderTrack&>) {
239 sectorTrack = &mSectorTrackInfos[trk.TrackID()];
240 nClusters = sectorTrack->OrigTrack()->NHits();
241 } else {
242 nClusters = trk.NClusters();
243 }
244 auto acc = GPUTPCTrkLbl<false, GPUTPCTrkLbl_ret>(resolveMCLabels<T>(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f);
245 for (int32_t i = 0; i < nClusters; i++) {
246 int32_t id;
247 if constexpr (std::is_same_v<S, GPUTPCGMBorderTrack&>) {
248 const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()];
249 const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i];
250 id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0];
251 } else {
252 id = mClusters[trk.FirstClusterRef() + i].num;
253 }
254 acc.addLabel(id);
255 }
256 return acc.computeLabel().id;
257}
258
259template <class S>
260int64_t GPUTPCGMMerger::GetTrackLabel(const S& trk) const
261{
262#ifdef GPUCA_TPC_GEOMETRY_O2
263 if (GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth) {
264 return GetTrackLabelA<o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>, S>(trk);
265 } else
266#endif
267 {
268 return GetTrackLabelA<AliHLTTPCClusterMCLabel, S>(trk);
269 }
270}
271
272#endif
273// END DEBUG CODE
274
275void GPUTPCGMMerger::PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const
276{
277 const GPUTPCGMSectorTrack* orgTrack = trk;
278 while (trk->PrevSegmentNeighbour() >= 0) {
279 trk = &mSectorTrackInfos[trk->PrevSegmentNeighbour()];
280 }
281 const GPUTPCGMSectorTrack* orgTower = trk;
282 while (trk->PrevNeighbour() >= 0) {
283 trk = &mSectorTrackInfos[trk->PrevNeighbour()];
284 }
285
286 int32_t nextId = trk - mSectorTrackInfos;
287 out << "Graph of track " << (orgTrack - mSectorTrackInfos) << "\n";
288 while (nextId >= 0) {
289 trk = &mSectorTrackInfos[nextId];
290 if (trk->PrevSegmentNeighbour() >= 0) {
291 out << "TRACK TREE INVALID!!! " << trk->PrevSegmentNeighbour() << " --> " << nextId << "\n";
292 }
293 out << (trk == orgTower ? "--" : " ");
294 while (nextId >= 0) {
295 GPUTPCGMSectorTrack* trk2 = &mSectorTrackInfos[nextId];
296 if (trk != trk2 && (trk2->PrevNeighbour() >= 0 || trk2->NextNeighbour() >= 0)) {
297 out << " (TRACK TREE INVALID!!! " << trk2->PrevNeighbour() << " <-- " << nextId << " --> " << trk2->NextNeighbour() << ") ";
298 }
299 char tmp[128];
300 snprintf(tmp, 128, " %s%5d(%5.2f)", trk2 == orgTrack ? "!" : " ", nextId, trk2->QPt());
301 out << tmp;
302 nextId = trk2->NextSegmentNeighbour();
303 }
304 out << "\n";
305 nextId = trk->NextNeighbour();
306 }
307}
308
310
312{
313 computePointerWithAlignment(mem, mSectorTrackInfos, mNTotalSectorTracks);
314 computePointerWithAlignment(mem, mSectorTrackInfoIndex, NSECTORS * 2 + 1);
315 if (mRec->GetProcessingSettings().deterministicGPUReconstruction) {
316 computePointerWithAlignment(mem, mTmpSortMemory, std::max(mNTotalSectorTracks, mNMaxTracks * 2));
317 }
318
319 void* memBase = mem;
320 computePointerWithAlignment(mem, mBorderMemory, 2 * mNTotalSectorTracks); // MergeBorders & Resolve
321 computePointerWithAlignment(mem, mBorderRangeMemory, 2 * mNTotalSectorTracks);
322 int32_t nTracks = 0;
323 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
324 const int32_t n = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks();
325 mBorder[iSector] = mBorderMemory + 2 * nTracks;
326 mBorder[NSECTORS + iSector] = mBorderMemory + 2 * nTracks + n;
327 mBorderRange[iSector] = mBorderRangeMemory + 2 * nTracks;
328 nTracks += n;
329 }
330 computePointerWithAlignment(mem, mTrackLinks, mNTotalSectorTracks);
331 computePointerWithAlignment(mem, mTrackCCRoots, mNTotalSectorTracks);
332 void* memMax = mem;
333 mem = memBase;
334 computePointerWithAlignment(mem, mTrackIDs, GPUCA_NSECTORS * mNMaxSingleSectorTracks); // UnpackResetIds - RefitSectorTracks - UnpackSectorGlobal
335 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
336 mem = memBase;
337 computePointerWithAlignment(mem, mTrackSort, mNMaxTracks); // PrepareClustersForFit0 - SortTracksQPt - PrepareClustersForFit1 - PrepareClustersForFit1 / Finalize0 - Finalize2
338 computePointerWithAlignment(mem, mSharedCount, mNMaxClusters);
339 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
340 mem = memBase;
341 computePointerWithAlignment(mem, mLoopData, mNMaxTracks); // GPUTPCGMMergerTrackFit - GPUTPCGMMergerFollowLoopers
342 computePointerWithAlignment(mem, mRetryRefitIds, mNMaxTracks); // Reducing mNMaxTracks for mLoopData / mRetryRefitIds does not save memory, since the other parts are larger anyway
343 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
344 mem = memBase;
345 computePointerWithAlignment(mem, mLooperCandidates, mNMaxLooperMatches); // MergeLoopers 1-3
346 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
347 return memMax;
348}
349
351{
352 computePointerWithAlignment(mem, mMemory);
353 return mem;
354}
355
357{
358 computePointerWithAlignment(mem, mTrackOrderAttach, mNMaxTracks);
359 const bool mergerSortTracks = mRec->GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging).par_SORT_BEFORE_FIT : mRec->GetProcessingSettings().mergerSortTracks;
360 if (mergerSortTracks) {
361 computePointerWithAlignment(mem, mTrackOrderProcess, mNMaxTracks);
362 }
363 return mem;
364}
365
367{
368 computePointerWithAlignment(mem, mMergedTracks, mNMaxTracks);
369 if (mRec->GetParam().dodEdxEnabled) {
370 computePointerWithAlignment(mem, mMergedTracksdEdx, mNMaxTracks);
371 if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) {
372 computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks);
373 }
374 }
375 computePointerWithAlignment(mem, mClusters, mNMaxMergedTrackClusters);
376 if (mRec->GetParam().par.earlyTpcTransform) {
377 computePointerWithAlignment(mem, mClustersXYZ, mNMaxMergedTrackClusters);
378 }
379 computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters);
380 return mem;
381}
382
384{
385 if ((mRec->GetRecoSteps() & GPUDataTypes::RecoStep::Refit) || mRec->GetProcessingSettings().outputSharedClusterMap) {
386 computePointerWithAlignment(mem, mClusterStateExt, mNMaxClusters);
387 } else {
388 mClusterStateExt = nullptr;
389 }
390 return mem;
391}
392
394{
395 computePointerWithAlignment(mem, mOutputTracksTPCO2, HostProcessor(this).NOutputTracksTPCO2());
396 return mem;
397}
398
400{
401 computePointerWithAlignment(mem, mOutputClusRefsTPCO2, HostProcessor(this).NOutputClusRefsTPCO2());
402 return mem;
403}
404
406{
407 computePointerWithAlignment(mem, mOutputTracksTPCO2MC, HostProcessor(this).NOutputTracksTPCO2());
408 return mem;
409}
410
412{
413 computePointerWithAlignment(mem, mTrackSortO2, mNMaxTracks);
414 computePointerWithAlignment(mem, mClusRefTmp, mNMaxTracks);
415 return mem;
416}
417
419{
425 if (mRec->GetProcessingSettings().createO2Output) {
429 if (mRec->GetProcessingSettings().runMC) {
431 }
432 }
434}
435
437{
438 mNTotalSectorTracks = 0;
439 mNClusters = 0;
440 mNMaxSingleSectorTracks = 0;
441 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
442 uint32_t ntrk = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks();
443 mNTotalSectorTracks += ntrk;
444 mNClusters += *mRec->GetConstantMem().tpcTrackers[iSector].NTrackHits();
445 if (mNMaxSingleSectorTracks < ntrk) {
446 mNMaxSingleSectorTracks = ntrk;
447 }
448 }
449 mNMaxMergedTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters);
450 if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) {
451 mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field
452 } else {
453 mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks);
454 }
455 if (io.clustersNative) {
456 mNMaxClusters = io.clustersNative->nClustersTotal;
458 mNMaxClusters = 0;
459 for (int32_t i = 0; i < NSECTORS; i++) {
460 mNMaxClusters += mRec->GetConstantMem().tpcTrackers[i].NHitsTotal();
461 }
462 } else {
463 mNMaxClusters = mNClusters;
464 }
465 mNMaxLooperMatches = mNMaxClusters / 4; // We have that much scratch memory anyway
466}
467
469{
470 for (int32_t i = 0; i < NSECTORS; i++) {
471 if (mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks > (int32_t)mNMaxSingleSectorTracks) {
472 throw std::runtime_error("mNMaxSingleSectorTracks too small");
473 }
474 }
476 throw std::runtime_error("Must run also sector tracking");
477 }
478 return 0;
479}
480
481#endif // GPUCA_GPUCODE
482
483GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output)
484{
485 const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal();
486 for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) {
487 mTrackLinks[i] = -1;
488 }
489}
490
491GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack, const GPUTPCTrack* inTrack, float alpha, int32_t sector)
492{
494 prop.SetMaterialTPC();
495 prop.SetMaxSinPhi(GPUCA_MAX_SIN_PHI);
496 prop.SetSeedingErrors(true); // Larger errors for seeds, better since we don't start with good hypothesis
497 prop.SetFitInProjections(false);
498 prop.SetPolynomialField(&Param().polynomialField);
500 trk.X() = inTrack->Param().GetX();
501 trk.Y() = inTrack->Param().GetY();
502 trk.Z() = inTrack->Param().GetZ();
503 trk.SinPhi() = inTrack->Param().GetSinPhi();
504 trk.DzDs() = inTrack->Param().GetDzDs();
505 trk.QPt() = inTrack->Param().GetQPt();
506 trk.TZOffset() = Param().par.earlyTpcTransform ? inTrack->Param().GetZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin);
507 trk.ShiftZ(this, sector, sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead
508 sectorTrack.SetX2(0.f);
509 for (int32_t way = 0; way < 2; way++) {
510 if (way) {
511 prop.SetFitInProjections(true);
512 prop.SetPropagateBzOnly(true);
513 }
514 trk.ResetCovariance();
515 prop.SetTrack(&trk, alpha);
516 int32_t start = way ? inTrack->NHits() - 1 : 0;
517 int32_t end = way ? 0 : (inTrack->NHits() - 1);
518 int32_t incr = way ? -1 : 1;
519 for (int32_t i = start; i != end; i += incr) {
520 float x, y, z;
521 int32_t row, flags;
522 const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sector];
523 const GPUTPCHitId& ic = tracker.TrackHits()[inTrack->FirstHitID() + i];
524 int32_t clusterIndex = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex());
525 row = ic.RowIndex();
526 const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex];
527 flags = cl.getFlags();
528 if (Param().par.earlyTpcTransform) {
529 x = tracker.Data().ClusterData()[clusterIndex].x;
530 y = tracker.Data().ClusterData()[clusterIndex].y;
531 z = tracker.Data().ClusterData()[clusterIndex].z - trk.TZOffset();
532 } else {
533 GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset());
534 }
535 if (prop.PropagateToXAlpha(x, alpha, true)) {
536 return way == 0;
537 }
538 trk.ConstrainSinPhi();
539 if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, nullptr, false, sector, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge
540 return way == 0;
541 }
542 trk.ConstrainSinPhi();
543 }
544 if (way) {
545 sectorTrack.SetParam2(trk);
546 } else {
547 sectorTrack.Set(trk, inTrack, alpha, sector);
548 }
549 }
550 return 0;
551}
552
553GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr)
554{
555 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
556 const GPUTPCHitId& ic1 = trk.TrackHits()[sectorTr->FirstHitID()];
557 const GPUTPCHitId& ic2 = trk.TrackHits()[sectorTr->FirstHitID() + sectorTr->NHits() - 1];
558 int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex());
559 int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex());
560 if (Param().par.earlyTpcTransform) {
561 track.SetClusterZT(trk.Data().ClusterData()[clusterIndex1].z, trk.Data().ClusterData()[clusterIndex2].z);
562 } else {
563 const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0];
564 track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime());
565 }
566}
567
568GPUd() void GPUTPCGMMerger::UnpackSaveNumber(int32_t id)
569{
570 mSectorTrackInfoIndex[id] = mMemory->nUnpackedTracks;
571}
572
573GPUd() void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
574{
575 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
576 float alpha = Param().Alpha(iSector);
577 const GPUTPCTrack* sectorTr = mMemory->firstExtrapolatedTracks[iSector];
578 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
579 uint32_t nTracks = *trk.NTracks();
580 for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) {
581 sectorTr = &trk.Tracks()[itr];
582 int32_t localId = mTrackIDs[(sectorTr->LocalTrackId() >> 24) * mNMaxSingleSectorTracks + (sectorTr->LocalTrackId() & 0xFFFFFF)];
583 if (localId == -1) {
584 continue;
585 }
586 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u);
587 GPUTPCGMSectorTrack& track = mSectorTrackInfos[myTrack];
588 SetTrackClusterZT(track, iSector, sectorTr);
589 track.Set(this, sectorTr, alpha, iSector);
590 track.SetGlobalSectorTrackCov();
591 track.SetPrevNeighbour(-1);
592 track.SetNextNeighbour(-1);
593 track.SetNextSegmentNeighbour(-1);
594 track.SetPrevSegmentNeighbour(-1);
595 track.SetLocalTrackId(localId);
596 }
597}
598
599GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
600{
601 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
602 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
603 for (uint32_t i = iBlock * nThreads + iThread; i < nLocalTracks; i += nBlocks * nThreads) {
604 mTrackIDs[iSector * mNMaxSingleSectorTracks + i] = -1;
605 }
606}
607
608GPUd() void GPUTPCGMMerger::RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
609{
610 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
611 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
612
613 float alpha = Param().Alpha(iSector);
614 const GPUTPCTrack* sectorTr = nullptr;
615
616 for (uint32_t itr = iBlock * nThreads + iThread; itr < nLocalTracks; itr += nBlocks * nThreads) {
617 sectorTr = &trk.Tracks()[itr];
619 SetTrackClusterZT(track, iSector, sectorTr);
620 if (Param().rec.tpc.mergerCovSource == 0) {
621 track.Set(this, sectorTr, alpha, iSector);
622 if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) {
623 continue;
624 }
625 } else if (Param().rec.tpc.mergerCovSource == 1) {
626 track.Set(this, sectorTr, alpha, iSector);
627 track.CopyBaseTrackCov();
628 } else if (Param().rec.tpc.mergerCovSource == 2) {
629 if (RefitSectorTrack(track, sectorTr, alpha, iSector)) {
630 track.Set(this, sectorTr, alpha, iSector); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed
631 if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) {
632 continue;
633 }
634 }
635 }
636
637 CADEBUG(GPUInfo("INPUT Sector %d, Track %u, QPt %f DzDs %f", iSector, itr, track.QPt(), track.DzDs()));
638 track.SetPrevNeighbour(-1);
639 track.SetNextNeighbour(-1);
640 track.SetNextSegmentNeighbour(-1);
641 track.SetPrevSegmentNeighbour(-1);
642 track.SetExtrapolatedTrackId(0, -1);
643 track.SetExtrapolatedTrackId(1, -1);
644 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u);
645 mTrackIDs[iSector * mNMaxSingleSectorTracks + sectorTr->LocalTrackId()] = myTrack;
646 mSectorTrackInfos[myTrack] = track;
647 }
648}
649
650GPUd() void GPUTPCGMMerger::LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
651{
652 for (int32_t itr = SectorTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SectorTrackInfoGlobalLast(NSECTORS - 1); itr += nThreads * nBlocks) {
653 GPUTPCGMSectorTrack& extrapolatedTrack = mSectorTrackInfos[itr];
654 GPUTPCGMSectorTrack& localTrack = mSectorTrackInfos[extrapolatedTrack.LocalTrackId()];
655 if (localTrack.ExtrapolatedTrackId(0) != -1 || !CAMath::AtomicCAS(&localTrack.ExtrapolatedTrackIds()[0], -1, itr)) {
656 localTrack.SetExtrapolatedTrackId(1, itr);
657 }
658 }
659}
660
661GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam)
662{
663 //* prepare sector tracks for merging with next/previous/same sector
664 //* each track transported to the border line
665
666 float fieldBz = Param().bzCLight;
667
668 float dAlpha = Param().dAlpha / 2;
669 float x0 = 0;
670
671 if (iBorder == 0) { // transport to the left edge of the sector and rotate horizontally
672 dAlpha = dAlpha - CAMath::Pi() / 2;
673 } else if (iBorder == 1) { // transport to the right edge of the sector and rotate horizontally
674 dAlpha = -dAlpha - CAMath::Pi() / 2;
675 } else if (iBorder == 2) { // transport to the middle of the sector and rotate vertically to the border on the left
676 x0 = GPUTPCGeometry::Row2X(63);
677 } else if (iBorder == 3) { // transport to the middle of the sector and rotate vertically to the border on the right
678 dAlpha = -dAlpha;
679 x0 = GPUTPCGeometry::Row2X(63);
680 } else if (iBorder == 4) { // transport to the middle of the sßector, w/o rotation
681 dAlpha = 0;
682 x0 = GPUTPCGeometry::Row2X(63);
683 }
684
685 const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
686 float cosAlpha = CAMath::Cos(dAlpha);
687 float sinAlpha = CAMath::Sin(dAlpha);
688
689 GPUTPCGMSectorTrack trackTmp;
690 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
691 const GPUTPCGMSectorTrack* track = &mSectorTrackInfos[itr];
692 int32_t iSector = track->Sector();
693
694 if (track->PrevSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->PrevSegmentNeighbour()].Sector()) {
695 continue;
696 }
697 if (useOrigTrackParam) { // TODO: Check how far this makes sense with sector track refit
698 if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperQPtB5Limit) {
699 continue;
700 }
701 const GPUTPCGMSectorTrack* trackMin = track;
702 while (track->NextSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->NextSegmentNeighbour()].Sector()) {
703 track = &mSectorTrackInfos[track->NextSegmentNeighbour()];
704 if (track->OrigTrack()->Param().X() < trackMin->OrigTrack()->Param().X()) {
705 trackMin = track;
706 }
707 }
708 trackTmp = *trackMin;
709 track = &trackTmp;
710 if (Param().rec.tpc.mergerCovSource == 2 && trackTmp.X2() != 0.f) {
711 trackTmp.UseParam2();
712 } else {
713 trackTmp.Set(this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Sector());
714 }
715 } else {
716 if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperSecondHorizontalQPtB5Limit) {
717 if (iBorder == 0 && track->NextNeighbour() >= 0) {
718 continue;
719 }
720 if (iBorder == 1 && track->PrevNeighbour() >= 0) {
721 continue;
722 }
723 }
724 }
726
727 if (track->TransportToXAlpha(this, x0, sinAlpha, cosAlpha, fieldBz, b, maxSin)) {
728 b.SetTrackID(itr);
729 b.SetNClusters(track->NClusters());
730 for (int32_t i = 0; i < 4; i++) {
731 if (CAMath::Abs(b.Cov()[i]) >= 5.0f) {
732 b.SetCov(i, 5.0f);
733 }
734 }
735 if (CAMath::Abs(b.Cov()[4]) >= 0.5f) {
736 b.SetCov(4, 0.5f);
737 }
738 uint32_t myTrack = CAMath::AtomicAdd(&nB[iSector], 1u);
739 B[iSector][myTrack] = b;
740 }
741 }
742}
743
744template <>
745GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
746{
747 CADEBUG(GPUInfo("\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode));
748 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
749 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
750 bool sameSector = (iSector1 == iSector2);
751 for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) {
752 GPUTPCGMBorderTrack& b = B1[itr];
753 float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
754 if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) {
755 d *= 2;
756 } else if (d > 3) {
757 d = 3;
758 }
759 CADEBUG(printf(" Input Sector 1 %d Track %d: ", iSector1, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d));
761 range.fId = itr;
762 range.fMin = b.Par()[1] + b.ZOffsetLinear() - d;
763 range.fMax = b.Par()[1] + b.ZOffsetLinear() + d;
764 range1[itr] = range;
765 if (sameSector) {
766 range2[itr] = range;
767 }
768 }
769 if (!sameSector) {
770 for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) {
771 GPUTPCGMBorderTrack& b = B2[itr];
772 float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
773 if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) {
774 d *= 2;
775 } else if (d > 3) {
776 d = 3;
777 }
778 CADEBUG(printf(" Input Sector 2 %d Track %d: ", iSector2, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d));
780 range.fId = itr;
781 range.fMin = b.Par()[1] + b.ZOffsetLinear() - d;
782 range.fMax = b.Par()[1] + b.ZOffsetLinear() + d;
783 range2[itr] = range;
784 }
785 }
786}
787
788template <>
789GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
790{
791#if !defined(GPUCA_GPUCODE_COMPILEKERNELS)
792 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
793 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
794
795 if (iThread == 0) {
796 if (iBlock == 0) {
797 GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); });
798 } else if (iBlock == 1) {
799 GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); });
800 }
801 }
802#else
803 printf("This sorting variant is disabled for RTC");
804#endif
805}
806
807template <>
808GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax)
809{
810#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
811 if (iThread == 0 && iBlock == 0) {
812 if (cmpMax) {
813 GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMax());
814 } else {
815 GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMin());
816 }
817 }
818#endif
819}
820
821template <>
822GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
823{
824 // int32_t statAll = 0, statMerged = 0;
825 float factor2ys = Param().rec.tpc.trackMergerFactor2YS;
826 float factor2zt = Param().rec.tpc.trackMergerFactor2ZT;
827 float factor2k = Param().rec.tpc.trackMergerFactor2K;
828 float factor2General = Param().rec.tpc.trackMergerFactor2General;
829
830 factor2k = factor2General * factor2k;
831 factor2ys = factor2General * factor2ys;
832 factor2zt = factor2General * factor2zt;
833
834 int32_t minNPartHits = Param().rec.tpc.trackMergerMinPartHits;
835 int32_t minNTotalHits = Param().rec.tpc.trackMergerMinTotalHits;
836
837 bool sameSector = (iSector1 == iSector2);
838
839 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
840 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
841
842 int32_t i2 = 0;
843 for (int32_t i1 = iBlock * nThreads + iThread; i1 < N1; i1 += nThreads * nBlocks) {
844 GPUTPCGMBorderRange r1 = range1[i1];
845 while (i2 < N2 && range2[i2].fMax < r1.fMin) {
846 i2++;
847 }
848
849 GPUTPCGMBorderTrack& b1 = B1[r1.fId];
850 if (b1.NClusters() < minNPartHits) {
851 continue;
852 }
853 int32_t iBest2 = -1;
854 int32_t lBest2 = 0;
855 // statAll++;
856 for (int32_t k2 = i2; k2 < N2; k2++) {
857 GPUTPCGMBorderRange r2 = range2[k2];
858 if (r2.fMin > r1.fMax) {
859 break;
860 }
861 if (sameSector && (r1.fId >= r2.fId)) {
862 continue;
863 }
864 // do check
865
866 GPUTPCGMBorderTrack& b2 = B2[r2.fId];
867#if defined(GPUCA_MERGER_BY_MC_LABEL) && !defined(GPUCA_GPUCODE)
868 int64_t label1 = GetTrackLabel(b1);
869 int64_t label2 = GetTrackLabel(b2);
870 if (label1 != label2 && label1 != -1) // DEBUG CODE, match by MC label
871#endif
872 {
873 CADEBUG(if (GetConstantMem()->ioPtrs.mcLabelsTPC) {printf("Comparing track %3d to %3d: ", r1.fId, r2.fId); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b1.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b1.Cov()[i]); } printf("\n%28s", ""); });
874 CADEBUG(if (GetConstantMem()->ioPtrs.mcLabelsTPC) {for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b2.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b2.Cov()[i]); } printf(" - %5s - ", GetTrackLabel(b1) == GetTrackLabel(b2) ? "CLONE" : "FAKE"); });
875 if (b2.NClusters() < lBest2) {
876 CADEBUG2(continue, printf("!NCl1\n"));
877 }
878 if (mergeMode > 0) {
879 // Merging CE tracks
880 int32_t maxRowDiff = mergeMode == 2 ? 1 : 3; // TODO: check cut
881 if (CAMath::Abs(b1.Row() - b2.Row()) > maxRowDiff) {
882 CADEBUG2(continue, printf("!ROW\n"));
883 }
884 if (CAMath::Abs(b1.Par()[2] - b2.Par()[2]) > 0.5f || CAMath::Abs(b1.Par()[3] - b2.Par()[3]) > 0.5f) {
885 CADEBUG2(continue, printf("!CE SinPhi/Tgl\n")); // Crude cut to avoid totally wrong matches, TODO: check cut
886 }
887 }
888
889 GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksAll, b1.TrackID(), weight)) { MergedTrackStreamer(b1, b2, "merge_all_tracks", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksAll)); });
890
891 if (!b1.CheckChi2Y(b2, factor2ys)) {
892 CADEBUG2(continue, printf("!Y\n"));
893 }
894 // if( !b1.CheckChi2Z(b2, factor2zt ) ) CADEBUG2(continue, printf("!NCl1\n"));
895 if (!b1.CheckChi2QPt(b2, factor2k)) {
896 CADEBUG2(continue, printf("!QPt\n"));
897 }
898 float fys = CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20 ? factor2ys : (2.f * factor2ys);
899 float fzt = CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20 ? factor2zt : (2.f * factor2zt);
900 if (!b1.CheckChi2YS(b2, fys)) {
901 CADEBUG2(continue, printf("!YS\n"));
902 }
903 if (!b1.CheckChi2ZT(b2, fzt)) {
904 CADEBUG2(continue, printf("!ZT\n"));
905 }
906 if (CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20) {
907 if (b2.NClusters() < minNPartHits) {
908 CADEBUG2(continue, printf("!NCl2\n"));
909 }
910 if (b1.NClusters() + b2.NClusters() < minNTotalHits) {
911 CADEBUG2(continue, printf("!NCl3\n"));
912 }
913 }
914 CADEBUG(printf("OK: dZ %8.3f D1 %8.3f D2 %8.3f\n", CAMath::Abs(b1.Par()[1] - b2.Par()[1]), 3.5f * sqrt(b1.Cov()[1]), 3.5f * sqrt(b2.Cov()[1])));
915 } // DEBUG CODE, match by MC label
916 lBest2 = b2.NClusters();
917 iBest2 = b2.TrackID();
918 }
919
920 if (iBest2 < 0) {
921 continue;
922 }
923 GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksBest, b1.TrackID(), weight)) { MergedTrackStreamer(b1, MergedTrackStreamerFindBorderTrack(B2, N2, iBest2), "merge_best_track", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksBest)); });
924
925 // statMerged++;
926
927 CADEBUG(GPUInfo("Found match %d %d", b1.TrackID(), iBest2));
928
929 mTrackLinks[b1.TrackID()] = iBest2;
930 if (mergeMode > 0) {
931 GPUCA_DETERMINISTIC_CODE(CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()), mTrackLinks[iBest2] = b1.TrackID());
932 }
933 }
934 // GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged);
935}
936
937GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, int8_t withinSector, int8_t mergeMode) const
938{
939 if (withinSector == 1) { // Merge tracks within the same sector
940 jSector = iSector;
941 n1 = n2 = mMemory->tmpCounter[iSector];
942 b1 = b2 = mBorder[iSector];
943 } else if (withinSector == -1) { // Merge tracks accross the central electrode
944 jSector = (iSector + NSECTORS / 2);
945 const int32_t offset = mergeMode == 2 ? NSECTORS : 0;
946 n1 = mMemory->tmpCounter[iSector + offset];
947 n2 = mMemory->tmpCounter[jSector + offset];
948 b1 = mBorder[iSector + offset];
949 b2 = mBorder[jSector + offset];
950 } else { // Merge tracks of adjacent sectors
951 jSector = mNextSectorInd[iSector];
952 n1 = mMemory->tmpCounter[iSector];
953 n2 = mMemory->tmpCounter[NSECTORS + jSector];
954 b1 = mBorder[iSector];
955 b2 = mBorder[NSECTORS + jSector];
956 }
957}
958
959template <int32_t I>
960GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode)
961{
962 int32_t n1, n2;
964 int32_t jSector;
965 MergeBorderTracksSetup(n1, n2, b1, b2, jSector, iSector, withinSector, mergeMode);
966 MergeBorderTracks<I>(nBlocks, nThreads, iBlock, iThread, iSector, b1, n1, jSector, b2, n2, mergeMode);
967}
968
969#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
970template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
971template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
972template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
973#endif
974
975GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
976{
977 float x0 = GPUTPCGeometry::Row2X(63);
978 const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
979
980 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
981 GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
982 int32_t iSector = track.Sector();
984 if (track.TransportToX(this, x0, Param().bzCLight, b, maxSin)) {
985 b.SetTrackID(itr);
986 CADEBUG(printf("WITHIN SECTOR %d Track %d - ", iSector, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf("\n"));
987 b.SetNClusters(track.NClusters());
988 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[iSector], 1u);
989 mBorder[iSector][myTrack] = b;
990 }
991 }
992}
993
994GPUd() void GPUTPCGMMerger::MergeSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam)
995{
996 bool part2 = iBlock & 1;
997 int32_t border = part2 ? border1 : border0;
998 GPUAtomic(uint32_t)* n = mMemory->tmpCounter;
999 GPUTPCGMBorderTrack** b = mBorder;
1000 if (part2) {
1001 n += NSECTORS;
1002 b += NSECTORS;
1003 }
1004 MergeSectorsPrepareStep2((nBlocks + !part2) >> 1, nThreads, iBlock >> 1, iThread, border, b, n, useOrigTrackParam);
1005}
1006
1007GPUdi() void GPUTPCGMMerger::setBlockRange(int32_t elems, int32_t nBlocks, int32_t iBlock, int32_t& start, int32_t& end)
1008{
1009 start = (elems + nBlocks - 1) / nBlocks * iBlock;
1010 end = (elems + nBlocks - 1) / nBlocks * (iBlock + 1);
1011 end = CAMath::Min(elems, end);
1012}
1013
1014GPUd() void GPUTPCGMMerger::hookEdge(int32_t u, int32_t v)
1015{
1016 if (v < 0) {
1017 return;
1018 }
1019 while (true) {
1020 u = mTrackCCRoots[u];
1021 v = mTrackCCRoots[v];
1022 if (u == v) {
1023 break;
1024 }
1025 int32_t h = CAMath::Max(u, v);
1026 int32_t l = CAMath::Min(u, v);
1027
1028 int32_t old = CAMath::AtomicCAS(&mTrackCCRoots[h], h, l);
1029 if (old == h) {
1030 break;
1031 }
1032
1033 u = mTrackCCRoots[h];
1034 v = l;
1035 }
1036}
1037
1038GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsSetup(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1039{
1040 int32_t start, end;
1041 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1042 for (int32_t i = start + iThread; i < end; i += nThreads) {
1043 mTrackCCRoots[i] = i;
1044 }
1045}
1046
1047GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1048{
1049 // Compute connected components in parallel, step 1.
1050 // Source: Adaptive Work-Efficient Connected Components on the GPU, Sutton et al, 2016 (https://arxiv.org/pdf/1612.01178.pdf)
1051 int32_t start, end;
1052 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1053 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1054 hookEdge(itr, mTrackLinks[itr]);
1055 }
1056}
1057
1058GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookNeighbors(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1059{
1060 // Compute connected components in parallel, step 1 - Part 2.
1061 nBlocks = nBlocks / 4 * 4;
1062 if (iBlock >= nBlocks) {
1063 return;
1064 }
1065
1066 int32_t start, end;
1067 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks / 4, iBlock / 4, start, end);
1068
1069 int32_t myNeighbor = iBlock % 4;
1070
1071 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1072 int32_t v = mSectorTrackInfos[itr].AnyNeighbour(myNeighbor);
1073 hookEdge(itr, v);
1074 }
1075}
1076
1077GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsMultiJump(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1078{
1079 // Compute connected components in parallel, step 2.
1080 int32_t start, end;
1081 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1082 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1083 int32_t root = itr;
1084 int32_t next = mTrackCCRoots[root];
1085 if (root == next) {
1086 continue;
1087 }
1088 do {
1089 root = next;
1090 next = mTrackCCRoots[next];
1091 } while (root != next);
1092 mTrackCCRoots[itr] = root;
1093 }
1094}
1095
1096GPUd() void GPUTPCGMMerger::ResolveMergeSectors(GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll)
1097{
1098 if (!mergeAll) {
1099 /*int32_t neighborType = useOrigTrackParam ? 1 : 0;
1100 int32_t old1 = newTrack2.PrevNeighbour(0);
1101 int32_t old2 = newTrack1.NextNeighbour(0);
1102 if (old1 < 0 && old2 < 0) neighborType = 0;
1103 if (old1 == itr) continue;
1104 if (neighborType) old1 = newTrack2.PrevNeighbour(1);
1105 if ( old1 >= 0 )
1106 {
1107 GPUTPCGMSectorTrack &oldTrack1 = mSectorTrackInfos[old1];
1108 if ( oldTrack1.NClusters() < newTrack1.NClusters() ) {
1109 newTrack2.SetPrevNeighbour( -1, neighborType );
1110 oldTrack1.SetNextNeighbour( -1, neighborType );
1111 } else continue;
1112 }
1113
1114 if (old2 == itr2) continue;
1115 if (neighborType) old2 = newTrack1.NextNeighbour(1);
1116 if ( old2 >= 0 )
1117 {
1118 GPUTPCGMSectorTrack &oldTrack2 = mSectorTrackInfos[old2];
1119 if ( oldTrack2.NClusters() < newTrack2.NClusters() )
1120 {
1121 oldTrack2.SetPrevNeighbour( -1, neighborType );
1122 } else continue;
1123 }
1124 newTrack1.SetNextNeighbour( itr2, neighborType );
1125 newTrack2.SetPrevNeighbour( itr, neighborType );*/
1126 }
1127
1128 int32_t start, end;
1129 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1130
1131 for (int32_t baseIdx = 0; baseIdx < SectorTrackInfoLocalTotal(); baseIdx += nThreads) {
1132 int32_t itr = baseIdx + iThread;
1133 bool inRange = itr < SectorTrackInfoLocalTotal();
1134
1135 int32_t itr2 = -1;
1136 if (inRange) {
1137 itr2 = mTrackLinks[itr];
1138 }
1139
1140 bool resolveSector = (itr2 > -1);
1141 if (resolveSector) {
1142 int32_t root = mTrackCCRoots[itr];
1143 resolveSector &= (start <= root) && (root < end);
1144 }
1145
1146 int16_t smemIdx = work_group_scan_inclusive_add(int16_t(resolveSector));
1147
1148 if (resolveSector) {
1149 smem.iTrack1[smemIdx - 1] = itr;
1150 smem.iTrack2[smemIdx - 1] = itr2;
1151 }
1152 GPUbarrier();
1153
1154 if (iThread < nThreads - 1) {
1155 continue;
1156 }
1157
1158 const int32_t nSectors = smemIdx;
1159
1160 for (int32_t i = 0; i < nSectors; i++) {
1161 itr = smem.iTrack1[i];
1162 itr2 = smem.iTrack2[i];
1163
1164 GPUTPCGMSectorTrack* track1 = &mSectorTrackInfos[itr];
1165 GPUTPCGMSectorTrack* track2 = &mSectorTrackInfos[itr2];
1166 GPUTPCGMSectorTrack* track1Base = track1;
1167 GPUTPCGMSectorTrack* track2Base = track2;
1168
1169 bool sameSegment = CAMath::Abs(track1->NClusters() > track2->NClusters() ? track1->QPt() : track2->QPt()) * Param().qptB5Scaler < 2 || track1->QPt() * track2->QPt() > 0;
1170 // GPUInfo("\nMerge %d with %d - same segment %d", itr, itr2, (int32_t) sameSegment);
1171 // PrintMergeGraph(track1, std::cout);
1172 // PrintMergeGraph(track2, std::cout);
1173
1174 while (track2->PrevSegmentNeighbour() >= 0) {
1175 track2 = &mSectorTrackInfos[track2->PrevSegmentNeighbour()];
1176 }
1177 if (sameSegment) {
1178 if (track1 == track2) {
1179 continue;
1180 }
1181 while (track1->PrevSegmentNeighbour() >= 0) {
1182 track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()];
1183 if (track1 == track2) {
1184 goto NextTrack;
1185 }
1186 }
1187 GPUCommonAlgorithm::swap(track1, track1Base);
1188 for (int32_t k = 0; k < 2; k++) {
1189 GPUTPCGMSectorTrack* tmp = track1Base;
1190 while (tmp->Neighbour(k) >= 0) {
1191 tmp = &mSectorTrackInfos[tmp->Neighbour(k)];
1192 if (tmp == track2) {
1193 goto NextTrack;
1194 }
1195 }
1196 }
1197
1198 while (track1->NextSegmentNeighbour() >= 0) {
1199 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1200 if (track1 == track2) {
1201 goto NextTrack;
1202 }
1203 }
1204 } else {
1205 while (track1->PrevSegmentNeighbour() >= 0) {
1206 track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()];
1207 }
1208
1209 if (track1 == track2) {
1210 continue;
1211 }
1212 for (int32_t k = 0; k < 2; k++) {
1213 GPUTPCGMSectorTrack* tmp = track1;
1214 while (tmp->Neighbour(k) >= 0) {
1215 tmp = &mSectorTrackInfos[tmp->Neighbour(k)];
1216 if (tmp == track2) {
1217 goto NextTrack;
1218 }
1219 }
1220 }
1221
1222 float z1min, z1max, z2min, z2max;
1223 z1min = track1->MinClusterZT();
1224 z1max = track1->MaxClusterZT();
1225 z2min = track2->MinClusterZT();
1226 z2max = track2->MaxClusterZT();
1227 if (track1 != track1Base) {
1228 z1min = CAMath::Min(z1min, track1Base->MinClusterZT());
1229 z1max = CAMath::Max(z1max, track1Base->MaxClusterZT());
1230 }
1231 if (track2 != track2Base) {
1232 z2min = CAMath::Min(z2min, track2Base->MinClusterZT());
1233 z2max = CAMath::Max(z2max, track2Base->MaxClusterZT());
1234 }
1235 bool goUp = z2max - z1min > z1max - z2min;
1236
1237 if (track1->Neighbour(goUp) < 0 && track2->Neighbour(!goUp) < 0) {
1238 track1->SetNeighbor(track2 - mSectorTrackInfos, goUp);
1239 track2->SetNeighbor(track1 - mSectorTrackInfos, !goUp);
1240 // GPUInfo("Result (simple neighbor)");
1241 // PrintMergeGraph(track1, std::cout);
1242 continue;
1243 } else if (track1->Neighbour(goUp) < 0) {
1244 track2 = &mSectorTrackInfos[track2->Neighbour(!goUp)];
1245 GPUCommonAlgorithm::swap(track1, track2);
1246 } else if (track2->Neighbour(!goUp) < 0) {
1247 track1 = &mSectorTrackInfos[track1->Neighbour(goUp)];
1248 } else { // Both would work, but we use the simpler one
1249 track1 = &mSectorTrackInfos[track1->Neighbour(goUp)];
1250 }
1251 track1Base = track1;
1252 }
1253
1254 track2Base = track2;
1255 if (!sameSegment) {
1256 while (track1->NextSegmentNeighbour() >= 0) {
1257 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1258 }
1259 }
1260 track1->SetNextSegmentNeighbour(track2 - mSectorTrackInfos);
1261 track2->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos);
1262 // k = 0: Merge right side
1263 // k = 1: Merge left side
1264 for (int32_t k = 0; k < 2; k++) {
1265 track1 = track1Base;
1266 track2 = track2Base;
1267 while (track2->Neighbour(k) >= 0) {
1268 if (track1->Neighbour(k) >= 0) {
1269 GPUTPCGMSectorTrack* track1new = &mSectorTrackInfos[track1->Neighbour(k)];
1270 GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)];
1271 track2->SetNeighbor(-1, k);
1272 track2new->SetNeighbor(-1, k ^ 1);
1273 track1 = track1new;
1274 while (track1->NextSegmentNeighbour() >= 0) {
1275 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1276 }
1277 track1->SetNextSegmentNeighbour(track2new - mSectorTrackInfos);
1278 track2new->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos);
1279 track1 = track1new;
1280 track2 = track2new;
1281 } else {
1282 GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)];
1283 track1->SetNeighbor(track2->Neighbour(k), k);
1284 track2->SetNeighbor(-1, k);
1285 track2new->SetNeighbor(track1 - mSectorTrackInfos, k ^ 1);
1286 }
1287 }
1288 }
1289 // GPUInfo("Result");
1290 // PrintMergeGraph(track1, std::cout);
1291 NextTrack:;
1292 }
1293 }
1294}
1295
1296GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr)
1297{
1298 if (Param().rec.tpc.mergerCERowLimit > 0 && CAMath::Abs(track->QPt()) * Param().qptB5Scaler < 0.3f && (cls.row < Param().rec.tpc.mergerCERowLimit || cls.row >= GPUCA_ROW_COUNT - Param().rec.tpc.mergerCERowLimit)) {
1299 return;
1300 }
1301
1302 float z = 0;
1303 if (Param().par.earlyTpcTransform) {
1304 z = clsXYZ->z;
1305 } else {
1306 float x, y;
1307 auto& cln = mConstantMem->ioPtrs.clustersNative->clustersLinear[cls.num];
1308 GPUTPCConvertImpl::convert(*mConstantMem, cls.sector, cls.row, cln.getPad(), cln.getTime(), x, y, z);
1309 }
1310
1311 if (!Param().par.continuousTracking && CAMath::Abs(z) > 10) {
1312 return;
1313 }
1314 int32_t sector = track->Sector();
1315 for (int32_t attempt = 0; attempt < 2; attempt++) {
1317 const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row);
1318 if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) {
1319 b.SetTrackID(itr);
1320 b.SetNClusters(mMergedTracks[itr].NClusters());
1321 if (CAMath::Abs(b.Cov()[4]) >= 0.5f) {
1322 b.SetCov(4, 0.5f); // TODO: Is this needed and better than the cut in BorderTrack?
1323 }
1324 if (track->CSide()) {
1325 b.SetPar(1, b.Par()[1] - 2 * (z - b.ZOffsetLinear()));
1326 b.SetZOffsetLinear(-b.ZOffsetLinear());
1327 }
1328 b.SetRow(cls.row);
1329 uint32_t id = sector + attempt * NSECTORS;
1330 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[id], 1u);
1331 mBorder[id][myTrack] = b;
1332 break;
1333 }
1334 }
1335}
1336
1337GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1338{
1339 const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear;
1340 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
1341 if (mMergedTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) {
1342 if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) {
1343 continue;
1344 }
1345 GPUTPCGMMergedTrack* trk[2] = {&mMergedTracks[i], &mMergedTracks[mTrackLinks[i]]};
1346
1347 if (!trk[1]->OK() || trk[1]->CCE()) {
1348 continue;
1349 }
1350 bool celooper = (trk[0]->GetParam().GetQPt() * Param().qptB5Scaler > 1 && trk[0]->GetParam().GetQPt() * trk[1]->GetParam().GetQPt() < 0);
1351 bool looper = trk[0]->Looper() || trk[1]->Looper() || celooper;
1352 if (!looper && trk[0]->GetParam().GetPar(3) * trk[1]->GetParam().GetPar(3) < 0) {
1353 continue;
1354 }
1355
1356 uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters());
1357 if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) {
1358 raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters);
1359 for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) {
1360 mClusters[k].num = 0;
1361 mClusters[k].state = 0;
1362 }
1363 CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters);
1364 return;
1365 }
1366
1367 bool needswap = false;
1368 if (looper) {
1369 float z0max, z1max;
1370 if (Param().par.earlyTpcTransform) {
1371 z0max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef()].z), CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z));
1372 z1max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[1]->FirstClusterRef()].z), CAMath::Abs(mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z));
1373 } else {
1374 z0max = -CAMath::Min(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime());
1375 z1max = -CAMath::Min(cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime());
1376 }
1377 if (z1max < z0max) {
1378 needswap = true;
1379 }
1380 } else {
1381 if (mClusters[trk[0]->FirstClusterRef()].row > mClusters[trk[1]->FirstClusterRef()].row) {
1382 needswap = true;
1383 }
1384 }
1385 if (needswap) {
1386 GPUCommonAlgorithm::swap(trk[0], trk[1]);
1387 }
1388
1389 bool reverse[2] = {false, false};
1390 if (looper) {
1391 if (Param().par.earlyTpcTransform) {
1392 reverse[0] = (mClustersXYZ[trk[0]->FirstClusterRef()].z > mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z) ^ (trk[0]->CSide() > 0);
1393 reverse[1] = (mClustersXYZ[trk[1]->FirstClusterRef()].z < mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z) ^ (trk[1]->CSide() > 0);
1394 } else {
1395 reverse[0] = cls[mClusters[trk[0]->FirstClusterRef()].num].getTime() < cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime();
1396 reverse[1] = cls[mClusters[trk[1]->FirstClusterRef()].num].getTime() > cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime();
1397 }
1398 }
1399
1400 if (Param().par.continuousTracking) {
1401 if (Param().par.earlyTpcTransform) {
1402 const float z0 = trk[0]->CSide() ? CAMath::Max(mClustersXYZ[trk[0]->FirstClusterRef()].z, mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z) : CAMath::Min(mClustersXYZ[trk[0]->FirstClusterRef()].z, mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z);
1403 const float z1 = trk[1]->CSide() ? CAMath::Max(mClustersXYZ[trk[1]->FirstClusterRef()].z, mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z) : CAMath::Min(mClustersXYZ[trk[1]->FirstClusterRef()].z, mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z);
1404 const float offset = CAMath::Abs(z1) > CAMath::Abs(z0) ? -z0 : z1;
1405 trk[1]->Param().Z() += trk[1]->Param().TZOffset() - offset;
1406 trk[1]->Param().TZOffset() = offset;
1407 } else {
1408 GPUTPCGMMergedTrackHit* clsmax;
1409 const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(),
1410 cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(),
1411 &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1],
1412 &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax);
1413 const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f);
1414 trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TZOffset() - offset);
1415 trk[1]->Param().TZOffset() = offset;
1416 }
1417 }
1418
1419 int32_t pos = newRef;
1420 int32_t leg = -1;
1421 int32_t lastLeg = -1;
1422#pragma unroll
1423 for (int32_t k = 1; k >= 0; k--) {
1424 int32_t loopstart = reverse[k] ? (trk[k]->NClusters() - 1) : 0;
1425 int32_t loopend = reverse[k] ? -1 : (int32_t)trk[k]->NClusters();
1426 int32_t loopinc = reverse[k] ? -1 : 1;
1427 for (int32_t j = loopstart; j != loopend; j += loopinc) {
1428 if (Param().par.earlyTpcTransform) {
1429 mClustersXYZ[pos] = mClustersXYZ[trk[k]->FirstClusterRef() + j];
1430 }
1431 mClusters[pos] = mClusters[trk[k]->FirstClusterRef() + j];
1432 if (looper) {
1433 if (mClusters[trk[k]->FirstClusterRef() + j].leg != lastLeg) {
1434 leg++;
1435 lastLeg = mClusters[trk[k]->FirstClusterRef() + j].leg;
1436 }
1437 mClusters[pos].leg = leg;
1438 }
1439 pos++;
1440 }
1441 if (celooper) {
1442 lastLeg = -1;
1443 }
1444 }
1445 trk[1]->SetFirstClusterRef(newRef);
1446 trk[1]->SetNClusters(trk[0]->NClusters() + trk[1]->NClusters());
1447 if (trk[1]->NClusters() > GPUCA_MERGER_MAX_TRACK_CLUSTERS) {
1448 trk[1]->SetFirstClusterRef(trk[1]->FirstClusterRef() + trk[1]->NClusters() - GPUCA_MERGER_MAX_TRACK_CLUSTERS);
1449 trk[1]->SetNClusters(GPUCA_MERGER_MAX_TRACK_CLUSTERS);
1450 }
1451 trk[1]->SetCCE(true);
1452 if (looper) {
1453 trk[1]->SetLooper(true);
1454 trk[1]->SetLegs(leg + 1);
1455 }
1456 trk[0]->SetNClusters(0);
1457 trk[0]->SetOK(false);
1458 }
1459 }
1460
1461 // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mMergedTracks[i].CCE() == false) {mMergedTracks[i].SetNClusters(0);mMergedTracks[i].SetOK(false);}} //Remove all non-CE tracks
1462}
1463
1464namespace o2::gpu::internal
1465{
1466namespace // anonymous
1467{
1468struct GPUTPCGMMerger_CompareClusterIdsLooper {
1469 struct clcomparestruct {
1470 uint8_t leg;
1471 };
1472
1473 const uint8_t leg;
1474 const bool outwards;
1476 const clcomparestruct* const cmp2;
1477 GPUd() GPUTPCGMMerger_CompareClusterIdsLooper(uint8_t l, bool o, const GPUTPCGMMerger::trackCluster* c1, const clcomparestruct* c2) : leg(l), outwards(o), cmp1(c1), cmp2(c2) {}
1478 GPUd() bool operator()(const int16_t aa, const int16_t bb)
1479 {
1480 const clcomparestruct& a = cmp2[aa];
1481 const clcomparestruct& b = cmp2[bb];
1484 if (a.leg != b.leg) {
1485 return ((leg > 0) ^ (a.leg > b.leg));
1486 }
1487 if (a1.row != b1.row) {
1488 return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards);
1489 }
1490 return GPUCA_DETERMINISTIC_CODE((a1.id != b1.id) ? (a1.id > b1.id) : (aa > bb), a1.id > b1.id);
1491 }
1492};
1493
1494struct GPUTPCGMMerger_CompareClusterIds {
1496 GPUd() GPUTPCGMMerger_CompareClusterIds(const GPUTPCGMMerger::trackCluster* cmp) : mCmp(cmp) {}
1497 GPUd() bool operator()(const int16_t aa, const int16_t bb)
1498 {
1501 if (a.row != b.row) {
1502 return (a.row > b.row);
1503 }
1504 return GPUCA_DETERMINISTIC_CODE((a.id != b.id) ? (a.id > b.id) : (aa > bb), a.id > b.id);
1505 }
1506};
1507} // anonymous namespace
1508} // namespace o2::gpu::internal
1509
1510GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1511{
1512 GPUTPCGMSectorTrack* trackParts[kMaxParts];
1513
1514 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
1515 GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
1516
1517 if (track.PrevSegmentNeighbour() >= 0) {
1518 continue;
1519 }
1520 if (track.PrevNeighbour() >= 0) {
1521 continue;
1522 }
1523 int32_t nParts = 0;
1524 int32_t nHits = 0;
1525 int32_t leg = 0;
1526 GPUTPCGMSectorTrack *trbase = &track, *tr = &track;
1527 tr->SetPrevSegmentNeighbour(1000000000);
1528 while (true) {
1529 if (nParts >= kMaxParts) {
1530 break;
1531 }
1532 if (nHits + tr->NClusters() > kMaxClusters) {
1533 break;
1534 }
1535 nHits += tr->NClusters();
1536
1537 tr->SetLeg(leg);
1538 trackParts[nParts++] = tr;
1539 for (int32_t i = 0; i < 2; i++) {
1540 if (tr->ExtrapolatedTrackId(i) != -1) {
1541 if (nParts >= kMaxParts) {
1542 break;
1543 }
1544 if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) {
1545 break;
1546 }
1547 trackParts[nParts] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)];
1548 trackParts[nParts++]->SetLeg(leg);
1549 nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters();
1550 }
1551 }
1552 int32_t jtr = tr->NextSegmentNeighbour();
1553 if (jtr >= 0) {
1554 tr = &(mSectorTrackInfos[jtr]);
1555 tr->SetPrevSegmentNeighbour(1000000002);
1556 continue;
1557 }
1558 jtr = trbase->NextNeighbour();
1559 if (jtr >= 0) {
1560 trbase = &(mSectorTrackInfos[jtr]);
1561 tr = trbase;
1562 if (tr->PrevSegmentNeighbour() >= 0) {
1563 break;
1564 }
1565 tr->SetPrevSegmentNeighbour(1000000001);
1566 leg++;
1567 continue;
1568 }
1569 break;
1570 }
1571
1572 // unpack and sort clusters
1573 if (nParts > 1 && leg == 0) {
1574 GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) {
1575 GPUCA_DETERMINISTIC_CODE( // clang-format off
1576 if (a->X() != b->X()) {
1577 return (a->X() > b->X());
1578 }
1579 if (a->Y() != b->Y()) {
1580 return (a->Y() > b->Y());
1581 }
1582 if (a->Z() != b->Z()) {
1583 return (a->Z() > b->Z());
1584 }
1585 return a->QPt() > b->QPt();
1586 , // !GPUCA_DETERMINISTIC_CODE
1587 return (a->X() > b->X());
1588 ) // clang-format on
1589 });
1590 }
1591
1592 if (Param().rec.tpc.dropLoopers && leg > 0) {
1593 nParts = 1;
1594 leg = 0;
1595 }
1596
1597 trackCluster trackClusters[kMaxClusters];
1598 nHits = 0;
1599 for (int32_t ipart = 0; ipart < nParts; ipart++) {
1600 const GPUTPCGMSectorTrack* t = trackParts[ipart];
1601 CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nMergedTracks, ipart, t->QPt(), t->DzDs()));
1602 int32_t nTrackHits = t->NClusters();
1603 trackCluster* c2 = trackClusters + nHits + nTrackHits - 1;
1604 for (int32_t i = 0; i < nTrackHits; i++, c2--) {
1605 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()];
1606 const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i];
1607 uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0];
1608 *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector(), t->Leg()};
1609 }
1610 nHits += nTrackHits;
1611 }
1612 if (nHits < GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(track.QPt() * Param().qptB5Scaler)) {
1613 continue;
1614 }
1615
1616 int32_t ordered = leg == 0;
1617 if (ordered) {
1618 for (int32_t i = 1; i < nHits; i++) {
1619 if (trackClusters[i].row > trackClusters[i - 1].row || trackClusters[i].id == trackClusters[i - 1].id) {
1620 ordered = 0;
1621 break;
1622 }
1623 }
1624 }
1625 int32_t firstTrackIndex = 0;
1626 int32_t lastTrackIndex = nParts - 1;
1627 if (ordered == 0) {
1628 int32_t nTmpHits = 0;
1629 trackCluster trackClustersUnsorted[kMaxClusters];
1630 int16_t clusterIndices[kMaxClusters];
1631 for (int32_t i = 0; i < nHits; i++) {
1632 trackClustersUnsorted[i] = trackClusters[i];
1633 clusterIndices[i] = i;
1634 }
1635
1636 if (leg > 0) {
1637 // Find QPt and DzDs for the segment closest to the vertex, if low/mid Pt
1638 float baseZT = 1e9;
1639 uint8_t baseLeg = 0;
1640 for (int32_t i = 0; i < nParts; i++) {
1641 if (trackParts[i]->Leg() == 0 || trackParts[i]->Leg() == leg) {
1642 float zt;
1643 if (Param().par.earlyTpcTransform) {
1644 zt = CAMath::Min(CAMath::Abs(trackParts[i]->ClusterZT0()), CAMath::Abs(trackParts[i]->ClusterZTN()));
1645 } else {
1646 zt = -trackParts[i]->MinClusterZT(); // Negative time ~ smallest z, to behave the same way // TODO: Check all these min / max ZT
1647 }
1648 if (zt < baseZT) {
1649 baseZT = zt;
1650 baseLeg = trackParts[i]->Leg();
1651 }
1652 }
1653 }
1654 int32_t iLongest = 1e9;
1655 int32_t length = 0;
1656 for (int32_t i = (baseLeg ? (nParts - 1) : 0); baseLeg ? (i >= 0) : (i < nParts); baseLeg ? i-- : i++) {
1657 if (trackParts[i]->Leg() != baseLeg) {
1658 break;
1659 }
1660 if (trackParts[i]->OrigTrack()->NHits() > length) {
1661 iLongest = i;
1662 length = trackParts[i]->OrigTrack()->NHits();
1663 }
1664 }
1665 bool outwards;
1666 if (Param().par.earlyTpcTransform) {
1667 outwards = (trackParts[iLongest]->ClusterZT0() > trackParts[iLongest]->ClusterZTN()) ^ trackParts[iLongest]->CSide();
1668 } else {
1669 outwards = trackParts[iLongest]->ClusterZT0() < trackParts[iLongest]->ClusterZTN();
1670 }
1671 GPUTPCGMMerger_CompareClusterIdsLooper::clcomparestruct clusterSort[kMaxClusters];
1672 for (int32_t iPart = 0; iPart < nParts; iPart++) {
1673 const GPUTPCGMSectorTrack* t = trackParts[iPart];
1674 int32_t nTrackHits = t->NClusters();
1675 for (int32_t j = 0; j < nTrackHits; j++) {
1676 int32_t i = nTmpHits + j;
1677 clusterSort[i].leg = t->Leg();
1678 }
1679 nTmpHits += nTrackHits;
1680 }
1681
1682 GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIdsLooper(baseLeg, outwards, trackClusters, clusterSort));
1683 } else {
1684 GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIds(trackClusters));
1685 }
1686 nTmpHits = 0;
1687 firstTrackIndex = lastTrackIndex = -1;
1688 for (int32_t i = 0; i < nParts; i++) {
1689 nTmpHits += trackParts[i]->NClusters();
1690 if (nTmpHits > clusterIndices[0] && firstTrackIndex == -1) {
1691 firstTrackIndex = i;
1692 }
1693 if (nTmpHits > clusterIndices[nHits - 1] && lastTrackIndex == -1) {
1694 lastTrackIndex = i;
1695 }
1696 }
1697
1698 int32_t nFilteredHits = 0;
1699 int32_t indPrev = -1;
1700 for (int32_t i = 0; i < nHits; i++) {
1701 int32_t ind = clusterIndices[i];
1702 if (indPrev >= 0 && trackClustersUnsorted[ind].id == trackClustersUnsorted[indPrev].id) {
1703 continue;
1704 }
1705 indPrev = ind;
1706 trackClusters[nFilteredHits] = trackClustersUnsorted[ind];
1707 nFilteredHits++;
1708 }
1709 nHits = nFilteredHits;
1710 }
1711
1712 const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits);
1713 if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) {
1714 raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters);
1715 CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters);
1716 continue;
1717 }
1718
1719 GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster;
1720
1721 for (int32_t i = 0; i < nHits; i++) {
1722 uint8_t state;
1723 if (Param().par.earlyTpcTransform) {
1724 const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()];
1725 GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster;
1726 clXYZ[i].x = c.x;
1727 clXYZ[i].y = c.y;
1728 clXYZ[i].z = c.z;
1729 clXYZ[i].amp = c.amp;
1730#ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME
1731 clXYZ[i].pad = c.mPad;
1732 clXYZ[i].time = c.mTime;
1733#endif
1734 state = c.flags;
1735 } else {
1736 const ClusterNative& c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[i].id];
1737 state = c.getFlags();
1738 }
1739 cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags
1740 cl[i].row = trackClusters[i].row;
1741 cl[i].num = trackClusters[i].id;
1742 cl[i].sector = trackClusters[i].sector;
1743 cl[i].leg = trackClusters[i].leg;
1744 }
1745
1746 uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u);
1747 if (iOutputTrack >= mNMaxTracks) {
1748 raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks);
1749 CAMath::AtomicExch(&mMemory->nMergedTracks, mNMaxTracks);
1750 continue;
1751 }
1752
1753 GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack];
1754
1755 mergedTrack.SetFlags(0);
1756 mergedTrack.SetOK(1);
1757 mergedTrack.SetLooper(leg > 0);
1758 mergedTrack.SetLegs(leg);
1759 mergedTrack.SetNClusters(nHits);
1760 mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster);
1761 GPUTPCGMTrackParam& p1 = mergedTrack.Param();
1762 const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex];
1763 mergedTrack.SetCSide(p2.CSide());
1764
1766 const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row);
1767 if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) {
1768 p1.X() = toX;
1769 p1.Y() = b.Par()[0];
1770 p1.Z() = b.Par()[1];
1771 p1.SinPhi() = b.Par()[2];
1772 } else {
1773 p1.X() = p2.X();
1774 p1.Y() = p2.Y();
1775 p1.Z() = p2.Z();
1776 p1.SinPhi() = p2.SinPhi();
1777 }
1778 p1.TZOffset() = p2.TZOffset();
1779 p1.DzDs() = p2.DzDs();
1780 p1.QPt() = p2.QPt();
1781 mergedTrack.SetAlpha(p2.Alpha());
1782 if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) {
1783 p1.QPt() = 100.f / Param().rec.bz0Pt10MeV;
1784 }
1785
1786 // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nMergedTracks, p1.QPt(), nParts, nHits);
1787
1788 /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nMergedTracks))
1789 {
1790 mergedTrack.SetOK(0);
1791 mergedTrack.SetNClusters(0);
1792 }
1793 if (mergedTrack.NClusters() && mergedTrack.OK()) */
1794 if (Param().rec.tpc.mergeCE) {
1795 bool CEside;
1796 if (Param().par.earlyTpcTransform) {
1797 const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster;
1798 CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z);
1799 } else {
1800 auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear;
1801 CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime();
1802 }
1803 MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack);
1804 }
1805 } // itr
1806}
1807
1808GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1809{
1810 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
1811 mTrackOrderProcess[i] = i;
1812 }
1813}
1814
1815GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1816{
1817 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) {
1818 mTrackSort[i] = i;
1819 }
1820}
1821
1822GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1823{
1824#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1825 if (iThread == 0 && iBlock == 0) {
1826 GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mMergedTracks));
1827 }
1828#endif
1829}
1830
1831GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1832{
1833#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1834 if (iThread == 0 && iBlock == 0) {
1835 GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mMergedTracks));
1836 }
1837#endif
1838}
1839
1840GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1841{
1842 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) {
1843 mTrackOrderAttach[mTrackSort[i]] = i;
1844 const GPUTPCGMMergedTrack& trk = mMergedTracks[i];
1845 if (trk.OK()) {
1846 for (uint32_t j = 0; j < trk.NClusters(); j++) {
1847 mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = attachAttached | attachGood;
1848 CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u);
1849 }
1850 }
1851 }
1852}
1853
1854GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1855{
1856 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nBlocks * nThreads) {
1857 if (mSharedCount[mClusters[i].num] > 1) {
1858 mClusters[i].state |= GPUTPCGMMergedTrackHit::flagShared;
1859 }
1860 }
1861 if (mClusterStateExt) {
1862 for (uint32_t i = iBlock * nThreads + iThread; i < mNMaxClusters; i += nBlocks * nThreads) {
1863 uint8_t state = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[i].getFlags();
1864 if (mSharedCount[i] > 1) {
1866 }
1867 mClusterStateExt[i] = state;
1868 }
1869 }
1870}
1871
1872GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1873{
1874 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
1875 mTrackSort[mTrackOrderAttach[i]] = i;
1876 }
1877 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nThreads * nBlocks) {
1878 mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below
1879 }
1880}
1881
1882GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1883{
1884 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
1885 const GPUTPCGMMergedTrack& trk = mMergedTracks[i];
1886 if (!trk.OK() || trk.NClusters() == 0) {
1887 continue;
1888 }
1889 uint8_t goodLeg = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].leg;
1890 for (uint32_t j = 0; j < trk.NClusters(); j++) {
1891 int32_t id = mClusters[trk.FirstClusterRef() + j].num;
1892 uint32_t weight = mTrackOrderAttach[i] | attachAttached;
1893 uint8_t clusterState = mClusters[trk.FirstClusterRef() + j].state;
1894 if (!(clusterState & GPUTPCGMMergedTrackHit::flagReject)) {
1895 weight |= attachGood;
1896 } else if (clusterState & GPUTPCGMMergedTrackHit::flagHighIncl) {
1898 }
1899 if (mClusters[trk.FirstClusterRef() + j].leg == goodLeg) {
1901 }
1902 CAMath::AtomicMax(&mClusterAttachment[id], weight);
1903 }
1904 }
1905}
1906
1907GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1908{
1909 for (uint32_t i = iBlock * nThreads + iThread; i < mNMaxClusters; i += nThreads * nBlocks) {
1910 if (mClusterAttachment[i] != 0) {
1911 mClusterAttachment[i] = (mClusterAttachment[i] & attachFlagMask) | mTrackSort[mClusterAttachment[i] & attachTrackMask];
1912 }
1913 }
1914}
1915
1916GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1917{
1918 const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold
1919 for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) {
1920 const auto& trk = mMergedTracks[i];
1921 const auto& p = trk.GetParam();
1922 const float qptabs = CAMath::Abs(p.GetQPt());
1923 if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) {
1924 const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector;
1925 const float refz = p.GetZ() + (Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTZOffset(), Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100);
1926 float sinA, cosA;
1927 CAMath::SinCos(trk.GetAlpha(), sinA, cosA);
1928 float gx = cosA * p.GetX() - sinA * p.GetY();
1929 float gy = cosA * p.GetY() + sinA * p.GetX();
1930 float bz = Param().polynomialField.GetFieldBz(gx, gy, p.GetZ());
1931 const float r1 = p.GetQPt() * bz;
1932 const float r = CAMath::Abs(r1) > 0.0001f ? (1.f / r1) : 10000;
1933 const float mx = p.GetX() + r * p.GetSinPhi();
1934 const float my = p.GetY() - r * CAMath::Sqrt(1 - p.GetSinPhi() * p.GetSinPhi());
1935 const float gmx = cosA * mx - sinA * my;
1936 const float gmy = cosA * my + sinA * mx;
1937 uint32_t myId = CAMath::AtomicAdd(&mMemory->nLooperMatchCandidates, 1u);
1938 if (myId >= mNMaxLooperMatches) {
1939 raiseError(GPUErrors::ERROR_LOOPER_MATCH_OVERFLOW, myId, mNMaxLooperMatches);
1940 CAMath::AtomicExch(&mMemory->nLooperMatchCandidates, mNMaxLooperMatches);
1941 return;
1942 }
1943 mLooperCandidates[myId] = MergeLooperParam{refz, gmx, gmy, i};
1944
1945 /*printf("Track %u Sanity qpt %f snp %f bz %f\n", mMemory->nLooperMatchCandidates, p.GetQPt(), p.GetSinPhi(), bz);
1946 for (uint32_t k = 0;k < trk.NClusters();k++) {
1947 float xx, yy, zz;
1948 if (Param().par.earlyTpcTransform) {
1949 const float zOffset = (mClusters[trk.FirstClusterRef() + k].sector < 18) == (mClusters[trk.FirstClusterRef() + 0].sector < 18) ? p.GetTZOffset() : -p.GetTZOffset();
1950 xx = mClustersXYZ[trk.FirstClusterRef() + k].x;
1951 yy = mClustersXYZ[trk.FirstClusterRef() + k].y;
1952 zz = mClustersXYZ[trk.FirstClusterRef() + k].z - zOffset;
1953 } else {
1954 const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num];
1955 GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset());
1956 }
1957 float sa2, ca2;
1958 CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].sector), sa2, ca2);
1959 float cx = ca2 * xx - sa2 * yy;
1960 float cy = ca2 * yy + sa2 * xx;
1961 float dist = CAMath::Sqrt((cx - gmx) * (cx - gmx) + (cy - gmy) * (cy - gmy));
1962 printf("Hit %3d/%3d sector %d xy %f %f R %f\n", k, trk.NClusters(), (int32_t)mClusters[trk.FirstClusterRef() + k].sector, cx, cy, dist);
1963 }*/
1964 }
1965 }
1966}
1967
1968GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1969{
1970#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1971 if (iThread == 0 && iBlock == 0) {
1972 GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp());
1973 }
1974#endif
1975}
1976
1977GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1978{
1979 const MergeLooperParam* params = mLooperCandidates;
1980
1981#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
1982 std::vector<int64_t> paramLabels(mMemory->nLooperMatchCandidates);
1983 for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) {
1984 paramLabels[i] = GetTrackLabel(mMergedTracks[params[i].id]);
1985 }
1986 /*std::vector<bool> dropped(mMemory->nLooperMatchCandidates);
1987 std::vector<bool> droppedMC(mMemory->nLooperMatchCandidates);
1988 std::vector<int32_t> histMatch(101);
1989 std::vector<int32_t> histFail(101);*/
1990 if (!mRec->GetProcessingSettings().runQA) {
1991 throw std::runtime_error("Need QA enabled for the Merge Loopers MC QA");
1992 }
1993#endif
1994
1995 for (uint32_t i = get_global_id(0); i < mMemory->nLooperMatchCandidates; i += get_global_size(0)) {
1996 for (uint32_t j = i + 1; j < mMemory->nLooperMatchCandidates; j++) {
1997 // int32_t bs = 0;
1998 if (CAMath::Abs(params[j].refz) > CAMath::Abs(params[i].refz) + 100.f) {
1999 break;
2000 }
2001 const float d2xy = CAMath::Sum2(params[i].x - params[j].x, params[i].y - params[j].y);
2002 if (d2xy > 15.f) {
2003 // bs |= 1;
2004 continue;
2005 }
2006 const auto& trk1 = mMergedTracks[params[i].id];
2007 const auto& trk2 = mMergedTracks[params[j].id];
2008 const auto& param1 = trk1.GetParam();
2009 const auto& param2 = trk2.GetParam();
2010 if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) {
2011 // bs |= 2;
2012 continue;
2013 }
2014
2015 const float dznormalized = (CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz)) / (CAMath::TwoPi() * 0.5f * (CAMath::Abs(param1.GetDzDs()) + CAMath::Abs(param2.GetDzDs())) * 1.f / (0.5f * (CAMath::Abs(param1.GetQPt()) + CAMath::Abs(param2.GetQPt())) * CAMath::Abs(Param().polynomialField.GetNominalBz())));
2016 const float phasecorr = CAMath::Modf((CAMath::ASin(param1.GetSinPhi()) + trk1.GetAlpha() - CAMath::ASin(param2.GetSinPhi()) - trk2.GetAlpha()) / CAMath::TwoPi() + 5.5f, 1.f) - 0.5f;
2017 const float phasecorrdirection = (params[j].refz * param1.GetQPt() * param1.GetDzDs()) > 0 ? 1 : -1;
2018 const float dzcorr = dznormalized + phasecorr * phasecorrdirection;
2019 const bool sameside = !(trk1.CSide() ^ trk2.CSide());
2020 const float dzcorrlimit[4] = {sameside ? 0.018f : 0.012f, sameside ? 0.12f : 0.025f, 0.14f, 0.15f};
2021 const int32_t dzcorrcount = sameside ? 4 : 2;
2022 bool dzcorrok = false;
2023 float dznorm = 0.f;
2024 for (int32_t k = 0; k < dzcorrcount; k++) {
2025 const float d = CAMath::Abs(dzcorr - 0.5f * k);
2026 if (d <= dzcorrlimit[k]) {
2027 dzcorrok = true;
2028 dznorm = d / dzcorrlimit[k];
2029 break;
2030 }
2031 }
2032 if (!dzcorrok) {
2033 // bs |= 4;
2034 continue;
2035 }
2036
2037 const float dtgl = param1.GetDzDs() - (param1.GetQPt() * param2.GetQPt() > 0 ? param2.GetDzDs() : -param2.GetDzDs());
2038 const float dqpt = (CAMath::Abs(param1.GetQPt()) - CAMath::Abs(param2.GetQPt())) / CAMath::Min(param1.GetQPt(), param2.GetQPt());
2039 float d = CAMath::Sum2(dtgl * (1.f / 0.03f), dqpt * (1.f / 0.04f)) + d2xy * (1.f / 4.f) + dznorm * (1.f / 0.3f);
2040 bool EQ = d < 6.f;
2041#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2042 const int64_t label1 = paramLabels[i];
2043 const int64_t label2 = paramLabels[j];
2044 bool labelEQ = label1 != -1 && label1 == label2;
2045 if (1 || EQ || labelEQ) {
2046 // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mMergedTracks[params[i].id].CSide(), (int32_t)mMergedTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y);
2047 static auto& tup = GPUROOTDump<TNtuple>::get("mergeloopers", "labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs");
2048 tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz), dzcorr, dtgl, dqpt, dznorm, bs);
2049 static auto tup2 = GPUROOTDump<TNtuple>::getNew("mergeloopers2", "labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr");
2050 tup2.Fill((float)labelEQ, params[i].refz, params[j].refz, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), param1.GetSinPhi(), param2.GetSinPhi(), trk1.GetAlpha(), trk2.GetAlpha(), dznormalized, phasecorr, phasecorrdirection, dzcorr);
2051 }
2052 /*if (EQ) {
2053 dropped[j] = true;
2054 }
2055 if (labelEQ) {
2056 droppedMC[j] = true;
2057 histMatch[CAMath::Min<int32_t>(100, d * 10.f)]++;
2058 }
2059 if (d < 10.f && !labelEQ) {
2060 histFail[CAMath::Min<int32_t>(100, d * 10.f)]++;
2061 }*/
2062#endif
2063 if (EQ) {
2064 mMergedTracks[params[j].id].SetMergedLooper(true);
2065 if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) {
2066 mMergedTracks[params[i].id].SetMergedLooper(true);
2067 }
2068 }
2069 }
2070 }
2071 /*#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2072 int32_t total = 0, totalmc = 0, good = 0, missed = 0, fake = 0;
2073 for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) {
2074 total += dropped[i];
2075 totalmc += droppedMC[i];
2076 good += dropped[i] && droppedMC[i];
2077 missed += droppedMC[i] && !dropped[i];
2078 fake += dropped[i] && !droppedMC[i];
2079 }
2080 if (good) {
2081 printf("%20s: %8d\n", "Total", total);
2082 printf("%20s: %8d\n", "TotalMC", totalmc);
2083 printf("%20s: %8d (%8.3f%% %8.3f%%)\n", "Good", good, 100.f * good / total, 100.f * good / totalmc);
2084 printf("%20s: %8d (%8.3f%%)\n", "Missed", missed, 100.f * missed / totalmc);
2085 printf("%20s: %8d (%8.3f%%)\n", "Fake", fake, 100.f * fake / total);
2086 }
2087 printf("Match histo\n");
2088 for (uint32_t i = 0; i < histMatch.size(); i++) {
2089 printf("%8.3f: %3d\n", i / 10.f + 0.05f, histMatch[i]);
2090 }
2091 printf("Fake histo\n");
2092 for (uint32_t i = 0; i < histFail.size(); i++) {
2093 printf("%8.3f: %3d\n", i / 10.f + 0.05f, histFail[i]);
2094 }
2095#endif*/
2096}
benchmark::State & state
Class of a TPC cluster in TPC-native coordinates (row, time)
A const (ready only) version of MCTruthContainer.
int32_t i
#define GPUdii()
#define get_global_size(dim)
#define GPUAtomic(type)
#define GPUbarrier()
#define GPUdni()
#define get_global_id(dim)
#define GPUCA_DEBUG_STREAMER_CHECK(...)
#define GPUCA_DETERMINISTIC_CODE(det, indet)
#define GPUCA_MERGER_MAX_TRACK_CLUSTERS
#define GPUCA_MAX_SIN_PHI_LOW
#define GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(QPTB5)
#define GPUCA_MAX_SIN_PHI
#define CADEBUG(...)
Definition GPUDef.h:79
#define CADEBUG2(cmd,...)
Definition GPUDef.h:80
bool o
uint8_t leg
const GPUTPCGMMerger::trackCluster & b1
const GPUTPCGMMerger::trackCluster & a1
const GPUTPCGMMerger::trackCluster *const mCmp
const bool outwards
const clcomparestruct *const cmp2
const GPUTPCGMMerger::trackCluster *const cmp1
bool const GPUTPCGMMerger::trackCluster * c1
bool const GPUTPCGMMerger::trackCluster const clcomparestruct * c2
#define GPUCA_NSECTORS
#define GPUCA_ROW_COUNT
constexpr int p2()
constexpr int p1()
constexpr to accelerate the coordinates changing
void output(const std::map< std::string, ChannelStat > &channels)
Definition rawdump.cxx:197
uint16_t pos
Definition RawData.h:3
uint32_t j
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
Definition of TPCFastTransform class.
std::array< int, 64 > reverse(std::array< int, 64 > a)
int nClusters
double num
Definition B.h:16
Class for time synchronization of RawReader instances.
GPUReconstruction * mRec
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
static GPUROOTDump< T, Args... > getNew(const char *name1, Names... names)
Definition GPUROOTDump.h:64
static GPUROOTDump< T, Args... > & get(const char *name1, Names... names)
Definition GPUROOTDump.h:58
RecoStepField GetRecoStepsGPU() const
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
RecoStepField GetRecoSteps() const
const GPUParam & GetParam() const
const GPUConstantMem & GetConstantMem() const
GPUMemorySizeScalers * MemoryScalers()
const GPUSettingsProcessing & GetProcessingSettings() const
virtual const GPUDefParameters & getGPUParameters(bool doGPU) const =0
void * SetPointersRefitScratch(void *mem)
void * SetPointersMemory(void *mem)
static constexpr const int32_t NSECTORS
void * SetPointersOutput(void *mem)
void * SetPointersOutputO2MC(void *mem)
void * SetPointersOutputO2Scratch(void *mem)
void * SetPointersMerger(void *mem)
void * SetPointersOutputO2Clus(void *mem)
void * SetPointersOutputState(void *mem)
void SetMaxData(const GPUTrackingInOutPointers &io)
void * SetPointersOutputO2(void *mem)
int32_t int32_t int32_t bool float float Z
int32_t int32_t int32_t bool float Y
GLint GLint GLsizei GLint border
Definition glcorearb.h:275
GLdouble n
Definition glcorearb.h:1982
GLfloat GLfloat GLfloat alpha
Definition glcorearb.h:279
GLint GLenum GLint x
Definition glcorearb.h:403
GLuint GLuint end
Definition glcorearb.h:469
const GLdouble * v
Definition glcorearb.h:832
GLuint GLuint GLfloat weight
Definition glcorearb.h:5477
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLint * range
Definition glcorearb.h:1899
GLint y
Definition glcorearb.h:270
GLenum const GLfloat * params
Definition glcorearb.h:272
GLintptr offset
Definition glcorearb.h:660
GLuint GLsizei GLsizei * length
Definition glcorearb.h:790
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLuint GLfloat x0
Definition glcorearb.h:5034
GLbitfield flags
Definition glcorearb.h:1570
GLboolean r
Definition glcorearb.h:1233
GLuint start
Definition glcorearb.h:469
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLuint id
Definition glcorearb.h:650
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
uint8_t itsSharedClusterMap uint8_t
Node par(int index)
Parameters.
GPUdi() o2
Definition TrackTRD.h:38
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
float float float float z1
Definition MathUtils.h:44
if(!okForPhiMin(phi0, phi1))
Global TPC definitions and constants.
Definition SimTraits.h:167
@ streamMergeBorderTracksBest
stream MergeBorderTracks best track
@ streamMergeBorderTracksAll
stream MergeBorderTracks all tracks
std::string getTime(uint64_t ts)
GPUReconstruction * rec
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
size_t getValue(size_t maxVal, size_t val)
size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss)
size_t NTPCMergedTracks(size_t tpcSectorTracks)
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUTPCGMMergedTrack & GPUrestrict() b
GPUd() bool operator()(const int32_t aa
GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack *cmp)
const GPUTPCGMMergedTrack & GPUrestrict() b
GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack *cmp)
GPUd() bool operator()(const int32_t aa
std::vector< int > row
char const *restrict const cmp
Definition x9.h:96