Project
Loading...
Searching...
No Matches
GPUTPCGMMerger.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#define GPUCA_CADEBUG 0
16#define GPUCA_MERGE_LOOPER_MC 0
17// #define GPUCA_CADEBUG_ENABLED
18
19#include "GPUCommonDef.h"
20
21#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
23#include "GPUROOTDump.h"
24#endif
25
26#ifndef GPUCA_GPUCODE_DEVICE
27#include <cstdio>
28#include <cstring>
29#include <cmath>
30#include "GPUReconstruction.h"
31#endif
32
33#include "GPUTPCTracker.h"
34#include "GPUTPCTrackParam.h"
35#include "GPUTPCGMMerger.h"
36#include "GPUO2DataTypes.h"
37#include "TPCFastTransform.h"
38#include "GPUTPCConvertImpl.h"
39#include "GPUTPCGeometry.h"
40#include "GPUDefParametersRuntime.h"
41
42#include "GPUCommonMath.h"
43#include "GPUCommonAlgorithm.h"
44#include "GPUCommonConstants.h"
45
46#include "GPUTPCTrackParam.h"
47#include "GPUTPCGMMergedTrack.h"
48#include "GPUParam.h"
50
51#include "GPUTPCGMTrackParam.h"
52#include "GPUTPCGMSectorTrack.h"
53#include "GPUTPCGMBorderTrack.h"
54
57#ifndef GPUCA_GPUCODE
60#endif
61
62using namespace o2::gpu;
63using namespace o2::tpc;
64using namespace gputpcgmmergertypes;
65
66namespace o2::gpu::internal
67{
69 float refz;
70 float x;
71 float y;
72 uint32_t id;
73};
74
76 GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
77 {
78 return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax);
79 }
80};
82 GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
83 {
84 return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin);
85 }
86};
87
91 GPUd() bool operator()(const int32_t aa, const int32_t bb)
92 {
95 if (a.OK() != b.OK()) {
96 return a.OK();
97 }
98 if (a.CCE() != b.CCE()) {
99 return a.CCE() > b.CCE();
100 }
101 GPUCA_DETERMINISTIC_CODE( // clang-format off
102 if (a.NClusters() != b.NClusters()) {
103 return a.NClusters() > b.NClusters();
104 } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
105 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
106 } if (a.GetParam().GetY() != b.GetParam().GetY()) {
107 return a.GetParam().GetY() > b.GetParam().GetY();
108 }
109 return aa > bb;
110 , // !GPUCA_DETERMINISTIC_CODE
111 return a.NClusters() > b.NClusters();
112 ) // clang-format on
113 }
114};
115
119 GPUd() bool operator()(const int32_t aa, const int32_t bb)
120 {
123 GPUCA_DETERMINISTIC_CODE( // clang-format off
124 if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
125 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
126 } if (a.GetParam().GetY() != b.GetParam().GetY()) {
127 return a.GetParam().GetY() > b.GetParam().GetY();
128 }
129 return a.GetParam().GetZ() > b.GetParam().GetZ();
130 , // !GPUCA_DETERMINISTIC_CODE
131 return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
132 ) // clang-format on
133 }
134};
135
137 GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b)
138 {
139 return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz));
140 }
141};
142
143} // namespace o2::gpu::internal
144
145using namespace o2::gpu::internal;
146
147#ifndef GPUCA_GPUCODE
148
149#include "GPUQA.h"
150#include "GPUMemorySizeScalers.h"
151
153{
154 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
155 mNextSectorInd[iSector] = iSector + 1;
156 mPrevSectorInd[iSector] = iSector - 1;
157 }
158 int32_t mid = NSECTORS / 2 - 1;
159 int32_t last = NSECTORS - 1;
160 mNextSectorInd[mid] = 0;
161 mPrevSectorInd[0] = mid;
162 mNextSectorInd[last] = NSECTORS / 2;
163 mPrevSectorInd[NSECTORS / 2] = last;
164}
165
166// DEBUG CODE
167#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
168#include "GPUQAHelper.h"
169
170template <class T>
171inline const auto* resolveMCLabels(const o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>* a, const AliHLTTPCClusterMCLabel* b)
172{
173 return a;
174}
175template <>
176inline const auto* resolveMCLabels<AliHLTTPCClusterMCLabel>(const o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>* a, const AliHLTTPCClusterMCLabel* b)
177{
178 return b;
179}
180
181template <class T, class S>
182int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const
183{
184 GPUTPCGMSectorTrack* sectorTrack = nullptr;
185 int32_t nClusters = 0;
186 if constexpr (std::is_same_v<S, GPUTPCGMBorderTrack&>) {
187 sectorTrack = &mSectorTrackInfos[trk.TrackID()];
188 nClusters = sectorTrack->OrigTrack()->NHits();
189 } else {
190 nClusters = trk.NClusters();
191 }
192 auto acc = GPUTPCTrkLbl<false, GPUTPCTrkLbl_ret>(resolveMCLabels<T>(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f);
193 for (int32_t i = 0; i < nClusters; i++) {
194 int32_t id;
195 if constexpr (std::is_same_v<S, GPUTPCGMBorderTrack&>) {
196 const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()];
197 const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i];
198 id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0];
199 } else {
200 id = mClusters[trk.FirstClusterRef() + i].num;
201 }
202 acc.addLabel(id);
203 }
204 return acc.computeLabel().id;
205}
206
207template <class S>
208int64_t GPUTPCGMMerger::GetTrackLabel(const S& trk) const
209{
210#ifdef GPUCA_TPC_GEOMETRY_O2
211 if (GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth) {
212 return GetTrackLabelA<o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>, S>(trk);
213 } else
214#endif
215 {
216 return GetTrackLabelA<AliHLTTPCClusterMCLabel, S>(trk);
217 }
218}
219
220#endif
221// END DEBUG CODE
222
224{
225 uint32_t nErr = 0;
226 for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) {
227 const GPUTPCGMMergedTrack& trk = mMergedTracks[i];
228 if (trk.OK()) {
229 if (trk.NClusters() == 0) {
230 GPUError("FAILURE: Track marked ok but has 0 clusters");
231 nErr++;
232 }
233 if (!trk.CCE() && !trk.MergedLooper()) {
234 const GPUTPCGMMergedTrack* updTrk = &trk;
235 while (updTrk->PrevSegment() >= 0) {
236 auto next = &mMergedTracks[updTrk->PrevSegment()];
237 if (!next->MergedLooper()) {
238 GPUError("FAILURE: prev segment not marked as merged looper\n");
239 nErr++;
240 }
241 if (next == &trk) {
242 GPUError("FAILURE: segment cycle found\n");
243 break;
244 }
245 updTrk = next;
246 }
247 if (updTrk->NClusters() == 0) {
248 printf("FAILURE: segment leg has 0 clusters");
249 }
250 }
251 }
252 }
253
254 if (nErr == 0) {
255 GPUInfo("Merged Tracks OK");
256 } else {
257 throw std::runtime_error("Error during track merging");
258 }
259}
260
262{
263 uint32_t nErr = 0;
264 std::vector<bool> trkUsed(SectorTrackInfoLocalTotal());
265 for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) {
266 trkUsed[i] = false;
267 }
268
269 for (int32_t itr = 0; itr < SectorTrackInfoLocalTotal(); itr++) {
270 GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
271 if (track.PrevSegmentNeighbour() >= 0 && mSectorTrackInfos[track.PrevSegmentNeighbour()].NextSegmentNeighbour() != itr) {
272 GPUError("FAILURE: Invalid reciprocal segment link: %d PrevSegmentNeighbour %d NextSegmentNeighbour %d", itr, track.PrevSegmentNeighbour(), mSectorTrackInfos[track.PrevSegmentNeighbour()].NextSegmentNeighbour());
273 nErr++;
274 }
275 if (track.NextSegmentNeighbour() >= 0 && mSectorTrackInfos[track.NextSegmentNeighbour()].PrevSegmentNeighbour() != itr) {
276 GPUError("FAILURE: Invalid reciprocal segment link: %d NextSegmentNeighbour %d PrevSegmentNeighbour %d", itr, track.NextSegmentNeighbour(), mSectorTrackInfos[track.NextSegmentNeighbour()].PrevSegmentNeighbour());
277 nErr++;
278 }
279 if (track.PrevNeighbour() >= 0 && mSectorTrackInfos[track.PrevNeighbour()].NextNeighbour() != itr) {
280 GPUError("FAILURE: Invalid reciprocal link: %d PrevNeighbour %d NextNeighbour %d", itr, track.PrevNeighbour(), mSectorTrackInfos[track.PrevNeighbour()].NextNeighbour());
281 nErr++;
282 }
283 if (track.NextNeighbour() >= 0 && mSectorTrackInfos[track.NextNeighbour()].PrevNeighbour() != itr) {
284 GPUError("FAILURE: Invalid reciprocal link: %d NextNeighbour %d PrevNeighbour %d", itr, track.NextNeighbour(), mSectorTrackInfos[track.NextNeighbour()].PrevNeighbour());
285 nErr++;
286 }
287 if (track.PrevSegmentNeighbour() >= 0) {
288 continue;
289 }
290 if (track.PrevNeighbour() >= 0) {
291 continue;
292 }
293 GPUTPCGMSectorTrack *trbase = &track, *tr = &track;
294 while (true) {
295 int32_t iTrk = tr - mSectorTrackInfos;
296 if (trkUsed[iTrk]) {
297 GPUError("FAILURE: double use");
298 nErr++;
299 break;
300 }
301 trkUsed[iTrk] = true;
302
303 int32_t jtr = tr->NextSegmentNeighbour();
304 if (jtr >= 0) {
305 tr = &(mSectorTrackInfos[jtr]);
306 if (tr->PrevNeighbour() >= 0) {
307 GPUError("FAILURE: Non-base segment has previous leg");
308 nErr++;
309 }
310 continue;
311 }
312 jtr = trbase->NextNeighbour();
313 if (jtr >= 0) {
314 trbase = &(mSectorTrackInfos[jtr]);
315 tr = trbase;
316 if (tr->PrevSegmentNeighbour() >= 0) {
317 GPUError("FAILURE: Neibhbour leg has previous segment neightbout");
318 nErr++;
319 break;
320 }
321 continue;
322 }
323 break;
324 }
325 }
326 for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) {
327 if (trkUsed[i] == false) {
328 GPUError("FAILURE: trk missed");
329 nErr++;
330 }
331 }
332 if (nErr == 0) {
333 GPUInfo("Merged Track Graph OK");
334 } else {
335 throw std::runtime_error("Invalid merge graph");
336 }
337}
338
339void GPUTPCGMMerger::PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const
340{
341 const GPUTPCGMSectorTrack* orgTrack = trk;
342 while (trk->PrevSegmentNeighbour() >= 0) {
343 trk = &mSectorTrackInfos[trk->PrevSegmentNeighbour()];
344 }
345 const GPUTPCGMSectorTrack* orgTower = trk;
346 while (trk->PrevNeighbour() >= 0) {
347 trk = &mSectorTrackInfos[trk->PrevNeighbour()];
348 }
349
350 int32_t nextId = trk - mSectorTrackInfos;
351 out << "Graph of track " << (orgTrack - mSectorTrackInfos) << "\n";
352 while (nextId >= 0) {
353 trk = &mSectorTrackInfos[nextId];
354 if (trk->PrevSegmentNeighbour() >= 0) {
355 out << "TRACK TREE INVALID!!! " << trk->PrevSegmentNeighbour() << " --> " << nextId << "\n";
356 }
357 out << (trk == orgTower ? "--" : " ");
358 while (nextId >= 0) {
359 GPUTPCGMSectorTrack* trk2 = &mSectorTrackInfos[nextId];
360 if (trk != trk2 && (trk2->PrevNeighbour() >= 0 || trk2->NextNeighbour() >= 0)) {
361 out << " (TRACK TREE INVALID!!! " << trk2->PrevNeighbour() << " <-- " << nextId << " --> " << trk2->NextNeighbour() << ") ";
362 }
363 char tmp[128];
364 snprintf(tmp, 128, " %s%5d(%5.2f)", trk2 == orgTrack ? "!" : " ", nextId, trk2->QPt());
365 out << tmp;
366 nextId = trk2->NextSegmentNeighbour();
367 }
368 out << "\n";
369 nextId = trk->NextNeighbour();
370 }
371}
372
374
376{
377 computePointerWithAlignment(mem, mSectorTrackInfos, mNTotalSectorTracks);
378 computePointerWithAlignment(mem, mSectorTrackInfoIndex, NSECTORS * 2 + 1);
379 if (mRec->GetProcessingSettings().deterministicGPUReconstruction) {
380 computePointerWithAlignment(mem, mTmpSortMemory, std::max(mNTotalSectorTracks, mNMaxTracks * 2));
381 }
382
383 void* memBase = mem;
384 computePointerWithAlignment(mem, mBorderMemory, 2 * mNTotalSectorTracks); // MergeBorders & Resolve
385 computePointerWithAlignment(mem, mBorderRangeMemory, 2 * mNTotalSectorTracks);
386 int32_t nTracks = 0;
387 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
388 const int32_t n = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks();
389 mBorder[iSector] = mBorderMemory + 2 * nTracks;
390 mBorder[NSECTORS + iSector] = mBorderMemory + 2 * nTracks + n;
391 mBorderRange[iSector] = mBorderRangeMemory + 2 * nTracks;
392 nTracks += n;
393 }
394 computePointerWithAlignment(mem, mTrackLinks, mNTotalSectorTracks);
395 computePointerWithAlignment(mem, mTrackCCRoots, mNTotalSectorTracks);
396 void* memMax = mem;
397 mem = memBase;
398 computePointerWithAlignment(mem, mTrackIDs, GPUCA_NSECTORS * mNMaxSingleSectorTracks); // UnpackResetIds - RefitSectorTracks - UnpackSectorGlobal
399 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
400 mem = memBase;
401 computePointerWithAlignment(mem, mTrackSort, mNMaxTracks); // PrepareForFit0 - SortTracksQPt - PrepareForFit1 - PrepareForFit1 / Finalize0 - Finalize2
402 computePointerWithAlignment(mem, mSharedCount, mNMaxClusters);
403 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
404 mem = memBase;
405 computePointerWithAlignment(mem, mLoopData, mNMaxTracks); // GPUTPCGMMergerTrackFit - GPUTPCGMMergerFollowLoopers
406 computePointerWithAlignment(mem, mRetryRefitIds, mNMaxTracks); // Reducing mNMaxTracks for mLoopData / mRetryRefitIds does not save memory, since the other parts are larger anyway
407 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
408 mem = memBase;
409 computePointerWithAlignment(mem, mLooperCandidates, mNMaxLooperMatches); // MergeLoopers 1-3
410 memMax = (void*)std::max((size_t)mem, (size_t)memMax);
411 return memMax;
412}
413
415{
416 computePointerWithAlignment(mem, mMemory);
417 return mem;
418}
419
421{
422 computePointerWithAlignment(mem, mTrackOrderAttach, mNMaxTracks);
423 const bool mergerSortTracks = mRec->GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging).par_SORT_BEFORE_FIT : mRec->GetProcessingSettings().mergerSortTracks;
424 if (mergerSortTracks) {
425 computePointerWithAlignment(mem, mTrackOrderProcess, mNMaxTracks);
426 }
427 return mem;
428}
429
431{
432 computePointerWithAlignment(mem, mMergedTracks, mNMaxTracks);
433 if (mRec->GetParam().dodEdxEnabled) {
434 computePointerWithAlignment(mem, mMergedTracksdEdx, mNMaxTracks);
435 if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) {
436 computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks);
437 }
438 }
439 computePointerWithAlignment(mem, mClusters, mNMaxMergedTrackClusters);
440 computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters);
441 return mem;
442}
443
445{
446 if ((mRec->GetRecoSteps() & GPUDataTypes::RecoStep::Refit) || mRec->GetProcessingSettings().outputSharedClusterMap) {
447 computePointerWithAlignment(mem, mClusterStateExt, mNMaxClusters);
448 } else {
449 mClusterStateExt = nullptr;
450 }
451 return mem;
452}
453
455{
456 computePointerWithAlignment(mem, mOutputTracksTPCO2, HostProcessor(this).NOutputTracksTPCO2());
457 return mem;
458}
459
461{
462 computePointerWithAlignment(mem, mOutputClusRefsTPCO2, HostProcessor(this).NOutputClusRefsTPCO2());
463 return mem;
464}
465
467{
468 computePointerWithAlignment(mem, mOutputTracksTPCO2MC, HostProcessor(this).NOutputTracksTPCO2());
469 return mem;
470}
471
473{
474 computePointerWithAlignment(mem, mTrackSortO2, mNMaxTracks);
475 computePointerWithAlignment(mem, mClusRefTmp, mNMaxTracks);
476 return mem;
477}
478
480{
486 if (mRec->GetProcessingSettings().createO2Output) {
490 if (mRec->GetProcessingSettings().runMC) {
492 }
493 }
495}
496
498{
499 mNTotalSectorTracks = 0;
500 mNClusters = 0;
501 mNMaxSingleSectorTracks = 0;
502 for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
503 uint32_t ntrk = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks();
504 mNTotalSectorTracks += ntrk;
505 mNClusters += *mRec->GetConstantMem().tpcTrackers[iSector].NTrackHits();
506 if (mNMaxSingleSectorTracks < ntrk) {
507 mNMaxSingleSectorTracks = ntrk;
508 }
509 }
510 mNMaxMergedTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters);
511 if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) {
512 mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field
513 } else {
514 mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks);
515 }
516 if (io.clustersNative) {
517 mNMaxClusters = io.clustersNative->nClustersTotal;
519 mNMaxClusters = 0;
520 for (int32_t i = 0; i < NSECTORS; i++) {
521 mNMaxClusters += mRec->GetConstantMem().tpcTrackers[i].NHitsTotal();
522 }
523 } else {
524 mNMaxClusters = mNClusters;
525 }
526 mNMaxLooperMatches = mNMaxClusters / 4; // We have that much scratch memory anyway
527}
528
530{
531 for (int32_t i = 0; i < NSECTORS; i++) {
532 if (mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks > (int32_t)mNMaxSingleSectorTracks) {
533 throw std::runtime_error("mNMaxSingleSectorTracks too small");
534 }
535 }
537 throw std::runtime_error("Must run also sector tracking");
538 }
539 return 0;
540}
541
542#endif // GPUCA_GPUCODE
543
544GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output)
545{
546 const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal();
547 for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) {
548 mTrackLinks[i] = -1;
549 }
550}
551
552GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack, const GPUTPCTrack* inTrack, float alpha, int32_t sector)
553{
555 prop.SetMaterialTPC();
556 prop.SetMaxSinPhi(GPUCA_MAX_SIN_PHI);
557 prop.SetSeedingErrors(true); // Larger errors for seeds, better since we don't start with good hypothesis
558 prop.SetFitInProjections(false);
559 prop.SetPolynomialField(&Param().polynomialField);
561 trk.X() = inTrack->Param().GetX();
562 trk.Y() = inTrack->Param().GetY();
563 trk.Z() = inTrack->Param().GetZ();
564 trk.SinPhi() = inTrack->Param().GetSinPhi();
565 trk.DzDs() = inTrack->Param().GetDzDs();
566 trk.QPt() = inTrack->Param().GetQPt();
567 trk.TOffset() = Param().par.continuousTracking ? GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin) : 0;
568 const auto tmp = sectorTrack.ClusterTN() > sectorTrack.ClusterT0() ? std::array<float, 2>{sectorTrack.ClusterTN(), sectorTrack.ClusterT0()} : std::array<float, 2>{sectorTrack.ClusterT0(), sectorTrack.ClusterTN()};
569 trk.ShiftZ(this, sector, tmp[0], tmp[1], inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead
570 sectorTrack.SetX2(0.f);
571 for (int32_t way = 0; way < 2; way++) {
572 if (way) {
573 prop.SetFitInProjections(true);
574 prop.SetPropagateBzOnly(true);
575 }
576 trk.ResetCovariance();
577 prop.SetTrack(&trk, alpha);
578 int32_t start = way ? inTrack->NHits() - 1 : 0;
579 int32_t end = way ? 0 : (inTrack->NHits() - 1);
580 int32_t incr = way ? -1 : 1;
581 for (int32_t i = start; i != end; i += incr) {
582 float x, y, z;
583 int32_t row, flags;
584 const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sector];
585 const GPUTPCHitId& ic = tracker.TrackHits()[inTrack->FirstHitID() + i];
586 int32_t clusterIndex = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex());
587 row = ic.RowIndex();
588 const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex];
589 flags = cl.getFlags();
590 GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TOffset());
591 if (prop.PropagateToXAlpha(x, alpha, way == 0)) {
592 return way == 0;
593 }
594 trk.ConstrainSinPhi();
595 if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, false, sector, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge
596 return way == 0;
597 }
598 trk.ConstrainSinPhi();
599 }
600 if (way) {
601 sectorTrack.SetParam2(trk);
602 } else {
603 sectorTrack.Set(trk, inTrack, alpha, sector);
604 }
605 }
606 return 0;
607}
608
609GPUd() void GPUTPCGMMerger::SetTrackClusterT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr)
610{
611 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
612 const GPUTPCHitId& ic1 = trk.TrackHits()[sectorTr->FirstHitID()];
613 const GPUTPCHitId& ic2 = trk.TrackHits()[sectorTr->FirstHitID() + sectorTr->NHits() - 1];
614 int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex());
615 int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex());
616 const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0];
617 track.SetClusterT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime());
618}
619
620GPUd() void GPUTPCGMMerger::UnpackSaveNumber(int32_t id)
621{
622 mSectorTrackInfoIndex[id] = mMemory->nUnpackedTracks;
623}
624
625GPUd() void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
626{
627 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
628 float alpha = Param().Alpha(iSector);
629 const GPUTPCTrack* sectorTr = mMemory->firstExtrapolatedTracks[iSector];
630 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
631 uint32_t nTracks = *trk.NTracks();
632 for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) {
633 sectorTr = &trk.Tracks()[itr];
634 int32_t localId = mTrackIDs[((sectorTr->LocalTrackId() >> 24) & 0x3F) * mNMaxSingleSectorTracks + (sectorTr->LocalTrackId() & 0xFFFFFF)];
635 if (localId == -1) {
636 continue;
637 }
638 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u);
639 GPUTPCGMSectorTrack& track = mSectorTrackInfos[myTrack];
640 SetTrackClusterT(track, iSector, sectorTr);
641 track.Set(this, sectorTr, alpha, iSector);
642 track.SetGlobalSectorTrackCov();
643 track.SetPrevNeighbour(-1);
644 track.SetNextNeighbour(-1);
645 track.SetNextSegmentNeighbour(-1);
646 track.SetPrevSegmentNeighbour(-1);
647 track.SetLocalTrackId(localId | (sectorTr->LocalTrackId() & 0x40000000));
648 }
649}
650
651GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
652{
653 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
654 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
655 for (uint32_t i = iBlock * nThreads + iThread; i < nLocalTracks; i += nBlocks * nThreads) {
656 mTrackIDs[iSector * mNMaxSingleSectorTracks + i] = -1;
657 }
658}
659
660GPUd() void GPUTPCGMMerger::RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
661{
662 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
663 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
664
665 float alpha = Param().Alpha(iSector);
666 const GPUTPCTrack* sectorTr = nullptr;
667
668 for (uint32_t itr = iBlock * nThreads + iThread; itr < nLocalTracks; itr += nBlocks * nThreads) {
669 sectorTr = &trk.Tracks()[itr];
671 SetTrackClusterT(track, iSector, sectorTr);
672 if (RefitSectorTrack(track, sectorTr, alpha, iSector)) {
673 track.Set(this, sectorTr, alpha, iSector); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed
674 if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) {
675 continue;
676 }
677 }
678
679 CADEBUG(GPUInfo("INPUT Sector %d, Track %u, QPt %f DzDs %f", iSector, itr, track.QPt(), track.DzDs()));
680 track.SetPrevNeighbour(-1);
681 track.SetNextNeighbour(-1);
682 track.SetNextSegmentNeighbour(-1);
683 track.SetPrevSegmentNeighbour(-1);
684 track.SetExtrapolatedTrackId(0, -1);
685 track.SetExtrapolatedTrackId(1, -1);
686 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u);
687 mTrackIDs[iSector * mNMaxSingleSectorTracks + sectorTr->LocalTrackId()] = myTrack;
688 mSectorTrackInfos[myTrack] = track;
689 }
690}
691
692GPUd() void GPUTPCGMMerger::LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
693{
694 for (int32_t itr = SectorTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SectorTrackInfoGlobalLast(NSECTORS - 1); itr += nThreads * nBlocks) {
695 GPUTPCGMSectorTrack& extrapolatedTrack = mSectorTrackInfos[itr];
696 GPUTPCGMSectorTrack& localTrack = mSectorTrackInfos[extrapolatedTrack.LocalTrackId() & 0xFFFFFF];
697 int up = (extrapolatedTrack.LocalTrackId() & 0x40000000) ? 1 : 0;
698 localTrack.SetExtrapolatedTrackId(up, itr);
699 }
700}
701
702GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam)
703{
704 //* prepare sector tracks for merging with next/previous/same sector
705 //* each track transported to the border line
706
707 float fieldBz = Param().bzCLight;
708
709 float dAlpha = Param().dAlpha / 2;
710 float x0 = 0;
711
712 if (iBorder == 0) { // transport to the left edge of the sector and rotate horizontally
713 dAlpha = dAlpha - CAMath::Pi() / 2;
714 } else if (iBorder == 1) { // transport to the right edge of the sector and rotate horizontally
715 dAlpha = -dAlpha - CAMath::Pi() / 2;
716 } else if (iBorder == 2) { // transport to the middle of the sector and rotate vertically to the border on the left
717 x0 = GPUTPCGeometry::Row2X(63);
718 } else if (iBorder == 3) { // transport to the middle of the sector and rotate vertically to the border on the right
719 dAlpha = -dAlpha;
720 x0 = GPUTPCGeometry::Row2X(63);
721 } else if (iBorder == 4) { // transport to the middle of the sßector, w/o rotation
722 dAlpha = 0;
723 x0 = GPUTPCGeometry::Row2X(63);
724 }
725
726 const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
727 float cosAlpha = CAMath::Cos(dAlpha);
728 float sinAlpha = CAMath::Sin(dAlpha);
729
730 GPUTPCGMSectorTrack trackTmp;
731 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
732 const GPUTPCGMSectorTrack* track = &mSectorTrackInfos[itr];
733 int32_t iSector = track->Sector();
734
735 if (track->PrevSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->PrevSegmentNeighbour()].Sector()) {
736 continue;
737 }
738 if (useOrigTrackParam) { // TODO: Check how far this makes sense with sector track refit
739 if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperQPtB5Limit) {
740 continue;
741 }
742 const GPUTPCGMSectorTrack* trackMin = track;
743 while (track->NextSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->NextSegmentNeighbour()].Sector()) {
744 track = &mSectorTrackInfos[track->NextSegmentNeighbour()];
745 if (track->OrigTrack()->Param().X() < trackMin->OrigTrack()->Param().X()) {
746 trackMin = track;
747 }
748 }
749 trackTmp = *trackMin;
750 track = &trackTmp;
751 if (trackTmp.X2() != 0.f) {
752 trackTmp.UseParam2();
753 } else {
754 trackTmp.Set(this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Sector());
755 }
756 } else {
757 if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperSecondHorizontalQPtB5Limit) {
758 if (iBorder == 0 && track->NextNeighbour() >= 0) {
759 continue;
760 }
761 if (iBorder == 1 && track->PrevNeighbour() >= 0) {
762 continue;
763 }
764 }
765 }
767
768 if (track->TransportToXAlpha(this, x0, sinAlpha, cosAlpha, fieldBz, b, maxSin)) {
769 b.SetTrackID(itr);
770 b.SetNClusters(track->NClusters());
771 for (int32_t i = 0; i < 4; i++) {
772 if (CAMath::Abs(b.Cov()[i]) >= 5.0f) {
773 b.SetCov(i, 5.0f);
774 }
775 }
776 if (CAMath::Abs(b.Cov()[4]) >= 0.5f) {
777 b.SetCov(4, 0.5f);
778 }
779 uint32_t myTrack = CAMath::AtomicAdd(&nB[iSector], 1u);
780 B[iSector][myTrack] = b;
781 }
782 }
783}
784
785template <>
786GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
787{
788 CADEBUG(GPUInfo("\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode));
789 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
790 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
791 bool sameSector = (iSector1 == iSector2);
792 for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) {
793 GPUTPCGMBorderTrack& b = B1[itr];
794 float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
795 if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) {
796 d *= 2;
797 } else if (d > 3) {
798 d = 3;
799 }
800 CADEBUG(printf(" Input Sector 1 %d Track %d: ", iSector1, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d));
802 range.fId = itr;
803 range.fMin = b.Par()[1] + b.ZOffsetLinear() - d;
804 range.fMax = b.Par()[1] + b.ZOffsetLinear() + d;
805 range1[itr] = range;
806 if (sameSector) {
807 range2[itr] = range;
808 }
809 }
810 if (!sameSector) {
811 for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) {
812 GPUTPCGMBorderTrack& b = B2[itr];
813 float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
814 if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) {
815 d *= 2;
816 } else if (d > 3) {
817 d = 3;
818 }
819 CADEBUG(printf(" Input Sector 2 %d Track %d: ", iSector2, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d));
821 range.fId = itr;
822 range.fMin = b.Par()[1] + b.ZOffsetLinear() - d;
823 range.fMax = b.Par()[1] + b.ZOffsetLinear() + d;
824 range2[itr] = range;
825 }
826 }
827}
828
829template <>
830GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
831{
832#if !defined(GPUCA_GPUCODE_COMPILEKERNELS)
833 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
834 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
835
836 if (iThread == 0) {
837 if (iBlock == 0) {
838 GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); });
839 } else if (iBlock == 1) {
840 GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); });
841 }
842 }
843#else
844 printf("This sorting variant is disabled for RTC");
845#endif
846}
847
848template <>
849GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax)
850{
851#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
852 if (iThread == 0 && iBlock == 0) {
853 if (cmpMax) {
854 GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMax());
855 } else {
856 GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMin());
857 }
858 }
859#endif
860}
861
862template <>
863GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
864{
865 // int32_t statAll = 0, statMerged = 0;
866 float factor2ys = Param().rec.tpc.trackMergerFactor2YS;
867 float factor2zt = Param().rec.tpc.trackMergerFactor2ZT;
868 float factor2k = Param().rec.tpc.trackMergerFactor2K;
869 float factor2General = Param().rec.tpc.trackMergerFactor2General;
870
871 factor2k = factor2General * factor2k;
872 factor2ys = factor2General * factor2ys;
873 factor2zt = factor2General * factor2zt;
874
875 int32_t minNPartHits = Param().rec.tpc.trackMergerMinPartHits;
876 int32_t minNTotalHits = Param().rec.tpc.trackMergerMinTotalHits;
877
878 bool sameSector = (iSector1 == iSector2);
879
880 GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
881 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
882
883 int32_t i2 = 0;
884 for (int32_t i1 = iBlock * nThreads + iThread; i1 < N1; i1 += nThreads * nBlocks) {
885 GPUTPCGMBorderRange r1 = range1[i1];
886 while (i2 < N2 && range2[i2].fMax < r1.fMin) {
887 i2++;
888 }
889
890 GPUTPCGMBorderTrack& b1 = B1[r1.fId];
891 if (b1.NClusters() < minNPartHits) {
892 continue;
893 }
894 int32_t iBest2 = -1;
895 int32_t lBest2 = 0;
896 // statAll++;
897 for (int32_t k2 = i2; k2 < N2; k2++) {
898 GPUTPCGMBorderRange r2 = range2[k2];
899 if (r2.fMin > r1.fMax) {
900 break;
901 }
902 if (sameSector && (r1.fId >= r2.fId)) {
903 continue;
904 }
905 // do check
906
907 GPUTPCGMBorderTrack& b2 = B2[r2.fId];
908#if defined(GPUCA_MERGER_BY_MC_LABEL) && !defined(GPUCA_GPUCODE)
909 int64_t label1 = GetTrackLabel(b1);
910 int64_t label2 = GetTrackLabel(b2);
911 if (label1 != label2 && label1 != -1) // DEBUG CODE, match by MC label
912#endif
913 {
914 CADEBUG(if (GetConstantMem()->ioPtrs.mcLabelsTPC) {printf("Comparing track %3d to %3d: ", r1.fId, r2.fId); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b1.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b1.Cov()[i]); } printf("\n%28s", ""); });
915 CADEBUG(if (GetConstantMem()->ioPtrs.mcLabelsTPC) {for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b2.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b2.Cov()[i]); } printf(" - %5s - ", GetTrackLabel(b1) == GetTrackLabel(b2) ? "CLONE" : "FAKE"); });
916 if (b2.NClusters() < lBest2) {
917 CADEBUG2(continue, printf("!NCl1\n"));
918 }
919 if (mergeMode > 0) {
920 // Merging CE tracks
921 int32_t maxRowDiff = mergeMode == 2 ? 1 : 3; // TODO: check cut
922 if (CAMath::Abs(b1.Row() - b2.Row()) > maxRowDiff) {
923 CADEBUG2(continue, printf("!ROW\n"));
924 }
925 if (CAMath::Abs(b1.Par()[2] - b2.Par()[2]) > 0.5f || CAMath::Abs(b1.Par()[3] - b2.Par()[3]) > 0.5f) {
926 CADEBUG2(continue, printf("!CE SinPhi/Tgl\n")); // Crude cut to avoid totally wrong matches, TODO: check cut
927 }
928 }
929
930 GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksAll, b1.TrackID(), weight)) { MergedTrackStreamer(b1, b2, "merge_all_tracks", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksAll)); });
931
932 if (!b1.CheckChi2Y(b2, factor2ys)) {
933 CADEBUG2(continue, printf("!Y\n"));
934 }
935 // if( !b1.CheckChi2Z(b2, factor2zt ) ) CADEBUG2(continue, printf("!NCl1\n"));
936 if (!b1.CheckChi2QPt(b2, factor2k)) {
937 CADEBUG2(continue, printf("!QPt\n"));
938 }
939 float fys = CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20 ? factor2ys : (2.f * factor2ys);
940 float fzt = CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20 ? factor2zt : (2.f * factor2zt);
941 if (!b1.CheckChi2YS(b2, fys)) {
942 CADEBUG2(continue, printf("!YS\n"));
943 }
944 if (!b1.CheckChi2ZT(b2, fzt)) {
945 CADEBUG2(continue, printf("!ZT\n"));
946 }
947 if (CAMath::Abs(b1.Par()[4]) * Param().qptB5Scaler < 20) {
948 if (b2.NClusters() < minNPartHits) {
949 CADEBUG2(continue, printf("!NCl2\n"));
950 }
951 if (b1.NClusters() + b2.NClusters() < minNTotalHits) {
952 CADEBUG2(continue, printf("!NCl3\n"));
953 }
954 }
955 CADEBUG(printf("OK: dZ %8.3f D1 %8.3f D2 %8.3f\n", CAMath::Abs(b1.Par()[1] - b2.Par()[1]), 3.5f * sqrt(b1.Cov()[1]), 3.5f * sqrt(b2.Cov()[1])));
956 } // DEBUG CODE, match by MC label
957 lBest2 = b2.NClusters();
958 iBest2 = b2.TrackID();
959 }
960
961 if (iBest2 < 0) {
962 continue;
963 }
964 GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksBest, b1.TrackID(), weight)) { MergedTrackStreamer(b1, MergedTrackStreamerFindBorderTrack(B2, N2, iBest2), "merge_best_track", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksBest)); });
965
966 // statMerged++;
967
968 CADEBUG(GPUInfo("Found match %d %d", b1.TrackID(), iBest2));
969
970 mTrackLinks[b1.TrackID()] = iBest2;
971 if (mergeMode > 0) {
972 GPUCA_DETERMINISTIC_CODE(CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()), mTrackLinks[iBest2] = b1.TrackID());
973 }
974 }
975 // GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged);
976}
977
978GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, int8_t withinSector, int8_t mergeMode) const
979{
980 if (withinSector == 1) { // Merge tracks within the same sector
981 jSector = iSector;
982 n1 = n2 = mMemory->tmpCounter[iSector];
983 b1 = b2 = mBorder[iSector];
984 } else if (withinSector == -1) { // Merge tracks accross the central electrode
985 jSector = (iSector + NSECTORS / 2);
986 const int32_t offset = mergeMode == 2 ? NSECTORS : 0;
987 n1 = mMemory->tmpCounter[iSector + offset];
988 n2 = mMemory->tmpCounter[jSector + offset];
989 b1 = mBorder[iSector + offset];
990 b2 = mBorder[jSector + offset];
991 } else { // Merge tracks of adjacent sectors
992 jSector = mNextSectorInd[iSector];
993 n1 = mMemory->tmpCounter[iSector];
994 n2 = mMemory->tmpCounter[NSECTORS + jSector];
995 b1 = mBorder[iSector];
996 b2 = mBorder[NSECTORS + jSector];
997 }
998}
999
1000template <int32_t I>
1001GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode)
1002{
1003 int32_t n1, n2;
1004 GPUTPCGMBorderTrack *b1, *b2;
1005 int32_t jSector;
1006 MergeBorderTracksSetup(n1, n2, b1, b2, jSector, iSector, withinSector, mergeMode);
1007 MergeBorderTracks<I>(nBlocks, nThreads, iBlock, iThread, iSector, b1, n1, jSector, b2, n2, mergeMode);
1008}
1009
1010#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
1011template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
1012template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
1013template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
1014#endif
1015
1016GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1017{
1018 float x0 = GPUTPCGeometry::Row2X(63);
1019 const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
1020
1021 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
1022 GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
1023 int32_t iSector = track.Sector();
1025 if (track.TransportToX(this, x0, Param().bzCLight, b, maxSin)) {
1026 b.SetTrackID(itr);
1027 CADEBUG(printf("WITHIN SECTOR %d Track %d - ", iSector, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf("\n"));
1028 b.SetNClusters(track.NClusters());
1029 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[iSector], 1u);
1030 mBorder[iSector][myTrack] = b;
1031 }
1032 }
1033}
1034
1035GPUd() void GPUTPCGMMerger::MergeSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam)
1036{
1037 bool part2 = iBlock & 1;
1038 int32_t border = part2 ? border1 : border0;
1039 GPUAtomic(uint32_t)* n = mMemory->tmpCounter;
1040 GPUTPCGMBorderTrack** b = mBorder;
1041 if (part2) {
1042 n += NSECTORS;
1043 b += NSECTORS;
1044 }
1045 MergeSectorsPrepareStep2((nBlocks + !part2) >> 1, nThreads, iBlock >> 1, iThread, border, b, n, useOrigTrackParam);
1046}
1047
1048GPUdi() void GPUTPCGMMerger::setBlockRange(int32_t elems, int32_t nBlocks, int32_t iBlock, int32_t& start, int32_t& end)
1049{
1050 start = (elems + nBlocks - 1) / nBlocks * iBlock;
1051 end = (elems + nBlocks - 1) / nBlocks * (iBlock + 1);
1052 end = CAMath::Min(elems, end);
1053}
1054
1055GPUd() void GPUTPCGMMerger::hookEdge(int32_t u, int32_t v)
1056{
1057 if (v < 0) {
1058 return;
1059 }
1060 while (true) {
1061 u = mTrackCCRoots[u];
1062 v = mTrackCCRoots[v];
1063 if (u == v) {
1064 break;
1065 }
1066 int32_t h = CAMath::Max(u, v);
1067 int32_t l = CAMath::Min(u, v);
1068
1069 int32_t old = CAMath::AtomicCAS(&mTrackCCRoots[h], h, l);
1070 if (old == h) {
1071 break;
1072 }
1073
1074 u = mTrackCCRoots[h];
1075 v = l;
1076 }
1077}
1078
1079GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsSetup(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1080{
1081 int32_t start, end;
1082 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1083 for (int32_t i = start + iThread; i < end; i += nThreads) {
1084 mTrackCCRoots[i] = i;
1085 }
1086}
1087
1088GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1089{
1090 // Compute connected components in parallel, step 1.
1091 // Source: Adaptive Work-Efficient Connected Components on the GPU, Sutton et al, 2016 (https://arxiv.org/pdf/1612.01178.pdf)
1092 int32_t start, end;
1093 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1094 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1095 hookEdge(itr, mTrackLinks[itr]);
1096 }
1097}
1098
1099GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookNeighbors(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1100{
1101 // Compute connected components in parallel, step 1 - Part 2.
1102 nBlocks = nBlocks / 4 * 4;
1103 if (iBlock >= nBlocks) {
1104 return;
1105 }
1106
1107 int32_t start, end;
1108 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks / 4, iBlock / 4, start, end);
1109
1110 int32_t myNeighbor = iBlock % 4;
1111
1112 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1113 int32_t v = mSectorTrackInfos[itr].AnyNeighbour(myNeighbor);
1114 hookEdge(itr, v);
1115 }
1116}
1117
1118GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsMultiJump(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1119{
1120 // Compute connected components in parallel, step 2.
1121 int32_t start, end;
1122 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1123 for (int32_t itr = start + iThread; itr < end; itr += nThreads) {
1124 int32_t root = itr;
1125 int32_t next = mTrackCCRoots[root];
1126 if (root == next) {
1127 continue;
1128 }
1129 do {
1130 root = next;
1131 next = mTrackCCRoots[next];
1132 } while (root != next);
1133 mTrackCCRoots[itr] = root;
1134 }
1135}
1136
1137GPUd() void GPUTPCGMMerger::ResolveMergeSectors(GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll)
1138{
1139 if (!mergeAll) {
1140 /*int32_t neighborType = useOrigTrackParam ? 1 : 0;
1141 int32_t old1 = newTrack2.PrevNeighbour(0);
1142 int32_t old2 = newTrack1.NextNeighbour(0);
1143 if (old1 < 0 && old2 < 0) neighborType = 0;
1144 if (old1 == itr) continue;
1145 if (neighborType) old1 = newTrack2.PrevNeighbour(1);
1146 if ( old1 >= 0 )
1147 {
1148 GPUTPCGMSectorTrack &oldTrack1 = mSectorTrackInfos[old1];
1149 if ( oldTrack1.NClusters() < newTrack1.NClusters() ) {
1150 newTrack2.SetPrevNeighbour( -1, neighborType );
1151 oldTrack1.SetNextNeighbour( -1, neighborType );
1152 } else continue;
1153 }
1154
1155 if (old2 == itr2) continue;
1156 if (neighborType) old2 = newTrack1.NextNeighbour(1);
1157 if ( old2 >= 0 )
1158 {
1159 GPUTPCGMSectorTrack &oldTrack2 = mSectorTrackInfos[old2];
1160 if ( oldTrack2.NClusters() < newTrack2.NClusters() )
1161 {
1162 oldTrack2.SetPrevNeighbour( -1, neighborType );
1163 } else continue;
1164 }
1165 newTrack1.SetNextNeighbour( itr2, neighborType );
1166 newTrack2.SetPrevNeighbour( itr, neighborType );*/
1167 }
1168
1169 int32_t start, end;
1170 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end);
1171
1172 for (int32_t baseIdx = 0; baseIdx < SectorTrackInfoLocalTotal(); baseIdx += nThreads) {
1173 int32_t itr = baseIdx + iThread;
1174 bool inRange = itr < SectorTrackInfoLocalTotal();
1175
1176 int32_t itr2 = -1;
1177 if (inRange) {
1178 itr2 = mTrackLinks[itr];
1179 }
1180
1181 bool resolveSector = (itr2 > -1);
1182 if (resolveSector) {
1183 int32_t root = mTrackCCRoots[itr];
1184 resolveSector &= (start <= root) && (root < end);
1185 }
1186
1187 int16_t smemIdx = work_group_scan_inclusive_add(int16_t(resolveSector));
1188
1189 if (resolveSector) {
1190 smem.iTrack1[smemIdx - 1] = itr;
1191 smem.iTrack2[smemIdx - 1] = itr2;
1192 }
1193 GPUbarrier();
1194
1195 if (iThread < nThreads - 1) {
1196 continue;
1197 }
1198
1199 const int32_t nSectors = smemIdx;
1200
1201 for (int32_t i = 0; i < nSectors; i++) {
1202 itr = smem.iTrack1[i];
1203 itr2 = smem.iTrack2[i];
1204
1205 GPUTPCGMSectorTrack* track1 = &mSectorTrackInfos[itr];
1206 GPUTPCGMSectorTrack* track2 = &mSectorTrackInfos[itr2];
1207 GPUTPCGMSectorTrack* track1Base = track1;
1208 GPUTPCGMSectorTrack* track2Base = track2;
1209
1210 bool sameSegment = CAMath::Abs(track1->NClusters() > track2->NClusters() ? track1->QPt() : track2->QPt()) * Param().qptB5Scaler < 2 || track1->QPt() * track2->QPt() > 0;
1211 // GPUInfo("\nMerge %d with %d - same segment %d", itr, itr2, (int32_t) sameSegment);
1212 // PrintMergeGraph(track1, std::cout);
1213 // PrintMergeGraph(track2, std::cout);
1214
1215 while (track2->PrevSegmentNeighbour() >= 0) {
1216 track2 = &mSectorTrackInfos[track2->PrevSegmentNeighbour()];
1217 }
1218 if (sameSegment) {
1219 if (track1 == track2) {
1220 continue;
1221 }
1222 while (track1->PrevSegmentNeighbour() >= 0) {
1223 track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()];
1224 if (track1 == track2) {
1225 goto NextTrack;
1226 }
1227 }
1228 GPUCommonAlgorithm::swap(track1, track1Base);
1229 for (int32_t k = 0; k < 2; k++) {
1230 GPUTPCGMSectorTrack* tmp = track1Base;
1231 while (tmp->Neighbour(k) >= 0) {
1232 tmp = &mSectorTrackInfos[tmp->Neighbour(k)];
1233 if (tmp == track2) {
1234 goto NextTrack;
1235 }
1236 }
1237 }
1238
1239 while (track1->NextSegmentNeighbour() >= 0) {
1240 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1241 if (track1 == track2) {
1242 goto NextTrack;
1243 }
1244 }
1245 } else {
1246 while (track1->PrevSegmentNeighbour() >= 0) {
1247 track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()];
1248 }
1249
1250 if (track1 == track2) {
1251 continue;
1252 }
1253 for (int32_t k = 0; k < 2; k++) {
1254 GPUTPCGMSectorTrack* tmp = track1;
1255 while (tmp->Neighbour(k) >= 0) {
1256 tmp = &mSectorTrackInfos[tmp->Neighbour(k)];
1257 if (tmp == track2) {
1258 goto NextTrack;
1259 }
1260 }
1261 }
1262
1263 float z1min, z1max, z2min, z2max;
1264 z1min = track1->MinClusterT();
1265 z1max = track1->MaxClusterT();
1266 z2min = track2->MinClusterT();
1267 z2max = track2->MaxClusterT();
1268 if (track1 != track1Base) {
1269 z1min = CAMath::Min(z1min, track1Base->MinClusterT());
1270 z1max = CAMath::Max(z1max, track1Base->MaxClusterT());
1271 }
1272 if (track2 != track2Base) {
1273 z2min = CAMath::Min(z2min, track2Base->MinClusterT());
1274 z2max = CAMath::Max(z2max, track2Base->MaxClusterT());
1275 }
1276 bool goUp = z2max - z1min > z1max - z2min;
1277
1278 if (track1->Neighbour(goUp) < 0 && track2->Neighbour(!goUp) < 0) {
1279 track1->SetNeighbor(track2 - mSectorTrackInfos, goUp);
1280 track2->SetNeighbor(track1 - mSectorTrackInfos, !goUp);
1281 // GPUInfo("Result (simple neighbor)");
1282 // PrintMergeGraph(track1, std::cout);
1283 continue;
1284 } else if (track1->Neighbour(goUp) < 0) {
1285 track2 = &mSectorTrackInfos[track2->Neighbour(!goUp)];
1286 GPUCommonAlgorithm::swap(track1, track2);
1287 } else if (track2->Neighbour(!goUp) < 0) {
1288 track1 = &mSectorTrackInfos[track1->Neighbour(goUp)];
1289 } else { // Both would work, but we use the simpler one
1290 track1 = &mSectorTrackInfos[track1->Neighbour(goUp)];
1291 }
1292 track1Base = track1;
1293 }
1294
1295 track2Base = track2;
1296 if (!sameSegment) {
1297 while (track1->NextSegmentNeighbour() >= 0) {
1298 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1299 }
1300 }
1301 track1->SetNextSegmentNeighbour(track2 - mSectorTrackInfos);
1302 track2->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos);
1303 // k = 0: Merge right side
1304 // k = 1: Merge left side
1305 for (int32_t k = 0; k < 2; k++) {
1306 track1 = track1Base;
1307 track2 = track2Base;
1308 while (track2->Neighbour(k) >= 0) {
1309 if (track1->Neighbour(k) >= 0) {
1310 GPUTPCGMSectorTrack* track1new = &mSectorTrackInfos[track1->Neighbour(k)];
1311 GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)];
1312 track2->SetNeighbor(-1, k);
1313 track2new->SetNeighbor(-1, k ^ 1);
1314 track1 = track1new;
1315 while (track1->NextSegmentNeighbour() >= 0) {
1316 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1317 }
1318 track1->SetNextSegmentNeighbour(track2new - mSectorTrackInfos);
1319 track2new->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos);
1320 track1 = track1new;
1321 track2 = track2new;
1322 } else {
1323 GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)];
1324 track1->SetNeighbor(track2->Neighbour(k), k);
1325 track2->SetNeighbor(-1, k);
1326 track2new->SetNeighbor(track1 - mSectorTrackInfos, k ^ 1);
1327 }
1328 }
1329 }
1330 // GPUInfo("Result");
1331 // PrintMergeGraph(track1, std::cout);
1332 NextTrack:;
1333 }
1334 }
1335}
1336
1337GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, int32_t itr)
1338{
1339 if (Param().rec.tpc.mergerCERowLimit > 0 && CAMath::Abs(track->QPt()) * Param().qptB5Scaler < 0.3f && (cls.row < Param().rec.tpc.mergerCERowLimit || cls.row >= GPUCA_ROW_COUNT - Param().rec.tpc.mergerCERowLimit)) {
1340 return;
1341 }
1342
1343 float z = 0;
1344 {
1345 float x, y;
1346 auto& cln = mConstantMem->ioPtrs.clustersNative->clustersLinear[cls.num];
1347 GPUTPCConvertImpl::convert(*mConstantMem, cls.sector, cls.row, cln.getPad(), cln.getTime(), x, y, z);
1348 }
1349
1350 if (!Param().par.continuousTracking && CAMath::Abs(z) > 10) {
1351 return;
1352 }
1353 int32_t sector = track->Sector();
1354 for (int32_t attempt = 0; attempt < 2; attempt++) {
1356 const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row);
1357 if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) {
1358 b.SetTrackID(itr);
1359 b.SetNClusters(mMergedTracks[itr].NClusters());
1360 if (CAMath::Abs(b.Cov()[4]) >= 0.5f) {
1361 b.SetCov(4, 0.5f); // TODO: Is this needed and better than the cut in BorderTrack?
1362 }
1363 if (track->CSide()) {
1364 b.SetPar(1, b.Par()[1] - 2 * (z - b.ZOffsetLinear()));
1365 b.SetZOffsetLinear(-b.ZOffsetLinear());
1366 }
1367 b.SetRow(cls.row);
1368 uint32_t id = sector + attempt * NSECTORS;
1369 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[id], 1u);
1370 mBorder[id][myTrack] = b;
1371 break;
1372 }
1373 }
1374}
1375
1376GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1377{
1378 const ClusterNative* cls = mConstantMem->ioPtrs.clustersNative->clustersLinear;
1379 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
1380 if (mMergedTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) {
1381 if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) {
1382 continue;
1383 }
1384 GPUTPCGMMergedTrack* trk[2] = {&mMergedTracks[i], &mMergedTracks[mTrackLinks[i]]};
1385
1386 if (!trk[1]->OK() || trk[1]->CCE()) {
1387 continue;
1388 }
1389 bool celooper = (trk[0]->GetParam().GetQPt() * Param().qptB5Scaler > 1 && trk[0]->GetParam().GetQPt() * trk[1]->GetParam().GetQPt() < 0);
1390 celooper |= trk[0]->PrevSegment() != -1 && trk[1]->PrevSegment() != -1;
1391 if (!celooper && trk[0]->GetParam().GetPar(3) * trk[1]->GetParam().GetPar(3) < 0) {
1392 continue;
1393 }
1394
1395 bool needswap = false;
1396 if (trk[0]->PrevSegment() == -1 && trk[1]->PrevSegment() >= 0) {
1397 needswap = true;
1398 } else if (celooper) {
1399 const float z0max = -CAMath::Min(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime());
1400 const float z1max = -CAMath::Min(cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime());
1401 if (z1max < z0max) {
1402 needswap = true;
1403 }
1404 } else {
1405 if (mClusters[trk[0]->FirstClusterRef()].row > mClusters[trk[1]->FirstClusterRef()].row) {
1406 needswap = true;
1407 }
1408 }
1409 if (needswap) {
1410 GPUCommonAlgorithm::swap(trk[0], trk[1]);
1411 }
1412
1413 if (Param().par.continuousTracking) {
1414 GPUTPCGMMergedTrackHit* clsmax;
1415 const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime(),
1416 cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(),
1417 &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1],
1418 &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax);
1419 const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f);
1420 trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TOffset() - offset);
1421 trk[1]->Param().TOffset() = offset;
1422 if (celooper) {
1423 trk[0]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[0]->CSide() * NSECTORS / 2, trk[0]->Param().TOffset() - offset);
1424 trk[0]->Param().TOffset() = offset;
1425 }
1426 }
1427
1428 if (celooper) { // TODO: Need propper handling, avoid falsely flagging the primary leg as looper
1429 trk[0]->SetMergedLooperConnected(true);
1430 trk[0]->SetCCE(true);
1431 trk[0]->SetLooper(true);
1432 trk[1]->SetMergedLooperConnected(true);
1433 trk[1]->SetCCE(true);
1434 trk[1]->SetLooper(true);
1435 continue;
1436 }
1437
1438 uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters());
1439 if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) {
1440 raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters);
1441 for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) {
1442 mClusters[k].num = 0;
1443 mClusters[k].state = 0;
1444 }
1445 CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters);
1446 return;
1447 }
1448
1449 int32_t pos = newRef;
1450#pragma unroll
1451 for (int32_t k = 1; k >= 0; k--) {
1452 for (uint32_t j = 0; j != trk[k]->NClusters(); j++) {
1453 mClusters[pos++] = mClusters[trk[k]->FirstClusterRef() + j];
1454 }
1455 }
1456 trk[1]->SetFirstClusterRef(newRef);
1457 trk[1]->SetNClusters(trk[0]->NClusters() + trk[1]->NClusters());
1458 if (trk[1]->NClusters() > GPUCA_MERGER_MAX_TRACK_CLUSTERS) {
1459 trk[1]->SetFirstClusterRef(trk[1]->FirstClusterRef() + trk[1]->NClusters() - GPUCA_MERGER_MAX_TRACK_CLUSTERS);
1460 trk[1]->SetNClusters(GPUCA_MERGER_MAX_TRACK_CLUSTERS);
1461 }
1462 trk[1]->SetCCE(true);
1463 trk[0]->SetNClusters(0);
1464 trk[0]->SetOK(false);
1465 }
1466 }
1467
1468 // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mMergedTracks[i].CCE() == false) {mMergedTracks[i].SetNClusters(0);mMergedTracks[i].SetOK(false);}} //Remove all non-CE tracks
1469}
1470
1471namespace o2::gpu::internal
1472{
1473namespace // anonymous
1474{
1475struct GPUTPCGMMerger_CompareClusterIds {
1477 const bool revert;
1478 GPUd() GPUTPCGMMerger_CompareClusterIds(const GPUTPCGMMerger::trackCluster* cmp, bool r) : mCmp(cmp), revert(r) {}
1479 GPUd() bool operator()(const int16_t aa, const int16_t bb)
1480 {
1483 if (a.row != b.row) {
1484 return (a.row > b.row) ^ revert;
1485 }
1486 return GPUCA_DETERMINISTIC_CODE((a.id != b.id) ? (a.id > b.id) : (aa > bb), a.id > b.id);
1487 }
1488};
1489} // anonymous namespace
1490} // namespace o2::gpu::internal
1491
1492GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1493{
1494 static constexpr int32_t kMaxParts = 16;
1495 static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS;
1496
1497 GPUTPCGMSectorTrack* trackParts[kMaxParts];
1498
1499 int32_t itr = iBlock * nThreads + iThread;
1500 GPUTPCGMSectorTrack* trbase = nullptr;
1501 int32_t leg = 0;
1502 int32_t lastMergedSegment = -1;
1503 bool revertSegments = false;
1504 bool revertInSegment = false;
1505 while (true) {
1506 if (trbase && !Param().rec.tpc.dropLoopers) {
1507 int32_t jtr = trbase->NextNeighbour();
1508 if (jtr >= 0) {
1509 trbase = &(mSectorTrackInfos[jtr]);
1510 if (trbase->PrevSegmentNeighbour() >= 0) {
1511 trbase = nullptr;
1512 } else {
1513 if (Param().rec.enableCyclicGraphWorkarounds) {
1514 trbase->SetPrevSegmentNeighbour(1000000001);
1515 }
1516 leg += revertSegments ? 1 : -1;
1517 }
1518 } else {
1519 trbase = nullptr;
1520 }
1521 }
1522
1523 if (trbase == nullptr) {
1524 while (itr < SectorTrackInfoLocalTotal()) {
1525 trbase = &mSectorTrackInfos[itr];
1526 if (trbase->PrevSegmentNeighbour() >= 0 || trbase->PrevNeighbour() >= 0) {
1527 itr += nThreads * nBlocks;
1528 continue;
1529 }
1530 break;
1531 }
1532 if (itr >= SectorTrackInfoLocalTotal()) {
1533 break;
1534 }
1535 revertSegments = false;
1536 revertInSegment = false;
1537 if (Param().rec.enableCyclicGraphWorkarounds) {
1538 trbase->SetPrevSegmentNeighbour(1000000000);
1539 }
1540 int32_t jtr = trbase->NextNeighbour();
1541 leg = 0;
1542 if (jtr >= 0) {
1543 int32_t lasttr = itr;
1544 while (jtr >= 0) { // --------------- count segments ---------------
1545 if (Param().rec.enableCyclicGraphWorkarounds && &mSectorTrackInfos[jtr] == trbase) {
1546 break; // Break cyclic graph
1547 }
1548 lasttr = jtr;
1549 leg++;
1550 jtr = mSectorTrackInfos[jtr].NextNeighbour();
1551 }
1552
1553 float mainT = 1e9;
1554 revertSegments = true;
1555 for (uint32_t k = 0; k < 2; k++) { // --------------- check if first or last segment is primary ---------------
1556 int32_t ichk = k ? lasttr : itr;
1557 const GPUTPCGMSectorTrack* trchk = &mSectorTrackInfos[ichk];
1558 while (true) {
1559 float t = -trchk->MinClusterT();
1560 if (t < mainT) {
1561 if (k) {
1562 revertSegments = false;
1563 break;
1564 }
1565 mainT = t;
1566 }
1567 int32_t next = trchk->NextSegmentNeighbour();
1568 if (next < 0 || (Param().rec.enableCyclicGraphWorkarounds && next == ichk)) {
1569 break; // Breaks also cycles
1570 }
1571 trchk = &mSectorTrackInfos[next];
1572 }
1573 }
1574 if (revertSegments) {
1575 leg = 0;
1576 }
1577
1578 { // --------------- find longest sector track of main segment ---------------
1579 int32_t length = 0;
1580 int32_t ichk = revertSegments ? itr : lasttr;
1581 const GPUTPCGMSectorTrack* trchk = &mSectorTrackInfos[ichk];
1582 const GPUTPCGMSectorTrack* longest = trchk;
1583 while (true) {
1584 if (trchk->OrigTrack()->NHits() > length) {
1585 longest = trchk;
1586 length = trchk->OrigTrack()->NHits();
1587 }
1588 int32_t next = trchk->NextSegmentNeighbour();
1589 if (next < 0 || (Param().rec.enableCyclicGraphWorkarounds && next == ichk)) {
1590 break; // Breaks also cycles
1591 }
1592 trchk = &mSectorTrackInfos[next];
1593 }
1594 revertInSegment = longest->ClusterT0() < longest->ClusterTN();
1595 }
1596 }
1597 lastMergedSegment = -1;
1598 itr += nThreads * nBlocks;
1599 }
1600
1601 do {
1602 int32_t nParts = 0;
1603 int32_t nHits = 0;
1604
1605 GPUTPCGMSectorTrack* tr = trbase;
1606 while (true) {
1607 if (nParts >= kMaxParts) {
1608 break;
1609 }
1610 if (nHits + tr->NClusters() > kMaxClusters) {
1611 break;
1612 }
1613 nHits += tr->NClusters();
1614
1615 trackParts[nParts++] = tr;
1616 for (int32_t i = 0; i < 2; i++) {
1617 if (tr->ExtrapolatedTrackId(i) != -1) {
1618 if (nParts >= kMaxParts) {
1619 break;
1620 }
1621 if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) {
1622 break;
1623 }
1624 trackParts[nParts++] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)];
1625 nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters();
1626 }
1627 }
1628 int32_t jtr = tr->NextSegmentNeighbour();
1629 if (jtr >= 0) {
1630 tr = &(mSectorTrackInfos[jtr]);
1631 if (Param().rec.enableCyclicGraphWorkarounds) {
1632 tr->SetPrevSegmentNeighbour(1000000002);
1633 }
1634 continue;
1635 }
1636 break;
1637 }
1638
1639 // unpack and sort clusters
1640 if (nParts > 1 && (!revertInSegment ^ (leg & 1))) {
1641 GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) {
1642 GPUCA_DETERMINISTIC_CODE( // clang-format off
1643 if (a->X() != b->X()) {
1644 return (a->X() > b->X());
1645 }
1646 if (a->Y() != b->Y()) {
1647 return (a->Y() > b->Y());
1648 }
1649 if (a->Z() != b->Z()) {
1650 return (a->Z() > b->Z());
1651 }
1652 return a->QPt() > b->QPt();
1653 , // !GPUCA_DETERMINISTIC_CODE
1654 return (a->X() > b->X());
1655 ) // clang-format on
1656 });
1657 }
1658
1659 trackCluster trackClusters[kMaxClusters];
1660 nHits = 0;
1661 for (int32_t ipart = 0; ipart < nParts; ipart++) {
1662 const GPUTPCGMSectorTrack* t = trackParts[ipart];
1663 CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nMergedTracks, ipart, t->QPt(), t->DzDs()));
1664 int32_t nTrackHits = t->NClusters();
1665 trackCluster* c2 = trackClusters + nHits + nTrackHits - 1;
1666 for (int32_t i = 0; i < nTrackHits; i++, c2--) {
1667 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()];
1668 const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i];
1669 uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0];
1670 *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector()};
1671 }
1672 nHits += nTrackHits;
1673 }
1674 if (nHits < GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(trbase->QPt() * Param().qptB5Scaler)) {
1675 break;
1676 }
1677
1678 const bool mustReverse = revertInSegment ^ (leg & 1);
1679 bool ordered = !mustReverse;
1680 if (ordered) {
1681 for (int32_t i = 1; i < nHits; i++) {
1682 if ((trackClusters[i].row > trackClusters[i - 1].row) ^ mustReverse || trackClusters[i].id == trackClusters[i - 1].id) {
1683 ordered = false;
1684 break;
1685 }
1686 }
1687 }
1688 int32_t firstTrackIndex = 0;
1689 int32_t lastTrackIndex = nParts - 1;
1690 if (ordered == 0) {
1691 int32_t nTmpHits = 0;
1692 trackCluster trackClustersUnsorted[kMaxClusters];
1693 int16_t clusterIndices[kMaxClusters];
1694 for (int32_t i = 0; i < nHits; i++) {
1695 trackClustersUnsorted[i] = trackClusters[i];
1696 clusterIndices[i] = i;
1697 }
1698
1699 GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIds(trackClusters, mustReverse));
1700
1701 nTmpHits = 0;
1702 firstTrackIndex = lastTrackIndex = -1;
1703 for (int32_t i = 0; i < nParts; i++) {
1704 nTmpHits += trackParts[i]->NClusters();
1705 if (nTmpHits > clusterIndices[0] && firstTrackIndex == -1) {
1706 firstTrackIndex = i;
1707 }
1708 if (nTmpHits > clusterIndices[nHits - 1] && lastTrackIndex == -1) {
1709 lastTrackIndex = i;
1710 }
1711 }
1712
1713 int32_t nFilteredHits = 0;
1714 int32_t indPrev = -1;
1715 for (int32_t i = 0; i < nHits; i++) {
1716 int32_t ind = clusterIndices[i];
1717 if (indPrev >= 0 && trackClustersUnsorted[ind].id == trackClustersUnsorted[indPrev].id) {
1718 continue;
1719 }
1720 indPrev = ind;
1721 trackClusters[nFilteredHits] = trackClustersUnsorted[ind];
1722 nFilteredHits++;
1723 }
1724 nHits = nFilteredHits;
1725 }
1726
1727 const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits);
1728 if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) {
1729 raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters);
1730 CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters);
1731 break;
1732 }
1733
1734 GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster;
1735
1736 for (int32_t i = 0; i < nHits; i++) {
1737 uint8_t state;
1738 const ClusterNative& c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[i].id];
1739 state = c.getFlags();
1740 cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags
1741 cl[i].row = trackClusters[i].row;
1742 cl[i].num = trackClusters[i].id;
1743 cl[i].sector = trackClusters[i].sector;
1744 }
1745
1746 uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u);
1747 if (iOutputTrack >= mNMaxTracks) {
1748 raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks);
1749 CAMath::AtomicExch(&mMemory->nMergedTracks, mNMaxTracks);
1750 break;
1751 }
1752
1753 GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack];
1754 GPUTPCGMTrackParam& p1 = mergedTrack.Param();
1755 const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex];
1756 mergedTrack.SetFlags(0);
1757 mergedTrack.SetOK(true);
1758 mergedTrack.SetLeg(leg);
1759 mergedTrack.SetLooper(leg > 0);
1760 mergedTrack.SetNClusters(nHits);
1761 mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster);
1762 mergedTrack.SetCSide(p2.CSide());
1763 mergedTrack.SetMergedLooperConnected(leg > 0);
1764 if (revertSegments) {
1765 mergedTrack.SetPrevSegment(-1);
1766 if (lastMergedSegment >= 0) {
1767 mMergedTracks[lastMergedSegment].SetPrevSegment(iOutputTrack);
1768 }
1769 } else {
1770 mergedTrack.SetPrevSegment(lastMergedSegment);
1771 }
1772 lastMergedSegment = iOutputTrack;
1773
1775 const float toX = GPUTPCGeometry::Row2X(cl[0].row);
1776 if (p2.TransportToX<2>(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) {
1777 p1.X() = toX;
1778 p1.Y() = b.Par()[0];
1779 p1.Z() = b.Par()[1];
1780 p1.SinPhi() = b.Par()[2];
1781 } else {
1782 p1.X() = p2.X();
1783 p1.Y() = p2.Y();
1784 p1.Z() = p2.Z();
1785 p1.SinPhi() = p2.SinPhi();
1786 }
1787 p1.TOffset() = p2.TOffset();
1788 p1.DzDs() = p2.DzDs();
1789 p1.QPt() = p2.QPt();
1790 mergedTrack.SetAlpha(p2.Alpha());
1791 if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) {
1792 p1.QPt() = 100.f / Param().rec.bz0Pt10MeV;
1793 }
1794
1795 // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nMergedTracks, p1.QPt(), nParts, nHits);
1796
1797 /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nMergedTracks))
1798 {
1799 mergedTrack.SetOK(0);
1800 mergedTrack.SetNClusters(0);
1801 }
1802 if (mergedTrack.NClusters() && mergedTrack.OK()) */
1803 if (leg == 0 && Param().rec.tpc.mergeCE) {
1804 auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear;
1805 bool CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime();
1806 MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], iOutputTrack);
1807 }
1808 } while (false);
1809 }
1810}
1811
1812GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1813{
1814 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
1815 mTrackOrderProcess[i] = i;
1816 }
1817}
1818
1819GPUd() void GPUTPCGMMerger::PrepareForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1820{
1821 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) {
1822 mTrackSort[i] = i;
1823 }
1824}
1825
1826GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1827{
1828#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1829 if (iThread == 0 && iBlock == 0) {
1830 GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mMergedTracks));
1831 }
1832#endif
1833}
1834
1835GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1836{
1837#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1838 if (iThread == 0 && iBlock == 0) {
1839 GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mMergedTracks));
1840 }
1841#endif
1842}
1843
1844GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1845{
1846 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) {
1847 mTrackOrderAttach[mTrackSort[i]] = i;
1848 GPUTPCGMMergedTrack& trk = mMergedTracks[i];
1849 if (trk.OK()) {
1850 for (uint32_t j = 0; j < trk.NClusters(); j++) {
1851 uint32_t weight = attachAttached | attachGood;
1852 if (CAMath::Abs(trk.GetParam().GetQPt() * Param().qptB5Scaler) <= Param().rec.tpc.rejectQPtB5 && !trk.MergedLooper() && trk.Leg() == 0) {
1854 }
1855 mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = weight;
1856 CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u);
1857 }
1858 if (!trk.CCE() && !trk.MergedLooper()) {
1859 GPUTPCGMMergedTrack* updTrk = trk.GetFirstSegment(mMergedTracks, Param().rec.enableCyclicGraphWorkarounds);
1860 const auto &cl0 = mClusters[trk.FirstClusterRef()], &cln = mClusters[updTrk->FirstClusterRef() + updTrk->NClusters() - 1];
1861 const auto& GPUrestrict() cls = GetConstantMem()->ioPtrs.clustersNative->clustersLinear;
1862 float z0 = cls[cl0.num].getTime(), zn = cls[cln.num].getTime();
1863 const auto tmp = zn > z0 ? std::array<float, 3>{zn, z0, GPUTPCGeometry::Row2X(cln.row)} : std::array<float, 3>{z0, zn, GPUTPCGeometry::Row2X(cl0.row)};
1864 trk.Param().ShiftZ(this, cl0.sector, tmp[0], tmp[1], tmp[2]);
1865 updTrk = &trk;
1866 while (updTrk->PrevSegment() >= 0) {
1867 auto next = &mMergedTracks[updTrk->PrevSegment()];
1868 if (Param().rec.enableCyclicGraphWorkarounds && next == &trk) {
1869 break;
1870 }
1871 updTrk = next;
1872 updTrk->Param().TOffset() = trk.Param().TOffset();
1873 }
1874 }
1875 }
1876 }
1877}
1878
1879GPUd() void GPUTPCGMMerger::PrepareForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1880{
1881 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nBlocks * nThreads) {
1882 if (mSharedCount[mClusters[i].num] > 1) {
1883 mClusters[i].state |= GPUTPCGMMergedTrackHit::flagShared;
1884 }
1885 }
1886 if (mClusterStateExt) {
1887 for (uint32_t i = iBlock * nThreads + iThread; i < mNMaxClusters; i += nBlocks * nThreads) {
1888 uint8_t state = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[i].getFlags();
1889 if (mSharedCount[i] > 1) {
1891 }
1892 mClusterStateExt[i] = state;
1893 }
1894 }
1895}
1896
1897GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1898{
1899 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
1900 mTrackSort[mTrackOrderAttach[i]] = i;
1901 }
1902 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nThreads * nBlocks) {
1903 if (!(mClusterAttachment[mClusters[i].num] & attachProtect)) {
1904 mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below
1905 }
1906 }
1907}
1908
1909GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1910{
1911 for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
1912 const GPUTPCGMMergedTrack& trk = mMergedTracks[i];
1913 if (!trk.OK() || trk.NClusters() == 0) {
1914 continue;
1915 }
1916 for (uint32_t j = 0; j < trk.NClusters(); j++) {
1917 int32_t id = mClusters[trk.FirstClusterRef() + j].num;
1918 uint32_t weight = mTrackOrderAttach[i] | attachAttached;
1919 uint8_t clusterState = mClusters[trk.FirstClusterRef() + j].state;
1920 if (!(clusterState & GPUTPCGMMergedTrackHit::flagReject)) {
1921 weight |= attachGood;
1922 } else if (clusterState & GPUTPCGMMergedTrackHit::flagHighIncl) {
1924 }
1925 if (trk.Leg() == 0) {
1927 }
1928 if (CAMath::Abs(trk.GetParam().GetQPt() * Param().qptB5Scaler) <= Param().rec.tpc.rejectQPtB5 && !trk.MergedLooper() && trk.Leg() == 0) {
1930 }
1931 CAMath::AtomicMax(&mClusterAttachment[id], weight);
1932 }
1933 }
1934}
1935
1936GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1937{
1938 for (uint32_t i = iBlock * nThreads + iThread; i < mNMaxClusters; i += nThreads * nBlocks) {
1939 if (mClusterAttachment[i] != 0) {
1940 mClusterAttachment[i] = (mClusterAttachment[i] & attachFlagMask) | mTrackSort[mClusterAttachment[i] & attachTrackMask];
1941 }
1942 }
1943}
1944
1945GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1946{
1947 const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the rejection threshold
1948 for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) {
1949 const auto& trk = mMergedTracks[i];
1950 const auto& p = trk.GetParam();
1951 const float qptabs = CAMath::Abs(p.GetQPt());
1952 if (trk.OK() && trk.NClusters() && trk.Leg() == 0 && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) {
1953 const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector;
1954 const float refz = p.GetZ() + (Param().par.continuousTracking ? GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTOffset(), Param().continuousMaxTimeBin) : 0) + (trk.CSide() ? -100 : 100);
1955 float sinA, cosA;
1956 CAMath::SinCos(trk.GetAlpha(), sinA, cosA);
1957 float gx = cosA * p.GetX() - sinA * p.GetY();
1958 float gy = cosA * p.GetY() + sinA * p.GetX();
1959 float bz = Param().polynomialField.GetFieldBz(gx, gy, p.GetZ());
1960 const float r1 = p.GetQPt() * bz;
1961 const float r = CAMath::Abs(r1) > 0.0001f ? (1.f / r1) : 10000;
1962 const float mx = p.GetX() + r * p.GetSinPhi();
1963 const float my = p.GetY() - r * CAMath::Sqrt(1 - p.GetSinPhi() * p.GetSinPhi());
1964 const float gmx = cosA * mx - sinA * my;
1965 const float gmy = cosA * my + sinA * mx;
1966 uint32_t myId = CAMath::AtomicAdd(&mMemory->nLooperMatchCandidates, 1u);
1967 if (myId >= mNMaxLooperMatches) {
1968 raiseError(GPUErrors::ERROR_LOOPER_MATCH_OVERFLOW, myId, mNMaxLooperMatches);
1969 CAMath::AtomicExch(&mMemory->nLooperMatchCandidates, mNMaxLooperMatches);
1970 return;
1971 }
1972 mLooperCandidates[myId] = MergeLooperParam{refz, gmx, gmy, i};
1973
1974 /*printf("Track %u Sanity qpt %f snp %f bz %f\n", mMemory->nLooperMatchCandidates, p.GetQPt(), p.GetSinPhi(), bz);
1975 for (uint32_t k = 0;k < trk.NClusters();k++) {
1976 float xx, yy, zz;
1977 const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num];
1978 GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTOffset());
1979 float sa2, ca2;
1980 CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].sector), sa2, ca2);
1981 float cx = ca2 * xx - sa2 * yy;
1982 float cy = ca2 * yy + sa2 * xx;
1983 float dist = CAMath::Sqrt((cx - gmx) * (cx - gmx) + (cy - gmy) * (cy - gmy));
1984 printf("Hit %3d/%3d sector %d xy %f %f R %f\n", k, trk.NClusters(), (int32_t)mClusters[trk.FirstClusterRef() + k].sector, cx, cy, dist);
1985 }*/
1986 }
1987 }
1988}
1989
1990GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1991{
1992#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1993 if (iThread == 0 && iBlock == 0) {
1994 GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp());
1995 }
1996#endif
1997}
1998
1999GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
2000{
2001 const MergeLooperParam* candidates = mLooperCandidates;
2002
2003#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2004 std::vector<int64_t> paramLabels(mMemory->nLooperMatchCandidates);
2005 for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) {
2006 paramLabels[i] = GetTrackLabel(mMergedTracks[candidates[i].id]);
2007 }
2008 /*std::vector<bool> dropped(mMemory->nLooperMatchCandidates);
2009 std::vector<bool> droppedMC(mMemory->nLooperMatchCandidates);
2010 std::vector<int32_t> histMatch(101);
2011 std::vector<int32_t> histFail(101);*/
2012 if (!mRec->GetProcessingSettings().runQA) {
2013 throw std::runtime_error("Need QA enabled for the Merge Loopers MC QA");
2014 }
2015#endif
2016
2017 for (uint32_t i = get_global_id(0); i < mMemory->nLooperMatchCandidates; i += get_global_size(0)) {
2018 for (uint32_t j = i + 1; j < mMemory->nLooperMatchCandidates; j++) {
2019 // int32_t bs = 0;
2020 assert(CAMath::Abs(candidates[i].refz) <= CAMath::Abs(candidates[j].refz));
2021 if (CAMath::Abs(candidates[j].refz) > CAMath::Abs(candidates[i].refz) + 100.f) {
2022 break;
2023 }
2024 const float d2xy = CAMath::Sum2(candidates[i].x - candidates[j].x, candidates[i].y - candidates[j].y);
2025 if (d2xy > 15.f) {
2026 // bs |= 1;
2027 continue;
2028 }
2029
2030 const GPUTPCGMMergedTrack* trkI = &mMergedTracks[candidates[i].id];
2031 float refZI = candidates[i].refz;
2032 {
2033 const auto* tmp = trkI->GetFirstSegment(mMergedTracks, Param().rec.enableCyclicGraphWorkarounds);
2034 if (tmp != trkI && tmp->CSide() == trkI->CSide() && CAMath::Abs(tmp->GetParam().GetZ()) > CAMath::Abs(trkI->GetParam().GetZ())) {
2035 float tmpRefZ = refZI + tmp->GetParam().GetZ() - trkI->GetParam().GetZ();
2036 if (CAMath::Abs(tmpRefZ) < CAMath::Abs(candidates[j].refz) && CAMath::Abs(tmpRefZ) > CAMath::Abs(refZI)) {
2037 trkI = tmp;
2038 refZI = tmpRefZ;
2039 }
2040 }
2041 };
2042 const auto& trk1 = *trkI;
2043 const auto& trk2 = mMergedTracks[candidates[j].id];
2044 const auto& param1 = trk1.GetParam();
2045 const auto& param2 = trk2.GetParam();
2046 if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) {
2047 // bs |= 2;
2048 continue;
2049 }
2050
2051 const float dznormalized = (CAMath::Abs(candidates[j].refz) - CAMath::Abs(refZI)) / (CAMath::TwoPi() * 0.5f * (CAMath::Abs(param1.GetDzDs()) + CAMath::Abs(param2.GetDzDs())) * 1.f / (0.5f * (CAMath::Abs(param1.GetQPt()) + CAMath::Abs(param2.GetQPt())) * CAMath::Abs(Param().polynomialField.GetNominalBz())));
2052 const float phasecorr = CAMath::Modf((CAMath::ASin(param1.GetSinPhi()) + trk1.GetAlpha() - CAMath::ASin(param2.GetSinPhi()) - trk2.GetAlpha()) / CAMath::TwoPi() + 5.5f, 1.f) - 0.5f;
2053 const float phasecorrdirection = (candidates[j].refz * param1.GetQPt() * param1.GetDzDs()) > 0 ? 1 : -1;
2054 const float dzcorr = dznormalized + phasecorr * phasecorrdirection;
2055 const bool sameside = !(trk1.CSide() ^ trk2.CSide());
2056 const float dzcorrlimit[4] = {sameside ? 0.018f : 0.012f, sameside ? 0.12f : 0.025f, 0.14f, 0.15f};
2057 const int32_t dzcorrcount = sameside ? 4 : 2;
2058 bool dzcorrok = false;
2059 float dznorm = 0.f;
2060 for (int32_t k = 0; k < dzcorrcount; k++) {
2061 const float d = CAMath::Abs(dzcorr - 0.5f * k);
2062 if (d <= dzcorrlimit[k]) {
2063 dzcorrok = true;
2064 dznorm = d / dzcorrlimit[k];
2065 break;
2066 }
2067 }
2068 if (!dzcorrok) {
2069 // bs |= 4;
2070 continue;
2071 }
2072
2073 const float dtgl = param1.GetDzDs() - (param1.GetQPt() * param2.GetQPt() > 0 ? param2.GetDzDs() : -param2.GetDzDs());
2074 const float dqpt = (CAMath::Abs(param1.GetQPt()) - CAMath::Abs(param2.GetQPt())) / CAMath::Min(param1.GetQPt(), param2.GetQPt());
2075 float d = CAMath::Sum2(dtgl * (1.f / 0.03f), dqpt * (1.f / 0.04f)) + d2xy * (1.f / 4.f) + dznorm * (1.f / 0.3f);
2076 bool EQ = d < 6.f;
2077#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2078 const int64_t label1 = paramLabels[i];
2079 const int64_t label2 = paramLabels[j];
2080 bool labelEQ = label1 != -1 && label1 == label2;
2081 if (1 || EQ || labelEQ) {
2082 // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mMergedTracks[candidates[i].id].CSide(), (int32_t)mMergedTracks[candidates[j].id].CSide(), candidates[i].tgl, candidates[j].tgl, candidates[i].qpt, candidates[j].qpt, candidates[i].x, candidates[j].x, candidates[i].y, candidates[j].y);
2083 static auto& tup = GPUROOTDump<TNtuple>::get("mergeloopers", "labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs");
2084 tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(candidates[j].refz) - CAMath::Abs(refZI), dzcorr, dtgl, dqpt, dznorm, bs);
2085 static auto tup2 = GPUROOTDump<TNtuple>::getNew("mergeloopers2", "labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr");
2086 tup2.Fill((float)labelEQ, refZI, candidates[j].refz, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), param1.GetSinPhi(), param2.GetSinPhi(), trk1.GetAlpha(), trk2.GetAlpha(), dznormalized, phasecorr, phasecorrdirection, dzcorr);
2087 }
2088 /*if (EQ) {
2089 dropped[j] = true;
2090 }
2091 if (labelEQ) {
2092 droppedMC[j] = true;
2093 histMatch[CAMath::Min<int32_t>(100, d * 10.f)]++;
2094 }
2095 if (d < 10.f && !labelEQ) {
2096 histFail[CAMath::Min<int32_t>(100, d * 10.f)]++;
2097 }*/
2098#endif
2099 if (EQ) {
2100 mMergedTracks[candidates[j].id].SetMergedLooperUnconnected(true);
2101 if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) {
2102 mMergedTracks[candidates[i].id].SetMergedLooperUnconnected(true);
2103 }
2104 }
2105 }
2106 }
2107 /*#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2108 int32_t total = 0, totalmc = 0, good = 0, missed = 0, fake = 0;
2109 for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) {
2110 total += dropped[i];
2111 totalmc += droppedMC[i];
2112 good += dropped[i] && droppedMC[i];
2113 missed += droppedMC[i] && !dropped[i];
2114 fake += dropped[i] && !droppedMC[i];
2115 }
2116 if (good) {
2117 printf("%20s: %8d\n", "Total", total);
2118 printf("%20s: %8d\n", "TotalMC", totalmc);
2119 printf("%20s: %8d (%8.3f%% %8.3f%%)\n", "Good", good, 100.f * good / total, 100.f * good / totalmc);
2120 printf("%20s: %8d (%8.3f%%)\n", "Missed", missed, 100.f * missed / totalmc);
2121 printf("%20s: %8d (%8.3f%%)\n", "Fake", fake, 100.f * fake / total);
2122 }
2123 printf("Match histo\n");
2124 for (uint32_t i = 0; i < histMatch.size(); i++) {
2125 printf("%8.3f: %3d\n", i / 10.f + 0.05f, histMatch[i]);
2126 }
2127 printf("Fake histo\n");
2128 for (uint32_t i = 0; i < histFail.size(); i++) {
2129 printf("%8.3f: %3d\n", i / 10.f + 0.05f, histFail[i]);
2130 }
2131#endif*/
2132}
benchmark::State & state
Class of a TPC cluster in TPC-native coordinates (row, time)
A const (ready only) version of MCTruthContainer.
int32_t i
#define GPUdii()
#define get_global_size(dim)
#define GPUAtomic(type)
#define GPUbarrier()
#define GPUdni()
#define GPUrestrict()
#define get_global_id(dim)
#define GPUCA_DEBUG_STREAMER_CHECK(...)
#define GPUCA_DETERMINISTIC_CODE(det, indet)
#define GPUCA_MERGER_MAX_TRACK_CLUSTERS
#define GPUCA_MAX_SIN_PHI_LOW
#define GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(QPTB5)
#define GPUCA_MAX_SIN_PHI
#define CADEBUG(...)
Definition GPUDef.h:72
#define CADEBUG2(cmd,...)
Definition GPUDef.h:73
const GPUTPCGMMerger::trackCluster *const mCmp
const bool revert
#define GPUCA_NSECTORS
#define GPUCA_ROW_COUNT
constexpr int p2()
constexpr int p1()
constexpr to accelerate the coordinates changing
void output(const std::map< std::string, ChannelStat > &channels)
Definition rawdump.cxx:197
uint16_t pos
Definition RawData.h:3
uint32_t j
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
Definition of TPCFastTransform class.
int nClusters
double num
Definition B.h:16
Class for time synchronization of RawReader instances.
GPUReconstruction * mRec
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
static GPUROOTDump< T, Args... > getNew(const char *name1, Names... names)
Definition GPUROOTDump.h:64
static GPUROOTDump< T, Args... > & get(const char *name1, Names... names)
Definition GPUROOTDump.h:58
RecoStepField GetRecoStepsGPU() const
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
RecoStepField GetRecoSteps() const
const GPUParam & GetParam() const
const GPUConstantMem & GetConstantMem() const
GPUMemorySizeScalers * MemoryScalers()
const GPUSettingsProcessing & GetProcessingSettings() const
virtual const GPUDefParameters & getGPUParameters(bool doGPU) const =0
void * SetPointersRefitScratch(void *mem)
void * SetPointersMemory(void *mem)
static constexpr const int32_t NSECTORS
void * SetPointersOutput(void *mem)
void * SetPointersOutputO2MC(void *mem)
void * SetPointersOutputO2Scratch(void *mem)
void * SetPointersMerger(void *mem)
void * SetPointersOutputO2Clus(void *mem)
void * SetPointersOutputState(void *mem)
void SetMaxData(const GPUTrackingInOutPointers &io)
void * SetPointersOutputO2(void *mem)
int32_t int32_t int32_t bool float float Z
int32_t int32_t int32_t bool float Y
GLint GLint GLsizei GLint border
Definition glcorearb.h:275
GLdouble n
Definition glcorearb.h:1982
GLfloat GLfloat GLfloat alpha
Definition glcorearb.h:279
GLint GLenum GLint x
Definition glcorearb.h:403
GLuint GLuint end
Definition glcorearb.h:469
const GLdouble * v
Definition glcorearb.h:832
GLenum array
Definition glcorearb.h:4274
GLuint GLuint GLfloat weight
Definition glcorearb.h:5477
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLint * range
Definition glcorearb.h:1899
GLint y
Definition glcorearb.h:270
GLintptr offset
Definition glcorearb.h:660
GLuint GLsizei GLsizei * length
Definition glcorearb.h:790
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLuint GLfloat x0
Definition glcorearb.h:5034
GLbitfield flags
Definition glcorearb.h:1570
GLboolean r
Definition glcorearb.h:1233
GLuint start
Definition glcorearb.h:469
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLuint id
Definition glcorearb.h:650
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
uint8_t itsSharedClusterMap uint8_t
Node par(int index)
Parameters.
GPUdi() o2
Definition TrackTRD.h:38
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
if(!okForPhiMin(phi0, phi1))
Global TPC definitions and constants.
Definition SimTraits.h:168
@ streamMergeBorderTracksBest
stream MergeBorderTracks best track
@ streamMergeBorderTracksAll
stream MergeBorderTracks all tracks
Defining DataPointCompositeObject explicitly as copiable.
std::string getTime(uint64_t ts)
GPUReconstruction * rec
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
size_t getValue(size_t maxVal, size_t val)
size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss)
size_t NTPCMergedTracks(size_t tpcSectorTracks)
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUTPCGMMergedTrack & GPUrestrict() b
GPUd() bool operator()(const int32_t aa
GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack *cmp)
const GPUTPCGMMergedTrack & GPUrestrict() b
GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack *cmp)
GPUd() bool operator()(const int32_t aa
std::vector< int > row
char const *restrict const cmp
Definition x9.h:96