15#define GPUCA_CADEBUG 0
16#define GPUCA_MERGE_LOOPER_MC 0
20#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
25#ifndef GPUCA_GPUCODE_DEVICE
67using namespace gputpcgmmergertypes;
69static constexpr int32_t kMaxParts = 400;
95 mNextSectorInd[mid] = 0;
96 mPrevSectorInd[0] = mid;
102#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
105void GPUTPCGMMerger::CheckMergedTracks()
107 std::vector<bool> trkUsed(SectorTrackInfoLocalTotal());
108 for (int32_t
i = 0;
i < SectorTrackInfoLocalTotal();
i++) {
112 for (int32_t itr = 0; itr < SectorTrackInfoLocalTotal(); itr++) {
114 if (track.PrevSegmentNeighbour() >= 0) {
117 if (track.PrevNeighbour() >= 0) {
123 int32_t iTrk = tr - mSectorTrackInfos;
125 GPUError(
"FAILURE: double use");
127 trkUsed[iTrk] =
true;
129 int32_t jtr = tr->NextSegmentNeighbour();
131 tr = &(mSectorTrackInfos[jtr]);
134 jtr = trbase->NextNeighbour();
136 trbase = &(mSectorTrackInfos[jtr]);
138 if (tr->PrevSegmentNeighbour() >= 0) {
147 for (int32_t
i = 0;
i < SectorTrackInfoLocalTotal();
i++) {
148 if (trkUsed[
i] ==
false) {
149 GPUError(
"FAILURE: trk missed");
165template <
class T,
class S>
166int64_t GPUTPCGMMerger::GetTrackLabelA(
const S& trk)
const
170 if constexpr (std::is_same<S, GPUTPCGMBorderTrack&>::value) {
171 sectorTrack = &mSectorTrackInfos[trk.TrackID()];
172 nClusters = sectorTrack->OrigTrack()->NHits();
176 auto acc =
GPUTPCTrkLbl<false, GPUTPCTrkLbl_ret>(resolveMCLabels<T>(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f);
179 if constexpr (std::is_same<S, GPUTPCGMBorderTrack&>::value) {
180 const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()];
181 const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() +
i];
182 id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0];
184 id = mClusters[trk.FirstClusterRef() +
i].
num;
188 return acc.computeLabel().id;
192int64_t GPUTPCGMMerger::GetTrackLabel(
const S& trk)
const
194#ifdef GPUCA_TPC_GEOMETRY_O2
195 if (GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth) {
196 return GetTrackLabelA<o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel>,
S>(trk);
200 return GetTrackLabelA<AliHLTTPCClusterMCLabel, S>(trk);
207void GPUTPCGMMerger::PrintMergeGraph(
const GPUTPCGMSectorTrack* trk, std::ostream& out)
const
210 while (trk->PrevSegmentNeighbour() >= 0) {
211 trk = &mSectorTrackInfos[trk->PrevSegmentNeighbour()];
214 while (trk->PrevNeighbour() >= 0) {
215 trk = &mSectorTrackInfos[trk->PrevNeighbour()];
218 int32_t nextId = trk - mSectorTrackInfos;
219 out <<
"Graph of track " << (orgTrack - mSectorTrackInfos) <<
"\n";
220 while (nextId >= 0) {
221 trk = &mSectorTrackInfos[nextId];
222 if (trk->PrevSegmentNeighbour() >= 0) {
223 out <<
"TRACK TREE INVALID!!! " << trk->PrevSegmentNeighbour() <<
" --> " << nextId <<
"\n";
225 out << (trk == orgTower ?
"--" :
" ");
226 while (nextId >= 0) {
228 if (trk != trk2 && (trk2->PrevNeighbour() >= 0 || trk2->NextNeighbour() >= 0)) {
229 out <<
" (TRACK TREE INVALID!!! " << trk2->PrevNeighbour() <<
" <-- " << nextId <<
" --> " << trk2->NextNeighbour() <<
") ";
232 snprintf(tmp, 128,
" %s%5d(%5.2f)", trk2 == orgTrack ?
"!" :
" ", nextId, trk2->QPt());
234 nextId = trk2->NextSegmentNeighbour();
237 nextId = trk->NextNeighbour();
257 mBorder[
iSector] = mBorderMemory + 2 * nTracks;
259 mBorderRange[
iSector] = mBorderRangeMemory + 2 * nTracks;
267 memMax = (
void*)std::max((
size_t)mem, (size_t)memMax);
271 memMax = (
void*)std::max((
size_t)mem, (size_t)memMax);
275 memMax = (
void*)std::max((
size_t)mem, (size_t)memMax);
278 memMax = (
void*)std::max((
size_t)mem, (size_t)memMax);
316 mClusterStateExt =
nullptr;
366 mNTotalSectorTracks = 0;
368 mNMaxSingleSectorTracks = 0;
371 mNTotalSectorTracks += ntrk;
373 if (mNMaxSingleSectorTracks < ntrk) {
374 mNMaxSingleSectorTracks = ntrk;
378 if (CAMath::Abs(
Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) {
385 }
else if (
mRec->
GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) {
391 mNMaxClusters = mNClusters;
393 mNMaxLooperMatches = mNMaxClusters / 4;
400 throw std::runtime_error(
"mNMaxSingleSectorTracks too small");
404 throw std::runtime_error(
"Must run also sector tracking");
413 const int32_t
n =
output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal();
414 for (int32_t
i = iBlock * nThreads + iThread;
i <
n;
i += nThreads * nBlocks) {
422 prop.SetMaterialTPC();
424 prop.SetToyMCEventsFlag(
false);
425 prop.SetSeedingErrors(
true);
426 prop.SetFitInProjections(
false);
427 prop.SetPolynomialField(&
Param().polynomialField);
429 trk.X() = inTrack->Param().GetX();
430 trk.
Y() = inTrack->Param().GetY();
431 trk.
Z() = inTrack->Param().GetZ();
432 trk.SinPhi() = inTrack->Param().GetSinPhi();
433 trk.DzDs() = inTrack->Param().GetDzDs();
434 trk.QPt() = inTrack->Param().GetQPt();
435 trk.TZOffset() =
Param().par.earlyTpcTransform ? inTrack->Param().GetZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(),
Param().continuousMaxTimeBin);
436 trk.ShiftZ(
this, sector, sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX());
437 sectorTrack.SetX2(0.f);
438 for (int32_t way = 0; way < 2; way++) {
440 prop.SetFitInProjections(
true);
441 prop.SetPropagateBzOnly(
true);
443 trk.ResetCovariance();
444 prop.SetTrack(&trk,
alpha);
445 int32_t
start = way ? inTrack->NHits() - 1 : 0;
446 int32_t
end = way ? 0 : (inTrack->NHits() - 1);
447 int32_t incr = way ? -1 : 1;
451 const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sector];
452 const GPUTPCHitId& ic = tracker.TrackHits()[inTrack->FirstHitID() +
i];
453 int32_t clusterIndex = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex());
455 const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex];
456 flags = cl.getFlags();
457 if (
Param().par.earlyTpcTransform) {
458 x = tracker.Data().ClusterData()[clusterIndex].x;
459 y = tracker.Data().ClusterData()[clusterIndex].y;
460 z = tracker.Data().ClusterData()[clusterIndex].
z - trk.TZOffset();
462 GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector,
row, cl.getPad(), cl.getTime(),
x,
y,
z, trk.TZOffset());
464 if (prop.PropagateToXAlpha(
x,
alpha,
true)) {
467 trk.ConstrainSinPhi();
468 if (prop.Update(
y,
z,
row,
Param(),
flags &
GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0,
nullptr,
false, sector, -1.f, 0.f, 0.f)) {
471 trk.ConstrainSinPhi();
474 sectorTrack.SetParam2(trk);
476 sectorTrack.Set(trk, inTrack,
alpha, sector);
484 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
485 const GPUTPCHitId& ic1 = trk.TrackHits()[sectorTr->FirstHitID()];
486 const GPUTPCHitId& ic2 = trk.TrackHits()[sectorTr->FirstHitID() + sectorTr->NHits() - 1];
487 int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex());
488 int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex());
489 if (
Param().par.earlyTpcTransform) {
490 track.SetClusterZT(trk.Data().ClusterData()[clusterIndex1].
z, trk.Data().ClusterData()[clusterIndex2].
z);
492 const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0];
493 track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime());
499 mSectorTrackInfoIndex[
id] = mMemory->nUnpackedTracks;
502GPUd()
void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
504 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
506 const GPUTPCTrack* sectorTr = mMemory->firstExtrapolatedTracks[iSector];
507 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
508 uint32_t nTracks = *trk.NTracks();
509 for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) {
510 sectorTr = &trk.Tracks()[itr];
511 int32_t localId = mTrackIDs[(sectorTr->LocalTrackId() >> 24) * mNMaxSingleSectorTracks + (sectorTr->LocalTrackId() & 0xFFFFFF)];
515 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u);
517 SetTrackClusterZT(track, iSector, sectorTr);
518 track.Set(
this, sectorTr,
alpha, iSector);
519 track.SetGlobalSectorTrackCov();
520 track.SetPrevNeighbour(-1);
521 track.SetNextNeighbour(-1);
522 track.SetNextSegmentNeighbour(-1);
523 track.SetPrevSegmentNeighbour(-1);
524 track.SetLocalTrackId(localId);
528GPUd()
void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
530 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
531 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
532 for (uint32_t
i = iBlock * nThreads + iThread;
i < nLocalTracks;
i += nBlocks * nThreads) {
533 mTrackIDs[iSector * mNMaxSingleSectorTracks +
i] = -1;
537GPUd()
void GPUTPCGMMerger::RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector)
539 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector];
540 uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks;
545 for (uint32_t itr = iBlock * nThreads + iThread; itr < nLocalTracks; itr += nBlocks * nThreads) {
546 sectorTr = &trk.Tracks()[itr];
548 SetTrackClusterZT(track, iSector, sectorTr);
549 if (
Param().
rec.tpc.mergerCovSource == 0) {
550 track.Set(
this, sectorTr,
alpha, iSector);
554 }
else if (
Param().
rec.tpc.mergerCovSource == 1) {
555 track.Set(
this, sectorTr,
alpha, iSector);
556 track.CopyBaseTrackCov();
557 }
else if (
Param().
rec.tpc.mergerCovSource == 2) {
558 if (RefitSectorTrack(track, sectorTr,
alpha, iSector)) {
559 track.Set(
this, sectorTr,
alpha, iSector);
566 CADEBUG(GPUInfo(
"INPUT Sector %d, Track %u, QPt %f DzDs %f", iSector, itr, track.QPt(), track.DzDs()));
567 track.SetPrevNeighbour(-1);
568 track.SetNextNeighbour(-1);
569 track.SetNextSegmentNeighbour(-1);
570 track.SetPrevSegmentNeighbour(-1);
571 track.SetExtrapolatedTrackId(0, -1);
572 track.SetExtrapolatedTrackId(1, -1);
573 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u);
574 mTrackIDs[iSector * mNMaxSingleSectorTracks + sectorTr->LocalTrackId()] = myTrack;
575 mSectorTrackInfos[myTrack] = track;
579GPUd()
void GPUTPCGMMerger::LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
581 for (int32_t itr = SectorTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SectorTrackInfoGlobalLast(NSECTORS - 1); itr += nThreads * nBlocks) {
584 if (localTrack.ExtrapolatedTrackId(0) != -1 || !CAMath::AtomicCAS(&localTrack.ExtrapolatedTrackIds()[0], -1, itr)) {
585 localTrack.SetExtrapolatedTrackId(1, itr);
595 float fieldBz =
Param().bzCLight;
597 float dAlpha =
Param().par.dAlpha / 2;
601 dAlpha = dAlpha - CAMath::Pi() / 2;
602 }
else if (iBorder == 1) {
603 dAlpha = -dAlpha - CAMath::Pi() / 2;
604 }
else if (iBorder == 2) {
605 x0 =
Param().tpcGeometry.Row2X(63);
606 }
else if (iBorder == 3) {
608 x0 =
Param().tpcGeometry.Row2X(63);
609 }
else if (iBorder == 4) {
611 x0 =
Param().tpcGeometry.Row2X(63);
614 const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
615 float cosAlpha = CAMath::Cos(dAlpha);
616 float sinAlpha = CAMath::Sin(dAlpha);
619 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
621 int32_t iSector = track->Sector();
623 if (track->PrevSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->PrevSegmentNeighbour()].Sector()) {
626 if (useOrigTrackParam) {
627 if (CAMath::Abs(track->QPt()) *
Param().qptB5Scaler <
Param().rec.tpc.mergerLooperQPtB5Limit) {
631 while (track->NextSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->NextSegmentNeighbour()].Sector()) {
632 track = &mSectorTrackInfos[track->NextSegmentNeighbour()];
633 if (track->OrigTrack()->Param().X() < trackMin->OrigTrack()->Param().X()) {
637 trackTmp = *trackMin;
639 if (
Param().rec.tpc.mergerCovSource == 2 && trackTmp.X2() != 0.f) {
640 trackTmp.UseParam2();
642 trackTmp.Set(
this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Sector());
645 if (CAMath::Abs(track->QPt()) *
Param().qptB5Scaler <
Param().rec.tpc.mergerLooperSecondHorizontalQPtB5Limit) {
646 if (iBorder == 0 && track->NextNeighbour() >= 0) {
649 if (iBorder == 1 && track->PrevNeighbour() >= 0) {
656 if (track->TransportToXAlpha(
this,
x0, sinAlpha, cosAlpha, fieldBz,
b, maxSin)) {
658 b.SetNClusters(track->NClusters());
659 for (int32_t
i = 0;
i < 4;
i++) {
660 if (CAMath::Abs(
b.Cov()[
i]) >= 5.0f) {
664 if (CAMath::Abs(
b.Cov()[4]) >= 0.5f) {
667 uint32_t myTrack = CAMath::AtomicAdd(&nB[iSector], 1u);
668 B[iSector][myTrack] =
b;
674GPUd()
void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1,
GPUTPCGMBorderTrack*
B1, int32_t N1, int32_t iSector2,
GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
676 CADEBUG(GPUInfo(
"\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode));
678 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
679 bool sameSector = (iSector1 == iSector2);
680 for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) {
682 float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(
b.Cov()[1]));
683 if (CAMath::Abs(
b.Par()[4]) *
Param().qptB5Scaler >= 20) {
688 CADEBUG(printf(
" Input Sector 1 %d Track %d: ", iSector1, itr);
for (int32_t
i = 0;
i < 5;
i++) { printf(
"%8.3f ",
b.Par()[
i]); } printf(
" - ");
for (int32_t
i = 0;
i < 5;
i++) { printf(
"%8.3f ",
b.Cov()[
i]); } printf(
" - D %8.3f\n", d));
691 range.fMin =
b.Par()[1] +
b.ZOffsetLinear() - d;
692 range.fMax =
b.Par()[1] +
b.ZOffsetLinear() + d;
699 for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) {
701 float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(
b.Cov()[1]));
702 if (CAMath::Abs(
b.Par()[4]) *
Param().qptB5Scaler >= 20) {
707 CADEBUG(printf(
" Input Sector 2 %d Track %d: ", iSector2, itr);
for (int32_t
i = 0;
i < 5;
i++) { printf(
"%8.3f ",
b.Par()[
i]); } printf(
" - ");
for (int32_t
i = 0;
i < 5;
i++) { printf(
"%8.3f ",
b.Cov()[
i]); } printf(
" - D %8.3f\n", d));
710 range.fMin =
b.Par()[1] +
b.ZOffsetLinear() - d;
711 range.fMax =
b.Par()[1] +
b.ZOffsetLinear() + d;
718GPUd()
void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1,
GPUTPCGMBorderTrack*
B1, int32_t N1, int32_t iSector2,
GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
720#if !defined(GPUCA_GPUCODE_COMPILEKERNELS)
722 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
726#ifdef GPUCA_NO_FAST_MATH
731 }
else if (iBlock == 1) {
732#ifdef GPUCA_NO_FAST_MATH
740 printf(
"This sorting variant is disabled for RTC");
744#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS)
749struct MergeBorderTracks_compMax {
752#ifdef GPUCA_NO_FAST_MATH
753 return (
a.fMax !=
b.fMax) ? (
a.fMax <
b.fMax) : (
a.fId <
b.fId);
755 return a.fMax <
b.fMax;
759struct MergeBorderTracks_compMin {
762#ifdef GPUCA_NO_FAST_MATH
763 return (
a.fMin !=
b.fMin) ? (
a.fMin <
b.fMin) : (
a.fId <
b.fId);
765 return a.fMin <
b.fMin;
773inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(
const krnlSetupTime& _xyz,
GPUTPCGMBorderRange*
const&
range, int32_t
const& N, int32_t
const& cmpMax)
775 thrust::device_ptr<GPUTPCGMBorderRange> p(
range);
778 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax());
780 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMin());
788#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
800GPUd()
void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1,
GPUTPCGMBorderTrack*
B1, int32_t N1, int32_t iSector2,
GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
803 float factor2ys =
Param().rec.tpc.trackMergerFactor2YS;
804 float factor2zt =
Param().rec.tpc.trackMergerFactor2ZT;
805 float factor2k =
Param().rec.tpc.trackMergerFactor2K;
806 float factor2General =
Param().rec.tpc.trackMergerFactor2General;
808 factor2k = factor2General * factor2k;
809 factor2ys = factor2General * factor2ys;
810 factor2zt = factor2General * factor2zt;
812 int32_t minNPartHits =
Param().rec.tpc.trackMergerMinPartHits;
813 int32_t minNTotalHits =
Param().rec.tpc.trackMergerMinTotalHits;
815 bool sameSector = (iSector1 == iSector2);
818 GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
821 for (int32_t i1 = iBlock * nThreads + iThread; i1 < N1; i1 += nThreads * nBlocks) {
823 while (i2 < N2 && range2[i2].fMax < r1.
fMin) {
828 if (
b1.NClusters() < minNPartHits) {
834 for (int32_t k2 = i2; k2 < N2; k2++) {
839 if (sameSector && (r1.
fId >= r2.
fId)) {
845#if defined(GPUCA_MERGER_BY_MC_LABEL) && !defined(GPUCA_GPUCODE)
846 int64_t label1 = GetTrackLabel(
b1);
847 int64_t label2 = GetTrackLabel(b2);
848 if (label1 != label2 && label1 != -1)
851 CADEBUG(
if (GetConstantMem()->ioPtrs.mcLabelsTPC) {printf(
"Comparing track %3d to %3d: ", r1.fId, r2.fId); for (int32_t i = 0; i < 5; i++) { printf(
"%8.3f ", b1.Par()[i]); } printf(
" - ");
for (int32_t
i = 0;
i < 5;
i++) { printf(
"%8.3f ",
b1.Cov()[
i]); } printf(
"\n%28s",
""); });
852 CADEBUG(
if (GetConstantMem()->ioPtrs.mcLabelsTPC) {for (int32_t i = 0; i < 5; i++) { printf(
"%8.3f ", b2.Par()[i]); } printf(
" - ");
for (int32_t
i = 0;
i < 5;
i++) { printf(
"%8.3f ", b2.Cov()[
i]); } printf(
" - %5s - ", GetTrackLabel(
b1) == GetTrackLabel(b2) ?
"CLONE" :
"FAKE"); });
853 if (b2.NClusters() < lBest2) {
854 CADEBUG2(
continue, printf(
"!NCl1\n"));
858 int32_t maxRowDiff = mergeMode == 2 ? 1 : 3;
859 if (CAMath::Abs(
b1.Row() - b2.Row()) > maxRowDiff) {
860 CADEBUG2(
continue, printf(
"!ROW\n"));
862 if (CAMath::Abs(
b1.Par()[2] - b2.Par()[2]) > 0.5f || CAMath::Abs(
b1.Par()[3] - b2.Par()[3]) > 0.5f) {
863 CADEBUG2(
continue, printf(
"!CE SinPhi/Tgl\n"));
869 if (!
b1.CheckChi2Y(b2, factor2ys)) {
873 if (!
b1.CheckChi2QPt(b2, factor2k)) {
874 CADEBUG2(
continue, printf(
"!QPt\n"));
876 float fys = CAMath::Abs(
b1.Par()[4]) *
Param().qptB5Scaler < 20 ? factor2ys : (2.f * factor2ys);
877 float fzt = CAMath::Abs(
b1.Par()[4]) *
Param().qptB5Scaler < 20 ? factor2zt : (2.f * factor2zt);
878 if (!
b1.CheckChi2YS(b2, fys)) {
879 CADEBUG2(
continue, printf(
"!YS\n"));
881 if (!
b1.CheckChi2ZT(b2, fzt)) {
882 CADEBUG2(
continue, printf(
"!ZT\n"));
884 if (CAMath::Abs(
b1.Par()[4]) *
Param().qptB5Scaler < 20) {
885 if (b2.NClusters() < minNPartHits) {
886 CADEBUG2(
continue, printf(
"!NCl2\n"));
888 if (
b1.NClusters() + b2.NClusters() < minNTotalHits) {
889 CADEBUG2(
continue, printf(
"!NCl3\n"));
892 CADEBUG(printf(
"OK: dZ %8.3f D1 %8.3f D2 %8.3f\n", CAMath::Abs(
b1.Par()[1] - b2.Par()[1]), 3.5f * sqrt(
b1.Cov()[1]), 3.5f * sqrt(b2.Cov()[1])));
894 lBest2 = b2.NClusters();
895 iBest2 = b2.TrackID();
901 GPUCA_DEBUG_STREAMER_CHECK(
float weight =
b1.Par()[4] *
b1.Par()[4];
if (o2::utils::DebugStreamer::checkStream(
o2::utils::StreamFlags::streamMergeBorderTracksBest,
b1.TrackID(),
weight)) { MergedTrackStreamer(
b1, MergedTrackStreamerFindBorderTrack(B2, N2, iBest2),
"merge_best_track", iSector1, iSector2, mergeMode,
weight, o2::utils::DebugStreamer::getSamplingFrequency(
o2::utils::StreamFlags::streamMergeBorderTracksBest)); });
905 CADEBUG(GPUInfo(
"Found match %d %d",
b1.TrackID(), iBest2));
907 mTrackLinks[
b1.TrackID()] = iBest2;
909#if defined(GPUCA_NO_FAST_MATH)
910 CAMath::AtomicMax(&mTrackLinks[iBest2],
b1.TrackID());
912 mTrackLinks[iBest2] =
b1.TrackID();
921 if (withinSector == 1) {
923 n1 = n2 = mMemory->tmpCounter[iSector];
924 b1 = b2 = mBorder[iSector];
925 }
else if (withinSector == -1) {
926 jSector = (iSector + NSECTORS / 2);
927 const int32_t
offset = mergeMode == 2 ? NSECTORS : 0;
928 n1 = mMemory->tmpCounter[iSector +
offset];
929 n2 = mMemory->tmpCounter[jSector +
offset];
931 b2 = mBorder[jSector +
offset];
933 jSector = mNextSectorInd[iSector];
934 n1 = mMemory->tmpCounter[iSector];
935 n2 = mMemory->tmpCounter[NSECTORS + jSector];
936 b1 = mBorder[iSector];
937 b2 = mBorder[NSECTORS + jSector];
942GPUd()
void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode)
947 MergeBorderTracksSetup(n1, n2,
b1, b2, jSector, iSector, withinSector, mergeMode);
948 MergeBorderTracks<I>(nBlocks, nThreads, iBlock, iThread, iSector,
b1, n1, jSector, b2, n2, mergeMode);
951#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE)
952template GPUdni()
void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
953template
GPUdni()
void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
954template
GPUdni()
void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
957GPUd()
void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
959 float x0 =
Param().tpcGeometry.Row2X(63);
960 const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
962 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
964 int32_t iSector = track.Sector();
966 if (track.TransportToX(
this,
x0,
Param().bzCLight,
b, maxSin)) {
968 CADEBUG(printf(
"WITHIN SECTOR %d Track %d - ", iSector, itr);
for (int32_t
i = 0;
i < 5;
i++) { printf(
"%8.3f ",
b.Par()[
i]); } printf(
" - ");
for (int32_t
i = 0;
i < 5;
i++) { printf(
"%8.3f ",
b.Cov()[
i]); } printf(
"\n"));
969 b.SetNClusters(track.NClusters());
970 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[iSector], 1u);
971 mBorder[iSector][myTrack] =
b;
976GPUd()
void GPUTPCGMMerger::MergeSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam)
978 bool part2 = iBlock & 1;
979 int32_t
border = part2 ? border1 : border0;
986 MergeSectorsPrepareStep2((nBlocks + !part2) >> 1, nThreads, iBlock >> 1, iThread,
border,
b,
n, useOrigTrackParam);
991 start = (elems + nBlocks - 1) / nBlocks * iBlock;
992 end = (elems + nBlocks - 1) / nBlocks * (iBlock + 1);
993 end = CAMath::Min(elems,
end);
1002 u = mTrackCCRoots[u];
1003 v = mTrackCCRoots[
v];
1007 int32_t
h = CAMath::Max(u,
v);
1008 int32_t l = CAMath::Min(u,
v);
1010 int32_t old = CAMath::AtomicCAS(&mTrackCCRoots[
h],
h, l);
1015 u = mTrackCCRoots[
h];
1020GPUd()
void GPUTPCGMMerger::ResolveFindConnectedComponentsSetup(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1023 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock,
start,
end);
1024 for (int32_t
i =
start + iThread;
i <
end;
i += nThreads) {
1025 mTrackCCRoots[
i] =
i;
1029GPUd()
void GPUTPCGMMerger::ResolveFindConnectedComponentsHookLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1034 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock,
start,
end);
1035 for (int32_t itr =
start + iThread; itr <
end; itr += nThreads) {
1036 hookEdge(itr, mTrackLinks[itr]);
1040GPUd()
void GPUTPCGMMerger::ResolveFindConnectedComponentsHookNeighbors(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1043 nBlocks = nBlocks / 4 * 4;
1044 if (iBlock >= nBlocks) {
1049 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks / 4, iBlock / 4,
start,
end);
1051 int32_t myNeighbor = iBlock % 4;
1053 for (int32_t itr =
start + iThread; itr <
end; itr += nThreads) {
1054 int32_t
v = mSectorTrackInfos[itr].AnyNeighbour(myNeighbor);
1059GPUd()
void GPUTPCGMMerger::ResolveFindConnectedComponentsMultiJump(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1063 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock,
start,
end);
1064 for (int32_t itr =
start + iThread; itr <
end; itr += nThreads) {
1066 int32_t next = mTrackCCRoots[root];
1072 next = mTrackCCRoots[next];
1073 }
while (root != next);
1074 mTrackCCRoots[itr] = root;
1111 setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock,
start,
end);
1113 for (int32_t baseIdx = 0; baseIdx < SectorTrackInfoLocalTotal(); baseIdx += nThreads) {
1114 int32_t itr = baseIdx + iThread;
1115 bool inRange = itr < SectorTrackInfoLocalTotal();
1119 itr2 = mTrackLinks[itr];
1122 bool resolveSector = (itr2 > -1);
1123 if (resolveSector) {
1124 int32_t root = mTrackCCRoots[itr];
1125 resolveSector &= (
start <= root) && (root <
end);
1128 int16_t smemIdx = work_group_scan_inclusive_add(int16_t(resolveSector));
1130 if (resolveSector) {
1131 smem.iTrack1[smemIdx - 1] = itr;
1132 smem.iTrack2[smemIdx - 1] = itr2;
1136 if (iThread < nThreads - 1) {
1140 const int32_t nSectors = smemIdx;
1142 for (int32_t
i = 0;
i < nSectors;
i++) {
1143 itr = smem.iTrack1[
i];
1144 itr2 = smem.iTrack2[
i];
1151 bool sameSegment = CAMath::Abs(track1->NClusters() > track2->NClusters() ? track1->QPt() : track2->QPt()) *
Param().qptB5Scaler < 2 || track1->QPt() * track2->QPt() > 0;
1156 while (track2->PrevSegmentNeighbour() >= 0) {
1157 track2 = &mSectorTrackInfos[track2->PrevSegmentNeighbour()];
1160 if (track1 == track2) {
1163 while (track1->PrevSegmentNeighbour() >= 0) {
1164 track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()];
1165 if (track1 == track2) {
1169 GPUCommonAlgorithm::swap(track1, track1Base);
1170 for (int32_t k = 0; k < 2; k++) {
1172 while (tmp->Neighbour(k) >= 0) {
1173 tmp = &mSectorTrackInfos[tmp->Neighbour(k)];
1174 if (tmp == track2) {
1180 while (track1->NextSegmentNeighbour() >= 0) {
1181 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1182 if (track1 == track2) {
1187 while (track1->PrevSegmentNeighbour() >= 0) {
1188 track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()];
1191 if (track1 == track2) {
1194 for (int32_t k = 0; k < 2; k++) {
1196 while (tmp->Neighbour(k) >= 0) {
1197 tmp = &mSectorTrackInfos[tmp->Neighbour(k)];
1198 if (tmp == track2) {
1204 float z1min, z1max, z2min, z2max;
1205 z1min = track1->MinClusterZT();
1206 z1max = track1->MaxClusterZT();
1207 z2min = track2->MinClusterZT();
1208 z2max = track2->MaxClusterZT();
1209 if (track1 != track1Base) {
1210 z1min = CAMath::Min(z1min, track1Base->MinClusterZT());
1211 z1max = CAMath::Max(z1max, track1Base->MaxClusterZT());
1213 if (track2 != track2Base) {
1214 z2min = CAMath::Min(z2min, track2Base->MinClusterZT());
1215 z2max = CAMath::Max(z2max, track2Base->MaxClusterZT());
1217 bool goUp = z2max - z1min > z1max - z2min;
1219 if (track1->Neighbour(goUp) < 0 && track2->Neighbour(!goUp) < 0) {
1220 track1->SetNeighbor(track2 - mSectorTrackInfos, goUp);
1221 track2->SetNeighbor(track1 - mSectorTrackInfos, !goUp);
1225 }
else if (track1->Neighbour(goUp) < 0) {
1226 track2 = &mSectorTrackInfos[track2->Neighbour(!goUp)];
1227 GPUCommonAlgorithm::swap(track1, track2);
1228 }
else if (track2->Neighbour(!goUp) < 0) {
1229 track1 = &mSectorTrackInfos[track1->Neighbour(goUp)];
1231 track1 = &mSectorTrackInfos[track1->Neighbour(goUp)];
1233 track1Base = track1;
1236 track2Base = track2;
1238 while (track1->NextSegmentNeighbour() >= 0) {
1239 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1242 track1->SetNextSegmentNeighbour(track2 - mSectorTrackInfos);
1243 track2->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos);
1246 for (int32_t k = 0; k < 2; k++) {
1247 track1 = track1Base;
1248 track2 = track2Base;
1249 while (track2->Neighbour(k) >= 0) {
1250 if (track1->Neighbour(k) >= 0) {
1253 track2->SetNeighbor(-1, k);
1254 track2new->SetNeighbor(-1, k ^ 1);
1256 while (track1->NextSegmentNeighbour() >= 0) {
1257 track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()];
1259 track1->SetNextSegmentNeighbour(track2new - mSectorTrackInfos);
1260 track2new->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos);
1265 track1->SetNeighbor(track2->Neighbour(k), k);
1266 track2->SetNeighbor(-1, k);
1267 track2new->SetNeighbor(track1 - mSectorTrackInfos, k ^ 1);
1285 if (
Param().par.earlyTpcTransform) {
1289 auto& cln = mConstantMem->ioPtrs.clustersNative->clustersLinear[cls.num];
1290 GPUTPCConvertImpl::convert(*mConstantMem, cls.sector, cls.row, cln.getPad(), cln.getTime(),
x,
y,
z);
1293 if (!
Param().par.continuousTracking && CAMath::Abs(
z) > 10) {
1296 int32_t sector = track->Sector();
1297 for (int32_t attempt = 0; attempt < 2; attempt++) {
1299 const float x0 =
Param().tpcGeometry.Row2X(attempt == 0 ? 63 : cls.
row);
1302 b.SetNClusters(mOutputTracks[itr].NClusters());
1303 if (CAMath::Abs(
b.Cov()[4]) >= 0.5f) {
1306 if (track->CSide()) {
1307 b.SetPar(1,
b.Par()[1] - 2 * (
z -
b.ZOffsetLinear()));
1308 b.SetZOffsetLinear(-
b.ZOffsetLinear());
1311 uint32_t
id = sector + attempt * NSECTORS;
1312 uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[
id], 1u);
1313 mBorder[
id][myTrack] =
b;
1321 const ClusterNative* cls =
Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear;
1322 for (uint32_t
i = iBlock * nThreads + iThread;
i < mMemory->nOutputTracks;
i += nThreads * nBlocks) {
1323 if (mOutputTracks[
i].CSide() == 0 && mTrackLinks[
i] >= 0) {
1324 if (mTrackLinks[mTrackLinks[
i]] != (int32_t)
i) {
1329 if (!trk[1]->OK() || trk[1]->CCE()) {
1332 bool celooper = (trk[0]->GetParam().GetQPt() *
Param().qptB5Scaler > 1 && trk[0]->GetParam().GetQPt() * trk[1]->GetParam().GetQPt() < 0);
1333 bool looper = trk[0]->Looper() || trk[1]->Looper() || celooper;
1334 if (!looper && trk[0]->GetParam().GetPar(3) * trk[1]->GetParam().GetPar(3) < 0) {
1338 uint32_t newRef = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, trk[0]->NClusters() + trk[1]->NClusters());
1339 if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxOutputTrackClusters) {
1340 raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxOutputTrackClusters);
1341 for (uint32_t k = newRef; k < mNMaxOutputTrackClusters; k++) {
1342 mClusters[k].num = 0;
1343 mClusters[k].state = 0;
1345 CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters);
1349 bool needswap =
false;
1352 if (
Param().par.earlyTpcTransform) {
1353 z0max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef()].
z), CAMath::Abs(mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].
z));
1354 z1max = CAMath::Max(CAMath::Abs(mClustersXYZ[trk[1]->FirstClusterRef()].
z), CAMath::Abs(mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].
z));
1356 z0max = -CAMath::Min(cls[mClusters[trk[0]->FirstClusterRef()].
num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].
num].getTime());
1357 z1max = -CAMath::Min(cls[mClusters[trk[1]->FirstClusterRef()].
num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].
num].getTime());
1359 if (z1max < z0max) {
1363 if (mClusters[trk[0]->FirstClusterRef()].
row > mClusters[trk[1]->FirstClusterRef()].
row) {
1368 GPUCommonAlgorithm::swap(trk[0], trk[1]);
1371 bool reverse[2] = {
false,
false};
1373 if (
Param().par.earlyTpcTransform) {
1374 reverse[0] = (mClustersXYZ[trk[0]->FirstClusterRef()].z > mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].z) ^ (trk[0]->CSide() > 0);
1375 reverse[1] = (mClustersXYZ[trk[1]->FirstClusterRef()].z < mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].z) ^ (trk[1]->CSide() > 0);
1377 reverse[0] = cls[mClusters[trk[0]->FirstClusterRef()].num].getTime() < cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].num].getTime();
1378 reverse[1] = cls[mClusters[trk[1]->FirstClusterRef()].num].getTime() > cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime();
1382 if (
Param().par.continuousTracking) {
1383 if (
Param().par.earlyTpcTransform) {
1384 const float z0 = trk[0]->CSide() ? CAMath::Max(mClustersXYZ[trk[0]->FirstClusterRef()].
z, mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].
z) :
CAMath::Min(mClustersXYZ[trk[0]->FirstClusterRef()].
z, mClustersXYZ[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].
z);
1385 const float z1 = trk[1]->CSide() ? CAMath::Max(mClustersXYZ[trk[1]->FirstClusterRef()].
z, mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].
z) :
CAMath::Min(mClustersXYZ[trk[1]->FirstClusterRef()].
z, mClustersXYZ[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].
z);
1386 const float offset = CAMath::Abs(z1) > CAMath::Abs(z0) ? -z0 :
z1;
1387 trk[1]->Param().Z() += trk[1]->Param().TZOffset() -
offset;
1388 trk[1]->Param().TZOffset() =
offset;
1391 const float tmax = CAMath::MaxWithRef(cls[mClusters[trk[0]->FirstClusterRef()].
num].getTime(), cls[mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1].
num].getTime(),
1392 cls[mClusters[trk[1]->FirstClusterRef()].
num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].
num].getTime(),
1393 &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1],
1394 &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax);
1395 const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->
sector, clsmax->
row, cls[clsmax->
num].getPad()), 0.f);
1396 trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->
Param().TZOffset() -
offset);
1397 trk[1]->Param().TZOffset() =
offset;
1401 int32_t
pos = newRef;
1403 int32_t lastLeg = -1;
1405 for (int32_t k = 1; k >= 0; k--) {
1406 int32_t loopstart =
reverse[k] ? (trk[k]->NClusters() - 1) : 0;
1407 int32_t loopend =
reverse[k] ? -1 : (int32_t)trk[k]->NClusters();
1408 int32_t loopinc =
reverse[k] ? -1 : 1;
1409 for (int32_t
j = loopstart;
j != loopend;
j += loopinc) {
1410 if (
Param().par.earlyTpcTransform) {
1411 mClustersXYZ[
pos] = mClustersXYZ[trk[k]->FirstClusterRef() +
j];
1413 mClusters[
pos] = mClusters[trk[k]->FirstClusterRef() +
j];
1415 if (mClusters[trk[k]->FirstClusterRef() +
j].leg != lastLeg) {
1417 lastLeg = mClusters[trk[k]->FirstClusterRef() +
j].leg;
1419 mClusters[
pos].leg =
leg;
1427 trk[1]->SetFirstClusterRef(newRef);
1428 trk[1]->SetNClusters(trk[0]->NClusters() + trk[1]->NClusters());
1433 trk[1]->SetCCE(
true);
1435 trk[1]->SetLooper(
true);
1436 trk[1]->SetLegs(
leg + 1);
1438 trk[0]->SetNClusters(0);
1439 trk[0]->SetOK(
false);
1450struct GPUTPCGMMerger_CompareClusterIdsLooper {
1451 struct clcomparestruct {
1460 GPUd() bool operator()(const int16_t
aa, const int16_t
bb)
1462 const clcomparestruct&
a =
cmp2[
aa];
1466 if (
a.leg !=
b.leg) {
1467 return ((
leg > 0) ^ (
a.leg >
b.leg));
1472#ifdef GPUCA_NO_FAST_MATH
1483struct GPUTPCGMMerger_CompareClusterIds {
1486 GPUd() bool operator()(const int16_t
aa, const int16_t
bb)
1490 if (
a.row !=
b.row) {
1491 return (
a.row >
b.row);
1493#ifdef GPUCA_NO_FAST_MATH
1495 return (
a.id >
b.id);
1499 return (
a.id >
b.id);
1506GPUd()
void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1510 for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
1514 if (track.PrevSegmentNeighbour() >= 0) {
1517 if (track.PrevNeighbour() >= 0) {
1524 tr->SetPrevSegmentNeighbour(1000000000);
1526 if (nParts >= kMaxParts) {
1529 if (nHits + tr->NClusters() > kMaxClusters) {
1532 nHits += tr->NClusters();
1535 trackParts[nParts++] = tr;
1536 for (int32_t
i = 0;
i < 2;
i++) {
1537 if (tr->ExtrapolatedTrackId(
i) != -1) {
1538 if (nParts >= kMaxParts) {
1541 if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(
i)].NClusters() > kMaxClusters) {
1544 trackParts[nParts] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(
i)];
1545 trackParts[nParts++]->SetLeg(
leg);
1546 nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(
i)].NClusters();
1549 int32_t jtr = tr->NextSegmentNeighbour();
1551 tr = &(mSectorTrackInfos[jtr]);
1552 tr->SetPrevSegmentNeighbour(1000000002);
1555 jtr = trbase->NextNeighbour();
1557 trbase = &(mSectorTrackInfos[jtr]);
1559 if (tr->PrevSegmentNeighbour() >= 0) {
1562 tr->SetPrevSegmentNeighbour(1000000001);
1570 if (nParts > 1 &&
leg == 0) {
1572#ifdef GPUCA_NO_FAST_MATH
1573 if (
a->X() !=
b->X()) {
1574 return (a->X() > b->X());
1576 if (
a->Y() !=
b->Y()) {
1577 return (a->Y() > b->Y());
1579 if (
a->Z() !=
b->Z()) {
1580 return (a->Z() > b->Z());
1582 return a->QPt() >
b->QPt();
1584 return (
a->X() >
b->X());
1589 if (
Param().rec.tpc.dropLoopers &&
leg > 0) {
1594 trackCluster trackClusters[kMaxClusters];
1596 for (int32_t ipart = 0; ipart < nParts; ipart++) {
1598 CADEBUG(printf(
"Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nOutputTracks, ipart, t->QPt(), t->DzDs()));
1599 int32_t nTrackHits = t->NClusters();
1600 trackCluster*
c2 = trackClusters + nHits + nTrackHits - 1;
1601 for (int32_t
i = 0;
i < nTrackHits;
i++,
c2--) {
1602 const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()];
1603 const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() +
i];
1604 uint32_t
id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0];
1605 *
c2 = trackCluster{
id, (
uint8_t)ic.RowIndex(), t->Sector(), t->Leg()};
1607 nHits += nTrackHits;
1615 for (int32_t
i = 1;
i < nHits;
i++) {
1616 if (trackClusters[
i].
row > trackClusters[
i - 1].
row || trackClusters[
i].
id == trackClusters[
i - 1].
id) {
1622 int32_t firstTrackIndex = 0;
1623 int32_t lastTrackIndex = nParts - 1;
1625 int32_t nTmpHits = 0;
1626 trackCluster trackClustersUnsorted[kMaxClusters];
1627 int16_t clusterIndices[kMaxClusters];
1628 for (int32_t
i = 0;
i < nHits;
i++) {
1629 trackClustersUnsorted[
i] = trackClusters[
i];
1630 clusterIndices[
i] =
i;
1637 for (int32_t
i = 0;
i < nParts;
i++) {
1638 if (trackParts[
i]->Leg() == 0 || trackParts[
i]->Leg() ==
leg) {
1640 if (
Param().par.earlyTpcTransform) {
1641 zt = CAMath::Min(CAMath::Abs(trackParts[
i]->ClusterZT0()), CAMath::Abs(trackParts[
i]->ClusterZTN()));
1643 zt = -trackParts[
i]->MinClusterZT();
1647 baseLeg = trackParts[
i]->Leg();
1651 int32_t iLongest = 1e9;
1653 for (int32_t
i = (baseLeg ? (nParts - 1) : 0); baseLeg ? (
i >= 0) : (
i < nParts); baseLeg ?
i-- :
i++) {
1654 if (trackParts[
i]->Leg() != baseLeg) {
1657 if (trackParts[
i]->OrigTrack()->NHits() >
length) {
1659 length = trackParts[
i]->OrigTrack()->NHits();
1663 if (
Param().par.earlyTpcTransform) {
1664 outwards = (trackParts[iLongest]->ClusterZT0() > trackParts[iLongest]->ClusterZTN()) ^ trackParts[iLongest]->CSide();
1666 outwards = trackParts[iLongest]->ClusterZT0() < trackParts[iLongest]->ClusterZTN();
1668 GPUTPCGMMerger_CompareClusterIdsLooper::clcomparestruct clusterSort[kMaxClusters];
1669 for (int32_t iPart = 0; iPart < nParts; iPart++) {
1671 int32_t nTrackHits = t->NClusters();
1672 for (int32_t
j = 0;
j < nTrackHits;
j++) {
1673 int32_t
i = nTmpHits +
j;
1674 clusterSort[
i].leg = t->Leg();
1676 nTmpHits += nTrackHits;
1679 GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIdsLooper(baseLeg,
outwards, trackClusters, clusterSort));
1681 GPUCommonAlgorithm::sort(clusterIndices, clusterIndices + nHits, GPUTPCGMMerger_CompareClusterIds(trackClusters));
1684 firstTrackIndex = lastTrackIndex = -1;
1685 for (int32_t
i = 0;
i < nParts;
i++) {
1686 nTmpHits += trackParts[
i]->NClusters();
1687 if (nTmpHits > clusterIndices[0] && firstTrackIndex == -1) {
1688 firstTrackIndex =
i;
1690 if (nTmpHits > clusterIndices[nHits - 1] && lastTrackIndex == -1) {
1695 int32_t nFilteredHits = 0;
1696 int32_t indPrev = -1;
1697 for (int32_t
i = 0;
i < nHits;
i++) {
1698 int32_t ind = clusterIndices[
i];
1699 if (indPrev >= 0 && trackClustersUnsorted[ind].
id == trackClustersUnsorted[indPrev].
id) {
1703 trackClusters[nFilteredHits] = trackClustersUnsorted[ind];
1706 nHits = nFilteredHits;
1709 uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits);
1710 if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) {
1711 raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters);
1712 CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters);
1719 for (int32_t
i = 0;
i < nHits;
i++) {
1721 if (
Param().par.earlyTpcTransform) {
1722 const GPUTPCClusterData&
c = GetConstantMem()->tpcTrackers[trackClusters[
i].sector].ClusterData()[trackClusters[
i].id - GetConstantMem()->tpcTrackers[trackClusters[
i].sector].Data().ClusterIdOffset()];
1726 clXYZ[
i].
amp =
c.amp;
1727#ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME
1728 clXYZ[
i].pad =
c.mPad;
1729 clXYZ[
i].time =
c.mTime;
1733 const ClusterNative&
c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[
i].id];
1737 cl[
i].
row = trackClusters[
i].row;
1738 cl[
i].
num = trackClusters[
i].id;
1739 cl[
i].
sector = trackClusters[
i].sector;
1740 cl[
i].
leg = trackClusters[
i].leg;
1743 uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nOutputTracks, 1u);
1744 if (iOutputTrack >= mNMaxTracks) {
1745 raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks);
1746 CAMath::AtomicExch(&mMemory->nOutputTracks, mNMaxTracks);
1752 mergedTrack.SetFlags(0);
1753 mergedTrack.SetOK(1);
1754 mergedTrack.SetLooper(
leg > 0);
1755 mergedTrack.SetLegs(
leg);
1756 mergedTrack.SetNClusters(nHits);
1757 mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster);
1760 mergedTrack.SetCSide(
p2.CSide());
1763 const float toX =
Param().par.earlyTpcTransform ? clXYZ[0].
x :
Param().tpcGeometry.Row2X(cl[0].
row);
1766 p1.Y() =
b.Par()[0];
1767 p1.Z() =
b.Par()[1];
1768 p1.SinPhi() =
b.Par()[2];
1773 p1.SinPhi() =
p2.SinPhi();
1775 p1.TZOffset() =
p2.TZOffset();
1776 p1.DzDs() =
p2.DzDs();
1777 p1.QPt() =
p2.QPt();
1778 mergedTrack.SetAlpha(
p2.Alpha());
1779 if (CAMath::Abs(
Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) {
1780 p1.QPt() = 100.f /
Param().rec.bz0Pt10MeV;
1793 if (
Param().par.earlyTpcTransform) {
1794 CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].
z > clXYZ[nHits - 1].
z);
1796 auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear;
1797 CEside = cls[cl[0].
num].getTime() < cls[cl[nHits - 1].
num].getTime();
1799 MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], &clXYZ[CEside ? (nHits - 1) : 0], iOutputTrack);
1804GPUd()
void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1806 for (uint32_t
i = iBlock * nThreads + iThread;
i < mMemory->nOutputTracks;
i += nThreads * nBlocks) {
1807 mTrackOrderProcess[
i] =
i;
1811GPUd()
void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1813 for (uint32_t
i = iBlock * nThreads + iThread;
i < mMemory->nOutputTracks;
i += nBlocks * nThreads) {
1818#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS)
1823struct GPUTPCGMMergerSortTracks_comp {
1826 GPUd() bool operator()(const int32_t
aa, const int32_t
bb)
1830 if (
a.CCE() !=
b.CCE()) {
1831 return a.CCE() >
b.CCE();
1833 if (
a.Legs() !=
b.Legs()) {
1834 return a.Legs() >
b.Legs();
1836#ifdef GPUCA_NO_FAST_MATH
1837 if (
a.NClusters() !=
b.NClusters()) {
1838 return a.NClusters() >
b.NClusters();
1840 if (CAMath::Abs(
a.GetParam().GetQPt()) != CAMath::Abs(
b.GetParam().GetQPt())) {
1841 return CAMath::Abs(
a.GetParam().GetQPt()) > CAMath::Abs(
b.GetParam().GetQPt());
1843 if (
a.GetParam().GetY() !=
b.GetParam().GetY()) {
1844 return a.GetParam().GetY() >
b.GetParam().GetY();
1848 return a.NClusters() >
b.NClusters();
1853struct GPUTPCGMMergerSortTracksQPt_comp {
1856 GPUd() bool operator()(const int32_t
aa, const int32_t
bb)
1860#ifdef GPUCA_NO_FAST_MATH
1861 if (CAMath::Abs(
a.GetParam().GetQPt()) != CAMath::Abs(
b.GetParam().GetQPt())) {
1862 return CAMath::Abs(
a.GetParam().GetQPt()) > CAMath::Abs(
b.GetParam().GetQPt());
1864 if (
a.GetParam().GetY() !=
b.GetParam().GetY()) {
1865 return a.GetParam().GetY() >
b.GetParam().GetY();
1867 return a.GetParam().GetZ() >
b.GetParam().GetZ();
1869 return CAMath::Abs(
a.GetParam().GetQPt()) > CAMath::Abs(
b.GetParam().GetQPt());
1877inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(
const krnlSetupTime& _xyz)
1879 thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess());
1881 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
1885inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(
const krnlSetupTime& _xyz)
1887 thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort());
1889 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
1893GPUd()
void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1895#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1896 if (iThread || iBlock) {
1900 auto comp = [
cmp = mOutputTracks](
const int32_t
aa,
const int32_t
bb) {
1903 if (
a.CCE() !=
b.CCE()) {
1904 return a.CCE() >
b.CCE();
1906 if (
a.Legs() !=
b.Legs()) {
1907 return a.Legs() >
b.Legs();
1909#ifdef GPUCA_NO_FAST_MATH
1910 if (
a.NClusters() !=
b.NClusters()) {
1911 return a.NClusters() >
b.NClusters();
1913 if (CAMath::Abs(
a.GetParam().GetQPt()) != CAMath::Abs(
b.GetParam().GetQPt())) {
1914 return CAMath::Abs(
a.GetParam().GetQPt()) > CAMath::Abs(
b.GetParam().GetQPt());
1916 if (
a.GetParam().GetY() !=
b.GetParam().GetY()) {
1917 return a.GetParam().GetY() >
b.GetParam().GetY();
1921 return a.NClusters() >
b.NClusters();
1925 GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp);
1929GPUd()
void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1931#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1932 if (iThread || iBlock) {
1936 auto comp = [
cmp = mOutputTracks](
const int32_t
aa,
const int32_t
bb) {
1939#ifdef GPUCA_NO_FAST_MATH
1940 if (CAMath::Abs(
a.GetParam().GetQPt()) != CAMath::Abs(
b.GetParam().GetQPt())) {
1941 return CAMath::Abs(
a.GetParam().GetQPt()) > CAMath::Abs(
b.GetParam().GetQPt());
1943 if (
a.GetParam().GetY() !=
b.GetParam().GetY()) {
1944 return a.GetParam().GetY() >
b.GetParam().GetY();
1946 return a.GetParam().GetZ() >
b.GetParam().GetZ();
1948 return CAMath::Abs(
a.GetParam().GetQPt()) > CAMath::Abs(
b.GetParam().GetQPt());
1952 GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp);
1956GPUd()
void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1958 for (uint32_t
i = iBlock * nThreads + iThread;
i < mMemory->nOutputTracks;
i += nBlocks * nThreads) {
1959 mTrackOrderAttach[mTrackSort[
i]] =
i;
1962 for (uint32_t
j = 0;
j < trk.NClusters();
j++) {
1964 CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() +
j].num], 1u);
1970GPUd()
void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
1972 for (uint32_t
i = iBlock * nThreads + iThread;
i < mMemory->nOutputTrackClusters;
i += nBlocks * nThreads) {
1973 if (mSharedCount[mClusters[
i].
num] > 1) {
1977 if (mClusterStateExt) {
1978 for (uint32_t
i = iBlock * nThreads + iThread;
i < mNMaxClusters;
i += nBlocks * nThreads) {
1979 uint8_t state = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[
i].getFlags();
1980 if (mSharedCount[
i] > 1) {
1983 mClusterStateExt[
i] =
state;
1990 for (uint32_t
i = iBlock * nThreads + iThread;
i < mMemory->nOutputTracks;
i += nThreads * nBlocks) {
1991 mTrackSort[mTrackOrderAttach[
i]] =
i;
1993 for (uint32_t
i = iBlock * nThreads + iThread;
i < mMemory->nOutputTrackClusters;
i += nThreads * nBlocks) {
1994 mClusterAttachment[mClusters[
i].num] = 0;
2000 for (uint32_t
i = iBlock * nThreads + iThread;
i < mMemory->nOutputTracks;
i += nThreads * nBlocks) {
2002 if (!trk.OK() || trk.NClusters() == 0) {
2005 uint8_t goodLeg = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].leg;
2006 for (uint32_t
j = 0;
j < trk.NClusters();
j++) {
2007 int32_t
id = mClusters[trk.FirstClusterRef() +
j].num;
2009 uint8_t clusterState = mClusters[trk.FirstClusterRef() +
j].state;
2015 if (mClusters[trk.FirstClusterRef() +
j].leg == goodLeg) {
2018 CAMath::AtomicMax(&mClusterAttachment[
id],
weight);
2025 for (uint32_t
i = iBlock * nThreads + iThread;
i < mNMaxClusters;
i += nThreads * nBlocks) {
2026 if (mClusterAttachment[
i] != 0) {
2032GPUd()
void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
2034 const float lowPtThresh =
Param().rec.tpc.rejectQPtB5 * 1.1f;
2036 const auto& trk = mOutputTracks[
i];
2037 const auto& p = trk.GetParam();
2038 const float qptabs = CAMath::Abs(p.GetQPt());
2039 if (trk.NClusters() && qptabs *
Param().qptB5Scaler > 5.f && qptabs *
Param().qptB5Scaler <= lowPtThresh) {
2040 const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector;
2041 const float refz = p.GetZ() + (
Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTZOffset(),
Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100);
2043 CAMath::SinCos(trk.GetAlpha(), sinA, cosA);
2044 float gx = cosA * p.GetX() - sinA * p.GetY();
2045 float gy = cosA * p.GetY() + sinA * p.GetX();
2046 float bz =
Param().polynomialField.GetFieldBz(gx, gy, p.GetZ());
2047 const float r1 = p.GetQPt() * bz;
2048 const float r = CAMath::Abs(r1) > 0.0001f ? (1.f / r1) : 10000;
2049 const float mx = p.GetX() +
r * p.GetSinPhi();
2050 const float my = p.GetY() -
r * CAMath::Sqrt(1 - p.GetSinPhi() * p.GetSinPhi());
2051 const float gmx = cosA * mx - sinA * my;
2052 const float gmy = cosA * my + sinA * mx;
2053 uint32_t myId = CAMath::AtomicAdd(&mMemory->nLooperMatchCandidates, 1u);
2054 if (myId >= mNMaxLooperMatches) {
2055 raiseError(GPUErrors::ERROR_LOOPER_MATCH_OVERFLOW, myId, mNMaxLooperMatches);
2056 CAMath::AtomicExch(&mMemory->nLooperMatchCandidates, mNMaxLooperMatches);
2084GPUd()
void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
2086#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
2087 if (iThread || iBlock) {
2091 GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp);
2095#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS)
2100struct GPUTPCGMMergerMergeLoopers_comp {
2103 return CAMath::Abs(
a.refz) < CAMath::Abs(
b.refz);
2110inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(
const krnlSetupTime& _xyz)
2112 thrust::device_ptr<MergeLooperParam>
params(mProcessorsShadow->tpcMerger.LooperCandidates());
2114 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]),
params,
params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp());
2118GPUd()
void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
2122#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2123 std::vector<int64_t> paramLabels(mMemory->nLooperMatchCandidates);
2124 for (uint32_t
i = 0;
i < mMemory->nLooperMatchCandidates;
i++) {
2125 paramLabels[
i] = GetTrackLabel(mOutputTracks[
params[
i].
id]);
2131 if (!mRec->GetProcessingSettings().runQA) {
2132 throw std::runtime_error(
"Need QA enabled for the Merge Loopers MC QA");
2137 for (uint32_t
j =
i + 1;
j < mMemory->nLooperMatchCandidates;
j++) {
2139 if (CAMath::Abs(
params[
j].refz) > CAMath::Abs(
params[
i].refz) + 100.f) {
2147 const auto& trk1 = mOutputTracks[
params[
i].id];
2148 const auto& trk2 = mOutputTracks[
params[
j].id];
2149 const auto& param1 = trk1.GetParam();
2150 const auto& param2 = trk2.GetParam();
2151 if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) {
2156 const float dznormalized = (CAMath::Abs(
params[
j].refz) - CAMath::Abs(
params[
i].refz)) / (CAMath::TwoPi() * 0.5f * (CAMath::Abs(param1.GetDzDs()) + CAMath::Abs(param2.GetDzDs())) * 1.f / (0.5f * (CAMath::Abs(param1.GetQPt()) + CAMath::Abs(param2.GetQPt())) * CAMath::Abs(
Param().polynomialField.GetNominalBz())));
2157 const float phasecorr = CAMath::Modf((CAMath::ASin(param1.GetSinPhi()) + trk1.GetAlpha() - CAMath::ASin(param2.GetSinPhi()) - trk2.GetAlpha()) / CAMath::TwoPi() + 5.5f, 1.f) - 0.5f;
2158 const float phasecorrdirection = (
params[
j].refz * param1.GetQPt() * param1.GetDzDs()) > 0 ? 1 : -1;
2159 const float dzcorr = dznormalized + phasecorr * phasecorrdirection;
2160 const bool sameside = !(trk1.CSide() ^ trk2.CSide());
2161 const float dzcorrlimit[4] = {sameside ? 0.018f : 0.012f, sameside ? 0.12f : 0.025f, 0.14f, 0.15f};
2162 const int32_t dzcorrcount = sameside ? 4 : 2;
2163 bool dzcorrok =
false;
2165 for (int32_t k = 0; k < dzcorrcount; k++) {
2166 const float d = CAMath::Abs(dzcorr - 0.5f * k);
2167 if (d <= dzcorrlimit[k]) {
2169 dznorm = d / dzcorrlimit[k];
2178 const float dtgl = param1.GetDzDs() - (param1.GetQPt() * param2.GetQPt() > 0 ? param2.GetDzDs() : -param2.GetDzDs());
2179 const float dqpt = (CAMath::Abs(param1.GetQPt()) - CAMath::Abs(param2.GetQPt())) / CAMath::Min(param1.GetQPt(), param2.GetQPt());
2180 float d = CAMath::Sum2(dtgl * (1.f / 0.03f), dqpt * (1.f / 0.04f)) + d2xy * (1.f / 4.f) + dznorm * (1.f / 0.3f);
2182#if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE)
2183 const int64_t label1 = paramLabels[
i];
2184 const int64_t label2 = paramLabels[
j];
2185 bool labelEQ = label1 != -1 && label1 == label2;
2186 if (1 || EQ || labelEQ) {
2188 static auto& tup =
GPUROOTDump<TNtuple>::get(
"mergeloopers",
"labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs");
2189 tup.Fill((
float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(),
CAMath::
Abs(
params[
j].refz) -
CAMath::
Abs(
params[
i].refz), dzcorr, dtgl, dqpt, dznorm, bs);
2190 static auto tup2 =
GPUROOTDump<TNtuple>::getNew(
"mergeloopers2",
"labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr");
2191 tup2.Fill((
float)labelEQ,
params[
i].refz,
params[
j].refz, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), param1.GetSinPhi(), param2.GetSinPhi(), trk1.GetAlpha(), trk2.GetAlpha(), dznormalized, phasecorr, phasecorrdirection, dzcorr);
2205 mOutputTracks[
params[
j].id].SetMergedLooper(
true);
2206 if (CAMath::Abs(param2.GetQPt() *
Param().qptB5Scaler) >=
Param().
rec.tpc.rejectQPtB5) {
2207 mOutputTracks[
params[
i].id].SetMergedLooper(
true);
Class of a TPC cluster in TPC-native coordinates (row, time)
A const (ready only) version of MCTruthContainer.
#define get_global_size(dim)
#define get_global_id(dim)
#define GPUCA_DEBUG_STREAMER_CHECK(...)
#define GPUCA_MERGER_MAX_TRACK_CLUSTERS
#define GPUCA_MAX_SIN_PHI_LOW
#define GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(QPTB5)
#define GPUCA_MAX_SIN_PHI
#define CADEBUG2(cmd,...)
const GPUTPCGMMerger::trackCluster & b1
const GPUTPCGMMerger::trackCluster & a1
const GPUTPCGMMerger::trackCluster *const mCmp
const clcomparestruct *const cmp2
const GPUTPCGMMerger::trackCluster *const cmp1
bool const GPUTPCGMMerger::trackCluster * c1
bool const GPUTPCGMMerger::trackCluster const clcomparestruct * c2
constexpr int p1()
constexpr to accelerate the coordinates changing
std::array< int, 64 > reverse(std::array< int, 64 > a)
Class for time synchronization of RawReader instances.
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
void AllocateAndInitializeLate()
static GPUROOTDump< T, Args... > getNew(const char *name1, Names... names)
static GPUROOTDump< T, Args... > & get(const char *name1, Names... names)
const GPUParam & GetParam() const
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
RecoStepField GetRecoSteps() const
const GPUConstantMem & GetConstantMem() const
GPUMemorySizeScalers * MemoryScalers()
const GPUSettingsProcessing & GetProcessingSettings() const
void * SetPointersRefitScratch(void *mem)
void RegisterMemoryAllocation()
void * SetPointersMemory(void *mem)
static constexpr const int32_t NSECTORS
void * SetPointersOutput(void *mem)
void InitializeProcessor()
void * SetPointersOutputO2MC(void *mem)
void * SetPointersOutputO2Scratch(void *mem)
void * SetPointersMerger(void *mem)
void * SetPointersOutputO2Clus(void *mem)
void * SetPointersOutputState(void *mem)
void SetMaxData(const GPUTrackingInOutPointers &io)
void * SetPointersOutputO2(void *mem)
int32_t int32_t int32_t bool float float Z
int32_t int32_t int32_t bool float Y
GLint GLint GLsizei GLint border
GLfloat GLfloat GLfloat alpha
GLuint GLuint GLfloat weight
GLboolean GLboolean GLboolean b
GLenum const GLfloat * params
GLuint GLsizei GLsizei * length
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean GLboolean GLboolean GLboolean a
GLdouble GLdouble GLdouble z
uint8_t itsSharedClusterMap uint8_t
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
float float float float z1
if(!okForPhiMin(phi0, phi1))
Global TPC definitions and constants.
@ streamMergeBorderTracksBest
stream MergeBorderTracks best track
@ streamMergeBorderTracksAll
stream MergeBorderTracks all tracks
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
size_t getValue(size_t maxVal, size_t val)
size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss)
size_t NTPCMergedTracks(size_t tpcSectorTracks)
@ clustererAndSharedFlags
const o2::tpc::ClusterNativeAccess * clustersNative
unsigned int nClustersTotal
char const *restrict const cmp