93 GPUInfo(
"Running TPC Merger");
106 memset(Merger.Memory(), 0,
sizeof(*Merger.Memory()));
113 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(
GetGridAuto(0, deviceType), 1);
116 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
i);
117 runKernel<GPUTPCGMMergerUnpackResetIds>(
GetGridAuto(0, deviceType),
i);
118 runKernel<GPUTPCGMMergerSectorRefit>(
GetGridAuto(0, deviceType),
i);
121 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS);
122 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{
GPUCA_NSECTORS, -
WarpSize(), 0, deviceType}}, 0);
125 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS +
i);
126 runKernel<GPUTPCGMMergerUnpackGlobal>(
GetGridAuto(0, deviceType),
i);
128 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}}, 2 *
NSECTORS);
130 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{
GPUCA_NSECTORS, -
WarpSize(), 0, deviceType}}, 1);
134 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
135 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(),
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
136 runKernel<GPUTPCGMMergerMergeWithinPrepare>(
GetGridAuto(0, deviceType));
137 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
138 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
141 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
142 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
143 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
144 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
145 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
146 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
147 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
148 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
149 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
150 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
151 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
152 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
153 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
156 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
158 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(
GetGridAuto(0, deviceType));
159 runKernel<GPUTPCGMMergerCollect>(
GetGridAuto(0, deviceType));
161 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks1>({{1, -
WarpSize(), 0, deviceType}}, 1);
162 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks2>({{1, -
WarpSize(), 0, deviceType}}, 1);
167 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
true);
168 RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
169 RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
170 runKernel<GPUTPCGMMergerMergeCE>(
GetGridAuto(0, deviceType));
173 int32_t waitForTransfer = 0;
180 runKernel<GPUTPCGMMergerSortTracksPrepare>(
GetGridAuto(0, deviceType));
182 runKernel<GPUTPCGMMergerSortTracks>(
GetGridAuto(0, deviceType));
185 uint32_t maxId = Merger.NMaxClusters();
186 if (maxId > Merger.NMaxClusters()) {
187 throw std::runtime_error(
"mNMaxClusters too small");
189 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId *
sizeof(*MergerShadowAll.SharedCount()));
190 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId *
sizeof(*MergerShadowAll.ClusterAttachment()));
191 runKernel<GPUTPCGMMergerPrepareClusters, 0>(
GetGridAuto(0, deviceType));
193 runKernel<GPUTPCGMMergerSortTracksQPt>(
GetGridAuto(0, deviceType));
194 runKernel<GPUTPCGMMergerPrepareClusters, 1>(
GetGridAuto(0, deviceType));
195 runKernel<GPUTPCGMMergerPrepareClusters, 2>(
GetGridAuto(0, deviceType));
201 if (waitForTransfer) {
216 if (
param().
rec.tpc.retryRefit == 1) {
217 runKernel<GPUTPCGMMergerTrackFit>(
GetGridAuto(0), -1);
219 if (
param().
rec.tpc.looperInterpolationInExtraPass) {
220 runKernel<GPUTPCGMMergerFollowLoopers>(
GetGridAuto(0));
224 runKernel<GPUTPCGMMergerFinalize, 0>(
GetGridAuto(0, deviceType));
225 runKernel<GPUTPCGMMergerFinalize, 1>(
GetGridAuto(0, deviceType));
226 runKernel<GPUTPCGMMergerFinalize, 2>(
GetGridAuto(0, deviceType));
227 if (
param().
rec.tpc.mergeLoopersAfterburner) {
228 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ?
GetGrid(Merger.NOutputTracks(), 0, deviceType) :
GetGridAuto(0, deviceType));
233 runKernel<GPUTPCGMMergerMergeLoopers, 1>(
GetGridAuto(0, deviceType));
234 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ?
GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) :
GetGridAuto(0, deviceType));
246 if ((
size_t)((
char*)bufferEnd - (
char*)
buffer) >
size) {
247 throw std::runtime_error(
"QA Scratch buffer exceeded");
250 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() *
sizeof(*Merger.OutputTracks()), outputStream, 0,
nullptr, waitEvent);
252 if (
param().dodEdxDownscaled) {
253 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() *
sizeof(*Merger.OutputTracksdEdx()), outputStream, 0);
255 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() *
sizeof(*Merger.Clusters()), outputStream, 0);
256 if (
param().par.earlyTpcTransform) {
257 GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() *
sizeof(*Merger.ClustersXYZ()), outputStream, 0);
259 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() *
sizeof(*Merger.ClusterAttachment()), outputStream, 0);
276#ifdef GPUCA_TPC_GEOMETRY_O2
286 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(
GetGridAuto(0, deviceType));
288 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(
GetGridAuto(0, deviceType));
298 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(
GetGridAuto(0, deviceType));
349 GPUInfo(
"TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());