99 GPUInfo(
"Running TPC Merger");
112 memset(Merger.Memory(), 0,
sizeof(*Merger.Memory()));
119 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(
GetGridAuto(0, deviceType), 1);
122 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
i);
123 runKernel<GPUTPCGMMergerUnpackResetIds>(
GetGridAuto(0, deviceType),
i);
124 runKernel<GPUTPCGMMergerSectorRefit>(
GetGridAuto(0, deviceType),
i);
127 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS);
128 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{
GPUCA_NSECTORS, -
WarpSize(), 0, deviceType}}, 0);
131 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS +
i);
132 runKernel<GPUTPCGMMergerUnpackGlobal>(
GetGridAuto(0, deviceType),
i);
134 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}}, 2 *
NSECTORS);
136 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{
GPUCA_NSECTORS, -
WarpSize(), 0, deviceType}}, 1);
140 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
141 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(),
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
142 runKernel<GPUTPCGMMergerMergeWithinPrepare>(
GetGridAuto(0, deviceType));
143 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
144 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
147 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
148 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
149 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
150 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
151 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
152 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
153 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
154 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
155 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
156 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
157 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
158 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
159 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
162 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
164 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(
GetGridAuto(0, deviceType));
165 runKernel<GPUTPCGMMergerCollect>(
GetGridAuto(0, deviceType));
167 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks1>({{1, -
WarpSize(), 0, deviceType}}, 1);
168 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks2>({{1, -
WarpSize(), 0, deviceType}}, 1);
173 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
true);
174 RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
175 RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
176 runKernel<GPUTPCGMMergerMergeCE>(
GetGridAuto(0, deviceType));
179 int32_t waitForTransfer = 0;
186 runKernel<GPUTPCGMMergerSortTracksPrepare>(
GetGridAuto(0, deviceType));
188 runKernel<GPUTPCGMMergerSortTracks>(
GetGridAuto(0, deviceType));
191 uint32_t maxId = Merger.NMaxClusters();
192 if (maxId > Merger.NMaxClusters()) {
193 throw std::runtime_error(
"mNMaxClusters too small");
195 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId *
sizeof(*MergerShadowAll.SharedCount()));
196 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId *
sizeof(*MergerShadowAll.ClusterAttachment()));
197 runKernel<GPUTPCGMMergerPrepareClusters, 0>(
GetGridAuto(0, deviceType));
199 runKernel<GPUTPCGMMergerSortTracksQPt>(
GetGridAuto(0, deviceType));
200 runKernel<GPUTPCGMMergerPrepareClusters, 1>(
GetGridAuto(0, deviceType));
201 runKernel<GPUTPCGMMergerPrepareClusters, 2>(
GetGridAuto(0, deviceType));
207 if (waitForTransfer) {
222 if (
param().
rec.tpc.retryRefit == 1) {
223 runKernel<GPUTPCGMMergerTrackFit>(
GetGridAuto(0), -1);
225 if (
param().
rec.tpc.looperInterpolationInExtraPass) {
226 runKernel<GPUTPCGMMergerFollowLoopers>(
GetGridAuto(0));
230 runKernel<GPUTPCGMMergerFinalize, 0>(
GetGridAuto(0, deviceType));
231 runKernel<GPUTPCGMMergerFinalize, 1>(
GetGridAuto(0, deviceType));
232 runKernel<GPUTPCGMMergerFinalize, 2>(
GetGridAuto(0, deviceType));
233 if (
param().
rec.tpc.mergeLoopersAfterburner) {
234 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ?
GetGrid(Merger.NOutputTracks(), 0, deviceType) :
GetGridAuto(0, deviceType));
239 runKernel<GPUTPCGMMergerMergeLoopers, 1>(
GetGridAuto(0, deviceType));
240 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ?
GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) :
GetGridAuto(0, deviceType));
252 if ((
size_t)((
char*)bufferEnd - (
char*)
buffer) >
size) {
253 throw std::runtime_error(
"QA Scratch buffer exceeded");
256 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() *
sizeof(*Merger.OutputTracks()), outputStream, 0,
nullptr, waitEvent);
258 if (
param().dodEdxDownscaled) {
259 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() *
sizeof(*Merger.OutputTracksdEdx()), outputStream, 0);
261 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() *
sizeof(*Merger.Clusters()), outputStream, 0);
262 if (
param().par.earlyTpcTransform) {
263 GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() *
sizeof(*Merger.ClustersXYZ()), outputStream, 0);
265 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() *
sizeof(*Merger.ClusterAttachment()), outputStream, 0);
282#ifdef GPUCA_TPC_GEOMETRY_O2
292 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(
GetGridAuto(0, deviceType));
294 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(
GetGridAuto(0, deviceType));
304 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(
GetGridAuto(0, deviceType));
355 GPUInfo(
"TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());