98 const int32_t outputStream = OutputStream();
100 GPUInfo(
"Running TPC Merger");
113 memset(Merger.Memory(), 0,
sizeof(*Merger.Memory()));
120 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(
GetGridAuto(0, deviceType), 1);
123 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
i);
124 runKernel<GPUTPCGMMergerUnpackResetIds>(
GetGridAuto(0, deviceType),
i);
125 runKernel<GPUTPCGMMergerSectorRefit>(
GetGridAuto(0, deviceType),
i);
128 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS);
129 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{
GPUCA_NSECTORS, -
WarpSize(), 0, deviceType}}, 0);
132 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS +
i);
133 runKernel<GPUTPCGMMergerUnpackGlobal>(
GetGridAuto(0, deviceType),
i);
135 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}}, 2 *
NSECTORS);
137 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{
GPUCA_NSECTORS, -
WarpSize(), 0, deviceType}}, 1);
141 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
142 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(),
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
143 runKernel<GPUTPCGMMergerMergeWithinPrepare>(
GetGridAuto(0, deviceType));
144 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
145 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
148 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
149 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
150 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
151 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
152 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
153 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
154 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
155 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
156 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
157 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
158 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
159 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
160 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
163 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
165 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(
GetGridAuto(0, deviceType));
169 runKernel<GPUTPCGMMergerCollect>(
GetGridAuto(0, deviceType));
171 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks1>({{1, -
WarpSize(), 0, deviceType}}, 1);
172 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks2>({{1, -
WarpSize(), 0, deviceType}}, 1);
177 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
true);
178 RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
179 RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
180 runKernel<GPUTPCGMMergerMergeCE>(
GetGridAuto(0, deviceType));
183 int32_t waitForTransfer = 0;
190 if (mergerSortTracks) {
191 runKernel<GPUTPCGMMergerSortTracksPrepare>(
GetGridAuto(0, deviceType));
193 runKernel<GPUTPCGMMergerSortTracks>(
GetGridAuto(0, deviceType));
199 uint32_t maxId = Merger.NMaxClusters();
200 if (maxId > Merger.NMaxClusters()) {
201 throw std::runtime_error(
"mNMaxClusters too small");
203 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId *
sizeof(*MergerShadowAll.SharedCount()));
204 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId *
sizeof(*MergerShadowAll.ClusterAttachment()));
205 runKernel<GPUTPCGMMergerPrepareForFit, 0>(
GetGridAuto(0, deviceType));
207 runKernel<GPUTPCGMMergerSortTracksQPt>(
GetGridAuto(0, deviceType));
208 runKernel<GPUTPCGMMergerPrepareForFit, 1>(
GetGridAuto(0, deviceType));
209 runKernel<GPUTPCGMMergerPrepareForFit, 2>(
GetGridAuto(0, deviceType));
215 if (waitForTransfer) {
229 runKernel<GPUTPCGMMergerTrackFit>(doGPU ?
GetGrid(Merger.NMergedTracks(), 0) :
GetGridAuto(0), mergerSortTracks ? 1 : 0);
230 if (
param().
rec.tpc.retryRefit == 1) {
231 runKernel<GPUTPCGMMergerTrackFit>(
GetGridAuto(0), -1);
233 runKernel<GPUTPCGMMergerFollowLoopers>(
GetGridAuto(0));
236 runKernel<GPUTPCGMMergerFinalize, 0>(
GetGridAuto(0, deviceType));
237 runKernel<GPUTPCGMMergerFinalize, 1>(
GetGridAuto(0, deviceType));
238 runKernel<GPUTPCGMMergerFinalize, 2>(
GetGridAuto(0, deviceType));
239 if (
param().
rec.tpc.mergeLoopersAfterburner) {
240 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ?
GetGrid(Merger.NMergedTracks(), 0, deviceType) :
GetGridAuto(0, deviceType));
245 runKernel<GPUTPCGMMergerMergeLoopers, 1>(
GetGridAuto(0, deviceType));
246 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ?
GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) :
GetGridAuto(0, deviceType));
259 if ((
size_t)((
char*)bufferEnd - (
char*)
buffer) >
size) {
260 throw std::runtime_error(
"QA Scratch buffer exceeded");
263 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() *
sizeof(*Merger.MergedTracks()), outputStream, 0,
nullptr, waitEvent);
265 if (
param().dodEdxEnabled) {
266 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() *
sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
268 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() *
sizeof(*Merger.Clusters()), outputStream, 0);
269 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() *
sizeof(*Merger.ClusterAttachment()), outputStream, 0);
286#ifdef GPUCA_TPC_GEOMETRY_O2
297 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(
GetGridAuto(0, deviceType));
300 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(
GetGridAuto(0, deviceType));
310 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(
GetGridAuto(0, deviceType));
359 GPUInfo(
"TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NMergedTrackClusters(), Merger.NClusters());