100 const int32_t outputStream = OutputStream();
102 GPUInfo(
"Running TPC Merger");
115 memset(Merger.Memory(), 0,
sizeof(*Merger.Memory()));
122 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(
GetGridAuto(0, deviceType), 1);
125 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
i);
126 runKernel<GPUTPCGMMergerUnpackResetIds>(
GetGridAuto(0, deviceType),
i);
127 runKernel<GPUTPCGMMergerSectorRefit>(
GetGridAuto(0, deviceType),
i);
130 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS);
134 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS +
i);
135 runKernel<GPUTPCGMMergerUnpackGlobal>(
GetGridAuto(0, deviceType),
i);
137 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}}, 2 *
NSECTORS);
144 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
145 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(),
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
146 runKernel<GPUTPCGMMergerMergeWithinPrepare>(
GetGridAuto(0, deviceType));
148 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
152 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
153 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
154 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
156 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
158 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
159 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
161 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
163 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
164 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
166 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
169 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
171 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(
GetGridAuto(0, deviceType));
175 runKernel<GPUTPCGMMergerCollect>(
GetGridAuto(0, deviceType));
177 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks1>({{1, -
WarpSize(), 0, deviceType}}, 1);
178 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks2>({{1, -
WarpSize(), 0, deviceType}}, 1);
183 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
true);
188 runKernel<GPUTPCGMMergerMergeCE>(
GetGridAuto(0, deviceType));
191 int32_t waitForTransfer = 0;
198 if (mergerSortTracks) {
199 runKernel<GPUTPCGMMergerSortTracksPrepare>(
GetGridAuto(0, deviceType));
201 runKernel<GPUTPCGMMergerSortTracks>(
GetGridAuto(0, deviceType));
207 uint32_t maxId = Merger.NMaxClusters();
208 if (maxId > Merger.NMaxClusters()) {
209 throw std::runtime_error(
"mNMaxClusters too small");
211 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId *
sizeof(*MergerShadowAll.SharedCount()));
212 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId *
sizeof(*MergerShadowAll.ClusterAttachment()));
213 runKernel<GPUTPCGMMergerPrepareForFit, 0>(
GetGridAuto(0, deviceType));
215 runKernel<GPUTPCGMMergerSortTracksQPt>(
GetGridAuto(0, deviceType));
216 runKernel<GPUTPCGMMergerPrepareForFit, 1>(
GetGridAuto(0, deviceType));
217 runKernel<GPUTPCGMMergerPrepareForFit, 2>(
GetGridAuto(0, deviceType));
223 if (waitForTransfer) {
237 runKernel<GPUTPCGMMergerTrackFit>(doGPU ?
GetGrid(Merger.NMergedTracks(), 0) :
GetGridAuto(0), mergerSortTracks ? 1 : 0);
238 if (
param().
rec.tpc.retryRefit == 1) {
239 runKernel<GPUTPCGMMergerTrackFit>(
GetGridAuto(0), -1);
241 runKernel<GPUTPCGMMergerFollowLoopers>(
GetGridAuto(0));
244 runKernel<GPUTPCGMMergerFinalize, 0>(
GetGridAuto(0, deviceType));
245 runKernel<GPUTPCGMMergerFinalize, 1>(
GetGridAuto(0, deviceType));
246 runKernel<GPUTPCGMMergerFinalize, 2>(
GetGridAuto(0, deviceType));
247 if (
param().
rec.tpc.mergeLoopersAfterburner) {
248 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ?
GetGrid(Merger.NMergedTracks(), 0, deviceType) :
GetGridAuto(0, deviceType));
253 runKernel<GPUTPCGMMergerMergeLoopers, 1>(
GetGridAuto(0, deviceType));
254 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ?
GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) :
GetGridAuto(0, deviceType));
264 size_t size =
mRec->
Res(Merger.MemoryResOutput()).
Size() + constants::GPU_MEMALIGN;
267 if ((
size_t)((
char*)bufferEnd - (
char*)
buffer) >
size) {
268 throw std::runtime_error(
"QA Scratch buffer exceeded");
271 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() *
sizeof(*Merger.MergedTracks()), outputStream, 0,
nullptr, waitEvent);
273 if (
param().dodEdxEnabled) {
274 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() *
sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
276 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() *
sizeof(*Merger.Clusters()), outputStream, 0);
277 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() *
sizeof(*Merger.ClusterAttachment()), outputStream, 0);
305 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(
GetGridAuto(0, deviceType));
308 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(
GetGridAuto(0, deviceType));
318 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(
GetGridAuto(0, deviceType));
367 GPUInfo(
"TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NMergedTrackClusters(), Merger.NClusters());