100 GPUInfo(
"Running TPC Merger");
113 memset(Merger.Memory(), 0,
sizeof(*Merger.Memory()));
120 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(
GetGridAuto(0, deviceType), 1);
123 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
i);
124 runKernel<GPUTPCGMMergerUnpackResetIds>(
GetGridAuto(0, deviceType),
i);
125 runKernel<GPUTPCGMMergerSectorRefit>(
GetGridAuto(0, deviceType),
i);
128 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS);
129 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{
GPUCA_NSECTORS, -
WarpSize(), 0, deviceType}}, 0);
132 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}},
NSECTORS +
i);
133 runKernel<GPUTPCGMMergerUnpackGlobal>(
GetGridAuto(0, deviceType),
i);
135 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -
WarpSize(), 0, deviceType}}, 2 *
NSECTORS);
137 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{
GPUCA_NSECTORS, -
WarpSize(), 0, deviceType}}, 1);
141 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
142 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(),
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
143 runKernel<GPUTPCGMMergerMergeWithinPrepare>(
GetGridAuto(0, deviceType));
144 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
145 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
148 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
false);
149 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
150 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
151 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
152 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
153 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
154 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
155 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
156 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
157 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
158 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(
GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
159 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
160 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
163 runKernel<GPUMemClean16>({{1, -
WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 *
NSECTORS *
sizeof(*MergerShadowAll.TmpCounter()));
165 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(
GetGridAuto(0, deviceType));
166 runKernel<GPUTPCGMMergerCollect>(
GetGridAuto(0, deviceType));
168 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks1>({{1, -
WarpSize(), 0, deviceType}}, 1);
169 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks2>({{1, -
WarpSize(), 0, deviceType}}, 1);
174 runKernel<GPUTPCGMMergerClearLinks>(
GetGridAuto(0, deviceType),
true);
175 RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
176 RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
177 runKernel<GPUTPCGMMergerMergeCE>(
GetGridAuto(0, deviceType));
180 int32_t waitForTransfer = 0;
187 if (mergerSortTracks) {
188 runKernel<GPUTPCGMMergerSortTracksPrepare>(
GetGridAuto(0, deviceType));
190 runKernel<GPUTPCGMMergerSortTracks>(
GetGridAuto(0, deviceType));
193 uint32_t maxId = Merger.NMaxClusters();
194 if (maxId > Merger.NMaxClusters()) {
195 throw std::runtime_error(
"mNMaxClusters too small");
197 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId *
sizeof(*MergerShadowAll.SharedCount()));
198 runKernel<GPUMemClean16>({{numBlocks, -
ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId *
sizeof(*MergerShadowAll.ClusterAttachment()));
199 runKernel<GPUTPCGMMergerPrepareClusters, 0>(
GetGridAuto(0, deviceType));
201 runKernel<GPUTPCGMMergerSortTracksQPt>(
GetGridAuto(0, deviceType));
202 runKernel<GPUTPCGMMergerPrepareClusters, 1>(
GetGridAuto(0, deviceType));
203 runKernel<GPUTPCGMMergerPrepareClusters, 2>(
GetGridAuto(0, deviceType));
209 if (waitForTransfer) {
223 runKernel<GPUTPCGMMergerTrackFit>(doGPU ?
GetGrid(Merger.NMergedTracks(), 0) :
GetGridAuto(0), mergerSortTracks ? 1 : 0);
224 if (
param().
rec.tpc.retryRefit == 1) {
225 runKernel<GPUTPCGMMergerTrackFit>(
GetGridAuto(0), -1);
227 if (
param().
rec.tpc.looperInterpolationInExtraPass == -1 ?
mRec->
getGPUParameters(doGPU).par_MERGER_SPLIT_LOOP_INTERPOLATION :
param().
rec.tpc.looperInterpolationInExtraPass) {
228 runKernel<GPUTPCGMMergerFollowLoopers>(
GetGridAuto(0));
232 runKernel<GPUTPCGMMergerFinalize, 0>(
GetGridAuto(0, deviceType));
233 runKernel<GPUTPCGMMergerFinalize, 1>(
GetGridAuto(0, deviceType));
234 runKernel<GPUTPCGMMergerFinalize, 2>(
GetGridAuto(0, deviceType));
235 if (
param().
rec.tpc.mergeLoopersAfterburner) {
236 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ?
GetGrid(Merger.NMergedTracks(), 0, deviceType) :
GetGridAuto(0, deviceType));
241 runKernel<GPUTPCGMMergerMergeLoopers, 1>(
GetGridAuto(0, deviceType));
242 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ?
GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) :
GetGridAuto(0, deviceType));
255 if ((
size_t)((
char*)bufferEnd - (
char*)
buffer) >
size) {
256 throw std::runtime_error(
"QA Scratch buffer exceeded");
259 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() *
sizeof(*Merger.MergedTracks()), outputStream, 0,
nullptr, waitEvent);
261 if (
param().dodEdxEnabled) {
262 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() *
sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
264 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() *
sizeof(*Merger.Clusters()), outputStream, 0);
265 if (
param().par.earlyTpcTransform) {
266 GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() *
sizeof(*Merger.ClustersXYZ()), outputStream, 0);
268 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() *
sizeof(*Merger.ClusterAttachment()), outputStream, 0);
285#ifdef GPUCA_TPC_GEOMETRY_O2
295 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(
GetGridAuto(0, deviceType));
297 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(
GetGridAuto(0, deviceType));
307 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(
GetGridAuto(0, deviceType));
358 GPUInfo(
"TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());