Project
Loading...
Searching...
No Matches
GPUChainTrackingMerger.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
17#include "GPULogging.h"
18#include "GPUDefParametersRuntime.h"
19#include "GPUO2DataTypes.h"
20#include "GPUQA.h"
21#include "GPUTPCGMMerger.h"
22#include "GPUConstantMem.h"
23#include "GPUTPCGMMergerGPU.h"
24#include "GPUTPCGMO2Output.h"
26#include "utils/strtag.h"
27#include <fstream>
28
29using namespace o2::gpu;
30
31void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
32{
34 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
35 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
36 if (GetProcessingSettings().deterministicGPUReconstruction) {
37 uint32_t nBorderTracks = withinSector == 1 ? NSECTORS : (2 * NSECTORS);
38 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::borderTracks>({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0);
39 }
40 uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS;
41 if (GetProcessingSettings().alternateBorderSort == -1 ? mRec->getGPUParameters(doGPU).par_ALTERNATE_BORDER_SORT : GetProcessingSettings().alternateBorderSort) {
43 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init);
44 for (uint32_t i = 0; i < n; i++) {
45 int32_t stream = i % mRec->NStreams();
46 runKernel<GPUTPCGMMergerMergeBorders, 0>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode);
47 }
50 for (uint32_t i = 0; i < n; i++) {
51 int32_t stream = i % mRec->NStreams();
52 int32_t n1, n2;
54 int32_t jSector;
55 Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode);
56 gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange(i);
57 gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks();
58 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0);
59 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1);
60 runKernel<GPUTPCGMMergerMergeBorders, 2>({GetGridAuto(stream, deviceType)}, i, withinSector, mergeMode);
61 }
62 int32_t ne = std::min<int32_t>(n, mRec->NStreams()) - 1; // Stream 0 must wait for all streams, Note n > 1
63 for (int32_t j = 0; j < ne; j++) {
64 RecordMarker(&mEvents->sector[j], j + 1);
65 }
67 } else {
68 for (uint32_t i = 0; i < n; i++) {
69 runKernel<GPUTPCGMMergerMergeBorders, 0>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
70 }
71 runKernel<GPUTPCGMMergerMergeBorders, 1>({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSector, mergeMode);
72 for (uint32_t i = 0; i < n; i++) {
73 runKernel<GPUTPCGMMergerMergeBorders, 2>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
74 }
75 }
76 DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRanges, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode);
78}
79
80void GPUChainTracking::RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType)
81{
82 runKernel<GPUTPCGMMergerResolve, 0>(GetGridAuto(0, deviceType));
83 runKernel<GPUTPCGMMergerResolve, 1>(GetGridAuto(0, deviceType));
84 runKernel<GPUTPCGMMergerResolve, 2>(GetGridAuto(0, deviceType));
85 runKernel<GPUTPCGMMergerResolve, 3>(GetGridAuto(0, deviceType));
86 runKernel<GPUTPCGMMergerResolve, 4>(GetGridAuto(0, deviceType), useOrigTrackParam, mergeAll);
87}
88
89int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
90{
92 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
94 uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1;
96 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
97 GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger;
98 const int32_t outputStream = OutputStream();
99 if (GetProcessingSettings().debugLevel >= 2) {
100 GPUInfo("Running TPC Merger");
101 }
102 const auto& threadContext = GetThreadContext();
103
104 SynchronizeGPU(); // Need to know the full number of sector tracks
105 SetupGPUProcessor(&Merger, true);
108
109 if (Merger.CheckSectors()) {
110 return 1;
111 }
112
113 memset(Merger.Memory(), 0, sizeof(*Merger.Memory()));
114 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
115 if (doGPU) {
116 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
117 }
118
119 if (GetProcessingSettings().deterministicGPUReconstruction) {
120 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(GetGridAuto(0, deviceType), 1);
121 }
122 for (uint32_t i = 0; i < NSECTORS; i++) {
123 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, i);
124 runKernel<GPUTPCGMMergerUnpackResetIds>(GetGridAuto(0, deviceType), i);
125 runKernel<GPUTPCGMMergerSectorRefit>(GetGridAuto(0, deviceType), i); // TODO: Why all in stream 0?
126 }
127 if (GetProcessingSettings().deterministicGPUReconstruction) {
128 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS);
129 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 0);
130 }
131 for (uint32_t i = 0; i < NSECTORS; i++) {
132 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS + i);
133 runKernel<GPUTPCGMMergerUnpackGlobal>(GetGridAuto(0, deviceType), i);
134 }
135 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, 2 * NSECTORS);
136 if (GetProcessingSettings().deterministicGPUReconstruction) {
137 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1);
138 }
140
141 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
142 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
143 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
144 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
145 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
147
148 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
149 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
150 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
151 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
152 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
153 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
154 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
155 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
156 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
157 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
158 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
159 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
160 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
162
163 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
164
165 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto(0, deviceType));
166 runKernel<GPUTPCGMMergerCollect>(GetGridAuto(0, deviceType));
167 if (GetProcessingSettings().deterministicGPUReconstruction) {
168 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks1>({{1, -WarpSize(), 0, deviceType}}, 1);
169 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks2>({{1, -WarpSize(), 0, deviceType}}, 1);
170 }
172
173 if (param().rec.tpc.mergeCE) {
174 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), true);
175 RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
176 RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
177 runKernel<GPUTPCGMMergerMergeCE>(GetGridAuto(0, deviceType));
179 }
180 int32_t waitForTransfer = 0;
181 if (doGPU) {
182 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
183 waitForTransfer = 1;
184 }
185
186 const bool mergerSortTracks = GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(doGPU).par_SORT_BEFORE_FIT : GetProcessingSettings().mergerSortTracks;
187 if (mergerSortTracks) {
188 runKernel<GPUTPCGMMergerSortTracksPrepare>(GetGridAuto(0, deviceType));
189 CondWaitEvent(waitForTransfer, &mEvents->single);
190 runKernel<GPUTPCGMMergerSortTracks>(GetGridAuto(0, deviceType));
191 }
192
193 uint32_t maxId = Merger.NMaxClusters();
194 if (maxId > Merger.NMaxClusters()) {
195 throw std::runtime_error("mNMaxClusters too small");
196 }
197 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount()));
198 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment()));
199 runKernel<GPUTPCGMMergerPrepareClusters, 0>(GetGridAuto(0, deviceType));
200 CondWaitEvent(waitForTransfer, &mEvents->single);
201 runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
202 runKernel<GPUTPCGMMergerPrepareClusters, 1>(GetGridAuto(0, deviceType));
203 runKernel<GPUTPCGMMergerPrepareClusters, 2>(GetGridAuto(0, deviceType));
204
206
207 if (doGPU) {
208 CondWaitEvent(waitForTransfer, &mEvents->single);
209 if (waitForTransfer) {
211 }
212 } else if (doGPU) {
213 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
214 }
215
216 if (GetProcessingSettings().delayedOutput) {
217 for (uint32_t i = 0; i < mOutputQueue.size(); i++) {
218 GPUMemCpy(mOutputQueue[i].step, mOutputQueue[i].dst, mOutputQueue[i].src, mOutputQueue[i].size, outputStream, false);
219 }
220 mOutputQueue.clear();
221 }
222
223 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0);
224 if (param().rec.tpc.retryRefit == 1) {
225 runKernel<GPUTPCGMMergerTrackFit>(GetGridAuto(0), -1);
226 }
227 if (param().rec.tpc.looperInterpolationInExtraPass == -1 ? mRec->getGPUParameters(doGPU).par_MERGER_SPLIT_LOOP_INTERPOLATION : param().rec.tpc.looperInterpolationInExtraPass) {
228 runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
229 }
230
232 runKernel<GPUTPCGMMergerFinalize, 0>(GetGridAuto(0, deviceType));
233 runKernel<GPUTPCGMMergerFinalize, 1>(GetGridAuto(0, deviceType));
234 runKernel<GPUTPCGMMergerFinalize, 2>(GetGridAuto(0, deviceType));
235 if (param().rec.tpc.mergeLoopersAfterburner) {
236 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ? GetGrid(Merger.NMergedTracks(), 0, deviceType) : GetGridAuto(0, deviceType));
237 if (doGPU) {
238 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0);
239 SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel<GPUTPCGMMergerMergeLoopers, 1>
240 }
241 runKernel<GPUTPCGMMergerMergeLoopers, 1>(GetGridAuto(0, deviceType));
242 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType));
244 }
246
247 if (doGPU) {
249 auto* waitEvent = &mEvents->single;
250 if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1 || mFractionalQAEnabled) {
251 if (!(GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1)) {
252 size_t size = mRec->Res(Merger.MemoryResOutput()).Size() + GPUCA_MEMALIGN;
254 void* bufferEnd = Merger.SetPointersOutput(buffer);
255 if ((size_t)((char*)bufferEnd - (char*)buffer) > size) {
256 throw std::runtime_error("QA Scratch buffer exceeded");
257 }
258 }
259 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent);
260 waitEvent = nullptr;
261 if (param().dodEdxEnabled) {
262 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
263 }
264 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
265 if (param().par.earlyTpcTransform) {
266 GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0);
267 }
268 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
269 }
270 if (GetProcessingSettings().outputSharedClusterMap) {
271 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputState(), outputStream, nullptr, waitEvent);
272 waitEvent = nullptr;
273 }
275 } else {
276 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
277 }
278 if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) {
279 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
280 }
281
283 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERGE"));
284
285#ifdef GPUCA_TPC_GEOMETRY_O2
286 if (GetProcessingSettings().createO2Output) {
288 mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Return the sector data memory early
290 }
291
293 AllocateRegisteredMemory(Merger.MemoryResOutputO2Scratch());
294 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
295 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(GetGridAuto(0, deviceType));
296 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
297 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(GetGridAuto(0, deviceType));
300
301 if (GetProcessingSettings().clearO2OutputFromGPU) {
303 }
306 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
307 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(GetGridAuto(0, deviceType));
308
311 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
312 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::mc>(GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU));
313 } else if (doGPU) {
315 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2(), outputStream, nullptr, &mEvents->single);
316 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2Clus(), outputStream);
318 }
319 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERG2"));
320 }
321#endif
322 if (doGPU && (synchronizeOutput || GetProcessingSettings().clearO2OutputFromGPU)) {
323 SynchronizeStream(outputStream);
324 }
325 if (GetProcessingSettings().clearO2OutputFromGPU) {
327 }
328
329 mIOPtrs.mergedTracks = Merger.MergedTracks();
330 mIOPtrs.nMergedTracks = Merger.NMergedTracks();
331 mIOPtrs.mergedTrackHits = Merger.Clusters();
332 mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ();
333 mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
334 mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment();
335 mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt();
336 mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2();
337 mIOPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
338 mIOPtrs.outputClusRefsTPCO2 = Merger.OutputClusRefsTPCO2();
339 mIOPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
340 mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC();
341
342 if (doGPU) {
343 processorsShadow()->ioPtrs.mergedTracks = MergerShadow.MergedTracks();
344 processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks();
345 processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters();
346 processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ();
347 processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
348 processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment();
349 processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt();
350 processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2();
351 processorsShadow()->ioPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
352 processorsShadow()->ioPtrs.outputClusRefsTPCO2 = MergerShadow.OutputClusRefsTPCO2();
353 processorsShadow()->ioPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
354 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
355 }
356
357 if (GetProcessingSettings().debugLevel >= 2) {
358 GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());
359 }
360 return 0;
361}
int32_t i
#define GPUCA_MEMALIGN
const GPUTPCGMMerger::trackCluster & b1
#define GPUCA_NSECTORS
uint32_t j
Definition RawData.h:0
int32_t RunTPCTrackingMerger(bool synchronizeOutput=true)
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
std::vector< outputQueueEntry > mOutputQueue
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:108
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:128
bool DoDebugAndDump(RecoStep step, uint32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:230
void CondWaitEvent(T &cond, deviceEvent *ev)
Definition GPUChain.h:100
void SynchronizeGPU()
Definition GPUChain.h:110
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:72
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:127
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:111
uint32_t WarpSize() const
Definition GPUChain.h:214
uint32_t ThreadCount() const
Definition GPUChain.h:215
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:217
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
Definition GPUChain.h:109
GPUConstantMem * processors()
Definition GPUChain.h:84
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:41
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:117
GPUParam & param()
Definition GPUChain.h:87
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:220
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:76
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:89
GPUReconstructionCPU * mRec
Definition GPUChain.h:79
GPUConstantMem * processorsShadow()
Definition GPUChain.h:85
static constexpr int32_t NSECTORS
Definition GPUChain.h:58
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:125
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:123
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:21
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:42
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:32
uint32_t BlockCount() const
Definition GPUChain.h:213
GPUReconstruction * rec()
Definition GPUChain.h:66
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:92
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:122
void * AllocateScratchBuffer(size_t nBytes)
Definition GPUQA.h:55
const GPUDefParameters & getGPUParameters(bool doGPU) const override
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
GPUMemoryResource & Res(int16_t num)
void PushNonPersistentMemory(uint64_t tag)
void DumpMergeCE(std::ostream &out) const
void DumpMergedBetweenSectors(std::ostream &out) const
void DumpFitPrepare(std::ostream &out) const
void DumpFinal(std::ostream &out) const
void * SetPointersOutput(void *mem)
void DumpRefit(std::ostream &out) const
void DumpMergedWithinSectors(std::ostream &out) const
void DumpCollected(std::ostream &out) const
void DumpLoopers(std::ostream &out) const
void DumpMergeRanges(std::ostream &out, int32_t withinSector, int32_t mergeMode) const
void DumpSectorTracks(std::ostream &out) const
GLdouble n
Definition glcorearb.h:1982
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum dst
Definition glcorearb.h:1767
GLuint GLuint stream
Definition glcorearb.h:1806
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
GPUTrackingInOutPointers ioPtrs
const GPUTPCGMMergedTrackHitXYZ * mergedTrackHitsXYZ
const o2::tpc::ClusterNativeAccess * clustersNative
const uint32_t * mergedTrackHitAttachment
const o2::MCCompLabel * outputTracksTPCO2MC
const o2::tpc::TrackTPC * outputTracksTPCO2
const GPUTPCGMMergedTrackHit * mergedTrackHits
const GPUTPCGMMergedTrack * mergedTracks
GPUOutputControl tpcTracksO2Labels
GPUOutputControl tpcTracksO2ClusRefs
size_t getIndex(const GPUOutputControl &v)
GPUOutputControl sharedClusterMap
const o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > * clustersMCTruth