Project
Loading...
Searching...
No Matches
GPUChainTrackingMerger.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
16#include "GPULogging.h"
17#include "GPUDefParametersRuntime.h"
18#include "GPUO2DataTypes.h"
19#include "GPUQA.h"
20#include "GPUTPCGMMerger.h"
21#include "GPUConstantMem.h"
22#include "GPUTPCGMMergerGPU.h"
23#include "GPUTPCGMO2Output.h"
25#include "utils/strtag.h"
26#include <fstream>
27
28using namespace o2::gpu;
29
30void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
31{
33 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
34 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
35 if (GetProcessingSettings().deterministicGPUReconstruction) {
36 uint32_t nBorderTracks = withinSector == 1 ? NSECTORS : (2 * NSECTORS);
37 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::borderTracks>({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0);
38 }
39 uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS;
40 if (GetProcessingSettings().alternateBorderSort == -1 ? mRec->getGPUParameters(doGPU).par_ALTERNATE_BORDER_SORT : GetProcessingSettings().alternateBorderSort) {
42 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init);
43 for (uint32_t i = 0; i < n; i++) {
44 int32_t stream = i % mRec->NStreams();
45 runKernel<GPUTPCGMMergerMergeBorders, 0>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode);
46 }
49 for (uint32_t i = 0; i < n; i++) {
50 int32_t stream = i % mRec->NStreams();
51 int32_t n1, n2;
53 int32_t jSector;
54 Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode);
55 gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange(i);
56 gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks();
57 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0);
58 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1);
59 runKernel<GPUTPCGMMergerMergeBorders, 2>({GetGridAuto(stream, deviceType)}, i, withinSector, mergeMode);
60 }
61 int32_t ne = std::min<int32_t>(n, mRec->NStreams()) - 1; // Stream 0 must wait for all streams, Note n > 1
62 for (int32_t j = 0; j < ne; j++) {
63 RecordMarker(&mEvents->sector[j], j + 1);
64 }
66 } else {
67 for (uint32_t i = 0; i < n; i++) {
68 runKernel<GPUTPCGMMergerMergeBorders, 0>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
69 }
70 runKernel<GPUTPCGMMergerMergeBorders, 1>({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSector, mergeMode);
71 for (uint32_t i = 0; i < n; i++) {
72 runKernel<GPUTPCGMMergerMergeBorders, 2>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
73 }
74 }
75 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode);
77}
78
79void GPUChainTracking::RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType)
80{
81 runKernel<GPUTPCGMMergerResolve, 0>(GetGridAuto(0, deviceType));
82 runKernel<GPUTPCGMMergerResolve, 1>(GetGridAuto(0, deviceType));
83 runKernel<GPUTPCGMMergerResolve, 2>(GetGridAuto(0, deviceType));
84 runKernel<GPUTPCGMMergerResolve, 3>(GetGridAuto(0, deviceType));
85 runKernel<GPUTPCGMMergerResolve, 4>(GetGridAuto(0, deviceType), useOrigTrackParam, mergeAll);
86}
87
88int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
89{
91 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
93 uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1;
95 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
96 GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger;
97 const int32_t outputStream = OutputStream();
98 if (GetProcessingSettings().debugLevel >= 2) {
99 GPUInfo("Running TPC Merger");
100 }
101 const auto& threadContext = GetThreadContext();
102
103 SynchronizeGPU(); // Need to know the full number of sector tracks
104 SetupGPUProcessor(&Merger, true);
107
108 if (Merger.CheckSectors()) {
109 return 1;
110 }
111
112 memset(Merger.Memory(), 0, sizeof(*Merger.Memory()));
113 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
114 if (doGPU) {
115 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
116 }
117
118 if (GetProcessingSettings().deterministicGPUReconstruction) {
119 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(GetGridAuto(0, deviceType), 1);
120 }
121 for (uint32_t i = 0; i < NSECTORS; i++) {
122 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, i);
123 runKernel<GPUTPCGMMergerUnpackResetIds>(GetGridAuto(0, deviceType), i);
124 runKernel<GPUTPCGMMergerSectorRefit>(GetGridAuto(0, deviceType), i); // TODO: Why all in stream 0?
125 }
126 if (GetProcessingSettings().deterministicGPUReconstruction) {
127 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS);
128 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 0);
129 }
130 for (uint32_t i = 0; i < NSECTORS; i++) {
131 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS + i);
132 runKernel<GPUTPCGMMergerUnpackGlobal>(GetGridAuto(0, deviceType), i);
133 }
134 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, 2 * NSECTORS);
135 if (GetProcessingSettings().deterministicGPUReconstruction) {
136 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1);
137 }
138 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile);
139
140 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
141 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
142 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
143 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
144 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
145 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile);
146
147 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
148 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
149 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
150 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
151 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
152 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
153 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
154 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
155 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
156 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
157 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
158 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
159 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
160 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile);
161
162 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
163
164 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto(0, deviceType));
165 runKernel<GPUTPCGMMergerCollect>(GetGridAuto(0, deviceType));
166 if (GetProcessingSettings().deterministicGPUReconstruction) {
167 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks1>({{1, -WarpSize(), 0, deviceType}}, 1);
168 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks2>({{1, -WarpSize(), 0, deviceType}}, 1);
169 }
170 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile);
171
172 if (param().rec.tpc.mergeCE) {
173 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), true);
174 RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
175 RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
176 runKernel<GPUTPCGMMergerMergeCE>(GetGridAuto(0, deviceType));
177 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile);
178 }
179 int32_t waitForTransfer = 0;
180 if (doGPU) {
181 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
182 waitForTransfer = 1;
183 }
184
185 const bool mergerSortTracks = GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(doGPU).par_SORT_BEFORE_FIT : GetProcessingSettings().mergerSortTracks;
186 if (mergerSortTracks) {
187 runKernel<GPUTPCGMMergerSortTracksPrepare>(GetGridAuto(0, deviceType));
188 CondWaitEvent(waitForTransfer, &mEvents->single);
189 runKernel<GPUTPCGMMergerSortTracks>(GetGridAuto(0, deviceType));
190 }
191
192 uint32_t maxId = Merger.NMaxClusters();
193 if (maxId > Merger.NMaxClusters()) {
194 throw std::runtime_error("mNMaxClusters too small");
195 }
196 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount()));
197 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment()));
198 runKernel<GPUTPCGMMergerPrepareClusters, 0>(GetGridAuto(0, deviceType));
199 CondWaitEvent(waitForTransfer, &mEvents->single);
200 runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
201 runKernel<GPUTPCGMMergerPrepareClusters, 1>(GetGridAuto(0, deviceType));
202 runKernel<GPUTPCGMMergerPrepareClusters, 2>(GetGridAuto(0, deviceType));
203
204 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile);
205
206 if (doGPU) {
207 CondWaitEvent(waitForTransfer, &mEvents->single);
208 if (waitForTransfer) {
210 }
211 } else if (doGPU) {
212 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
213 }
214
215 if (GetProcessingSettings().delayedOutput) {
216 for (uint32_t i = 0; i < mOutputQueue.size(); i++) {
217 GPUMemCpy(mOutputQueue[i].step, mOutputQueue[i].dst, mOutputQueue[i].src, mOutputQueue[i].size, outputStream, false);
218 }
219 mOutputQueue.clear();
220 }
221
222 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0);
223 if (param().rec.tpc.retryRefit == 1) {
224 runKernel<GPUTPCGMMergerTrackFit>(GetGridAuto(0), -1);
225 }
226 if (param().rec.tpc.looperInterpolationInExtraPass == -1 ? mRec->getGPUParameters(doGPU).par_MERGER_SPLIT_LOOP_INTERPOLATION : param().rec.tpc.looperInterpolationInExtraPass) {
227 runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
228 }
229
230 DoDebugAndDump(RecoStep::TPCMerging, 2048, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile);
231 runKernel<GPUTPCGMMergerFinalize, 0>(GetGridAuto(0, deviceType));
232 runKernel<GPUTPCGMMergerFinalize, 1>(GetGridAuto(0, deviceType));
233 runKernel<GPUTPCGMMergerFinalize, 2>(GetGridAuto(0, deviceType));
234 if (param().rec.tpc.mergeLoopersAfterburner) {
235 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType));
236 if (doGPU) {
237 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0);
238 SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel<GPUTPCGMMergerMergeLoopers, 1>
239 }
240 runKernel<GPUTPCGMMergerMergeLoopers, 1>(GetGridAuto(0, deviceType));
241 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType));
242 }
243 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile);
244
245 if (doGPU) {
247 auto* waitEvent = &mEvents->single;
248 if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1 || mFractionalQAEnabled) {
249 if (!(GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1)) {
250 size_t size = mRec->Res(Merger.MemoryResOutput()).Size() + GPUCA_MEMALIGN;
252 void* bufferEnd = Merger.SetPointersOutput(buffer);
253 if ((size_t)((char*)bufferEnd - (char*)buffer) > size) {
254 throw std::runtime_error("QA Scratch buffer exceeded");
255 }
256 }
257 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent);
258 waitEvent = nullptr;
259 if (param().dodEdxDownscaled) {
260 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0);
261 }
262 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
263 if (param().par.earlyTpcTransform) {
264 GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0);
265 }
266 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
267 }
268 if (GetProcessingSettings().outputSharedClusterMap) {
269 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputState(), outputStream, nullptr, waitEvent);
270 waitEvent = nullptr;
271 }
273 } else {
274 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
275 }
276 if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) {
277 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
278 }
279
281 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERGE"));
282
283#ifdef GPUCA_TPC_GEOMETRY_O2
284 if (GetProcessingSettings().createO2Output) {
286 mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Return the sector data memory early
288 }
289
291 AllocateRegisteredMemory(Merger.MemoryResOutputO2Scratch());
292 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
293 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(GetGridAuto(0, deviceType));
294 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
295 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(GetGridAuto(0, deviceType));
298
299 if (GetProcessingSettings().clearO2OutputFromGPU) {
301 }
304 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
305 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(GetGridAuto(0, deviceType));
306
309 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
310 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::mc>(GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU));
311 } else if (doGPU) {
313 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2(), outputStream, nullptr, &mEvents->single);
314 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2Clus(), outputStream);
316 }
317 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERG2"));
318 }
319#endif
320 if (doGPU && (synchronizeOutput || GetProcessingSettings().clearO2OutputFromGPU)) {
321 SynchronizeStream(outputStream);
322 }
323 if (GetProcessingSettings().clearO2OutputFromGPU) {
325 }
326
327 mIOPtrs.mergedTracks = Merger.OutputTracks();
328 mIOPtrs.nMergedTracks = Merger.NOutputTracks();
329 mIOPtrs.mergedTrackHits = Merger.Clusters();
330 mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ();
331 mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
332 mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment();
333 mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt();
334 mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2();
335 mIOPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
336 mIOPtrs.outputClusRefsTPCO2 = Merger.OutputClusRefsTPCO2();
337 mIOPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
338 mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC();
339
340 if (doGPU) {
341 processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks();
342 processorsShadow()->ioPtrs.nMergedTracks = Merger.NOutputTracks();
343 processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters();
344 processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ();
345 processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
346 processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment();
347 processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt();
348 processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2();
349 processorsShadow()->ioPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
350 processorsShadow()->ioPtrs.outputClusRefsTPCO2 = MergerShadow.OutputClusRefsTPCO2();
351 processorsShadow()->ioPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
352 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
353 }
354
355 if (GetProcessingSettings().debugLevel >= 2) {
356 GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());
357 }
358 return 0;
359}
int32_t i
#define GPUCA_MEMALIGN
const GPUTPCGMMerger::trackCluster & b1
#define GPUCA_NSECTORS
uint32_t j
Definition RawData.h:0
int32_t RunTPCTrackingMerger(bool synchronizeOutput=true)
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
std::vector< outputQueueEntry > mOutputQueue
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:107
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:127
void CondWaitEvent(T &cond, deviceEvent *ev)
Definition GPUChain.h:99
void SynchronizeGPU()
Definition GPUChain.h:109
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:71
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:126
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:110
uint32_t WarpSize() const
Definition GPUChain.h:213
uint32_t ThreadCount() const
Definition GPUChain.h:214
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:216
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
Definition GPUChain.h:108
GPUConstantMem * processors()
Definition GPUChain.h:83
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:40
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:116
GPUParam & param()
Definition GPUChain.h:86
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:219
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:75
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:88
GPUReconstructionCPU * mRec
Definition GPUChain.h:78
GPUConstantMem * processorsShadow()
Definition GPUChain.h:84
static constexpr int32_t NSECTORS
Definition GPUChain.h:57
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:124
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:122
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:229
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:21
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:42
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:32
uint32_t BlockCount() const
Definition GPUChain.h:212
GPUReconstruction * rec()
Definition GPUChain.h:65
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:91
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:121
void * AllocateScratchBuffer(size_t nBytes)
Definition GPUQA.h:55
const GPUDefParameters & getGPUParameters(bool doGPU) const override
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
GPUMemoryResource & Res(int16_t num)
void PushNonPersistentMemory(uint64_t tag)
void DumpMergeCE(std::ostream &out) const
void DumpMergedBetweenSectors(std::ostream &out) const
void DumpFitPrepare(std::ostream &out) const
void DumpFinal(std::ostream &out) const
void * SetPointersOutput(void *mem)
void DumpRefit(std::ostream &out) const
void DumpMergedWithinSectors(std::ostream &out) const
void DumpCollected(std::ostream &out) const
void DumpMergeRanges(std::ostream &out, int32_t withinSector, int32_t mergeMode) const
void DumpSectorTracks(std::ostream &out) const
GLdouble n
Definition glcorearb.h:1982
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum dst
Definition glcorearb.h:1767
GLuint GLuint stream
Definition glcorearb.h:1806
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
GPUTrackingInOutPointers ioPtrs
const GPUTPCGMMergedTrackHitXYZ * mergedTrackHitsXYZ
const o2::tpc::ClusterNativeAccess * clustersNative
const uint32_t * mergedTrackHitAttachment
const o2::MCCompLabel * outputTracksTPCO2MC
const o2::tpc::TrackTPC * outputTracksTPCO2
const GPUTPCGMMergedTrackHit * mergedTrackHits
const GPUTPCGMMergedTrack * mergedTracks
GPUOutputControl tpcTracksO2Labels
GPUOutputControl tpcTracksO2ClusRefs
size_t getIndex(const GPUOutputControl &v)
GPUOutputControl sharedClusterMap
const o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > * clustersMCTruth