Project
Loading...
Searching...
No Matches
GPUChainTrackingMerger.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
16#include "GPULogging.h"
17#include "GPUO2DataTypes.h"
18#include "GPUQA.h"
19#include "utils/strtag.h"
20#include <fstream>
21
22using namespace o2::gpu;
23
24void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
25{
27 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
28 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
29 if (GetProcessingSettings().deterministicGPUReconstruction) {
30 uint32_t nBorderTracks = withinSector == 1 ? NSECTORS : (2 * NSECTORS);
31 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::borderTracks>({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0);
32 }
33 uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS;
34 if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) {
36 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init);
37 for (uint32_t i = 0; i < n; i++) {
38 int32_t stream = i % mRec->NStreams();
39 runKernel<GPUTPCGMMergerMergeBorders, 0>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode);
40 }
43 for (uint32_t i = 0; i < n; i++) {
44 int32_t stream = i % mRec->NStreams();
45 int32_t n1, n2;
47 int32_t jSector;
48 Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode);
49 gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange(i);
50 gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks();
51 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0);
52 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1);
53 runKernel<GPUTPCGMMergerMergeBorders, 2>({GetGridAuto(stream, deviceType)}, i, withinSector, mergeMode);
54 }
55 int32_t ne = std::min<int32_t>(n, mRec->NStreams()) - 1; // Stream 0 must wait for all streams, Note n > 1
56 for (int32_t j = 0; j < ne; j++) {
57 RecordMarker(&mEvents->sector[j], j + 1);
58 }
60 } else {
61 for (uint32_t i = 0; i < n; i++) {
62 runKernel<GPUTPCGMMergerMergeBorders, 0>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
63 }
64 runKernel<GPUTPCGMMergerMergeBorders, 1>({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSector, mergeMode);
65 for (uint32_t i = 0; i < n; i++) {
66 runKernel<GPUTPCGMMergerMergeBorders, 2>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
67 }
68 }
69 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode);
71}
72
73void GPUChainTracking::RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType)
74{
75 runKernel<GPUTPCGMMergerResolve, 0>(GetGridAuto(0, deviceType));
76 runKernel<GPUTPCGMMergerResolve, 1>(GetGridAuto(0, deviceType));
77 runKernel<GPUTPCGMMergerResolve, 2>(GetGridAuto(0, deviceType));
78 runKernel<GPUTPCGMMergerResolve, 3>(GetGridAuto(0, deviceType));
79 runKernel<GPUTPCGMMergerResolve, 4>(GetGridAuto(0, deviceType), useOrigTrackParam, mergeAll);
80}
81
82int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
83{
85 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
87 uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1;
89 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
90 GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger;
91 const int32_t outputStream = OutputStream();
92 if (GetProcessingSettings().debugLevel >= 2) {
93 GPUInfo("Running TPC Merger");
94 }
95 const auto& threadContext = GetThreadContext();
96
97 SynchronizeGPU(); // Need to know the full number of sector tracks
98 SetupGPUProcessor(&Merger, true);
101
102 if (Merger.CheckSectors()) {
103 return 1;
104 }
105
106 memset(Merger.Memory(), 0, sizeof(*Merger.Memory()));
107 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
108 if (doGPU) {
109 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
110 }
111
112 if (GetProcessingSettings().deterministicGPUReconstruction) {
113 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(GetGridAuto(0, deviceType), 1);
114 }
115 for (uint32_t i = 0; i < NSECTORS; i++) {
116 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, i);
117 runKernel<GPUTPCGMMergerUnpackResetIds>(GetGridAuto(0, deviceType), i);
118 runKernel<GPUTPCGMMergerSectorRefit>(GetGridAuto(0, deviceType), i); // TODO: Why all in stream 0?
119 }
120 if (GetProcessingSettings().deterministicGPUReconstruction) {
121 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS);
122 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 0);
123 }
124 for (uint32_t i = 0; i < NSECTORS; i++) {
125 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS + i);
126 runKernel<GPUTPCGMMergerUnpackGlobal>(GetGridAuto(0, deviceType), i);
127 }
128 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, 2 * NSECTORS);
129 if (GetProcessingSettings().deterministicGPUReconstruction) {
130 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1);
131 }
132 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile);
133
134 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
135 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
136 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
137 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
138 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
139 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile);
140
141 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
142 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
143 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
144 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
145 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
146 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
147 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
148 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
149 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
150 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
151 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
152 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
153 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
154 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile);
155
156 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
157
158 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto(0, deviceType));
159 runKernel<GPUTPCGMMergerCollect>(GetGridAuto(0, deviceType));
160 if (GetProcessingSettings().deterministicGPUReconstruction) {
161 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks1>({{1, -WarpSize(), 0, deviceType}}, 1);
162 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks2>({{1, -WarpSize(), 0, deviceType}}, 1);
163 }
164 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile);
165
166 if (param().rec.tpc.mergeCE) {
167 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), true);
168 RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
169 RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
170 runKernel<GPUTPCGMMergerMergeCE>(GetGridAuto(0, deviceType));
171 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile);
172 }
173 int32_t waitForTransfer = 0;
174 if (doGPU) {
175 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
176 waitForTransfer = 1;
177 }
178
179 if (GetProcessingSettings().mergerSortTracks) {
180 runKernel<GPUTPCGMMergerSortTracksPrepare>(GetGridAuto(0, deviceType));
181 CondWaitEvent(waitForTransfer, &mEvents->single);
182 runKernel<GPUTPCGMMergerSortTracks>(GetGridAuto(0, deviceType));
183 }
184
185 uint32_t maxId = Merger.NMaxClusters();
186 if (maxId > Merger.NMaxClusters()) {
187 throw std::runtime_error("mNMaxClusters too small");
188 }
189 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount()));
190 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment()));
191 runKernel<GPUTPCGMMergerPrepareClusters, 0>(GetGridAuto(0, deviceType));
192 CondWaitEvent(waitForTransfer, &mEvents->single);
193 runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
194 runKernel<GPUTPCGMMergerPrepareClusters, 1>(GetGridAuto(0, deviceType));
195 runKernel<GPUTPCGMMergerPrepareClusters, 2>(GetGridAuto(0, deviceType));
196
197 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile);
198
199 if (doGPU) {
200 CondWaitEvent(waitForTransfer, &mEvents->single);
201 if (waitForTransfer) {
203 }
204 } else if (doGPU) {
205 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
206 }
207
208 if (GetProcessingSettings().delayedOutput) {
209 for (uint32_t i = 0; i < mOutputQueue.size(); i++) {
210 GPUMemCpy(mOutputQueue[i].step, mOutputQueue[i].dst, mOutputQueue[i].src, mOutputQueue[i].size, outputStream, false);
211 }
212 mOutputQueue.clear();
213 }
214
215 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), GetProcessingSettings().mergerSortTracks ? 1 : 0);
216 if (param().rec.tpc.retryRefit == 1) {
217 runKernel<GPUTPCGMMergerTrackFit>(GetGridAuto(0), -1);
218 }
219 if (param().rec.tpc.looperInterpolationInExtraPass) {
220 runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
221 }
222
223 DoDebugAndDump(RecoStep::TPCMerging, 2048, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile);
224 runKernel<GPUTPCGMMergerFinalize, 0>(GetGridAuto(0, deviceType));
225 runKernel<GPUTPCGMMergerFinalize, 1>(GetGridAuto(0, deviceType));
226 runKernel<GPUTPCGMMergerFinalize, 2>(GetGridAuto(0, deviceType));
227 if (param().rec.tpc.mergeLoopersAfterburner) {
228 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType));
229 if (doGPU) {
230 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0);
231 SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel<GPUTPCGMMergerMergeLoopers, 1>
232 }
233 runKernel<GPUTPCGMMergerMergeLoopers, 1>(GetGridAuto(0, deviceType));
234 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType));
235 }
236 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile);
237
238 if (doGPU) {
240 auto* waitEvent = &mEvents->single;
241 if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1 || mFractionalQAEnabled) {
242 if (!(GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1)) {
243 size_t size = mRec->Res(Merger.MemoryResOutput()).Size() + GPUCA_MEMALIGN;
245 void* bufferEnd = Merger.SetPointersOutput(buffer);
246 if ((size_t)((char*)bufferEnd - (char*)buffer) > size) {
247 throw std::runtime_error("QA Scratch buffer exceeded");
248 }
249 }
250 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent);
251 waitEvent = nullptr;
252 if (param().dodEdxDownscaled) {
253 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0);
254 }
255 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
256 if (param().par.earlyTpcTransform) {
257 GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0);
258 }
259 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
260 }
261 if (GetProcessingSettings().outputSharedClusterMap) {
262 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputState(), outputStream, nullptr, waitEvent);
263 waitEvent = nullptr;
264 }
266 } else {
267 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
268 }
269 if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) {
270 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
271 }
272
274 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERGE"));
275
276#ifdef GPUCA_TPC_GEOMETRY_O2
277 if (GetProcessingSettings().createO2Output) {
279 mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Return the sector data memory early
281 }
282
284 AllocateRegisteredMemory(Merger.MemoryResOutputO2Scratch());
285 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
286 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(GetGridAuto(0, deviceType));
287 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
288 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(GetGridAuto(0, deviceType));
291
292 if (GetProcessingSettings().clearO2OutputFromGPU) {
293 mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile
294 }
297 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
298 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(GetGridAuto(0, deviceType));
299
302 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
303 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::mc>(GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU));
304 } else if (doGPU) {
306 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2(), outputStream, nullptr, &mEvents->single);
307 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2Clus(), outputStream);
309 }
310 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERG2"));
311 }
312#endif
313 if (doGPU && (synchronizeOutput || GetProcessingSettings().clearO2OutputFromGPU)) {
314 SynchronizeStream(outputStream);
315 }
316 if (GetProcessingSettings().clearO2OutputFromGPU) {
318 }
319
320 mIOPtrs.mergedTracks = Merger.OutputTracks();
321 mIOPtrs.nMergedTracks = Merger.NOutputTracks();
322 mIOPtrs.mergedTrackHits = Merger.Clusters();
323 mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ();
324 mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
325 mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment();
326 mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt();
327 mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2();
328 mIOPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
329 mIOPtrs.outputClusRefsTPCO2 = Merger.OutputClusRefsTPCO2();
330 mIOPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
331 mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC();
332
333 if (doGPU) {
334 processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks();
335 processorsShadow()->ioPtrs.nMergedTracks = Merger.NOutputTracks();
336 processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters();
337 processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ();
338 processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
339 processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment();
340 processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt();
341 processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2();
342 processorsShadow()->ioPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
343 processorsShadow()->ioPtrs.outputClusRefsTPCO2 = MergerShadow.OutputClusRefsTPCO2();
344 processorsShadow()->ioPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
345 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
346 }
347
348 if (GetProcessingSettings().debugLevel >= 2) {
349 GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());
350 }
351 return 0;
352}
int32_t i
#define GPUCA_MEMALIGN
const GPUTPCGMMerger::trackCluster & b1
#define GPUCA_NSECTORS
uint32_t j
Definition RawData.h:0
int32_t RunTPCTrackingMerger(bool synchronizeOutput=true)
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
std::vector< outputQueueEntry > mOutputQueue
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:103
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
void CondWaitEvent(T &cond, deviceEvent *ev)
Definition GPUChain.h:95
void SynchronizeGPU()
Definition GPUChain.h:105
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:68
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
Definition GPUChain.h:104
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:122
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:106
uint32_t WarpSize() const
Definition GPUChain.h:206
uint32_t ThreadCount() const
Definition GPUChain.h:207
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:209
GPUConstantMem * processors()
Definition GPUChain.h:80
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:37
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:112
GPUParam & param()
Definition GPUChain.h:83
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:212
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:72
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:85
GPUReconstructionCPU * mRec
Definition GPUChain.h:75
GPUConstantMem * processorsShadow()
Definition GPUChain.h:81
static constexpr int32_t NSECTORS
Definition GPUChain.h:54
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:120
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:118
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:223
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:21
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:42
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:32
uint32_t BlockCount() const
Definition GPUChain.h:205
GPUReconstruction * rec()
Definition GPUChain.h:62
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:87
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:117
void * AllocateScratchBuffer(size_t nBytes)
Definition GPUQA.h:55
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void * AllocateVolatileDeviceMemory(size_t size)
GPUMemoryResource & Res(int16_t num)
void PushNonPersistentMemory(uint64_t tag)
void DumpMergeCE(std::ostream &out) const
void DumpMergedBetweenSectors(std::ostream &out) const
void DumpFitPrepare(std::ostream &out) const
void DumpFinal(std::ostream &out) const
void * SetPointersOutput(void *mem)
void DumpRefit(std::ostream &out) const
void DumpMergedWithinSectors(std::ostream &out) const
void DumpCollected(std::ostream &out) const
void DumpMergeRanges(std::ostream &out, int32_t withinSector, int32_t mergeMode) const
void DumpSectorTracks(std::ostream &out) const
GLdouble n
Definition glcorearb.h:1982
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum dst
Definition glcorearb.h:1767
GLuint GLuint stream
Definition glcorearb.h:1806
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
GPUTrackingInOutPointers ioPtrs
const GPUTPCGMMergedTrackHitXYZ * mergedTrackHitsXYZ
const o2::tpc::ClusterNativeAccess * clustersNative
const uint32_t * mergedTrackHitAttachment
const o2::MCCompLabel * outputTracksTPCO2MC
const o2::tpc::TrackTPC * outputTracksTPCO2
const GPUTPCGMMergedTrackHit * mergedTrackHits
const GPUTPCGMMergedTrack * mergedTracks
GPUOutputControl tpcTracksO2Labels
GPUOutputControl tpcTracksO2ClusRefs
size_t getIndex(const GPUOutputControl &v)
GPUOutputControl sharedClusterMap
const o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > * clustersMCTruth