Project
Loading...
Searching...
No Matches
GPUChainTrackingMerger.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
16#include "GPULogging.h"
17#include "GPUO2DataTypes.h"
18#include "GPUQA.h"
19#include "utils/strtag.h"
20#include <fstream>
21
22using namespace o2::gpu;
23
24void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
25{
27 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
28 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
29 if (GetProcessingSettings().deterministicGPUReconstruction) {
30 uint32_t nBorderTracks = withinSector == 1 ? NSECTORS : (2 * NSECTORS);
31 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::borderTracks>({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0);
32 }
33 uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS;
34 if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) {
35 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init);
37 for (uint32_t i = 0; i < n; i++) {
38 int32_t stream = i % mRec->NStreams();
39 runKernel<GPUTPCGMMergerMergeBorders, 0>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode);
40 }
43 for (uint32_t i = 0; i < n; i++) {
44 int32_t stream = i % mRec->NStreams();
45 int32_t n1, n2;
47 int32_t jSector;
48 Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode);
49 gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange(i);
50 gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks();
51 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0);
52 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1);
53 deviceEvent* e = nullptr;
54 int32_t ne = 0;
55 if (i == n - 1) { // Synchronize all execution on stream 0 with the last kernel
56 ne = std::min<int32_t>(n, mRec->NStreams());
57 for (int32_t j = 1; j < ne; j++) {
59 }
60 e = &mEvents->sector[1];
61 ne--;
62 stream = 0;
63 }
64 runKernel<GPUTPCGMMergerMergeBorders, 2>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, e, ne}}, i, withinSector, mergeMode);
65 }
66 } else {
67 for (uint32_t i = 0; i < n; i++) {
68 runKernel<GPUTPCGMMergerMergeBorders, 0>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
69 }
70 runKernel<GPUTPCGMMergerMergeBorders, 1>({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSector, mergeMode);
71 for (uint32_t i = 0; i < n; i++) {
72 runKernel<GPUTPCGMMergerMergeBorders, 2>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
73 }
74 }
75 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode);
77}
78
79void GPUChainTracking::RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType)
80{
81 runKernel<GPUTPCGMMergerResolve, 0>(GetGridAuto(0, deviceType));
82 runKernel<GPUTPCGMMergerResolve, 1>(GetGridAuto(0, deviceType));
83 runKernel<GPUTPCGMMergerResolve, 2>(GetGridAuto(0, deviceType));
84 runKernel<GPUTPCGMMergerResolve, 3>(GetGridAuto(0, deviceType));
85 runKernel<GPUTPCGMMergerResolve, 4>(GetGridAuto(0, deviceType), useOrigTrackParam, mergeAll);
86}
87
88int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
89{
91 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
93 uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1;
95 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
96 GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger;
97 const int32_t outputStream = OutputStream();
98 if (GetProcessingSettings().debugLevel >= 2) {
99 GPUInfo("Running TPC Merger");
100 }
101 const auto& threadContext = GetThreadContext();
102
103 SynchronizeGPU(); // Need to know the full number of sector tracks
104 SetupGPUProcessor(&Merger, true);
107
108 if (Merger.CheckSectors()) {
109 return 1;
110 }
111
112 memset(Merger.Memory(), 0, sizeof(*Merger.Memory()));
113 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
114 if (doGPU) {
115 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
116 }
117
118 if (GetProcessingSettings().deterministicGPUReconstruction) {
119 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(GetGridAuto(0, deviceType), 1);
120 }
121 for (uint32_t i = 0; i < NSECTORS; i++) {
122 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, i);
123 runKernel<GPUTPCGMMergerUnpackResetIds>(GetGridAuto(0, deviceType), i);
124 runKernel<GPUTPCGMMergerSectorRefit>(GetGridAuto(0, deviceType), i);
125 }
126 if (GetProcessingSettings().deterministicGPUReconstruction) {
127 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS);
128 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 0);
129 }
130 for (uint32_t i = 0; i < NSECTORS; i++) {
131 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS + i);
132 runKernel<GPUTPCGMMergerUnpackGlobal>(GetGridAuto(0, deviceType), i);
133 }
134 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, 2 * NSECTORS);
135 if (GetProcessingSettings().deterministicGPUReconstruction) {
136 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1);
137 }
138 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile);
139
140 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
141 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
142 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
143 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
144 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
145 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile);
146
147 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
148 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
149 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
150 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
151 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
152 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
153 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
154 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
155 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
156 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
157 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
158 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
159 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
160 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile);
161
162 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
163
164 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto(0, deviceType));
165 runKernel<GPUTPCGMMergerCollect>(GetGridAuto(0, deviceType));
166 if (GetProcessingSettings().deterministicGPUReconstruction) {
167 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks1>({{1, -WarpSize(), 0, deviceType}}, 1);
168 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::extrapolatedTracks2>({{1, -WarpSize(), 0, deviceType}}, 1);
169 }
170 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile);
171
172 if (param().rec.tpc.mergeCE) {
173 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), true);
174 RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
175 RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
176 runKernel<GPUTPCGMMergerMergeCE>(GetGridAuto(0, deviceType));
177 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile);
178 }
179 int32_t waitForTransfer = 0;
180 if (doGPU) {
181 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
182 waitForTransfer = 1;
183 }
184
185 if (GetProcessingSettings().mergerSortTracks) {
186 runKernel<GPUTPCGMMergerSortTracksPrepare>(GetGridAuto(0, deviceType));
187 CondWaitEvent(waitForTransfer, &mEvents->single);
188 runKernel<GPUTPCGMMergerSortTracks>(GetGridAuto(0, deviceType));
189 }
190
191 uint32_t maxId = Merger.NMaxClusters();
192 if (maxId > Merger.NMaxClusters()) {
193 throw std::runtime_error("mNMaxClusters too small");
194 }
195 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount()));
196 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment()));
197 runKernel<GPUTPCGMMergerPrepareClusters, 0>(GetGridAuto(0, deviceType));
198 CondWaitEvent(waitForTransfer, &mEvents->single);
199 runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
200 runKernel<GPUTPCGMMergerPrepareClusters, 1>(GetGridAuto(0, deviceType));
201 runKernel<GPUTPCGMMergerPrepareClusters, 2>(GetGridAuto(0, deviceType));
202
203 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile);
204
205 if (doGPU) {
206 CondWaitEvent(waitForTransfer, &mEvents->single);
207 if (waitForTransfer) {
209 }
210 } else if (doGPU) {
211 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
212 }
213
214 if (GetProcessingSettings().delayedOutput) {
215 for (uint32_t i = 0; i < mOutputQueue.size(); i++) {
216 GPUMemCpy(mOutputQueue[i].step, mOutputQueue[i].dst, mOutputQueue[i].src, mOutputQueue[i].size, outputStream, false);
217 }
218 mOutputQueue.clear();
219 }
220
221 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), GetProcessingSettings().mergerSortTracks ? 1 : 0);
222 if (param().rec.tpc.retryRefit == 1) {
223 runKernel<GPUTPCGMMergerTrackFit>(GetGridAuto(0), -1);
224 }
225 if (param().rec.tpc.looperInterpolationInExtraPass) {
226 runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
227 }
228
229 DoDebugAndDump(RecoStep::TPCMerging, 2048, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile);
230 runKernel<GPUTPCGMMergerFinalize, 0>(GetGridAuto(0, deviceType));
231 runKernel<GPUTPCGMMergerFinalize, 1>(GetGridAuto(0, deviceType));
232 runKernel<GPUTPCGMMergerFinalize, 2>(GetGridAuto(0, deviceType));
233 if (param().rec.tpc.mergeLoopersAfterburner) {
234 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType));
235 if (doGPU) {
236 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0);
237 SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel<GPUTPCGMMergerMergeLoopers, 1>
238 }
239 runKernel<GPUTPCGMMergerMergeLoopers, 1>(GetGridAuto(0, deviceType));
240 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType));
241 }
242 DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile);
243
244 if (doGPU) {
246 auto* waitEvent = &mEvents->single;
247 if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1 || mFractionalQAEnabled) {
248 if (!(GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1)) {
249 size_t size = mRec->Res(Merger.MemoryResOutput()).Size() + GPUCA_MEMALIGN;
251 void* bufferEnd = Merger.SetPointersOutput(buffer);
252 if ((size_t)((char*)bufferEnd - (char*)buffer) > size) {
253 throw std::runtime_error("QA Scratch buffer exceeded");
254 }
255 }
256 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent);
257 waitEvent = nullptr;
258 if (param().dodEdxDownscaled) {
259 GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0);
260 }
261 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
262 if (param().par.earlyTpcTransform) {
263 GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0);
264 }
265 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
266 }
267 if (GetProcessingSettings().outputSharedClusterMap) {
268 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputState(), outputStream, nullptr, waitEvent);
269 waitEvent = nullptr;
270 }
272 } else {
273 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
274 }
275 if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) {
276 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
277 }
278
280 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERGE"));
281
282#ifdef GPUCA_TPC_GEOMETRY_O2
283 if (GetProcessingSettings().createO2Output) {
285 mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Return the sector data memory early
287 }
288
290 AllocateRegisteredMemory(Merger.MemoryResOutputO2Scratch());
291 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
292 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(GetGridAuto(0, deviceType));
293 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
294 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(GetGridAuto(0, deviceType));
297
298 if (GetProcessingSettings().clearO2OutputFromGPU) {
299 mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile
300 }
303 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
304 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(GetGridAuto(0, deviceType));
305
308 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
309 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::mc>(GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU));
310 } else if (doGPU) {
312 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2(), outputStream, nullptr, &mEvents->single);
313 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2Clus(), outputStream);
315 }
316 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERG2"));
317 }
318#endif
319 if (doGPU && (synchronizeOutput || GetProcessingSettings().clearO2OutputFromGPU)) {
320 SynchronizeStream(outputStream);
321 }
322 if (GetProcessingSettings().clearO2OutputFromGPU) {
324 }
325
326 mIOPtrs.mergedTracks = Merger.OutputTracks();
327 mIOPtrs.nMergedTracks = Merger.NOutputTracks();
328 mIOPtrs.mergedTrackHits = Merger.Clusters();
329 mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ();
330 mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
331 mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment();
332 mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt();
333 mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2();
334 mIOPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
335 mIOPtrs.outputClusRefsTPCO2 = Merger.OutputClusRefsTPCO2();
336 mIOPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
337 mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC();
338
339 if (doGPU) {
340 processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks();
341 processorsShadow()->ioPtrs.nMergedTracks = Merger.NOutputTracks();
342 processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters();
343 processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ();
344 processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
345 processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment();
346 processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt();
347 processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2();
348 processorsShadow()->ioPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
349 processorsShadow()->ioPtrs.outputClusRefsTPCO2 = MergerShadow.OutputClusRefsTPCO2();
350 processorsShadow()->ioPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
351 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
352 }
353
354 if (GetProcessingSettings().debugLevel >= 2) {
355 GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());
356 }
357 return 0;
358}
int32_t i
#define GPUCA_MEMALIGN
const GPUTPCGMMerger::trackCluster & b1
#define GPUCA_NSECTORS
uint32_t j
Definition RawData.h:0
int32_t RunTPCTrackingMerger(bool synchronizeOutput=true)
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
std::vector< outputQueueEntry > mOutputQueue
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:103
gpu_reconstruction_kernels::deviceEvent deviceEvent
Definition GPUChain.h:36
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:32
void CondWaitEvent(T &cond, deviceEvent *ev)
Definition GPUChain.h:95
void SynchronizeGPU()
Definition GPUChain.h:105
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:42
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:68
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
Definition GPUChain.h:104
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:122
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:106
uint32_t WarpSize() const
Definition GPUChain.h:206
uint32_t ThreadCount() const
Definition GPUChain.h:207
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:209
GPUConstantMem * processors()
Definition GPUChain.h:80
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:37
GPUParam & param()
Definition GPUChain.h:83
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:212
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:72
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:85
GPUReconstructionCPU * mRec
Definition GPUChain.h:75
GPUConstantMem * processorsShadow()
Definition GPUChain.h:81
static constexpr int32_t NSECTORS
Definition GPUChain.h:54
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:120
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:21
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:118
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:223
uint32_t BlockCount() const
Definition GPUChain.h:205
GPUReconstruction * rec()
Definition GPUChain.h:62
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:87
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:117
void * AllocateScratchBuffer(size_t nBytes)
Definition GPUQA.h:55
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void * AllocateVolatileDeviceMemory(size_t size)
GPUMemoryResource & Res(int16_t num)
void PushNonPersistentMemory(uint64_t tag)
void DumpMergeCE(std::ostream &out) const
void DumpMergedBetweenSectors(std::ostream &out) const
void DumpFitPrepare(std::ostream &out) const
void DumpFinal(std::ostream &out) const
void * SetPointersOutput(void *mem)
void DumpRefit(std::ostream &out) const
void DumpMergedWithinSectors(std::ostream &out) const
void DumpCollected(std::ostream &out) const
void DumpMergeRanges(std::ostream &out, int32_t withinSector, int32_t mergeMode) const
void DumpSectorTracks(std::ostream &out) const
GLdouble n
Definition glcorearb.h:1982
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum dst
Definition glcorearb.h:1767
GLuint GLuint stream
Definition glcorearb.h:1806
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
GPUTrackingInOutPointers ioPtrs
const GPUTPCGMMergedTrackHitXYZ * mergedTrackHitsXYZ
const o2::tpc::ClusterNativeAccess * clustersNative
const uint32_t * mergedTrackHitAttachment
const o2::MCCompLabel * outputTracksTPCO2MC
const o2::tpc::TrackTPC * outputTracksTPCO2
const GPUTPCGMMergedTrackHit * mergedTrackHits
const GPUTPCGMMergedTrack * mergedTracks
GPUOutputControl tpcTracksO2Labels
GPUOutputControl tpcTracksO2ClusRefs
size_t getIndex(const GPUOutputControl &v)
GPUOutputControl sharedClusterMap
const o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > * clustersMCTruth