Project
Loading...
Searching...
No Matches
GPUChainTrackingMerger.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
17#include "GPULogging.h"
18#include "GPUDefParametersRuntime.h"
19#include "GPUO2DataTypes.h"
20#include "GPUQA.h"
21#include "GPUTPCGMMerger.h"
22#include "GPUConstantMem.h"
23#include "GPUTPCGMMergerGPU.h"
24#include "GPUTPCGMO2Output.h"
26#include "utils/strtag.h"
27#include <fstream>
28
29using namespace o2::gpu;
30
31void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(uint8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
32{
34 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
35 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
36 if (GetProcessingSettings().deterministicGPUReconstruction) {
37 uint32_t nBorderTracks = (mergeMode & GPUTPCGMMerger::mergeModes::mergeWithinSector) ? NSECTORS : (2 * NSECTORS);
38 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::borderTracks>({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0);
39 }
40 uint32_t n = (mergeMode & GPUTPCGMMerger::mergeModes::mergeAcrossCE) ? NSECTORS / 2 : NSECTORS;
41 if (GetProcessingSettings().alternateBorderSort == -1 ? mRec->getGPUParameters(doGPU).par_ALTERNATE_BORDER_SORT : GetProcessingSettings().alternateBorderSort) {
43 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init);
44 for (uint32_t i = 0; i < n; i++) {
45 int32_t stream = i % mRec->NStreams();
46 runKernel<GPUTPCGMMergerMergeBorders, 0>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, mergeMode);
47 }
50 for (uint32_t i = 0; i < n; i++) {
51 int32_t stream = i % mRec->NStreams();
52 int32_t n1, n2;
53 GPUTPCGMBorderTrack *b1, *b2;
54 int32_t jSector;
55 Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, mergeMode);
56 gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange(i);
57 gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks();
58 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0);
59 runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1);
60 runKernel<GPUTPCGMMergerMergeBorders, 2>({GetGridAuto(stream, deviceType)}, i, mergeMode);
61 }
62 int32_t ne = std::min<int32_t>(n, mRec->NStreams()) - 1; // Stream 0 must wait for all streams, Note n > 1
63 for (int32_t j = 0; j < ne; j++) {
64 RecordMarker(&mEvents->sector[j], j + 1);
65 }
67 } else if (doGPU && !GetProcessingSettings().rtc.enable) {
68 GPUFatal("GPU RTC requires alternateBorderSort!");
69 } else {
70 for (uint32_t i = 0; i < n; i++) {
71 runKernel<GPUTPCGMMergerMergeBorders, 0>(GetGridAuto(0, deviceType), i, mergeMode);
72 }
73 runKernel<GPUTPCGMMergerMergeBorders, 1>({{2 * n, -WarpSize(), 0, deviceType}}, 0, mergeMode);
74 for (uint32_t i = 0; i < n; i++) {
75 runKernel<GPUTPCGMMergerMergeBorders, 2>(GetGridAuto(0, deviceType), i, mergeMode);
76 }
77 }
80}
81
82void GPUChainTracking::RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType)
83{
84 runKernel<GPUTPCGMMergerResolve, 0>(GetGridAuto(0, deviceType));
85 runKernel<GPUTPCGMMergerResolve, 1>(GetGridAuto(0, deviceType));
86 runKernel<GPUTPCGMMergerResolve, 2>(GetGridAuto(0, deviceType));
87 runKernel<GPUTPCGMMergerResolve, 3>(GetGridAuto(0, deviceType));
88 runKernel<GPUTPCGMMergerResolve, 4>(GetGridAuto(0, deviceType), useOrigTrackParam, mergeAll);
89}
90
91int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
92{
94 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
96 uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1;
98 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
99 GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger;
100 const int32_t outputStream = OutputStream();
101 if (GetProcessingSettings().debugLevel >= 2) {
102 GPUInfo("Running TPC Merger");
103 }
104 const auto& threadContext = GetThreadContext();
105
106 SynchronizeGPU(); // Need to know the full number of sector tracks
107 SetupGPUProcessor(&Merger, true);
110
111 if (Merger.CheckSectors()) {
112 return 1;
113 }
114
115 memset(Merger.Memory(), 0, sizeof(*Merger.Memory()));
116 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
117 if (doGPU) {
118 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
119 }
120
121 if (GetProcessingSettings().deterministicGPUReconstruction) {
122 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::clearIds>(GetGridAuto(0, deviceType), 1);
123 }
124 for (uint32_t i = 0; i < NSECTORS; i++) {
125 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, i);
126 runKernel<GPUTPCGMMergerUnpackResetIds>(GetGridAuto(0, deviceType), i);
127 runKernel<GPUTPCGMMergerSectorRefit>(GetGridAuto(0, deviceType), i); // TODO: Why all in stream 0?
128 }
129 if (GetProcessingSettings().deterministicGPUReconstruction) {
130 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS);
131 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUTPCGeometry::NSECTORS, -WarpSize(), 0, deviceType}}, 0);
132 }
133 for (uint32_t i = 0; i < NSECTORS; i++) {
134 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, NSECTORS + i);
135 runKernel<GPUTPCGMMergerUnpackGlobal>(GetGridAuto(0, deviceType), i);
136 }
137 runKernel<GPUTPCGMMergerUnpackSaveNumber>({{1, -WarpSize(), 0, deviceType}}, 2 * NSECTORS);
138 if (GetProcessingSettings().deterministicGPUReconstruction) {
139 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUTPCGeometry::NSECTORS, -WarpSize(), 0, deviceType}}, 1);
140 }
142
143 // Merge within Sectors
144 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
145 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
146 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
147 RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeWithinSector, deviceType);
148 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
150
151 // Merge between sectors - transport to the middle of the sector and rotate vertically to the border on the left / right
152 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
153 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
154 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
155 RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeAtMidRow, deviceType);
156 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
157 // Merge between sectors - transport to the left / right edge of the sector and rotate horizontally
158 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
159 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
160 RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeBetweenSector, deviceType);
161 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
162 // Merge between sectors - use original track param
163 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
164 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
166 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
168
169 runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
170
171 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto(0, deviceType));
172 if (GetProcessingSettings().mergerSanityCheck) {
173 Merger.CheckMergeGraph();
174 }
175 runKernel<GPUTPCGMMergerCollect>(GetGridAuto(0, deviceType));
176 if (GetProcessingSettings().deterministicGPUReconstruction) {
177 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks1>({{1, -WarpSize(), 0, deviceType}}, 1);
178 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks2>({{1, -WarpSize(), 0, deviceType}}, 1);
179 }
181
182 if (param().rec.tpc.mergeCE) {
183 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), true);
184 // Merge across CE - compare at row 63
185 RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeAcrossCE, deviceType);
186 // Merge across CE - compare at row of cluster
187 RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeAcrossCE | GPUTPCGMMerger::mergeModes::mergeAtCluster, deviceType);
188 runKernel<GPUTPCGMMergerMergeCE>(GetGridAuto(0, deviceType));
190 }
191 int32_t waitForTransfer = 0;
192 if (doGPU) {
193 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
194 waitForTransfer = 1;
195 }
196
197 const bool mergerSortTracks = GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(doGPU).par_SORT_BEFORE_FIT : GetProcessingSettings().mergerSortTracks;
198 if (mergerSortTracks) {
199 runKernel<GPUTPCGMMergerSortTracksPrepare>(GetGridAuto(0, deviceType));
200 CondWaitEvent(waitForTransfer, &mEvents->single);
201 runKernel<GPUTPCGMMergerSortTracks>(GetGridAuto(0, deviceType));
202 }
203 if (GetProcessingSettings().mergerSanityCheck) {
204 Merger.CheckCollectedTracks();
205 }
206
207 uint32_t maxId = Merger.NMaxClusters();
208 if (maxId > Merger.NMaxClusters()) {
209 throw std::runtime_error("mNMaxClusters too small");
210 }
211 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount()));
212 runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment()));
213 runKernel<GPUTPCGMMergerPrepareForFit, 0>(GetGridAuto(0, deviceType));
214 CondWaitEvent(waitForTransfer, &mEvents->single);
215 runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
216 runKernel<GPUTPCGMMergerPrepareForFit, 1>(GetGridAuto(0, deviceType));
217 runKernel<GPUTPCGMMergerPrepareForFit, 2>(GetGridAuto(0, deviceType));
218
220
221 if (doGPU) {
222 CondWaitEvent(waitForTransfer, &mEvents->single);
223 if (waitForTransfer) {
225 }
226 } else if (doGPU) {
227 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
228 }
229
230 if (GetProcessingSettings().delayedOutput) {
231 for (uint32_t i = 0; i < mOutputQueue.size(); i++) {
232 GPUMemCpy(mOutputQueue[i].step, mOutputQueue[i].dst, mOutputQueue[i].src, mOutputQueue[i].size, outputStream, false);
233 }
234 mOutputQueue.clear();
235 }
236
237 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0);
238 if (param().rec.tpc.retryRefit == 1) {
239 runKernel<GPUTPCGMMergerTrackFit>(GetGridAuto(0), -1);
240 }
241 runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
242
244 runKernel<GPUTPCGMMergerFinalize, 0>(GetGridAuto(0, deviceType));
245 runKernel<GPUTPCGMMergerFinalize, 1>(GetGridAuto(0, deviceType));
246 runKernel<GPUTPCGMMergerFinalize, 2>(GetGridAuto(0, deviceType));
247 if (param().rec.tpc.mergeLoopersAfterburner) {
248 runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ? GetGrid(Merger.NMergedTracks(), 0, deviceType) : GetGridAuto(0, deviceType));
249 if (doGPU) {
250 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0);
251 SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel<GPUTPCGMMergerMergeLoopers, 1>
252 }
253 runKernel<GPUTPCGMMergerMergeLoopers, 1>(GetGridAuto(0, deviceType));
254 runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType));
256 }
258
259 if (doGPU) {
261 auto* waitEvent = &mEvents->single;
262 if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1 || mFractionalQAEnabled) {
263 if (!(GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1)) {
264 size_t size = mRec->Res(Merger.MemoryResOutput()).Size() + constants::GPU_MEMALIGN;
266 void* bufferEnd = Merger.SetPointersOutput(buffer);
267 if ((size_t)((char*)bufferEnd - (char*)buffer) > size) {
268 throw std::runtime_error("QA Scratch buffer exceeded");
269 }
270 }
271 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent);
272 waitEvent = nullptr;
273 if (param().dodEdxEnabled) {
274 GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
275 }
276 GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
277 GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
278 }
279 if (GetProcessingSettings().outputSharedClusterMap) {
280 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputState(), outputStream, nullptr, waitEvent);
281 waitEvent = nullptr;
282 }
284 } else {
285 TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
286 }
287 if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) {
288 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
289 }
290
292 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERGE"));
293
294#ifndef GPUCA_RUN2
295 if (GetProcessingSettings().createO2Output) {
297 mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Return the sector data memory early
299 }
300
302 AllocateRegisteredMemory(Merger.MemoryResOutputO2Scratch());
303 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
304 if (!GetProcessingSettings().tpcWriteClustersAfterRejection) {
305 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(GetGridAuto(0, deviceType));
306 }
307 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
308 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(GetGridAuto(0, deviceType));
311
312 if (GetProcessingSettings().clearO2OutputFromGPU) {
314 }
317 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
318 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::output>(GetGridAuto(0, deviceType));
319
322 TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
323 runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::mc>(GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU));
324 } else if (doGPU) {
326 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2(), outputStream, nullptr, &mEvents->single);
327 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2Clus(), outputStream);
329 }
330 mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERG2"));
331 }
332#endif
333 if (doGPU && (synchronizeOutput || GetProcessingSettings().clearO2OutputFromGPU)) {
334 SynchronizeStream(outputStream);
335 }
336 if (GetProcessingSettings().clearO2OutputFromGPU) {
338 }
339
340 mIOPtrs.mergedTracks = Merger.MergedTracks();
341 mIOPtrs.nMergedTracks = Merger.NMergedTracks();
342 mIOPtrs.mergedTrackHits = Merger.Clusters();
343 mIOPtrs.nMergedTrackHits = Merger.NMergedTrackClusters();
344 mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment();
345 mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt();
346 mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2();
347 mIOPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
348 mIOPtrs.outputClusRefsTPCO2 = Merger.OutputClusRefsTPCO2();
349 mIOPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
350 mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC();
351
352 if (doGPU) {
353 processorsShadow()->ioPtrs.mergedTracks = MergerShadow.MergedTracks();
354 processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks();
355 processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters();
356 processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NMergedTrackClusters();
357 processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment();
358 processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt();
359 processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2();
360 processorsShadow()->ioPtrs.nOutputTracksTPCO2 = Merger.NOutputTracksTPCO2();
361 processorsShadow()->ioPtrs.outputClusRefsTPCO2 = MergerShadow.OutputClusRefsTPCO2();
362 processorsShadow()->ioPtrs.nOutputClusRefsTPCO2 = Merger.NOutputClusRefsTPCO2();
363 WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
364 }
365
366 if (GetProcessingSettings().debugLevel >= 2) {
367 GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NMergedTrackClusters(), Merger.NClusters());
368 }
369 return 0;
370}
int32_t i
uint32_t j
Definition RawData.h:0
int32_t RunTPCTrackingMerger(bool synchronizeOutput=true)
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
std::vector< outputQueueEntry > mOutputQueue
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:108
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:129
bool DoDebugAndDump(RecoStep step, uint32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:239
void CondWaitEvent(T &cond, deviceEvent *ev)
Definition GPUChain.h:100
void SynchronizeGPU()
Definition GPUChain.h:110
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:72
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:128
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:111
uint32_t WarpSize() const
Definition GPUChain.h:225
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, gpudatatypes::RecoStep st=gpudatatypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:21
uint32_t ThreadCount() const
Definition GPUChain.h:226
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, gpudatatypes::RecoStep st=gpudatatypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:42
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:228
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
Definition GPUChain.h:109
GPUConstantMem * processors()
Definition GPUChain.h:84
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:41
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:117
GPUParam & param()
Definition GPUChain.h:87
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:231
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:76
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:89
GPUReconstructionCPU * mRec
Definition GPUChain.h:79
GPUConstantMem * processorsShadow()
Definition GPUChain.h:85
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, gpudatatypes::RecoStep st=gpudatatypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:32
static constexpr int32_t NSECTORS
Definition GPUChain.h:58
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:125
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:123
uint32_t BlockCount() const
Definition GPUChain.h:224
GPUReconstruction * rec()
Definition GPUChain.h:66
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:92
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:122
void * AllocateScratchBuffer(size_t nBytes)
Definition GPUQA.h:56
const GPUDefParameters & getGPUParameters(bool doGPU) const override
GPUMemoryResource & Res(int16_t num)
void PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor *proc=nullptr)
void PushNonPersistentMemory(uint64_t tag)
void DumpMergeCE(std::ostream &out) const
void DumpMergedBetweenSectors(std::ostream &out) const
void DumpFitPrepare(std::ostream &out) const
void DumpFinal(std::ostream &out) const
void * SetPointersOutput(void *mem)
void DumpRefit(std::ostream &out) const
void DumpMergedWithinSectors(std::ostream &out) const
void DumpCollected(std::ostream &out) const
void DumpMergeRanges(std::ostream &out, uint8_t mergeMode) const
void DumpLoopers(std::ostream &out) const
void DumpSectorTracks(std::ostream &out) const
static constexpr uint32_t NSECTORS
GLdouble n
Definition glcorearb.h:1982
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum dst
Definition glcorearb.h:1767
GLuint GLuint stream
Definition glcorearb.h:1806
constexpr T qStr2Tag(const char(&str)[N])
Definition strtag.h:24
GPUTPCTracker tpcTrackers[GPUTPCGeometry::NSECTORS]
GPUTrackingInOutPointers ioPtrs
const o2::tpc::ClusterNativeAccess * clustersNative
const o2::MCCompLabel * outputTracksTPCO2MC
const o2::tpc::TrackTPC * outputTracksTPCO2
const GPUTPCGMMergedTrackHit * mergedTrackHits
const GPUTPCGMMergedTrack * mergedTracks
GPUOutputControl tpcTracksO2Labels
GPUOutputControl tpcTracksO2ClusRefs
size_t getIndex(const GPUOutputControl &v)
GPUOutputControl sharedClusterMap
const o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > * clustersMCTruth