Project
Loading...
Searching...
No Matches
GPUChainTrackingSectorTracker.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
16#include "GPULogging.h"
17#include "GPUO2DataTypes.h"
19#include "GPUTPCClusterData.h"
22#include "utils/strtag.h"
23#include <fstream>
24
25using namespace o2::gpu;
26
27int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput)
28{
29 runKernel<GPUTPCExtrapolationTracking>({GetGridBlk(256, iSector % mRec->NStreams()), {iSector}});
30 TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), iSector % mRec->NStreams());
31 if (synchronizeOutput) {
32 SynchronizeStream(iSector % mRec->NStreams());
33 }
34 return (0);
35}
36
38{
39 if (mRec->GPUStuck()) {
40 GPUWarning("This GPU is stuck, processing of tracking for this event is skipped!");
41 return (1);
42 }
43
44 const auto& threadContext = GetThreadContext();
45
46 int32_t retVal = RunTPCTrackingSectors_internal();
47 if (retVal) {
49 }
50 return (retVal != 0);
51}
52
53int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
54{
55 if (GetProcessingSettings().debugLevel >= 2) {
56 GPUInfo("Running TPC Sector Tracker");
57 }
58 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCSectorTracking;
59 if (!param().par.earlyTpcTransform) {
60 for (uint32_t i = 0; i < NSECTORS; i++) {
62 if (doGPU) {
63 processorsShadow()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); // TODO: not needed I think, anyway copied in SetupGPUProcessor
64 }
65 }
67 } else {
68 int32_t offset = 0;
69 for (uint32_t i = 0; i < NSECTORS; i++) {
70 processors()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clusterData[i], mIOPtrs.nClusterData[i], offset);
71 if (doGPU && GetRecoSteps().isSet(RecoStep::TPCConversion)) {
72 processorsShadow()->tpcTrackers[i].Data().SetClusterData(processorsShadow()->tpcConverter.mClusters + processors()->tpcTrackers[i].Data().ClusterIdOffset(), processors()->tpcTrackers[i].NHitsTotal(), processors()->tpcTrackers[i].Data().ClusterIdOffset());
73 }
75 }
77 }
78 GPUInfo("Event has %u TPC Clusters, %d TRD Tracklets", (uint32_t)mRec->MemoryScalers()->nTPCHits, mIOPtrs.nTRDTracklets);
79
80 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
81 processors()->tpcTrackers[iSector].SetMaxData(mIOPtrs); // First iteration to set data sizes
82 }
83 mRec->ComputeReuseMax(nullptr); // Resolve maximums for shared buffers
84 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
85 SetupGPUProcessor(&processors()->tpcTrackers[iSector], false); // Prepare custom allocation for 1st stack level
86 mRec->AllocateRegisteredMemory(processors()->tpcTrackers[iSector].MemoryResSectorScratch());
87 }
89 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
90 SetupGPUProcessor(&processors()->tpcTrackers[iSector], true); // Now we allocate
91 mRec->ResetRegisteredMemoryPointers(&processors()->tpcTrackers[iSector]); // TODO: The above call breaks the GPU ptrs to already allocated memory. This fixes them. Should actually be cleaned up at the source.
93 }
94
95 bool streamInit[GPUCA_MAX_STREAMS] = {false};
96 if (doGPU) {
97 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
98 processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase();
99 // Initialize Startup Constants
100 processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties<GPUTPCTrackletConstructor, GPUTPCTrackletConstructor::allSectors>().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties<GPUTPCTrackletConstructor, GPUTPCTrackletConstructor::allSectors>().nThreads;
101 processorsShadow()->tpcTrackers[iSector].SetGPUTextureBase(mRec->DeviceMemoryBase());
102 }
103
104 if (PrepareTextures()) {
105 return (2);
106 }
107
108 // Copy Tracker Object to GPU Memory
109 if (GetProcessingSettings().debugLevel >= 3) {
110 GPUInfo("Copying Tracker objects to GPU");
111 }
112 if (PrepareProfile()) {
113 return 2;
114 }
115
116 WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
117
118 for (int32_t i = 0; i < mRec->NStreams() - 1; i++) {
119 streamInit[i] = false;
120 }
121 streamInit[mRec->NStreams() - 1] = true;
122 }
123 if (GPUDebug("Initialization (1)", 0)) {
124 return (2);
125 }
126
127 int32_t streamOccMap = mRec->NStreams() - 1;
128 if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) {
130 }
131 if (param().rec.tpc.occupancyMapTimeBins) {
132 if (doGPU) {
134 }
135 uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap;
136 auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU);
137 runKernel<GPUMemClean16>(GetGridAutoStep(streamOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param()));
138 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamOccMap), ptrTmp);
139 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), ptrTmp, ptr + 2);
141 mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage;
142 if (doGPU) {
143 GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init);
144 } else {
145 TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init);
146 }
147 }
148 if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) {
149 uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap;
150 occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128));
151 mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamOccMap);
152 }
153
154 int32_t streamMap[NSECTORS];
155
156 bool error = false;
157 mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) {
158 GPUTPCTracker& trk = processors()->tpcTrackers[iSector];
159 GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk;
160 int32_t useStream = (iSector % mRec->NStreams());
161
162 if (GetProcessingSettings().debugLevel >= 3) {
163 GPUInfo("Creating Sector Data (Sector %d)", iSector);
164 }
165 if (doGPU) {
166 TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream);
167 runKernel<GPUTPCCreateTrackingData>({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}});
168 streamInit[useStream] = true;
169 } else {
170 if (ReadEvent(iSector, 0)) {
171 GPUError("Error reading event");
172 error = 1;
173 return;
174 }
175 }
176 if (GetProcessingSettings().deterministicGPUReconstruction) {
177 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::hitData>({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}});
178 }
179 if (!doGPU && trk.CheckEmptySector() && GetProcessingSettings().debugLevel == 0) {
180 return;
181 }
182
183 if (GetProcessingSettings().debugLevel >= 6) {
184 *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl;
185 if (GetProcessingSettings().debugMask & 1) {
186 if (doGPU) {
187 TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true);
188 }
190 }
191 }
192
193 // Initialize temporary memory where needed
194 if (GetProcessingSettings().debugLevel >= 3) {
195 GPUInfo("Copying Sector Data to GPU and initializing temporary memory");
196 }
197 runKernel<GPUMemClean16>(GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights()));
198
199 if (!doGPU) {
200 TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); // Copy Data to GPU Global Memory
201 }
202 if (GPUDebug("Initialization (3)", useStream)) {
203 throw std::runtime_error("memcpy failure");
204 }
205
206 runKernel<GPUTPCNeighboursFinder>({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}});
207 streamInit[useStream] = true;
208
209 if (GetProcessingSettings().keepDisplayMemory) {
210 TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true);
211 memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size());
212 if (GetProcessingSettings().debugMask & 2) {
213 trk.DumpLinks(*mDebugFile, 0);
214 }
215 }
216
217 runKernel<GPUTPCNeighboursCleaner>({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSector}});
218 DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1);
219
220 runKernel<GPUTPCStartHitsFinder>({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}});
221#ifdef GPUCA_SORT_STARTHITS_GPU
222 if (doGPU) {
223 runKernel<GPUTPCStartHitsSorter>({GetGridAuto(useStream), {iSector}});
224 }
225#endif
226 if (GetProcessingSettings().deterministicGPUReconstruction) {
227 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::startHits>({GetGrid(1, 1, useStream), {iSector}});
228 }
229 DoDebugAndDump(RecoStep::TPCSectorTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile);
230
231 if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
232 trk.UpdateMaxData();
235 }
236
237 if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) {
238 runKernel<GPUTPCTrackletConstructor>({GetGridAuto(useStream), {iSector}});
239 DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile);
240 if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) {
242 }
243 }
244
245 if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) {
246 runKernel<GPUTPCTrackletSelector>({GetGridAuto(useStream), {iSector}});
247 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -ThreadCount(), useStream}, {iSector}}, 1);
248 if (GetProcessingSettings().deterministicGPUReconstruction) {
249 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({GetGrid(1, 1, useStream), {iSector}});
250 }
251 TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, trk.MemoryResCommon(), useStream, &mEvents->sector[iSector]);
252 streamMap[iSector] = useStream;
253 if (GetProcessingSettings().debugLevel >= 3) {
254 GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks());
255 }
256 DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile);
257 }
258 });
260 if (error) {
261 return (3);
262 }
263
264 if (doGPU || GetProcessingSettings().debugLevel >= 1) {
265 if (doGPU) {
267 }
268
269 if (!GetProcessingSettings().trackletSelectorInPipeline) {
270 if (GetProcessingSettings().trackletConstructorInPipeline) {
272 } else {
273 for (int32_t i = 0; i < mRec->NStreams(); i++) {
275 }
276 runKernel<GPUTPCTrackletConstructor, 1>({GetGridAuto(0), krnlRunRangeNone, {&mEvents->single, mEvents->stream, mRec->NStreams()}});
277 for (int32_t i = 0; i < mRec->NStreams(); i++) {
279 }
281 }
282
283 if (GetProcessingSettings().debugLevel >= 4) {
284 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
285 DoDebugAndDump(RecoStep::TPCSectorTracking, 128, processors()->tpcTrackers[iSector], &GPUTPCTracker::DumpTrackletHits, *mDebugFile);
286 }
287 }
288
289 int32_t runSectors = 0;
290 int32_t useStream = 0;
291 for (uint32_t iSector = 0; iSector < NSECTORS; iSector += runSectors) {
292 if (runSectors < GetProcessingSettings().trackletSelectorSectors) {
293 runSectors++;
294 }
295 runSectors = CAMath::Min<int32_t>(runSectors, NSECTORS - iSector);
296 if (getKernelProperties<GPUTPCTrackletSelector>().minBlocks * BlockCount() < (uint32_t)runSectors) {
297 runSectors = getKernelProperties<GPUTPCTrackletSelector>().minBlocks * BlockCount();
298 }
299
300 if (GetProcessingSettings().debugLevel >= 3) {
301 GPUInfo("Running TPC Tracklet selector (Stream %d, Sector %d to %d)", useStream, iSector, iSector + runSectors);
302 }
303 runKernel<GPUTPCTrackletSelector>({GetGridAuto(useStream), {iSector, runSectors}});
304 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -ThreadCount(), useStream}, {iSector}}, runSectors);
305 for (uint32_t k = iSector; k < iSector + runSectors; k++) {
306 if (GetProcessingSettings().deterministicGPUReconstruction) {
307 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({GetGrid(1, 1, useStream), {k}});
308 }
309 TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[k].MemoryResCommon(), useStream, &mEvents->sector[k]);
310 streamMap[k] = useStream;
311 }
312 useStream++;
313 if (useStream >= mRec->NStreams()) {
314 useStream = 0;
315 }
316 }
317 }
318
320
321 std::array<bool, NSECTORS> transferRunning;
322 transferRunning.fill(true);
323 if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) {
324 if (param().rec.tpc.extrapolationTracking) {
325 mWriteOutputDone.fill(0);
326 }
327
328 uint32_t tmpSector = 0;
329 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
330 if (GetProcessingSettings().debugLevel >= 3) {
331 GPUInfo("Transfering Tracks from GPU to Host");
332 }
333
334 if (tmpSector == iSector) {
335 SynchronizeEvents(&mEvents->sector[iSector]);
336 }
337 while (tmpSector < NSECTORS && (tmpSector == iSector || IsEventDone(&mEvents->sector[tmpSector]))) {
338 ReleaseEvent(mEvents->sector[tmpSector]);
339 if (*processors()->tpcTrackers[tmpSector].NTracks() > 0) {
340 TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[tmpSector].MemoryResOutput(), streamMap[tmpSector], &mEvents->sector[tmpSector]);
341 } else {
342 transferRunning[tmpSector] = false;
343 }
344 tmpSector++;
345 }
346
347 if (GetProcessingSettings().keepAllMemory) {
348 TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true);
349 if (!GetProcessingSettings().trackletConstructorInPipeline) {
350 if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) {
352 }
353 }
354 if (!GetProcessingSettings().trackletSelectorInPipeline) {
355 if (GetProcessingSettings().debugMask & 512) {
357 }
358 }
359 }
360
361 if (transferRunning[iSector]) {
362 SynchronizeEvents(&mEvents->sector[iSector]);
363 }
364 if (GetProcessingSettings().debugLevel >= 3) {
365 GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSector].NTracks(), *processors()->tpcTrackers[iSector].NTrackHits());
366 }
367
368 if (GetProcessingSettings().debugLevel >= 3) {
369 GPUInfo("Data ready for sector %d", iSector);
370 }
371 mSectorSelectorReady = iSector;
372
373 if (param().rec.tpc.extrapolationTracking) {
374 for (uint32_t tmpSector2a = 0; tmpSector2a <= iSector; tmpSector2a++) {
375 uint32_t tmpSector2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(tmpSector2a);
376 uint32_t sectorLeft, sectorRight;
377 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight);
378
379 if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mWriteOutputDone[tmpSector2] == 0) {
380 ExtrapolationTracking(tmpSector2, 0);
381 WriteOutput(tmpSector2, 0);
382 mWriteOutputDone[tmpSector2] = 1;
383 }
384 }
385 } else {
386 WriteOutput(iSector, 0);
387 }
388 }
389 }
390 if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.extrapolationTracking) {
391 std::vector<bool> blocking(NSECTORS * mRec->NStreams());
392 for (int32_t i = 0; i < NSECTORS; i++) {
393 for (int32_t j = 0; j < mRec->NStreams(); j++) {
394 blocking[i * mRec->NStreams() + j] = i % mRec->NStreams() == j;
395 }
396 }
397 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
398 uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector);
399 if (!((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)))) {
400 uint32_t sectorLeft, sectorRight;
401 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
402 if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) {
403 StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorLeft]);
404 blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()] = true;
405 }
406 if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()]) {
407 StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorRight]);
408 blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()] = true;
409 }
410 }
411 ExtrapolationTracking(tmpSector, 0, false);
412 }
413 }
414 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
415 if (doGPU && transferRunning[iSector]) {
416 ReleaseEvent(mEvents->sector[iSector]);
417 }
418 }
419 } else {
421 mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) {
422 if (param().rec.tpc.extrapolationTracking) {
423 ExtrapolationTracking(iSector, 0);
424 }
425 if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) {
426 WriteOutput(iSector, 0);
427 }
428 });
430 }
431
432 if (param().rec.tpc.extrapolationTracking && GetProcessingSettings().debugLevel >= 3) {
433 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
434 GPUInfo("Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector,
435 processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks, processors()->tpcTrackers[iSector].CommonMemory()->nTracks, processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits, processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits);
436 }
437 }
438
439 if (GetProcessingSettings().debugMask & 1024 && !GetProcessingSettings().deterministicGPUReconstruction) {
440 for (uint32_t i = 0; i < NSECTORS; i++) {
442 }
443 }
444
445 if (DoProfile()) {
446 return (1);
447 }
448 for (uint32_t i = 0; i < NSECTORS; i++) {
449 mIOPtrs.nSectorTracks[i] = *processors()->tpcTrackers[i].NTracks();
451 mIOPtrs.nSectorClusters[i] = *processors()->tpcTrackers[i].NTrackHits();
452 mIOPtrs.sectorClusters[i] = processors()->tpcTrackers[i].TrackHits();
453 if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) {
454 TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[i], -1, true);
455 }
456 }
457 if (GetProcessingSettings().debugLevel >= 2) {
458 GPUInfo("TPC Sector Tracker finished");
459 }
460 mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLTRK"));
461 return 0;
462}
463
464int32_t GPUChainTracking::ReadEvent(uint32_t iSector, int32_t threadId)
465{
466 if (GetProcessingSettings().debugLevel >= 5) {
467 GPUInfo("Running ReadEvent for sector %d on thread %d\n", iSector, threadId);
468 }
469 runKernel<GPUTPCCreateTrackingData>({{GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU)}, {iSector}});
470 if (GetProcessingSettings().debugLevel >= 5) {
471 GPUInfo("Finished ReadEvent for sector %d on thread %d\n", iSector, threadId);
472 }
473 return (0);
474}
475
476void GPUChainTracking::WriteOutput(int32_t iSector, int32_t threadId)
477{
478 if (GetProcessingSettings().debugLevel >= 5) {
479 GPUInfo("Running WriteOutput for sector %d on thread %d\n", iSector, threadId);
480 }
482 processors()->tpcTrackers[iSector].WriteOutput();
483 if (GetProcessingSettings().debugLevel >= 5) {
484 GPUInfo("Finished WriteOutput for sector %d on thread %d\n", iSector, threadId);
485 }
486}
int32_t i
#define GPUCA_MAX_STREAMS
int32_t retVal
#define GPUCA_NSECTORS
#define GPUCA_ROW_COUNT
uint32_t j
Definition RawData.h:0
TBranch * ptr
int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput=true)
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
volatile int32_t mSectorSelectorReady
void WriteOutput(int32_t iSector, int32_t threadId)
int32_t ReadEvent(uint32_t iSector, int32_t threadId)
GPUTrackingInOutPointers & mIOPtrs
std::array< int8_t, NSECTORS > mWriteOutputDone
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:103
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:119
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:102
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:32
void SynchronizeGPU()
Definition GPUChain.h:105
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:42
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:68
GPUReconstruction::RecoStepField GetRecoSteps() const
Definition GPUChain.h:67
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
Definition GPUChain.h:104
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:122
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1)
Definition GPUChain.h:113
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:106
uint32_t ThreadCount() const
Definition GPUChain.h:207
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:209
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const
Definition GPUChain.h:70
GPUConstantMem * processors()
Definition GPUChain.h:80
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:37
krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:47
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:112
GPUParam & param()
Definition GPUChain.h:83
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:212
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:72
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:85
GPUReconstructionCPU * mRec
Definition GPUChain.h:75
GPUConstantMem * processorsShadow()
Definition GPUChain.h:81
static constexpr int32_t NSECTORS
Definition GPUChain.h:54
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:120
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:21
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:118
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:223
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:86
virtual int32_t PrepareTextures()
Definition GPUChain.h:219
uint32_t BlockCount() const
Definition GPUChain.h:205
GPUReconstruction * rec()
Definition GPUChain.h:62
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:87
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:117
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
void * AllocateVolatileMemory(size_t size, bool device)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void ComputeReuseMax(GPUProcessor *proc)
const GPUParam & GetParam() const
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
GPUMemoryResource & Res(int16_t num)
const void * DeviceMemoryBase() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int16_t MemoryResTracklets() const
int16_t MemoryResLinks() const
void SetMaxData(const GPUTrackingInOutPointers &io)
void DumpOutput(std::ostream &out)
int16_t MemoryResOutput() const
void DumpTrackHits(std::ostream &out)
void DumpLinks(std::ostream &out, int32_t phase)
void DumpStartHits(std::ostream &out)
void DumpHitWeights(std::ostream &out)
int16_t MemoryResCommon() const
void DumpTrackingData(std::ostream &out)
void DumpTrackletHits(std::ostream &out)
GLintptr offset
Definition glcorearb.h:660
GLenum GLfloat param
Definition glcorearb.h:271
if(!okForPhiMin(phi0, phi1))
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
deviceEvent stream[GPUCA_MAX_STREAMS]
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
const GPUTPCHitId * sectorClusters[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUTPCTrack * sectorTracks[NSECTORS]
const GPUSettingsTF * settingsTF
const GPUTPCClusterData * clusterData[NSECTORS]
size_t getIndex(const GPUOutputControl &v)
unsigned int nClustersSector[constants::MAXSECTOR]
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]