Project
Loading...
Searching...
No Matches
GPUChainTrackingSectorTracker.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
16#include "GPULogging.h"
17#include "GPUO2DataTypes.h"
19#include "GPUTPCClusterData.h"
22#include "GPUDefParametersRuntime.h"
33#include "utils/strtag.h"
34#include <fstream>
35
36using namespace o2::gpu;
37
38uint32_t GPUChainTracking::StreamForSector(uint32_t sector) const
39{
40 return sector % mRec->NStreams();
41}
42
43int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, bool blocking)
44{
45 const uint32_t stream = StreamForSector(iSector);
46 runKernel<GPUTPCExtrapolationTracking>({GetGridBlk(256, stream), {iSector}});
47 TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), stream);
48 if (blocking) {
50 }
51 return (0);
52}
53
55{
56 if (mRec->GPUStuck()) {
57 GPUWarning("This GPU is stuck, processing of tracking for this event is skipped!");
58 return (1);
59 }
60
61 const auto& threadContext = GetThreadContext();
62
63 int32_t retVal = RunTPCTrackingSectors_internal();
64 if (retVal) {
66 }
67 return (retVal != 0);
68}
69
70int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
71{
72 if (GetProcessingSettings().debugLevel >= 2) {
73 GPUInfo("Running TPC Sector Tracker");
74 }
75 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCSectorTracking;
76 if (!param().par.earlyTpcTransform) {
77 for (uint32_t i = 0; i < NSECTORS; i++) {
79 if (doGPU) {
80 processorsShadow()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); // TODO: not needed I think, anyway copied in SetupGPUProcessor
81 }
82 }
84 } else {
85 int32_t offset = 0;
86 for (uint32_t i = 0; i < NSECTORS; i++) {
87 processors()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clusterData[i], mIOPtrs.nClusterData[i], offset);
88 if (doGPU && GetRecoSteps().isSet(RecoStep::TPCConversion)) {
89 processorsShadow()->tpcTrackers[i].Data().SetClusterData(processorsShadow()->tpcConverter.mClusters + processors()->tpcTrackers[i].Data().ClusterIdOffset(), processors()->tpcTrackers[i].NHitsTotal(), processors()->tpcTrackers[i].Data().ClusterIdOffset());
90 }
92 }
94 }
95 GPUInfo("Event has %u TPC Clusters, %d TRD Tracklets", (uint32_t)mRec->MemoryScalers()->nTPCHits, mIOPtrs.nTRDTracklets);
96
97 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
98 processors()->tpcTrackers[iSector].SetMaxData(mIOPtrs); // First iteration to set data sizes
99 }
100 mRec->ComputeReuseMax(nullptr); // Resolve maximums for shared buffers
101 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
102 SetupGPUProcessor(&processors()->tpcTrackers[iSector], false); // Prepare custom allocation for 1st stack level
103 mRec->AllocateRegisteredMemory(processors()->tpcTrackers[iSector].MemoryResSectorScratch());
104 }
106 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
107 SetupGPUProcessor(&processors()->tpcTrackers[iSector], true); // Now we allocate
108 mRec->ResetRegisteredMemoryPointers(&processors()->tpcTrackers[iSector]); // TODO: The above call breaks the GPU ptrs to already allocated memory. This fixes them. Should actually be cleaned up at the source.
110 }
111
112 bool streamInit[GPUCA_MAX_STREAMS] = {false};
113 int32_t streamInitAndOccMap = mRec->NStreams() - 1;
114
115 if (doGPU) {
116 // Copy Tracker Object to GPU Memory
117 if (GetProcessingSettings().debugLevel >= 3) {
118 GPUInfo("Copying Tracker objects to GPU");
119 }
120 if (PrepareProfile()) {
121 return 2;
122 }
123
124 WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, streamInitAndOccMap, &mEvents->init);
125
126 std::fill(streamInit, streamInit + mRec->NStreams(), false);
127 streamInit[streamInitAndOccMap] = true;
128 }
129
130 if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) {
132 }
133 if (param().rec.tpc.occupancyMapTimeBins) {
134 if (doGPU) {
136 }
137 uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap;
138 auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU);
139 runKernel<GPUMemClean16>(GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param()));
140 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamInitAndOccMap), ptrTmp);
141 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamInitAndOccMap), ptrTmp, ptr + 2);
143 mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage;
144 if (doGPU) {
145 GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamInitAndOccMap, false, &mEvents->init);
146 } else {
147 TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamInitAndOccMap, &mEvents->init);
148 }
149 }
150 if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) {
151 uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap;
152 occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128));
153 mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, doGPU && param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap);
154 }
155
156 int32_t streamMap[NSECTORS];
157
158 bool error = false;
159 mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) {
160 GPUTPCTracker& trk = processors()->tpcTrackers[iSector];
161 GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk;
162 int32_t useStream = StreamForSector(iSector);
163
164 if (GetProcessingSettings().debugLevel >= 3) {
165 GPUInfo("Creating Sector Data (Sector %d)", iSector);
166 }
167 TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream);
168 runKernel<GPUTPCCreateTrackingData>({doGPU ? GetGridBlk(GPUCA_ROW_COUNT, useStream) : GetGridAuto(0), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); // TODO: Check why GetGridAuto(0) is much fast on CPU
169 streamInit[useStream] = true;
170 if (GetProcessingSettings().deterministicGPUReconstruction) {
171 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::hitData>({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}});
172 }
173 if (!doGPU && trk.CheckEmptySector() && GetProcessingSettings().debugLevel == 0) {
174 return;
175 }
176
177 if (GetProcessingSettings().debugLevel >= 6) {
178 *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl;
179 if (GetProcessingSettings().debugMask & 1) {
180 if (doGPU) {
181 TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true);
182 }
184 }
185 }
186
187 runKernel<GPUMemClean16>(GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights()));
188 runKernel<GPUTPCNeighboursFinder>({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}});
189 streamInit[useStream] = true;
190
191 if (GetProcessingSettings().keepDisplayMemory) {
192 TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true);
193 memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size());
194 if (GetProcessingSettings().debugMask & 2) {
195 trk.DumpLinks(*mDebugFile, 0);
196 }
197 }
198
199 runKernel<GPUTPCNeighboursCleaner>({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSector}});
200 DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1);
201
202 runKernel<GPUTPCStartHitsFinder>({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}});
203 if (mRec->getGPUParameters(doGPU).par_SORT_STARTHITS) {
204 runKernel<GPUTPCStartHitsSorter>({GetGridAuto(useStream), {iSector}});
205 }
206 if (GetProcessingSettings().deterministicGPUReconstruction) {
207 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::startHits>({GetGrid(1, 1, useStream), {iSector}});
208 }
209 DoDebugAndDump(RecoStep::TPCSectorTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile);
210
211 if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
212 trk.UpdateMaxData();
215 }
216
217 runKernel<GPUTPCTrackletConstructor>({GetGridAuto(useStream), {iSector}});
218 DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile);
219 if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) {
221 }
222
223 runKernel<GPUTPCTrackletSelector>({GetGridAuto(useStream), {iSector}});
224 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -ThreadCount(), useStream}, {iSector}}, 1);
225 if (GetProcessingSettings().deterministicGPUReconstruction) {
226 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({GetGrid(1, 1, useStream), {iSector}});
227 }
228 TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, trk.MemoryResCommon(), useStream, &mEvents->sector[iSector]);
229 streamMap[iSector] = useStream;
230 if (GetProcessingSettings().debugLevel >= 3) {
231 GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks());
232 }
233 DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile);
234 });
236 if (error) {
237 return (3);
238 }
239
240 if (doGPU || GetProcessingSettings().debugLevel >= 1) {
241 if (param().rec.tpc.extrapolationTracking) {
242 std::vector<bool> blocking(NSECTORS * mRec->NStreams());
243 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
244 for (uint32_t iStream = 0; iStream < mRec->NStreams(); iStream++) {
245 blocking[iSector * mRec->NStreams() + iStream] = StreamForSector(iSector) == iStream;
246 }
247 }
248 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
249 uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector);
250 uint32_t sectorLeft, sectorRight;
251 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
252 if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)]) {
253 StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorLeft]);
254 blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)] = true;
255 }
256 if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)]) {
257 StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorRight]);
258 blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)] = true;
259 }
260 ExtrapolationTracking(tmpSector, false);
261 }
262 }
263 if (doGPU) {
265 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
266 ReleaseEvent(mEvents->sector[iSector]);
267 }
268 }
269 } else {
270 mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) {
271 if (param().rec.tpc.extrapolationTracking) {
272 ExtrapolationTracking(iSector, true);
273 }
274 });
276 }
277
278 if (param().rec.tpc.extrapolationTracking && GetProcessingSettings().debugLevel >= 3) {
279 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
280 GPUInfo("Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector,
281 processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks, processors()->tpcTrackers[iSector].CommonMemory()->nTracks, processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits, processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits);
282 }
283 }
284
285 if (DoProfile()) {
286 return (1);
287 }
288 for (uint32_t i = 0; i < NSECTORS; i++) {
289 mIOPtrs.nSectorTracks[i] = *processors()->tpcTrackers[i].NTracks();
291 mIOPtrs.nSectorClusters[i] = *processors()->tpcTrackers[i].NTrackHits();
292 mIOPtrs.sectorClusters[i] = processors()->tpcTrackers[i].TrackHits();
293 if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) {
294 TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[i], -1, true);
295 }
296 }
297 if (GetProcessingSettings().debugLevel >= 2) {
298 GPUInfo("TPC Sector Tracker finished");
299 }
300 mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLTRK"));
301 return 0;
302}
int32_t i
#define GPUCA_MAX_STREAMS
int32_t retVal
#define GPUCA_NSECTORS
#define GPUCA_ROW_COUNT
TBranch * ptr
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
GPUTrackingInOutPointers & mIOPtrs
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
int32_t ExtrapolationTracking(uint32_t iSector, bool blocking)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:127
void SynchronizeGPU()
Definition GPUChain.h:109
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:71
GPUReconstruction::RecoStepField GetRecoSteps() const
Definition GPUChain.h:70
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:126
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:110
uint32_t ThreadCount() const
Definition GPUChain.h:214
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:216
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
Definition GPUChain.h:108
GPUConstantMem * processors()
Definition GPUChain.h:83
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:116
GPUParam & param()
Definition GPUChain.h:86
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:219
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:75
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:88
GPUReconstructionCPU * mRec
Definition GPUChain.h:78
GPUConstantMem * processorsShadow()
Definition GPUChain.h:84
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:47
static constexpr int32_t NSECTORS
Definition GPUChain.h:57
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:124
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:122
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:229
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:21
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:42
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:32
GPUReconstruction * rec()
Definition GPUChain.h:65
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:121
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
const GPUDefParameters & getGPUParameters(bool doGPU) const override
void * AllocateVolatileMemory(size_t size, bool device)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void ComputeReuseMax(GPUProcessor *proc)
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
GPUMemoryResource & Res(int16_t num)
const GPUParam & GetParam() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int16_t MemoryResTracklets() const
int16_t MemoryResLinks() const
void SetMaxData(const GPUTrackingInOutPointers &io)
int16_t MemoryResOutput() const
void DumpTrackHits(std::ostream &out)
void DumpLinks(std::ostream &out, int32_t phase)
void DumpStartHits(std::ostream &out)
void DumpHitWeights(std::ostream &out)
int16_t MemoryResCommon() const
void DumpTrackingData(std::ostream &out)
void DumpTrackletHits(std::ostream &out)
GLintptr offset
Definition glcorearb.h:660
GLenum GLfloat param
Definition glcorearb.h:271
GLuint GLuint stream
Definition glcorearb.h:1806
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
const GPUTPCHitId * sectorClusters[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUTPCTrack * sectorTracks[NSECTORS]
const GPUSettingsTF * settingsTF
const GPUTPCClusterData * clusterData[NSECTORS]
size_t getIndex(const GPUOutputControl &v)
unsigned int nClustersSector[constants::MAXSECTOR]
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]