Project
Loading...
Searching...
No Matches
GPUReconstruction.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include <cstring>
16#include <cstdio>
17#include <iostream>
18#include <mutex>
19#include <string>
20#include <map>
21#include <queue>
22#include <mutex>
23#include <condition_variable>
24#include <array>
25
26#include "GPUReconstruction.h"
29#include "GPUReconstructionIO.h"
30#include "GPUROOTDumpCore.h"
31#include "GPUConfigDump.h"
32#include "GPUChainTracking.h"
33#include "GPUCommonHelpers.h"
34
35#include "GPUMemoryResource.h"
36#include "GPUChain.h"
38
39#include "GPULogging.h"
40#include "utils/strtag.h"
41
42#ifdef GPUCA_O2_LIB
44#endif
45
47
48namespace o2::gpu
49{
50namespace // anonymous
51{
52struct GPUReconstructionPipelineQueue {
53 uint32_t op = 0; // For now, 0 = process, 1 = terminate
54 GPUChain* chain = nullptr;
55 std::mutex m;
56 std::condition_variable c;
57 bool done = false;
58 int32_t retVal = 0;
59};
60} // namespace
61
63 std::queue<GPUReconstructionPipelineQueue*> queue;
64 std::mutex mutex;
65 std::condition_variable cond;
66 bool terminate = false;
67};
68} // namespace o2::gpu
69
70using namespace o2::gpu;
71
72constexpr const char* const GPUReconstruction::GEOMETRY_TYPE_NAMES[];
73constexpr const char* const GPUReconstruction::IOTYPENAMES[];
75
76static ptrdiff_t ptrDiff(void* a, void* b) { return (char*)a - (char*)b; }
77
78GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mDeviceBackendSettings(cfg)
79{
80 if (cfg.master) {
82 throw std::invalid_argument("device type of master and slave GPUReconstruction does not match");
83 }
84 if (cfg.master->mMaster) {
85 throw std::invalid_argument("Cannot be slave to a slave");
86 }
87 mMaster = cfg.master;
88 cfg.master->mSlaves.emplace_back(this);
89 }
92 for (uint32_t i = 0; i < NSECTORS; i++) {
93 processors()->tpcTrackers[i].SetSector(i); // TODO: Move to a better place
95#ifdef GPUCA_HAS_ONNX
96 processors()->tpcNNClusterer[i].mISector = i;
97#endif
98 }
99#ifndef GPUCA_NO_ROOT
100 mROOTDump = GPUROOTDumpCore::getAndCreate();
101#endif
102}
103
105{
106 if (mInitialized) {
107 GPUError("GPU Reconstruction not properly deinitialized!");
108 }
109}
110
111void GPUReconstruction::GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame)
112{
113 if (trackerTraits) {
114 trackerTraits->reset(new o2::its::TrackerTraits);
115 }
116 if (vertexerTraits) {
117 vertexerTraits->reset(new o2::its::VertexerTraits);
118 }
119 if (timeFrame) {
120 timeFrame->reset(new o2::its::TimeFrame);
121 }
122}
123
125{
126 return std::max<int32_t>(0, tbb::this_task_arena::current_thread_index());
127}
128
130{
131 if (mMaster) {
132 throw std::runtime_error("Must not call init on slave!");
133 }
134 int32_t retVal = InitPhaseBeforeDevice();
135 if (retVal) {
136 return retVal;
137 }
138 for (uint32_t i = 0; i < mSlaves.size(); i++) {
139 retVal = mSlaves[i]->InitPhaseBeforeDevice();
140 if (retVal) {
141 GPUError("Error initialization slave (before deviceinit)");
142 return retVal;
143 }
144 mNStreams = std::max(mNStreams, mSlaves[i]->mNStreams);
147 }
148 if (InitDevice()) {
149 return 1;
150 }
151 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
154 } else {
156 }
158 return 1;
159 }
160 for (uint32_t i = 0; i < mSlaves.size(); i++) {
161 mSlaves[i]->mDeviceMemoryBase = mDeviceMemoryPermanent;
162 mSlaves[i]->mHostMemoryBase = mHostMemoryPermanent;
163 mSlaves[i]->mDeviceMemorySize = mDeviceMemorySize - ptrDiff(mSlaves[i]->mDeviceMemoryBase, mDeviceMemoryBase);
164 mSlaves[i]->mHostMemorySize = mHostMemorySize - ptrDiff(mSlaves[i]->mHostMemoryBase, mHostMemoryBase);
165 mSlaves[i]->mHostMemoryPoolEnd = mHostMemoryPoolEnd;
166 mSlaves[i]->mDeviceMemoryPoolEnd = mDeviceMemoryPoolEnd;
167 if (mSlaves[i]->InitDevice()) {
168 GPUError("Error initialization slave (deviceinit)");
169 return 1;
170 }
172 GPUError("Error initialization slave (permanent memory)");
173 return 1;
174 }
175 mDeviceMemoryPermanent = mSlaves[i]->mDeviceMemoryPermanent;
176 mHostMemoryPermanent = mSlaves[i]->mHostMemoryPermanent;
177 }
179 if (retVal) {
180 return retVal;
181 }
183 for (uint32_t i = 0; i < mSlaves.size(); i++) {
184 mSlaves[i]->mDeviceMemoryPermanent = mDeviceMemoryPermanent;
185 mSlaves[i]->mHostMemoryPermanent = mHostMemoryPermanent;
186 retVal = mSlaves[i]->InitPhaseAfterDevice();
187 if (retVal) {
188 GPUError("Error initialization slave (after device init)");
189 return retVal;
190 }
191 mSlaves[i]->ClearAllocatedMemory();
192 }
193 return 0;
194}
195
196namespace o2::gpu::internal
197{
198static uint32_t getDefaultNThreads()
199{
200 const char* tbbEnv = getenv("TBB_NUM_THREADS");
201 uint32_t tbbNum = tbbEnv ? atoi(tbbEnv) : 0;
202 if (tbbNum) {
203 return tbbNum;
204 }
205 const char* ompEnv = getenv("OMP_NUM_THREADS");
206 uint32_t ompNum = ompEnv ? atoi(ompEnv) : 0;
207 if (ompNum) {
208 return tbbNum;
209 }
210 return tbb::info::default_concurrency();
211}
212} // namespace o2::gpu::internal
213
215{
216 if (mProcessingSettings.printSettings) {
217 if (mSlaves.size() || mMaster) {
218 printf("\nConfig Dump %s\n", mMaster ? "Slave" : "Master");
219 }
220 const GPUChainTracking* chTrk;
221 for (uint32_t i = 0; i < mChains.size(); i++) {
222 if ((chTrk = dynamic_cast<GPUChainTracking*>(mChains[i].get()))) {
223 break;
224 }
225 }
226 GPUConfigDump::dumpConfig(&param().rec, &mProcessingSettings, chTrk ? chTrk->GetQAConfig() : nullptr, chTrk ? chTrk->GetEventDisplayConfig() : nullptr, &mDeviceBackendSettings, &mRecoSteps);
227 }
230 if (!IsGPU()) {
231 mRecoSteps.stepsGPUMask.set((uint8_t)0);
232 }
233
234 if (mProcessingSettings.forceMemoryPoolSize >= 1024 || mProcessingSettings.forceHostMemoryPoolSize >= 1024) {
236 }
237 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_AUTO) {
239 }
240 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
241 mProcessingSettings.forceMemoryPoolSize = mProcessingSettings.forceHostMemoryPoolSize = 0;
242 }
243 if (mProcessingSettings.debugLevel >= 4) {
244 mProcessingSettings.keepAllMemory = true;
245 }
246 if (mProcessingSettings.debugLevel >= 5 && mProcessingSettings.allocDebugLevel < 2) {
247 mProcessingSettings.allocDebugLevel = 2;
248 }
249 if (mProcessingSettings.eventDisplay || mProcessingSettings.keepAllMemory) {
250 mProcessingSettings.keepDisplayMemory = true;
251 }
252 if (mProcessingSettings.debugLevel < 6) {
253 mProcessingSettings.debugMask = 0;
254 }
255 if (mProcessingSettings.debugLevel < 1) {
256 mProcessingSettings.deviceTimers = false;
257 }
258 if (mProcessingSettings.debugLevel > 0) {
259 mProcessingSettings.recoTaskTiming = true;
260 }
261 if (mProcessingSettings.deterministicGPUReconstruction == -1) {
262 mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6;
263 }
264 if (mProcessingSettings.deterministicGPUReconstruction) {
265#ifndef GPUCA_DETERMINISTIC_MODE
266 GPUError("Warning, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!");
267#endif
268 mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU;
269 param().rec.tpc.nWaysOuter = true;
270 if (param().rec.tpc.looperInterpolationInExtraPass == -1) {
271 param().rec.tpc.looperInterpolationInExtraPass = 0;
272 }
273 if (mProcessingSettings.createO2Output > 1) {
274 mProcessingSettings.createO2Output = 1;
275 }
276 mProcessingSettings.rtc.deterministic = 1;
277 }
278 if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) {
279 mProcessingSettings.nTPCClustererLanes = 1;
280 }
281 if (mProcessingSettings.createO2Output > 1 && mProcessingSettings.runQA && mProcessingSettings.qcRunFraction == 100.f) {
282 mProcessingSettings.createO2Output = 1;
283 }
284 if (!mProcessingSettings.createO2Output || !IsGPU()) {
285 mProcessingSettings.clearO2OutputFromGPU = false;
286 }
288 mProcessingSettings.mergerSortTracks = false;
289 }
290
291 if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) {
292 mProcessingSettings.delayedOutput = false;
293 }
294
297 if (!mProcessingSettings.rtc.enable) {
298 mProcessingSettings.rtc.optConstexpr = false;
299 }
300
301 mMemoryScalers->factor = mProcessingSettings.memoryScalingFactor;
302 mMemoryScalers->conservative = mProcessingSettings.conservativeMemoryEstimate;
303 mMemoryScalers->returnMaxVal = mProcessingSettings.forceMaxMemScalers != 0;
304 if (mProcessingSettings.forceMaxMemScalers > 1) {
305 mMemoryScalers->rescaleMaxMem(mProcessingSettings.forceMaxMemScalers);
306 }
307
308 if (mProcessingSettings.nHostThreads != -1 && mProcessingSettings.ompThreads != -1) {
309 GPUFatal("Must not use both nHostThreads and ompThreads at the same time!");
310 } else if (mProcessingSettings.ompThreads != -1) {
311 mProcessingSettings.nHostThreads = mProcessingSettings.ompThreads;
312 GPUWarning("You are using the deprecated ompThreads option, please switch to nHostThreads!");
313 }
314
315 if (mProcessingSettings.nHostThreads <= 0) {
316 mProcessingSettings.nHostThreads = internal::getDefaultNThreads();
317 } else {
318 mProcessingSettings.autoAdjustHostThreads = false;
319 }
321 if (mMaster == nullptr) {
322 mThreading = std::make_shared<GPUReconstructionThreading>();
323 mThreading->control = std::make_unique<tbb::global_control>(tbb::global_control::max_allowed_parallelism, mMaxHostThreads);
324 mThreading->allThreads = std::make_unique<tbb::task_arena>(mMaxHostThreads);
325 mThreading->activeThreads = std::make_unique<tbb::task_arena>(mMaxHostThreads);
326 } else {
328 }
330 if (IsGPU()) {
331 mNStreams = std::max<int32_t>(mProcessingSettings.nStreams, 3);
332 }
333
334 if (mProcessingSettings.nTPCClustererLanes == -1) {
335 mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max<int32_t>(1, std::min<int32_t>(GPUCA_NSECTORS, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min<int32_t>(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSECTORS : 4) : 1) : mMaxHostThreads));
336 }
337 if (mProcessingSettings.overrideClusterizerFragmentLen == -1) {
338 mProcessingSettings.overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / mProcessingSettings.nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST;
339 }
340 if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSECTORS) {
341 GPUError("Invalid value for nTPCClustererLanes: %d", mProcessingSettings.nTPCClustererLanes);
342 mProcessingSettings.nTPCClustererLanes = GPUCA_NSECTORS;
343 }
344
345 if (mProcessingSettings.doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || mProcessingSettings.memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) {
346 GPUError("Must use double pipeline mode only with exactly one chain that must support it");
347 return 1;
348 }
349
350 if (mMaster == nullptr && mProcessingSettings.doublePipeline) {
352 }
353
355 for (uint32_t i = 0; i < mChains.size(); i++) {
356 if (mChains[i]->EarlyConfigure()) {
357 return 1;
358 }
359 mChains[i]->RegisterPermanentMemoryAndProcessors();
360 size_t memPrimary, memPageLocked;
361 mChains[i]->MemorySize(memPrimary, memPageLocked);
362 if (!IsGPU() || mOutputControl.useInternal()) {
363 memPageLocked = memPrimary;
364 }
365 mDeviceMemorySize += memPrimary;
366 mHostMemorySize += memPageLocked;
367 }
368 if (mProcessingSettings.forceMemoryPoolSize && mProcessingSettings.forceMemoryPoolSize <= 2 && CanQueryMaxMemory()) {
369 mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize;
370 } else if (mProcessingSettings.forceMemoryPoolSize > 2) {
371 mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize;
372 if (!IsGPU() || mOutputControl.useInternal()) {
374 }
375 }
376 if (mProcessingSettings.forceHostMemoryPoolSize) {
377 mHostMemorySize = mProcessingSettings.forceHostMemoryPoolSize;
378 }
379
380 for (uint32_t i = 0; i < mProcessors.size(); i++) {
381 (mProcessors[i].proc->*(mProcessors[i].RegisterMemoryAllocation))();
382 }
383
384 return 0;
385}
386
388{
389 if (IsGPU()) {
390 for (uint32_t i = 0; i < mChains.size(); i++) {
391 mChains[i]->RegisterGPUProcessors();
392 }
393 }
395 return 0;
396}
397
399{
400 if (mProcessingSettings.forceMaxMemScalers <= 1 && mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
402 }
403 for (uint32_t i = 0; i < mChains.size(); i++) {
404 if (mChains[i]->Init()) {
405 return 1;
406 }
407 }
408 for (uint32_t i = 0; i < mProcessors.size(); i++) {
409 (mProcessors[i].proc->*(mProcessors[i].InitializeProcessor))();
410 }
411
412 WriteConstantParams(); // Initialize with initial values, can optionally be updated later
413
414 mInitialized = true;
415 return 0;
416}
417
419{
420 if (IsGPU()) {
421 const auto threadContext = GetThreadContext();
422 WriteToConstantMemory(ptrDiff(&processors()->param, processors()), &param(), sizeof(param()), -1);
423 }
424}
425
427{
428 for (uint32_t i = 0; i < mChains.size(); i++) {
429 mChains[i]->Finalize();
430 }
431 return 0;
432}
433
435{
436 if (!mInitialized) {
437 return 1;
438 }
439 for (uint32_t i = 0; i < mSlaves.size(); i++) {
440 if (mSlaves[i]->Exit()) {
441 GPUError("Error exiting slave");
442 }
443 }
444
445 mChains.clear(); // Make sure we destroy a possible ITS GPU tracker before we call the destructors
446 mHostConstantMem.reset(); // Reset these explicitly before the destruction of other members unloads the library
447 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
448 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
449 if (mMemoryResources[i].mReuse >= 0) {
450 continue;
451 }
452 operator delete(mMemoryResources[i].mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
453 mMemoryResources[i].mPtr = mMemoryResources[i].mPtrDevice = nullptr;
454 }
455 }
456 mMemoryResources.clear();
457 if (mInitialized) {
458 ExitDevice();
459 }
460 mInitialized = false;
461 return 0;
462}
463
466
468{
469 for (auto it = mMemoryReuse1to1.begin(); it != mMemoryReuse1to1.end(); it++) {
470 auto& re = it->second;
471 if (proc == nullptr || re.proc == proc) {
472 GPUMemoryResource& resMain = mMemoryResources[re.res[0]];
473 resMain.mOverrideSize = 0;
474 for (uint32_t i = 0; i < re.res.size(); i++) {
476 resMain.mOverrideSize = std::max<size_t>(resMain.mOverrideSize, ptrDiff(res.SetPointers((void*)1), (char*)1));
477 }
478 }
479 }
480}
481
483{
484 if (mProcessingSettings.debugLevel >= 5) {
485 GPUInfo("Allocating memory %p", (void*)proc);
486 }
487 size_t total = 0;
488 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
489 if (proc == nullptr ? !mMemoryResources[i].mProcessor->mAllocateAndInitializeLate : mMemoryResources[i].mProcessor == proc) {
491 total += AllocateRegisteredMemory(i);
492 } else if (resetCustom && (mMemoryResources[i].mPtr || mMemoryResources[i].mPtrDevice)) {
494 }
495 }
496 }
497 if (mProcessingSettings.debugLevel >= 5) {
498 GPUInfo("Allocating memory done");
499 }
500 return total;
501}
502
504{
505 if (mProcessingSettings.debugLevel >= 5) {
506 GPUInfo("Allocating Permanent Memory");
507 }
508 int32_t total = 0;
509 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
510 if ((mMemoryResources[i].mType & GPUMemoryResource::MEMORY_PERMANENT) && mMemoryResources[i].mPtr == nullptr) {
511 total += AllocateRegisteredMemory(i);
512 }
513 }
516 if (mProcessingSettings.debugLevel >= 5) {
517 GPUInfo("Permanent Memory Done");
518 }
519 return total;
520}
521
522size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, void*& ptr, void*& memorypool, void* memorybase, size_t memorysize, void* (GPUMemoryResource::*setPtr)(void*), void*& memorypoolend, const char* device)
523{
524 if (res->mReuse >= 0) {
525 ptr = (&ptr == &res->mPtrDevice) ? mMemoryResources[res->mReuse].mPtrDevice : mMemoryResources[res->mReuse].mPtr;
526 if (ptr == nullptr) {
527 GPUError("Invalid reuse ptr (%s)", res->mName);
528 throw std::bad_alloc();
529 }
530 size_t retVal = ptrDiff((res->*setPtr)(ptr), ptr);
531 if (retVal > mMemoryResources[res->mReuse].mSize) {
532 GPUError("Insufficient reuse memory %lu < %lu (%s) (%s)", mMemoryResources[res->mReuse].mSize, retVal, res->mName, device);
533 throw std::bad_alloc();
534 }
535 if (mProcessingSettings.allocDebugLevel >= 2) {
536 std::cout << "Reused (" << device << ") " << res->mName << ": " << retVal << "\n";
537 }
538 return retVal;
539 }
540 if (memorypool == nullptr) {
541 GPUError("Cannot allocate memory from uninitialized pool");
542 throw std::bad_alloc();
543 }
544 size_t retVal;
545 if ((res->mType & GPUMemoryResource::MEMORY_STACK) && memorypoolend) {
546 retVal = ptrDiff((res->*setPtr)((char*)1), (char*)(1));
547 memorypoolend = (void*)((char*)memorypoolend - GPUProcessor::getAlignmentMod<GPUCA_MEMALIGN>(memorypoolend));
548 if (retVal < res->mOverrideSize) {
549 retVal = res->mOverrideSize;
550 }
551 retVal += GPUProcessor::getAlignment<GPUCA_MEMALIGN>(retVal);
552 memorypoolend = (char*)memorypoolend - retVal;
553 ptr = memorypoolend;
554 retVal = std::max<size_t>(ptrDiff((res->*setPtr)(ptr), ptr), res->mOverrideSize);
555 } else {
556 ptr = memorypool;
557 memorypool = (char*)((res->*setPtr)(ptr));
558 retVal = ptrDiff(memorypool, ptr);
559 if (retVal < res->mOverrideSize) {
560 retVal = res->mOverrideSize;
561 memorypool = (char*)ptr + res->mOverrideSize;
562 }
563 memorypool = (void*)((char*)memorypool + GPUProcessor::getAlignment<GPUCA_MEMALIGN>(memorypool));
564 }
565 if (memorypoolend ? (memorypool > memorypoolend) : ((size_t)ptrDiff(memorypool, memorybase) > memorysize)) {
566 std::cerr << "Memory pool size exceeded (" << device << ") (" << res->mName << ": " << (memorypoolend ? (memorysize + ptrDiff(memorypool, memorypoolend)) : ptrDiff(memorypool, memorybase)) << " > " << memorysize << "\n";
567 throw std::bad_alloc();
568 }
569 if (mProcessingSettings.allocDebugLevel >= 2) {
570 std::cout << "Allocated (" << device << ") " << res->mName << ": " << retVal << " - available: " << (memorypoolend ? ptrDiff(memorypoolend, memorypool) : (memorysize - ptrDiff(memorypool, memorybase))) << "\n";
571 }
572 return retVal;
573}
574
576{
577 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) {
578 if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) {
579 if (res->mPtrDevice && res->mReuse < 0) {
580 operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
581 }
582 res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
583 if (res->mReuse >= 0) {
584 if (res->mSize > mMemoryResources[res->mReuse].mSize) {
585 GPUError("Invalid reuse, insufficient size: %ld < %ld", (int64_t)mMemoryResources[res->mReuse].mSize, (int64_t)res->mSize);
586 throw std::bad_alloc();
587 }
588 res->mPtrDevice = mMemoryResources[res->mReuse].mPtrDevice;
589 } else {
590 res->mPtrDevice = operator new(res->mSize + GPUCA_BUFFER_ALIGNMENT GPUCA_OPERATOR_NEW_ALIGNMENT);
591 }
592 res->mPtr = GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(res->mPtrDevice);
593 res->SetPointers(res->mPtr);
594 if (mProcessingSettings.allocDebugLevel >= 2) {
595 std::cout << (res->mReuse >= 0 ? "Reused " : "Allocated ") << res->mName << ": " << res->mSize << "\n";
596 }
599 }
600 if ((size_t)res->mPtr % GPUCA_BUFFER_ALIGNMENT) {
601 GPUError("Got buffer with insufficient alignment");
602 throw std::bad_alloc();
603 }
604 }
605 } else {
606 if (res->mPtr != nullptr) {
607 GPUError("Double allocation! (%s)", res->mName);
608 throw std::bad_alloc();
609 }
610 if (IsGPU() && res->mOverrideSize < GPUCA_BUFFER_ALIGNMENT) {
611 res->mOverrideSize = GPUCA_BUFFER_ALIGNMENT;
612 }
613 if ((!IsGPU() || (res->mType & GPUMemoryResource::MEMORY_HOST) || mProcessingSettings.keepDisplayMemory) && !(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { // keepAllMemory --> keepDisplayMemory
614 if (control && control->useExternal()) {
615 if (control->allocator) {
616 res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
617 res->mPtr = control->allocator(CAMath::nextMultipleOf<GPUCA_BUFFER_ALIGNMENT>(res->mSize));
618 res->mSize = std::max<size_t>(ptrDiff(res->SetPointers(res->mPtr), res->mPtr), res->mOverrideSize);
619 if (mProcessingSettings.allocDebugLevel >= 2) {
620 std::cout << "Allocated (from callback) " << res->mName << ": " << res->mSize << "\n";
621 }
622 } else {
623 void* dummy = nullptr;
624 res->mSize = AllocateRegisteredMemoryHelper(res, res->mPtr, control->ptrCurrent, control->ptrBase, control->size, &GPUMemoryResource::SetPointers, dummy, "host");
625 }
626 } else {
628 }
629 if ((size_t)res->mPtr % GPUCA_BUFFER_ALIGNMENT) {
630 GPUError("Got buffer with insufficient alignment");
631 throw std::bad_alloc();
632 }
633 }
634 if (IsGPU() && (res->mType & GPUMemoryResource::MEMORY_GPU)) {
635 if (res->mProcessor->mLinkedProcessor == nullptr) {
636 GPUError("Device Processor not set (%s)", res->mName);
637 throw std::bad_alloc();
638 }
640
642 res->mSize = size;
643 } else if (size != res->mSize) {
644 GPUError("Inconsistent device memory allocation (%s: device %lu vs %lu)", res->mName, size, res->mSize);
645 throw std::bad_alloc();
646 }
647 if ((size_t)res->mPtrDevice % GPUCA_BUFFER_ALIGNMENT) {
648 GPUError("Got buffer with insufficient alignment");
649 throw std::bad_alloc();
650 }
651 }
653 }
654}
655
660
662{
664 if ((res->mType & GPUMemoryResource::MEMORY_PERMANENT) && res->mPtr != nullptr) {
666 } else {
668 }
669 return res->mReuse >= 0 ? 0 : res->mSize;
670}
671
673{
675 throw std::runtime_error("Requested invalid memory typo for unmanaged allocation");
676 }
677 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
678 mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]);
679 return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mUnmanagedChunks.back().get());
680 } else {
683 char* retVal;
685 if (pool > poolend) {
686 GPUError("Insufficient unmanaged memory: missing %ld bytes", ptrDiff(pool, poolend));
687 throw std::bad_alloc();
688 }
690 if (mProcessingSettings.allocDebugLevel >= 2) {
691 std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n";
692 }
693 return retVal;
694 }
695}
696
698{
699 if (mVolatileMemoryStart == nullptr) {
701 }
702 if (size == 0) {
703 return nullptr; // Future GPU memory allocation is volatile
704 }
705 char* retVal;
708 GPUError("Insufficient volatile device memory: missing %ld", ptrDiff(mDeviceMemoryPool, mDeviceMemoryPoolEnd));
709 throw std::bad_alloc();
710 }
712 if (mProcessingSettings.allocDebugLevel >= 2) {
713 std::cout << "Allocated (volatile GPU): " << size << " - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
714 }
715
716 return retVal;
717}
718
720{
721 if (device) {
723 }
724 mVolatileChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]);
725 return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mVolatileChunks.back().get());
726}
727
729{
730 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
731 if (proc == nullptr || mMemoryResources[i].mProcessor == proc) {
733 }
734 }
735}
736
738{
741 void* basePtr = res->mReuse >= 0 ? mMemoryResources[res->mReuse].mPtr : res->mPtr;
742 size_t size = ptrDiff(res->SetPointers(basePtr), basePtr);
743 if (basePtr && size > std::max(res->mSize, res->mOverrideSize)) {
744 std::cerr << "Updated pointers exceed available memory size: " << size << " > " << std::max(res->mSize, res->mOverrideSize) << " - host - " << res->mName << "\n";
745 throw std::bad_alloc();
746 }
747 }
748 if (IsGPU() && (res->mType & GPUMemoryResource::MEMORY_GPU)) {
749 void* basePtr = res->mReuse >= 0 ? mMemoryResources[res->mReuse].mPtrDevice : res->mPtrDevice;
750 size_t size = ptrDiff(res->SetDevicePointers(basePtr), basePtr);
751 if (basePtr && size > std::max(res->mSize, res->mOverrideSize)) {
752 std::cerr << "Updated pointers exceed available memory size: " << size << " > " << std::max(res->mSize, res->mOverrideSize) << " - GPU - " << res->mName << "\n";
753 throw std::bad_alloc();
754 }
755 }
756}
757
758void GPUReconstruction::FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom, bool freePermanent)
759{
760 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
761 if ((proc == nullptr || mMemoryResources[i].mProcessor == proc) && (freeCustom || !(mMemoryResources[i].mType & GPUMemoryResource::MEMORY_CUSTOM)) && (freePermanent || !(mMemoryResources[i].mType & GPUMemoryResource::MEMORY_PERMANENT))) {
763 }
764 }
765}
766
771
773{
774 if (mProcessingSettings.allocDebugLevel >= 2 && (res->mPtr || res->mPtrDevice)) {
775 std::cout << "Freeing " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n";
776 }
777 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) {
778 operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
779 }
780 res->mPtr = nullptr;
781 res->mPtrDevice = nullptr;
782}
783
785{
788 mVolatileMemoryStart = nullptr;
789 }
790 if (mProcessingSettings.allocDebugLevel >= 2) {
791 std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
792 }
793}
794
800
805
807{
808 if (mProcessingSettings.keepDisplayMemory || mProcessingSettings.disableMemoryReuse) {
809 return;
810 }
811 if (mNonPersistentMemoryStack.size() == 0) {
812 GPUFatal("Trying to pop memory state from empty stack");
813 }
814 if (tag != 0 && std::get<3>(mNonPersistentMemoryStack.back()) != tag) {
815 GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str());
816 }
817 if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) {
818 printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size());
820 printf("%76s", "");
822 }
823 mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back());
825 for (uint32_t i = std::get<2>(mNonPersistentMemoryStack.back()); i < mNonPersistentIndividualAllocations.size(); i++) {
827 if (res->mReuse < 0) {
828 operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
829 }
830 res->mPtr = nullptr;
831 res->mPtrDevice = nullptr;
832 }
834 mNonPersistentMemoryStack.pop_back();
835}
836
838{
840 throw std::runtime_error("temporary memory stack already blocked");
841 }
844}
845
847{
848 if (mNonPersistentMemoryStack.size()) {
849 throw std::runtime_error("cannot unblock while there is stacked memory");
850 }
853 mHostMemoryPoolBlocked = nullptr;
854 mDeviceMemoryPoolBlocked = nullptr;
855}
856
858{
859 mMemoryResources[res].mPtr = ptr;
860}
861
863{
864 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
867 }
868 }
869 mUnmanagedChunks.clear();
872 mVolatileMemoryStart = nullptr;
873 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
874 mHostMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mHostMemoryPermanent);
875 mDeviceMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mDeviceMemoryPermanent);
878 } else {
880 }
881}
882
888
890{
891 printf("Maximum Memory Allocation: Host %'zu / Device %'zu\n", mHostMemoryUsedMax, mDeviceMemoryUsedMax);
892}
893
895{
896 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
897 printf("Memory Allocation: Host %'13zd / %'13zu (Permanent %'13zd, Data %'13zd, Scratch %'13zd), Device %'13zd / %'13zu, (Permanent %'13zd, Data %'13zd, Scratch %'13zd) %zu chunks\n",
900 mMemoryResources.size());
901 }
902}
903
905{
906 std::map<std::string, std::array<size_t, 3>> sizes;
907 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
908 auto& res = mMemoryResources[i];
909 if (res.mReuse >= 0) {
910 continue;
911 }
912 auto& x = sizes[res.mName];
913 if (res.mPtr) {
914 x[0] += res.mSize;
915 }
916 if (res.mPtrDevice) {
917 x[1] += res.mSize;
918 }
920 x[2] = 1;
921 }
922 }
923 printf("%59s CPU / %9s GPU\n", "", "");
924 for (auto it = sizes.begin(); it != sizes.end(); it++) {
925 printf("Allocation %30s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ? "P" : " ", it->second[0], it->second[1]);
926 }
928 for (uint32_t i = 0; i < mChains.size(); i++) {
929 mChains[i]->PrintMemoryStatistics();
930 }
931}
932
934{
935 if (mProcessingSettings.noGPUMemoryRegistration) {
936 return 0;
937 }
939 if (retVal == 0) {
940 mRegisteredMemoryPtrs.emplace(ptr);
941 }
942 return retVal;
943}
944
946{
947 if (mProcessingSettings.noGPUMemoryRegistration) {
948 return 0;
949 }
950 const auto& pos = mRegisteredMemoryPtrs.find(ptr);
951 if (pos != mRegisteredMemoryPtrs.end()) {
954 }
955 return 1;
956}
957
958namespace o2::gpu::internal
959{
960namespace // anonymous
961{
962template <class T>
963constexpr static inline int32_t getStepNum(T step, bool validCheck, int32_t N, const char* err = "Invalid step num")
964{
965 static_assert(sizeof(step) == sizeof(uint32_t), "Invalid step enum size");
966 int32_t retVal = 8 * sizeof(uint32_t) - 1 - CAMath::Clz((uint32_t)step);
967 if ((uint32_t)step == 0 || retVal >= N) {
968 if (!validCheck) {
969 return -1;
970 }
971 throw std::runtime_error("Invalid General Step");
972 }
973 return retVal;
974}
975} // anonymous namespace
976} // namespace o2::gpu::internal
977
978int32_t GPUReconstruction::getRecoStepNum(RecoStep step, bool validCheck) { return internal::getStepNum(step, validCheck, GPUDataTypes::N_RECO_STEPS, "Invalid Reco Step"); }
979int32_t GPUReconstruction::getGeneralStepNum(GeneralStep step, bool validCheck) { return internal::getStepNum(step, validCheck, GPUDataTypes::N_GENERAL_STEPS, "Invalid General Step"); }
980
982{
983 if (!mInitialized || !mProcessingSettings.doublePipeline || mMaster != nullptr || !mSlaves.size()) {
984 throw std::invalid_argument("Cannot start double pipeline mode");
985 }
986 if (mProcessingSettings.debugLevel >= 3) {
987 GPUInfo("Pipeline worker started");
988 }
989 bool terminate = false;
990 while (!terminate) {
991 {
992 std::unique_lock<std::mutex> lk(mPipelineContext->mutex);
993 mPipelineContext->cond.wait(lk, [this] { return this->mPipelineContext->queue.size() > 0; });
994 }
995 GPUReconstructionPipelineQueue* q;
996 {
997 std::lock_guard<std::mutex> lk(mPipelineContext->mutex);
998 q = mPipelineContext->queue.front();
999 mPipelineContext->queue.pop();
1000 }
1001 if (q->op == 1) {
1002 terminate = 1;
1003 } else {
1004 q->retVal = q->chain->RunChain();
1005 }
1006 {
1007 std::lock_guard<std::mutex> lk(q->m);
1008 q->done = true;
1009 }
1010 q->c.notify_one();
1011 }
1012 if (mProcessingSettings.debugLevel >= 3) {
1013 GPUInfo("Pipeline worker ended");
1014 }
1015}
1016
1021
1023{
1026 std::unique_ptr<GPUReconstructionPipelineQueue> qu(new GPUReconstructionPipelineQueue);
1027 GPUReconstructionPipelineQueue* q = qu.get();
1028 q->chain = terminate ? nullptr : mChains[0].get();
1029 q->op = terminate ? 1 : 0;
1030 std::unique_lock<std::mutex> lkdone(q->m);
1031 {
1032 std::lock_guard<std::mutex> lkpipe(rec->mPipelineContext->mutex);
1033 if (rec->mPipelineContext->terminate) {
1034 throw std::runtime_error("Must not enqueue work after termination request");
1035 }
1036 rec->mPipelineContext->queue.push(q);
1037 rec->mPipelineContext->terminate = terminate;
1038 rec->mPipelineContext->cond.notify_one();
1039 }
1040 q->c.wait(lkdone, [&q]() { return q->done; });
1041 if (q->retVal) {
1042 return q->retVal;
1043 }
1044 if (terminate) {
1045 return 0;
1046 } else {
1047 return mChains[0]->FinalizePipelinedProcessing();
1048 }
1049}
1050
1052{
1054 std::lock_guard<std::mutex> lk(rec->mPipelineContext->mutex);
1055 return rec->mPipelineContext->queue.size() && rec->mPipelineContext->queue.front()->op == 0 ? rec->mPipelineContext->queue.front()->chain : nullptr;
1056}
1057
1058void GPUReconstruction::PrepareEvent() // TODO: Clean this up, this should not be called from chainTracking but before
1059{
1061 for (uint32_t i = 0; i < mChains.size(); i++) {
1062 mChains[i]->PrepareEvent();
1063 }
1064 for (uint32_t i = 0; i < mProcessors.size(); i++) {
1065 if (mProcessors[i].proc->mAllocateAndInitializeLate) {
1066 continue;
1067 }
1068 (mProcessors[i].proc->*(mProcessors[i].SetMaxData))(mHostConstantMem->ioPtrs);
1069 if (mProcessors[i].proc->mGPUProcessorType != GPUProcessor::PROCESSOR_TYPE_DEVICE && mProcessors[i].proc->mLinkedProcessor) {
1070 (mProcessors[i].proc->mLinkedProcessor->*(mProcessors[i].SetMaxData))(mHostConstantMem->ioPtrs);
1071 }
1072 }
1073 ComputeReuseMax(nullptr);
1074 AllocateRegisteredMemory(nullptr);
1075}
1076
1077int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, std::vector<std::array<uint32_t, 4>>* fillErrors)
1078{
1079 int32_t retVal = 0;
1080 for (uint32_t i = 0; i < mChains.size(); i++) {
1081 if (mChains[i]->CheckErrorCodes(cpuOnly, forceShowErrors, fillErrors)) {
1082 retVal++;
1083 }
1084 }
1085 return retVal;
1086}
1087
1088int32_t GPUReconstruction::GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError)
1089{
1090 if (error == 0 || !GPUChkErrInternal(error, file, line)) {
1091 return 0;
1092 }
1093 if (failOnError) {
1094 if (mInitialized && mInErrorHandling == false) {
1095 mInErrorHandling = true;
1096 CheckErrorCodes(false, true);
1097 }
1098 throw std::runtime_error("GPU Backend Failure");
1099 }
1100 return 1;
1101}
1102
1104{
1105 std::string f;
1106 f = dir;
1107 f += "settings.dump";
1108 DumpStructToFile(&mGRPSettings, f.c_str());
1109 for (uint32_t i = 0; i < mChains.size(); i++) {
1110 mChains[i]->DumpSettings(dir);
1111 }
1112}
1113
1114void GPUReconstruction::UpdateDynamicSettings(const GPUSettingsRecDynamic* d)
1115{
1116 UpdateSettings(nullptr, nullptr, d);
1117}
1118
1119void GPUReconstruction::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p, const GPUSettingsRecDynamic* d)
1120{
1121 if (g) {
1122 mGRPSettings = *g;
1123 }
1124 if (p) {
1125 mProcessingSettings.debugLevel = p->debugLevel;
1126 mProcessingSettings.resetTimers = p->resetTimers;
1127 }
1128 GPURecoStepConfiguration* w = nullptr;
1130 w = &mRecoSteps;
1131 }
1132 param().UpdateSettings(g, p, w, d);
1133 if (mInitialized) {
1135 }
1136}
1137
1138int32_t GPUReconstruction::ReadSettings(const char* dir)
1139{
1140 std::string f;
1141 f = dir;
1142 f += "settings.dump";
1144 if (ReadStructFromFile(f.c_str(), &mGRPSettings)) {
1145 return 1;
1146 }
1148 for (uint32_t i = 0; i < mChains.size(); i++) {
1149 mChains[i]->ReadSettings(dir);
1150 }
1151 return 0;
1152}
1153
1154void GPUReconstruction::SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow)
1155{
1156#ifdef GPUCA_O2_LIB
1158 config.ReadConfigurableParam(config);
1159 config.configGRP.solenoidBzNominalGPU = solenoidBzNominalGPU;
1160 SetSettings(&config.configGRP, &config.configReconstruction, &config.configProcessing, workflow);
1161#else
1162 GPUSettingsGRP grp;
1163 grp.solenoidBzNominalGPU = solenoidBzNominalGPU;
1164 SetSettings(&grp, nullptr, nullptr, workflow);
1165#endif
1166}
1167
1168void GPUReconstruction::SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec, const GPUSettingsProcessing* proc, const GPURecoStepConfiguration* workflow)
1169{
1170 if (mInitialized) {
1171 GPUError("Cannot update settings while initialized");
1172 throw std::runtime_error("Settings updated while initialized");
1173 }
1174 mGRPSettings = *grp;
1175 if (proc) {
1176 mProcessingSettings = *proc;
1177 }
1178 if (workflow) {
1179 mRecoSteps.steps = workflow->steps;
1181 mRecoSteps.inputs = workflow->inputs;
1182 mRecoSteps.outputs = workflow->outputs;
1183 }
1184 param().SetDefaults(&mGRPSettings, rec, proc, workflow);
1185}
1186
1188{
1189 GPUOutputControl outputControl;
1190 outputControl.set(ptr, size);
1191 SetOutputControl(outputControl);
1192}
1193
1195{
1197}
1198
1199ThrustVolatileAllocator::ThrustVolatileAllocator(GPUReconstruction* r)
1200{
1201 mAlloc = [&r](size_t n) { return (char*)r->AllocateVolatileDeviceMemory(n); };
1202}
int32_t i
#define GPUCA_OPERATOR_NEW_ALIGNMENT
#define GPUCA_BUFFER_ALIGNMENT
#define GPUCA_GPUReconstructionUpdateDefaults()
uint32_t op
bool done
int32_t retVal
GPUChain * chain
#define GPUCA_NSECTORS
uint16_t pos
Definition RawData.h:3
uint32_t res
Definition RawData.h:0
TBranch * ptr
void set(S v)
Definition bitfield.h:59
bool isSet(const bitfield &v) const
Definition bitfield.h:70
const GPUSettingsDisplay * GetEventDisplayConfig() const
const GPUSettingsQA * GetQAConfig() const
static void dumpConfig(const GPUSettingsRec *rec, const GPUSettingsProcessing *proc, const GPUSettingsQA *qa, const GPUSettingsDisplay *display, const GPUSettingsDeviceBackend *device, const GPURecoStepConfiguration *workflow)
static constexpr const char *const RECO_STEP_NAMES[]
static constexpr int32_t N_RECO_STEPS
static constexpr int32_t N_GENERAL_STEPS
void * SetDevicePointers(void *ptr)
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
void InitGPUProcessor(GPUReconstruction *rec, ProcessorType type=PROCESSOR_TYPE_CPU, GPUProcessor *slaveProcessor=nullptr)
ProcessorType mGPUProcessorType
GPURecoStepConfiguration mRecoSteps
std::unordered_set< const void * > mRegisteredMemoryPtrs
std::vector< std::unique_ptr< GPUChain > > mChains
void * AllocateVolatileMemory(size_t size, bool device)
ThrustVolatileAllocator getThrustVolatileDeviceAllocator()
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
virtual void UpdateAutomaticProcessingSettings()
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
std::shared_ptr< GPUROOTDumpCore > mROOTDump
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device)
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
std::vector< GPUReconstruction * > mSlaves
std::unique_ptr< T > ReadStructFromFile(const char *file)
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame > *timeFrame)
std::vector< std::tuple< void *, void *, size_t, uint64_t > > mNonPersistentMemoryStack
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::vector< GPUMemoryResource > mMemoryResources
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
std::vector< std::unique_ptr< char[]> > mUnmanagedChunks
std::vector< ProcessorData > mProcessors
void * AllocateVolatileDeviceMemory(size_t size)
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
GPUReconstruction(const GPUReconstruction &)=delete
static constexpr GeometryType geometryType
GPUSettingsProcessing mProcessingSettings
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
std::vector< std::unique_ptr< char[]> > mVolatileChunks
virtual RecoStepField AvailableGPURecoSteps()
static constexpr const char *const IOTYPENAMES[]
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
virtual int32_t ExitDevice()=0
void * AllocateUnmanagedMemory(size_t size, int32_t type)
void PushNonPersistentMemory(uint64_t tag)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
void BlockStackedMemory(GPUReconstruction *rec)
void DumpSettings(const char *dir="")
int32_t unregisterMemoryForGPU(const void *ptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
GPUSettingsDeviceBackend mDeviceBackendSettings
int32_t EnqueuePipeline(bool terminate=false)
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const
int32_t GPUChkErrA(const int64_t error, const char *file, int32_t line, bool failOnError)
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
void SetSector(int32_t iSector)
#define TPC_MAX_FRAGMENT_LEN_GPU
#define TPC_MAX_FRAGMENT_LEN_HOST
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
const GLfloat * m
Definition glcorearb.h:4066
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
Definition glcorearb.h:2595
GLdouble f
Definition glcorearb.h:310
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean GLboolean g
Definition glcorearb.h:1233
GLboolean r
Definition glcorearb.h:1233
GLenum GLfloat param
Definition glcorearb.h:271
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLubyte GLubyte GLubyte GLubyte w
Definition glcorearb.h:852
GPUReconstruction * rec
std::string qTag2Str(const T tag)
Definition strtag.h:35
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
GPUTPCClusterFinder tpcClusterer[GPUCA_NSECTORS]
void set(void *p, size_t s)
std::function< void *(size_t)> allocator
void SetDefaults(float solenoidBz)
Definition GPUParam.cxx:33
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPURecoStepConfiguration *w=nullptr, const GPUSettingsRecDynamic *d=nullptr)
Definition GPUParam.cxx:121
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
std::queue< GPUReconstructionPipelineQueue * > queue