Project
Loading...
Searching...
No Matches
GPUReconstruction.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include <cstring>
16#include <cstdio>
17#include <iostream>
18#include <mutex>
19#include <string>
20#include <map>
21#include <queue>
22#include <mutex>
23#include <condition_variable>
24#include <array>
25
26#include "GPUReconstruction.h"
29#include "GPUReconstructionIO.h"
30#include "GPUROOTDumpCore.h"
31#include "GPUConfigDump.h"
32#include "GPUChainTracking.h"
33
34#include "GPUMemoryResource.h"
35#include "GPUChain.h"
37
38#include "GPULogging.h"
39#include "utils/strtag.h"
40
41#ifdef GPUCA_O2_LIB
43#endif
44
46
47namespace o2::gpu
48{
49namespace // anonymous
50{
51struct GPUReconstructionPipelineQueue {
52 uint32_t op = 0; // For now, 0 = process, 1 = terminate
53 GPUChain* chain = nullptr;
54 std::mutex m;
55 std::condition_variable c;
56 bool done = false;
57 int32_t retVal = 0;
58};
59} // namespace
60
62 std::queue<GPUReconstructionPipelineQueue*> queue;
63 std::mutex mutex;
64 std::condition_variable cond;
65 bool terminate = false;
66};
67} // namespace o2::gpu
68
69using namespace o2::gpu;
70
71constexpr const char* const GPUReconstruction::GEOMETRY_TYPE_NAMES[];
72constexpr const char* const GPUReconstruction::IOTYPENAMES[];
74
75static ptrdiff_t ptrDiff(void* a, void* b) { return (char*)a - (char*)b; }
76
77GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mDeviceBackendSettings(cfg)
78{
79 if (cfg.master) {
81 throw std::invalid_argument("device type of master and slave GPUReconstruction does not match");
82 }
83 if (cfg.master->mMaster) {
84 throw std::invalid_argument("Cannot be slave to a slave");
85 }
86 mMaster = cfg.master;
87 cfg.master->mSlaves.emplace_back(this);
88 }
89 new (&mProcessingSettings) GPUSettingsProcessing;
93 for (uint32_t i = 0; i < NSECTORS; i++) {
94 processors()->tpcTrackers[i].SetSector(i); // TODO: Move to a better place
96 }
97#ifndef GPUCA_NO_ROOT
98 mROOTDump = GPUROOTDumpCore::getAndCreate();
99#endif
100}
101
103{
104 if (mInitialized) {
105 GPUError("GPU Reconstruction not properly deinitialized!");
106 }
107}
108
109void GPUReconstruction::GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame)
110{
111 if (trackerTraits) {
112 trackerTraits->reset(new o2::its::TrackerTraits);
113 }
114 if (vertexerTraits) {
115 vertexerTraits->reset(new o2::its::VertexerTraits);
116 }
117 if (timeFrame) {
118 timeFrame->reset(new o2::its::TimeFrame);
119 }
120}
121
123{
124 return std::max<int32_t>(0, tbb::this_task_arena::current_thread_index());
125}
126
128{
129 if (mMaster) {
130 throw std::runtime_error("Must not call init on slave!");
131 }
132 int32_t retVal = InitPhaseBeforeDevice();
133 if (retVal) {
134 return retVal;
135 }
136 for (uint32_t i = 0; i < mSlaves.size(); i++) {
137 retVal = mSlaves[i]->InitPhaseBeforeDevice();
138 if (retVal) {
139 GPUError("Error initialization slave (before deviceinit)");
140 return retVal;
141 }
142 mNStreams = std::max(mNStreams, mSlaves[i]->mNStreams);
145 }
146 if (InitDevice()) {
147 return 1;
148 }
152 return 1;
153 }
154 for (uint32_t i = 0; i < mSlaves.size(); i++) {
155 mSlaves[i]->mDeviceMemoryBase = mDeviceMemoryPermanent;
156 mSlaves[i]->mHostMemoryBase = mHostMemoryPermanent;
157 mSlaves[i]->mDeviceMemorySize = mDeviceMemorySize - ptrDiff(mSlaves[i]->mDeviceMemoryBase, mDeviceMemoryBase);
158 mSlaves[i]->mHostMemorySize = mHostMemorySize - ptrDiff(mSlaves[i]->mHostMemoryBase, mHostMemoryBase);
159 mSlaves[i]->mHostMemoryPoolEnd = mHostMemoryPoolEnd;
160 mSlaves[i]->mDeviceMemoryPoolEnd = mDeviceMemoryPoolEnd;
161 if (mSlaves[i]->InitDevice()) {
162 GPUError("Error initialization slave (deviceinit)");
163 return 1;
164 }
166 GPUError("Error initialization slave (permanent memory)");
167 return 1;
168 }
169 mDeviceMemoryPermanent = mSlaves[i]->mDeviceMemoryPermanent;
170 mHostMemoryPermanent = mSlaves[i]->mHostMemoryPermanent;
171 }
173 if (retVal) {
174 return retVal;
175 }
177 for (uint32_t i = 0; i < mSlaves.size(); i++) {
178 mSlaves[i]->mDeviceMemoryPermanent = mDeviceMemoryPermanent;
179 mSlaves[i]->mHostMemoryPermanent = mHostMemoryPermanent;
180 retVal = mSlaves[i]->InitPhaseAfterDevice();
181 if (retVal) {
182 GPUError("Error initialization slave (after device init)");
183 return retVal;
184 }
185 mSlaves[i]->ClearAllocatedMemory();
186 }
187 return 0;
188}
189
190namespace o2::gpu::internal
191{
192static uint32_t getDefaultNThreads()
193{
194 const char* tbbEnv = getenv("TBB_NUM_THREADS");
195 uint32_t tbbNum = tbbEnv ? atoi(tbbEnv) : 0;
196 if (tbbNum) {
197 return tbbNum;
198 }
199 const char* ompEnv = getenv("OMP_NUM_THREADS");
200 uint32_t ompNum = ompEnv ? atoi(ompEnv) : 0;
201 if (ompNum) {
202 return tbbNum;
203 }
204 return tbb::info::default_concurrency();
205}
206} // namespace o2::gpu::internal
207
209{
210 if (mProcessingSettings.printSettings) {
211 if (mSlaves.size() || mMaster) {
212 printf("\nConfig Dump %s\n", mMaster ? "Slave" : "Master");
213 }
214 const GPUChainTracking* chTrk;
215 for (uint32_t i = 0; i < mChains.size(); i++) {
216 if ((chTrk = dynamic_cast<GPUChainTracking*>(mChains[i].get()))) {
217 break;
218 }
219 }
220 GPUConfigDump::dumpConfig(&param().rec, &mProcessingSettings, chTrk ? chTrk->GetQAConfig() : nullptr, chTrk ? chTrk->GetEventDisplayConfig() : nullptr, &mDeviceBackendSettings, &mRecoSteps);
221 }
224 if (!IsGPU()) {
225 mRecoSteps.stepsGPUMask.set((uint8_t)0);
226 }
227
228 if (mProcessingSettings.forceMemoryPoolSize >= 1024 || mProcessingSettings.forceHostMemoryPoolSize >= 1024) {
230 }
231 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_AUTO) {
233 }
234 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
235 mProcessingSettings.forceMemoryPoolSize = mProcessingSettings.forceHostMemoryPoolSize = 0;
236 }
237 if (mProcessingSettings.debugLevel >= 4) {
238 mProcessingSettings.keepAllMemory = true;
239 }
240 if (mProcessingSettings.debugLevel >= 5 && mProcessingSettings.allocDebugLevel < 2) {
241 mProcessingSettings.allocDebugLevel = 2;
242 }
243 if (mProcessingSettings.eventDisplay || mProcessingSettings.keepAllMemory) {
244 mProcessingSettings.keepDisplayMemory = true;
245 }
246 if (mProcessingSettings.debugLevel < 6) {
247 mProcessingSettings.debugMask = 0;
248 }
249 if (mProcessingSettings.debugLevel < 1) {
250 mProcessingSettings.deviceTimers = false;
251 }
252 if (mProcessingSettings.debugLevel > 0) {
253 mProcessingSettings.recoTaskTiming = true;
254 }
255 if (mProcessingSettings.deterministicGPUReconstruction == -1) {
256 mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6;
257 }
258 if (mProcessingSettings.deterministicGPUReconstruction) {
259#ifndef GPUCA_NO_FAST_MATH
260 GPUError("Warning, deterministicGPUReconstruction needs GPUCA_NO_FAST_MATH for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!");
261#endif
262 mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU;
263 param().rec.tpc.nWaysOuter = true;
264 if (param().rec.tpc.looperInterpolationInExtraPass == -1) {
265 param().rec.tpc.looperInterpolationInExtraPass = 0;
266 }
267 if (mProcessingSettings.createO2Output > 1) {
268 mProcessingSettings.createO2Output = 1;
269 }
270 }
271 if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) {
272 mProcessingSettings.nTPCClustererLanes = 1;
273 if (mProcessingSettings.trackletConstructorInPipeline < 0) {
274 mProcessingSettings.trackletConstructorInPipeline = 1;
275 }
276 if (mProcessingSettings.trackletSelectorInPipeline < 0) {
277 mProcessingSettings.trackletSelectorInPipeline = 1;
278 }
279 if (mProcessingSettings.trackletSelectorSectors < 0) {
280 mProcessingSettings.trackletSelectorSectors = 1;
281 }
282 }
283 if (mProcessingSettings.createO2Output > 1 && mProcessingSettings.runQA && mProcessingSettings.qcRunFraction == 100.f) {
284 mProcessingSettings.createO2Output = 1;
285 }
286 if (!mProcessingSettings.createO2Output || !IsGPU()) {
287 mProcessingSettings.clearO2OutputFromGPU = false;
288 }
289 if (!(mRecoSteps.stepsGPUMask & GPUDataTypes::RecoStep::TPCMerging)) {
290 mProcessingSettings.mergerSortTracks = false;
291 }
292
293 if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) {
294 mProcessingSettings.delayedOutput = false;
295 }
296
299 if (!mProcessingSettings.trackletConstructorInPipeline) {
300 mProcessingSettings.trackletSelectorInPipeline = false;
301 }
302 if (!mProcessingSettings.rtc.enable) {
303 mProcessingSettings.rtc.optConstexpr = false;
304 }
305
306 mMemoryScalers->factor = mProcessingSettings.memoryScalingFactor;
307 mMemoryScalers->conservative = mProcessingSettings.conservativeMemoryEstimate;
308 mMemoryScalers->returnMaxVal = mProcessingSettings.forceMaxMemScalers != 0;
309 if (mProcessingSettings.forceMaxMemScalers > 1) {
310 mMemoryScalers->rescaleMaxMem(mProcessingSettings.forceMaxMemScalers);
311 }
312
313 if (mProcessingSettings.nHostThreads != -1 && mProcessingSettings.ompThreads != -1) {
314 GPUFatal("Must not use both nHostThreads and ompThreads at the same time!");
315 } else if (mProcessingSettings.ompThreads != -1) {
316 mProcessingSettings.nHostThreads = mProcessingSettings.ompThreads;
317 GPUWarning("You are using the deprecated ompThreads option, please switch to nHostThreads!");
318 }
319
320 if (mProcessingSettings.nHostThreads <= 0) {
321 mProcessingSettings.nHostThreads = internal::getDefaultNThreads();
322 } else {
323 mProcessingSettings.autoAdjustHostThreads = false;
324 }
326 if (mMaster == nullptr) {
327 mThreading = std::make_shared<GPUReconstructionThreading>();
328 mThreading->control = std::make_unique<tbb::global_control>(tbb::global_control::max_allowed_parallelism, mMaxHostThreads);
329 mThreading->allThreads = std::make_unique<tbb::task_arena>(mMaxHostThreads);
330 mThreading->activeThreads = std::make_unique<tbb::task_arena>(mMaxHostThreads);
331 } else {
333 }
335 if (IsGPU()) {
336 mNStreams = std::max<int32_t>(mProcessingSettings.nStreams, 3);
337 }
338
339 if (mProcessingSettings.nTPCClustererLanes == -1) {
340 mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max<int32_t>(1, std::min<int32_t>(GPUCA_NSECTORS, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min<int32_t>(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSECTORS : 4) : 1) : mMaxHostThreads));
341 }
342 if (mProcessingSettings.overrideClusterizerFragmentLen == -1) {
343 mProcessingSettings.overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / mProcessingSettings.nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST;
344 }
345 if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSECTORS) {
346 GPUError("Invalid value for nTPCClustererLanes: %d", mProcessingSettings.nTPCClustererLanes);
347 mProcessingSettings.nTPCClustererLanes = GPUCA_NSECTORS;
348 }
349
350 if (mProcessingSettings.doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || mProcessingSettings.memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) {
351 GPUError("Must use double pipeline mode only with exactly one chain that must support it");
352 return 1;
353 }
354
355 if (mMaster == nullptr && mProcessingSettings.doublePipeline) {
357 }
358
360 for (uint32_t i = 0; i < mChains.size(); i++) {
361 if (mChains[i]->EarlyConfigure()) {
362 return 1;
363 }
364 mChains[i]->RegisterPermanentMemoryAndProcessors();
365 size_t memPrimary, memPageLocked;
366 mChains[i]->MemorySize(memPrimary, memPageLocked);
367 if (!IsGPU() || mOutputControl.useInternal()) {
368 memPageLocked = memPrimary;
369 }
370 mDeviceMemorySize += memPrimary;
371 mHostMemorySize += memPageLocked;
372 }
373 if (mProcessingSettings.forceMemoryPoolSize && mProcessingSettings.forceMemoryPoolSize <= 2 && CanQueryMaxMemory()) {
374 mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize;
375 } else if (mProcessingSettings.forceMemoryPoolSize > 2) {
376 mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize;
377 if (!IsGPU() || mOutputControl.useInternal()) {
379 }
380 }
381 if (mProcessingSettings.forceHostMemoryPoolSize) {
382 mHostMemorySize = mProcessingSettings.forceHostMemoryPoolSize;
383 }
384
385 for (uint32_t i = 0; i < mProcessors.size(); i++) {
386 (mProcessors[i].proc->*(mProcessors[i].RegisterMemoryAllocation))();
387 }
388
389 return 0;
390}
391
393{
394 if (IsGPU()) {
395 for (uint32_t i = 0; i < mChains.size(); i++) {
396 mChains[i]->RegisterGPUProcessors();
397 }
398 }
400 return 0;
401}
402
404{
405 if (mProcessingSettings.forceMaxMemScalers <= 1 && mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
407 }
408 for (uint32_t i = 0; i < mChains.size(); i++) {
409 if (mChains[i]->Init()) {
410 return 1;
411 }
412 }
413 for (uint32_t i = 0; i < mProcessors.size(); i++) {
414 (mProcessors[i].proc->*(mProcessors[i].InitializeProcessor))();
415 }
416
417 WriteConstantParams(); // Initialize with initial values, can optionally be updated later
418
419 mInitialized = true;
420 return 0;
421}
422
424{
425 if (IsGPU()) {
426 const auto threadContext = GetThreadContext();
427 WriteToConstantMemory(ptrDiff(&processors()->param, processors()), &param(), sizeof(param()), -1);
428 }
429}
430
432{
433 for (uint32_t i = 0; i < mChains.size(); i++) {
434 mChains[i]->Finalize();
435 }
436 return 0;
437}
438
440{
441 if (!mInitialized) {
442 return 1;
443 }
444 for (uint32_t i = 0; i < mSlaves.size(); i++) {
445 if (mSlaves[i]->Exit()) {
446 GPUError("Error exiting slave");
447 }
448 }
449
450 mChains.clear(); // Make sure we destroy a possible ITS GPU tracker before we call the destructors
451 mHostConstantMem.reset(); // Reset these explicitly before the destruction of other members unloads the library
452 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
453 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
454 if (mMemoryResources[i].mReuse >= 0) {
455 continue;
456 }
457 operator delete(mMemoryResources[i].mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
458 mMemoryResources[i].mPtr = mMemoryResources[i].mPtrDevice = nullptr;
459 }
460 }
461 mMemoryResources.clear();
462 if (mInitialized) {
463 ExitDevice();
464 }
465 mInitialized = false;
466 return 0;
467}
468
471
473{
474 for (auto it = mMemoryReuse1to1.begin(); it != mMemoryReuse1to1.end(); it++) {
475 auto& re = it->second;
476 if (proc == nullptr || re.proc == proc) {
477 GPUMemoryResource& resMain = mMemoryResources[re.res[0]];
478 resMain.mOverrideSize = 0;
479 for (uint32_t i = 0; i < re.res.size(); i++) {
481 resMain.mOverrideSize = std::max<size_t>(resMain.mOverrideSize, ptrDiff(res.SetPointers((void*)1), (char*)1));
482 }
483 }
484 }
485}
486
488{
489 if (mProcessingSettings.debugLevel >= 5) {
490 GPUInfo("Allocating memory %p", (void*)proc);
491 }
492 size_t total = 0;
493 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
494 if (proc == nullptr ? !mMemoryResources[i].mProcessor->mAllocateAndInitializeLate : mMemoryResources[i].mProcessor == proc) {
496 total += AllocateRegisteredMemory(i);
497 } else if (resetCustom && (mMemoryResources[i].mPtr || mMemoryResources[i].mPtrDevice)) {
499 }
500 }
501 }
502 if (mProcessingSettings.debugLevel >= 5) {
503 GPUInfo("Allocating memory done");
504 }
505 return total;
506}
507
509{
510 if (mProcessingSettings.debugLevel >= 5) {
511 GPUInfo("Allocating Permanent Memory");
512 }
513 int32_t total = 0;
514 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
515 if ((mMemoryResources[i].mType & GPUMemoryResource::MEMORY_PERMANENT) && mMemoryResources[i].mPtr == nullptr) {
516 total += AllocateRegisteredMemory(i);
517 }
518 }
521 if (mProcessingSettings.debugLevel >= 5) {
522 GPUInfo("Permanent Memory Done");
523 }
524 return total;
525}
526
527size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, void*& ptr, void*& memorypool, void* memorybase, size_t memorysize, void* (GPUMemoryResource::*setPtr)(void*), void*& memorypoolend, const char* device)
528{
529 if (res->mReuse >= 0) {
530 ptr = (&ptr == &res->mPtrDevice) ? mMemoryResources[res->mReuse].mPtrDevice : mMemoryResources[res->mReuse].mPtr;
531 if (ptr == nullptr) {
532 GPUError("Invalid reuse ptr (%s)", res->mName);
533 throw std::bad_alloc();
534 }
535 size_t retVal = ptrDiff((res->*setPtr)(ptr), ptr);
536 if (retVal > mMemoryResources[res->mReuse].mSize) {
537 GPUError("Insufficient reuse memory %lu < %lu (%s) (%s)", mMemoryResources[res->mReuse].mSize, retVal, res->mName, device);
538 throw std::bad_alloc();
539 }
540 if (mProcessingSettings.allocDebugLevel >= 2) {
541 std::cout << "Reused (" << device << ") " << res->mName << ": " << retVal << "\n";
542 }
543 return retVal;
544 }
545 if (memorypool == nullptr) {
546 GPUError("Cannot allocate memory from uninitialized pool");
547 throw std::bad_alloc();
548 }
549 size_t retVal;
550 if ((res->mType & GPUMemoryResource::MEMORY_STACK) && memorypoolend) {
551 retVal = ptrDiff((res->*setPtr)((char*)1), (char*)(1));
552 memorypoolend = (void*)((char*)memorypoolend - GPUProcessor::getAlignmentMod<GPUCA_MEMALIGN>(memorypoolend));
553 if (retVal < res->mOverrideSize) {
554 retVal = res->mOverrideSize;
555 }
556 retVal += GPUProcessor::getAlignment<GPUCA_MEMALIGN>(retVal);
557 memorypoolend = (char*)memorypoolend - retVal;
558 ptr = memorypoolend;
559 retVal = std::max<size_t>(ptrDiff((res->*setPtr)(ptr), ptr), res->mOverrideSize);
560 } else {
561 ptr = memorypool;
562 memorypool = (char*)((res->*setPtr)(ptr));
563 retVal = ptrDiff(memorypool, ptr);
564 if (retVal < res->mOverrideSize) {
565 retVal = res->mOverrideSize;
566 memorypool = (char*)ptr + res->mOverrideSize;
567 }
568 memorypool = (void*)((char*)memorypool + GPUProcessor::getAlignment<GPUCA_MEMALIGN>(memorypool));
569 }
570 if (memorypoolend ? (memorypool > memorypoolend) : ((size_t)ptrDiff(memorypool, memorybase) > memorysize)) {
571 std::cerr << "Memory pool size exceeded (" << device << ") (" << res->mName << ": " << (memorypoolend ? (memorysize + ptrDiff(memorypool, memorypoolend)) : ptrDiff(memorypool, memorybase)) << " < " << memorysize << "\n";
572 throw std::bad_alloc();
573 }
574 if (mProcessingSettings.allocDebugLevel >= 2) {
575 std::cout << "Allocated (" << device << ") " << res->mName << ": " << retVal << " - available: " << (memorypoolend ? ptrDiff(memorypoolend, memorypool) : (memorysize - ptrDiff(memorypool, memorybase))) << "\n";
576 }
577 return retVal;
578}
579
581{
582 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) {
583 if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) {
584 if (res->mPtrDevice && res->mReuse < 0) {
585 operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
586 }
587 res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
588 if (res->mReuse >= 0) {
589 if (res->mSize > mMemoryResources[res->mReuse].mSize) {
590 GPUError("Invalid reuse, insufficient size: %ld < %ld", (int64_t)mMemoryResources[res->mReuse].mSize, (int64_t)res->mSize);
591 throw std::bad_alloc();
592 }
593 res->mPtrDevice = mMemoryResources[res->mReuse].mPtrDevice;
594 } else {
595 res->mPtrDevice = operator new(res->mSize + GPUCA_BUFFER_ALIGNMENT GPUCA_OPERATOR_NEW_ALIGNMENT);
596 }
597 res->mPtr = GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(res->mPtrDevice);
598 res->SetPointers(res->mPtr);
599 if (mProcessingSettings.allocDebugLevel >= 2) {
600 std::cout << (res->mReuse >= 0 ? "Reused " : "Allocated ") << res->mName << ": " << res->mSize << "\n";
601 }
604 }
605 if ((size_t)res->mPtr % GPUCA_BUFFER_ALIGNMENT) {
606 GPUError("Got buffer with insufficient alignment");
607 throw std::bad_alloc();
608 }
609 }
610 } else {
611 if (res->mPtr != nullptr) {
612 GPUError("Double allocation! (%s)", res->mName);
613 throw std::bad_alloc();
614 }
615 if (IsGPU() && res->mOverrideSize < GPUCA_BUFFER_ALIGNMENT) {
616 res->mOverrideSize = GPUCA_BUFFER_ALIGNMENT;
617 }
618 if ((!IsGPU() || (res->mType & GPUMemoryResource::MEMORY_HOST) || mProcessingSettings.keepDisplayMemory) && !(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { // keepAllMemory --> keepDisplayMemory
619 if (control && control->useExternal()) {
620 if (control->allocator) {
621 res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
622 res->mPtr = control->allocator(CAMath::nextMultipleOf<GPUCA_BUFFER_ALIGNMENT>(res->mSize));
623 res->mSize = std::max<size_t>(ptrDiff(res->SetPointers(res->mPtr), res->mPtr), res->mOverrideSize);
624 if (mProcessingSettings.allocDebugLevel >= 2) {
625 std::cout << "Allocated (from callback) " << res->mName << ": " << res->mSize << "\n";
626 }
627 } else {
628 void* dummy = nullptr;
629 res->mSize = AllocateRegisteredMemoryHelper(res, res->mPtr, control->ptrCurrent, control->ptrBase, control->size, &GPUMemoryResource::SetPointers, dummy, "host");
630 }
631 } else {
633 }
634 if ((size_t)res->mPtr % GPUCA_BUFFER_ALIGNMENT) {
635 GPUError("Got buffer with insufficient alignment");
636 throw std::bad_alloc();
637 }
638 }
639 if (IsGPU() && (res->mType & GPUMemoryResource::MEMORY_GPU)) {
640 if (res->mProcessor->mLinkedProcessor == nullptr) {
641 GPUError("Device Processor not set (%s)", res->mName);
642 throw std::bad_alloc();
643 }
645
647 res->mSize = size;
648 } else if (size != res->mSize) {
649 GPUError("Inconsistent device memory allocation (%s: device %lu vs %lu)", res->mName, size, res->mSize);
650 throw std::bad_alloc();
651 }
652 if ((size_t)res->mPtrDevice % GPUCA_BUFFER_ALIGNMENT) {
653 GPUError("Got buffer with insufficient alignment");
654 throw std::bad_alloc();
655 }
656 }
658 }
659}
660
665
667{
669 if ((res->mType & GPUMemoryResource::MEMORY_PERMANENT) && res->mPtr != nullptr) {
671 } else {
673 }
674 return res->mReuse >= 0 ? 0 : res->mSize;
675}
676
678{
680 throw std::runtime_error("Requested invalid memory typo for unmanaged allocation");
681 }
682 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
683 mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]);
684 return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mUnmanagedChunks.back().get());
685 } else {
688 char* retVal;
690 if (pool > poolend) {
691 GPUError("Insufficient unmanaged memory: missing %ld bytes", ptrDiff(pool, poolend));
692 throw std::bad_alloc();
693 }
695 if (mProcessingSettings.allocDebugLevel >= 2) {
696 std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n";
697 }
698 return retVal;
699 }
700}
701
703{
704 if (mVolatileMemoryStart == nullptr) {
706 }
707 if (size == 0) {
708 return nullptr; // Future GPU memory allocation is volatile
709 }
710 char* retVal;
713 GPUError("Insufficient volatile device memory: missing %ld", ptrDiff(mDeviceMemoryPool, mDeviceMemoryPoolEnd));
714 throw std::bad_alloc();
715 }
717 if (mProcessingSettings.allocDebugLevel >= 2) {
718 std::cout << "Allocated (volatile GPU): " << size << " - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
719 }
720
721 return retVal;
722}
723
725{
726 if (device) {
728 }
729 mVolatileChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]);
730 return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mVolatileChunks.back().get());
731}
732
734{
735 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
736 if (proc == nullptr || mMemoryResources[i].mProcessor == proc) {
738 }
739 }
740}
741
743{
746 void* basePtr = res->mReuse >= 0 ? mMemoryResources[res->mReuse].mPtr : res->mPtr;
747 size_t size = ptrDiff(res->SetPointers(basePtr), basePtr);
748 if (basePtr && size > std::max(res->mSize, res->mOverrideSize)) {
749 std::cerr << "Updated pointers exceed available memory size: " << size << " > " << std::max(res->mSize, res->mOverrideSize) << " - host - " << res->mName << "\n";
750 throw std::bad_alloc();
751 }
752 }
753 if (IsGPU() && (res->mType & GPUMemoryResource::MEMORY_GPU)) {
754 void* basePtr = res->mReuse >= 0 ? mMemoryResources[res->mReuse].mPtrDevice : res->mPtrDevice;
755 size_t size = ptrDiff(res->SetDevicePointers(basePtr), basePtr);
756 if (basePtr && size > std::max(res->mSize, res->mOverrideSize)) {
757 std::cerr << "Updated pointers exceed available memory size: " << size << " > " << std::max(res->mSize, res->mOverrideSize) << " - GPU - " << res->mName << "\n";
758 throw std::bad_alloc();
759 }
760 }
761}
762
763void GPUReconstruction::FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom, bool freePermanent)
764{
765 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
766 if ((proc == nullptr || mMemoryResources[i].mProcessor == proc) && (freeCustom || !(mMemoryResources[i].mType & GPUMemoryResource::MEMORY_CUSTOM)) && (freePermanent || !(mMemoryResources[i].mType & GPUMemoryResource::MEMORY_PERMANENT))) {
768 }
769 }
770}
771
776
778{
779 if (mProcessingSettings.allocDebugLevel >= 2 && (res->mPtr || res->mPtrDevice)) {
780 std::cout << "Freeing " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n";
781 }
782 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) {
783 operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
784 }
785 res->mPtr = nullptr;
786 res->mPtrDevice = nullptr;
787}
788
790{
793 mVolatileMemoryStart = nullptr;
794 }
795 if (mProcessingSettings.allocDebugLevel >= 2) {
796 std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
797 }
798}
799
805
810
812{
813 if (mProcessingSettings.keepDisplayMemory || mProcessingSettings.disableMemoryReuse) {
814 return;
815 }
816 if (mNonPersistentMemoryStack.size() == 0) {
817 GPUFatal("Trying to pop memory state from empty stack");
818 }
819 if (tag != 0 && std::get<3>(mNonPersistentMemoryStack.back()) != tag) {
820 GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str());
821 }
822 if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) {
823 if (IsGPU()) {
824 printf("Allocated Device memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked ? ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked) : 0);
825 }
826 printf("Allocated Host memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked ? ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked) : 0);
827 printf("%16s", "");
829 }
830 mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back());
832 for (uint32_t i = std::get<2>(mNonPersistentMemoryStack.back()); i < mNonPersistentIndividualAllocations.size(); i++) {
834 if (res->mReuse < 0) {
835 operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
836 }
837 res->mPtr = nullptr;
838 res->mPtrDevice = nullptr;
839 }
841 mNonPersistentMemoryStack.pop_back();
842}
843
845{
847 throw std::runtime_error("temporary memory stack already blocked");
848 }
851}
852
854{
855 if (mNonPersistentMemoryStack.size()) {
856 throw std::runtime_error("cannot unblock while there is stacked memory");
857 }
860 mHostMemoryPoolBlocked = nullptr;
861 mDeviceMemoryPoolBlocked = nullptr;
862}
863
865{
866 mMemoryResources[res].mPtr = ptr;
867}
868
870{
871 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
874 }
875 }
876 mHostMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mHostMemoryPermanent);
877 mDeviceMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mDeviceMemoryPermanent);
878 mUnmanagedChunks.clear();
879 mVolatileMemoryStart = nullptr;
884}
885
891
893{
894 printf("Maximum Memory Allocation: Host %'zu / Device %'zu\n", mHostMemoryUsedMax, mDeviceMemoryUsedMax);
895}
896
898{
899 if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
900 printf("Memory Allocation: Host %'zd / %'zu (Permanent %'zd), Device %'zd / %'zu, (Permanent %'zd) %zu chunks\n",
903 }
904}
905
907{
908 std::map<std::string, std::array<size_t, 3>> sizes;
909 for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
910 auto& res = mMemoryResources[i];
911 if (res.mReuse >= 0) {
912 continue;
913 }
914 auto& x = sizes[res.mName];
915 if (res.mPtr) {
916 x[0] += res.mSize;
917 }
918 if (res.mPtrDevice) {
919 x[1] += res.mSize;
920 }
922 x[2] = 1;
923 }
924 }
925 printf("%59s CPU / %9s GPU\n", "", "");
926 for (auto it = sizes.begin(); it != sizes.end(); it++) {
927 printf("Allocation %30s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ? "P" : " ", it->second[0], it->second[1]);
928 }
930 for (uint32_t i = 0; i < mChains.size(); i++) {
931 mChains[i]->PrintMemoryStatistics();
932 }
933}
934
936{
937 if (mProcessingSettings.noGPUMemoryRegistration) {
938 return 0;
939 }
941 if (retVal == 0) {
942 mRegisteredMemoryPtrs.emplace(ptr);
943 }
944 return retVal;
945}
946
948{
949 if (mProcessingSettings.noGPUMemoryRegistration) {
950 return 0;
951 }
952 const auto& pos = mRegisteredMemoryPtrs.find(ptr);
953 if (pos != mRegisteredMemoryPtrs.end()) {
956 }
957 return 1;
958}
959
960namespace o2::gpu::internal
961{
962namespace // anonymous
963{
964template <class T>
965constexpr static inline int32_t getStepNum(T step, bool validCheck, int32_t N, const char* err = "Invalid step num")
966{
967 static_assert(sizeof(step) == sizeof(uint32_t), "Invalid step enum size");
968 int32_t retVal = 8 * sizeof(uint32_t) - 1 - CAMath::Clz((uint32_t)step);
969 if ((uint32_t)step == 0 || retVal >= N) {
970 if (!validCheck) {
971 return -1;
972 }
973 throw std::runtime_error("Invalid General Step");
974 }
975 return retVal;
976}
977} // anonymous namespace
978} // namespace o2::gpu::internal
979
980int32_t GPUReconstruction::getRecoStepNum(RecoStep step, bool validCheck) { return internal::getStepNum(step, validCheck, GPUDataTypes::N_RECO_STEPS, "Invalid Reco Step"); }
981int32_t GPUReconstruction::getGeneralStepNum(GeneralStep step, bool validCheck) { return internal::getStepNum(step, validCheck, GPUDataTypes::N_GENERAL_STEPS, "Invalid General Step"); }
982
984{
985 if (!mInitialized || !mProcessingSettings.doublePipeline || mMaster != nullptr || !mSlaves.size()) {
986 throw std::invalid_argument("Cannot start double pipeline mode");
987 }
988 if (mProcessingSettings.debugLevel >= 3) {
989 GPUInfo("Pipeline worker started");
990 }
991 bool terminate = false;
992 while (!terminate) {
993 {
994 std::unique_lock<std::mutex> lk(mPipelineContext->mutex);
995 mPipelineContext->cond.wait(lk, [this] { return this->mPipelineContext->queue.size() > 0; });
996 }
997 GPUReconstructionPipelineQueue* q;
998 {
999 std::lock_guard<std::mutex> lk(mPipelineContext->mutex);
1000 q = mPipelineContext->queue.front();
1001 mPipelineContext->queue.pop();
1002 }
1003 if (q->op == 1) {
1004 terminate = 1;
1005 } else {
1006 q->retVal = q->chain->RunChain();
1007 }
1008 {
1009 std::lock_guard<std::mutex> lk(q->m);
1010 q->done = true;
1011 }
1012 q->c.notify_one();
1013 }
1014 if (mProcessingSettings.debugLevel >= 3) {
1015 GPUInfo("Pipeline worker ended");
1016 }
1017}
1018
1023
1025{
1028 std::unique_ptr<GPUReconstructionPipelineQueue> qu(new GPUReconstructionPipelineQueue);
1029 GPUReconstructionPipelineQueue* q = qu.get();
1030 q->chain = terminate ? nullptr : mChains[0].get();
1031 q->op = terminate ? 1 : 0;
1032 std::unique_lock<std::mutex> lkdone(q->m);
1033 {
1034 std::lock_guard<std::mutex> lkpipe(rec->mPipelineContext->mutex);
1035 if (rec->mPipelineContext->terminate) {
1036 throw std::runtime_error("Must not enqueue work after termination request");
1037 }
1038 rec->mPipelineContext->queue.push(q);
1039 rec->mPipelineContext->terminate = terminate;
1040 rec->mPipelineContext->cond.notify_one();
1041 }
1042 q->c.wait(lkdone, [&q]() { return q->done; });
1043 if (q->retVal) {
1044 return q->retVal;
1045 }
1046 if (terminate) {
1047 return 0;
1048 } else {
1049 return mChains[0]->FinalizePipelinedProcessing();
1050 }
1051}
1052
1054{
1056 std::lock_guard<std::mutex> lk(rec->mPipelineContext->mutex);
1057 return rec->mPipelineContext->queue.size() && rec->mPipelineContext->queue.front()->op == 0 ? rec->mPipelineContext->queue.front()->chain : nullptr;
1058}
1059
1060void GPUReconstruction::PrepareEvent() // TODO: Clean this up, this should not be called from chainTracking but before
1061{
1063 for (uint32_t i = 0; i < mChains.size(); i++) {
1064 mChains[i]->PrepareEvent();
1065 }
1066 for (uint32_t i = 0; i < mProcessors.size(); i++) {
1067 if (mProcessors[i].proc->mAllocateAndInitializeLate) {
1068 continue;
1069 }
1070 (mProcessors[i].proc->*(mProcessors[i].SetMaxData))(mHostConstantMem->ioPtrs);
1071 if (mProcessors[i].proc->mGPUProcessorType != GPUProcessor::PROCESSOR_TYPE_DEVICE && mProcessors[i].proc->mLinkedProcessor) {
1072 (mProcessors[i].proc->mLinkedProcessor->*(mProcessors[i].SetMaxData))(mHostConstantMem->ioPtrs);
1073 }
1074 }
1075 ComputeReuseMax(nullptr);
1076 AllocateRegisteredMemory(nullptr);
1077}
1078
1079int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, std::vector<std::array<uint32_t, 4>>* fillErrors)
1080{
1081 int32_t retVal = 0;
1082 for (uint32_t i = 0; i < mChains.size(); i++) {
1083 if (mChains[i]->CheckErrorCodes(cpuOnly, forceShowErrors, fillErrors)) {
1084 retVal++;
1085 }
1086 }
1087 return retVal;
1088}
1089
1091{
1092 std::string f;
1093 f = dir;
1094 f += "settings.dump";
1095 DumpStructToFile(&mGRPSettings, f.c_str());
1096 for (uint32_t i = 0; i < mChains.size(); i++) {
1097 mChains[i]->DumpSettings(dir);
1098 }
1099}
1100
1101void GPUReconstruction::UpdateDynamicSettings(const GPUSettingsRecDynamic* d)
1102{
1103 UpdateSettings(nullptr, nullptr, d);
1104}
1105
1106void GPUReconstruction::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p, const GPUSettingsRecDynamic* d)
1107{
1108 if (g) {
1109 mGRPSettings = *g;
1110 }
1111 if (p) {
1112 mProcessingSettings.debugLevel = p->debugLevel;
1113 mProcessingSettings.resetTimers = p->resetTimers;
1114 }
1115 GPURecoStepConfiguration* w = nullptr;
1116 if (mRecoSteps.steps.isSet(GPUDataTypes::RecoStep::TPCdEdx)) {
1117 w = &mRecoSteps;
1118 }
1119 param().UpdateSettings(g, p, w, d);
1120 if (mInitialized) {
1122 }
1123}
1124
1125int32_t GPUReconstruction::ReadSettings(const char* dir)
1126{
1127 std::string f;
1128 f = dir;
1129 f += "settings.dump";
1131 if (ReadStructFromFile(f.c_str(), &mGRPSettings)) {
1132 return 1;
1133 }
1135 for (uint32_t i = 0; i < mChains.size(); i++) {
1136 mChains[i]->ReadSettings(dir);
1137 }
1138 return 0;
1139}
1140
1141void GPUReconstruction::SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow)
1142{
1143#ifdef GPUCA_O2_LIB
1145 config.ReadConfigurableParam(config);
1146 config.configGRP.solenoidBzNominalGPU = solenoidBzNominalGPU;
1147 SetSettings(&config.configGRP, &config.configReconstruction, &config.configProcessing, workflow);
1148#else
1149 GPUSettingsGRP grp;
1150 grp.solenoidBzNominalGPU = solenoidBzNominalGPU;
1151 SetSettings(&grp, nullptr, nullptr, workflow);
1152#endif
1153}
1154
1155void GPUReconstruction::SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec, const GPUSettingsProcessing* proc, const GPURecoStepConfiguration* workflow)
1156{
1157 if (mInitialized) {
1158 GPUError("Cannot update settings while initialized");
1159 throw std::runtime_error("Settings updated while initialized");
1160 }
1161 mGRPSettings = *grp;
1162 if (proc) {
1163 mProcessingSettings = *proc;
1164 }
1165 if (workflow) {
1166 mRecoSteps.steps = workflow->steps;
1168 mRecoSteps.inputs = workflow->inputs;
1169 mRecoSteps.outputs = workflow->outputs;
1170 }
1171 param().SetDefaults(&mGRPSettings, rec, proc, workflow);
1172}
1173
1175{
1176 GPUOutputControl outputControl;
1177 outputControl.set(ptr, size);
1178 SetOutputControl(outputControl);
1179}
1180
1182{
1184}
int32_t i
#define GPUCA_OPERATOR_NEW_ALIGNMENT
#define GPUCA_BUFFER_ALIGNMENT
#define GPUCA_GPUReconstructionUpdateDefaults()
uint32_t op
bool done
int32_t retVal
GPUChain * chain
#define GPUCA_NSECTORS
uint16_t pos
Definition RawData.h:3
uint32_t res
Definition RawData.h:0
TBranch * ptr
void set(S v)
Definition bitfield.h:59
bool isSet(const bitfield &v) const
Definition bitfield.h:70
const GPUSettingsDisplay * GetEventDisplayConfig() const
const GPUSettingsQA * GetQAConfig() const
static void dumpConfig(const GPUSettingsRec *rec, const GPUSettingsProcessing *proc, const GPUSettingsQA *qa, const GPUSettingsDisplay *display, const GPUSettingsDeviceBackend *device, const GPURecoStepConfiguration *workflow)
static constexpr const char *const RECO_STEP_NAMES[]
static constexpr int32_t N_RECO_STEPS
static constexpr int32_t N_GENERAL_STEPS
void * SetDevicePointers(void *ptr)
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
void InitGPUProcessor(GPUReconstruction *rec, ProcessorType type=PROCESSOR_TYPE_CPU, GPUProcessor *slaveProcessor=nullptr)
ProcessorType mGPUProcessorType
GPURecoStepConfiguration mRecoSteps
std::unordered_set< const void * > mRegisteredMemoryPtrs
std::vector< std::unique_ptr< GPUChain > > mChains
GPUDataTypes::RecoStep RecoStep
void * AllocateVolatileMemory(size_t size, bool device)
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
virtual void UpdateAutomaticProcessingSettings()
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
std::shared_ptr< GPUROOTDumpCore > mROOTDump
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device)
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
std::vector< GPUReconstruction * > mSlaves
std::unique_ptr< T > ReadStructFromFile(const char *file)
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame > *timeFrame)
std::vector< std::tuple< void *, void *, size_t, uint64_t > > mNonPersistentMemoryStack
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::vector< GPUMemoryResource > mMemoryResources
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
std::vector< std::unique_ptr< char[]> > mUnmanagedChunks
std::vector< ProcessorData > mProcessors
void * AllocateVolatileDeviceMemory(size_t size)
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
GPUReconstruction(const GPUReconstruction &)=delete
static constexpr GeometryType geometryType
GPUSettingsProcessing mProcessingSettings
GPUDataTypes::GeometryType GeometryType
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
std::vector< std::unique_ptr< char[]> > mVolatileChunks
virtual RecoStepField AvailableGPURecoSteps()
static constexpr const char *const IOTYPENAMES[]
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
virtual int32_t ExitDevice()=0
void * AllocateUnmanagedMemory(size_t size, int32_t type)
void PushNonPersistentMemory(uint64_t tag)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
GPUDataTypes::GeneralStep GeneralStep
void BlockStackedMemory(GPUReconstruction *rec)
void DumpSettings(const char *dir="")
int32_t unregisterMemoryForGPU(const void *ptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
GPUSettingsDeviceBackend mDeviceBackendSettings
int32_t EnqueuePipeline(bool terminate=false)
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
void SetSector(int32_t iSector)
#define TPC_MAX_FRAGMENT_LEN_GPU
#define TPC_MAX_FRAGMENT_LEN_HOST
GLint GLenum GLint x
Definition glcorearb.h:403
const GLfloat * m
Definition glcorearb.h:4066
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
Definition glcorearb.h:2595
GLdouble f
Definition glcorearb.h:310
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean GLboolean g
Definition glcorearb.h:1233
GLenum GLfloat param
Definition glcorearb.h:271
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLubyte GLubyte GLubyte GLubyte w
Definition glcorearb.h:852
GPUReconstruction * rec
std::string qTag2Str(const T tag)
Definition strtag.h:35
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
GPUTPCClusterFinder tpcClusterer[GPUCA_NSECTORS]
void set(void *p, size_t s)
std::function< void *(size_t)> allocator
void SetDefaults(float solenoidBz)
Definition GPUParam.cxx:32
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPURecoStepConfiguration *w=nullptr, const GPUSettingsRecDynamic *d=nullptr)
Definition GPUParam.cxx:121
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
std::queue< GPUReconstructionPipelineQueue * > queue