Project
Loading...
Searching...
No Matches
GPUReconstruction.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#if !defined(GPURECONSTRUCTION_H) && !defined(__OPENCL__)
16#define GPURECONSTRUCTION_H
17
18#include <cstddef>
19#include <cstdio>
20#include <cstring>
21#include <string>
22#include <memory>
23#include <iosfwd>
24#include <vector>
25#include <functional>
26#include <unordered_map>
27#include <unordered_set>
28#include <atomic>
29
30#include "GPUDataTypesIO.h"
31#include "GPUMemoryResource.h"
32#include "GPUOutputControl.h"
33#include "GPUParam.h"
34#include "GPUConstantMem.h"
36#include "GPUDef.h"
37
38namespace o2::its
39{
40template <int>
41class TrackerTraits;
42template <int>
43class VertexerTraits;
44template <int>
45class TimeFrame;
46} // namespace o2::its
47
48namespace o2::gpu
49{
50class GPUChain;
51struct GPUMemorySizeScalers;
52struct GPUReconstructionPipelineContext;
53struct GPUReconstructionThreading;
54class GPUROOTDumpCore;
55class ThrustVolatileAllocator;
56struct GPUDefParameters;
57class GPUMemoryResource;
58struct GPUSettingsDeviceBackend;
59struct GPUSettingsGRP;
60struct GPUSettingsProcessing;
61struct GPUSettingsRec;
62struct GPUSettingsRecDynamic;
63struct GPUMemoryReuse;
64
65namespace gpu_reconstruction_kernels
66{
67struct deviceEvent;
68class threadContext;
69} // namespace gpu_reconstruction_kernels
70
72{
73 protected:
74 class LibraryLoader; // These must be the first members to ensure correct destructor order!
75 std::shared_ptr<LibraryLoader> mMyLib = nullptr;
76 std::vector<GPUMemoryResource> mMemoryResources;
77 std::vector<std::unique_ptr<GPUChain>> mChains;
78
79 public:
80 virtual ~GPUReconstruction();
83
84 // General definitions
85 constexpr static uint32_t NSECTORS = GPUTPCGeometry::NSECTORS;
86
93
94 static constexpr const char* const GEOMETRY_TYPE_NAMES[] = {"INVALID", "ALIROOT", "O2"};
95#ifdef GPUCA_RUN2
96 static constexpr GeometryType geometryType = GeometryType::ALIROOT;
97#else
98 static constexpr GeometryType geometryType = GeometryType::O2;
99#endif
100
101 static DeviceType GetDeviceType(const char* type);
102 enum InOutPointerType : uint32_t { CLUSTER_DATA = 0,
116 TPC_ZS = 14,
122 static constexpr const char* const IOTYPENAMES[] = {"TPC HLT Clusters", "TPC Sector Tracks", "TPC Sector Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets",
123 "TPC Raw Clusters", "TPC Native Clusters", "TRD Tracklet MC Labels", "TPC Compressed Clusters", "TPC Digit", "TPC ZS Page", "TPC Native Clusters MC Labels", "TPC Digit MC Labeels",
124 "TRD Spacepoints", "TRD Triggerrecords", "TF Settings"};
126
127 // Functionality to create an instance of GPUReconstruction for the desired device
129 static GPUReconstruction* CreateInstance(DeviceType type = DeviceType::CPU, bool forceType = true, GPUReconstruction* master = nullptr);
130 static GPUReconstruction* CreateInstance(int32_t type, bool forceType, GPUReconstruction* master = nullptr) { return CreateInstance((DeviceType)type, forceType, master); }
131 static GPUReconstruction* CreateInstance(const char* type, bool forceType, GPUReconstruction* master = nullptr);
132 static bool CheckInstanceAvailable(DeviceType type, bool verbose);
133
134 enum class krnlDeviceType : int32_t { CPU = 0,
135 Device = 1,
136 Auto = -1 };
137
138 // Global steering functions
139 template <class T, typename... Args>
140 T* AddChain(Args... args);
141
142 int32_t Init();
143 int32_t Finalize();
144 int32_t Exit();
145
146 void DumpSettings(const char* dir = "");
147 int32_t ReadSettings(const char* dir = "");
148
149 void PrepareEvent();
150 virtual int32_t RunChains() = 0;
153 int32_t registerMemoryForGPU(const void* ptr, size_t size);
154 int32_t unregisterMemoryForGPU(const void* ptr);
155 virtual void* getGPUPointer(void* ptr) { return ptr; }
156 virtual void startGPUProfiling() {}
157 virtual void endGPUProfiling() {}
158 int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError);
159 int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr);
160 void RunPipelineWorker();
162
163 // Helpers for memory allocation
165 template <class T>
166 int16_t RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name = "", const GPUMemoryReuse& re = GPUMemoryReuse());
168 size_t AllocateRegisteredMemory(GPUProcessor* proc, bool resetCustom = false);
169
170 size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr);
172 void* AllocateDirectMemory(size_t size, int32_t type);
174 void* AllocateVolatileMemory(size_t size, bool device);
176 void FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom = false, bool freePermanent = false);
177 void FreeRegisteredMemory(int16_t res);
178 void ClearAllocatedMemory(bool clearOutputs = true);
182 void PushNonPersistentMemory(uint64_t tag);
183 void PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor* proc = nullptr);
188 void ComputeReuseMax(GPUProcessor* proc);
190 void PrintMemoryOverview();
191 void PrintMemoryMax();
192 void SetMemoryExternalInput(int16_t res, void* ptr);
194
195 // Helpers to fetch processors from other shared libraries
196 virtual void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits<7>>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits<7>>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame<7>>* timeFrame);
197 bool slavesExist() { return mSlaves.size() || mMaster; }
198 int slaveId() { return mSlaveId; }
199
200 // Getters / setters for parameters
202 bool IsGPU() const { return GetDeviceType() != DeviceType::INVALID_DEVICE && GetDeviceType() != DeviceType::CPU; }
203 const GPUParam& GetParam() const;
206 const GPUSettingsGRP& GetGRPSettings() const { return *mGRPSettings; }
208 const GPUSettingsProcessing& GetProcessingSettings() const { return *mProcessingSettings; }
209 const GPUCalibObjectsConst& GetCalib() const;
210 bool IsInitialized() const { return mInitialized; }
211 void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr);
212 void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr);
213 void SetResetTimers(bool reset); // May update also after Init()
214 void SetDebugLevelTmp(int32_t level); // Temporarily, before calling SetSettings()
215 void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr);
216 void UpdateDynamicSettings(const GPUSettingsRecDynamic* d);
218 void SetOutputControl(void* ptr, size_t size);
219 void SetInputControl(void* ptr, size_t size);
221 uint32_t NStreams() const { return mNStreams; }
222 const void* DeviceMemoryBase() const { return mDeviceMemoryBase; }
223 virtual const GPUDefParameters& getGPUParameters(bool doGPU) const = 0;
224
229 int32_t getRecoStepNum(RecoStep step, bool validCheck = true);
230 int32_t getGeneralStepNum(GeneralStep step, bool validCheck = true);
231
232 void setErrorCodeOutput(std::vector<std::array<uint32_t, 4>>* v) { mOutputErrorCodes = v; }
233 std::vector<std::array<uint32_t, 4>>* getErrorCodeOutput() { return mOutputErrorCodes; }
234
235 // Registration of GPU Processors
236 template <class T>
237 void RegisterGPUProcessor(T* proc, bool deviceSlave);
238 template <class T>
239 void SetupGPUProcessor(T* proc, bool allocate);
240 void RegisterGPUDeviceProcessor(GPUProcessor* proc, GPUProcessor* slaveProcessor);
242
243 // Support / Debugging
244 virtual void PrintKernelOccupancies() {}
246 double GetStatWallTime() { return mStatWallTime; }
247 void setDebugDumpCallback(std::function<void()>&& callback = std::function<void()>(nullptr));
248 bool triggerDebugDump();
249 std::string getDebugFolder(const std::string& prefix = ""); // empty string = no debug
250
251 // Threading
252 std::shared_ptr<GPUReconstructionThreading> mThreading;
253 static int32_t getHostThreadIndex();
254 int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; }
255
257 template <typename T>
259 {
260 return alignedAllocator<char, constants::GPU_BUFFER_ALIGNMENT>::allocate(n); // Note that char is correct, since the buffer is a char buffer
261 }
262
263 protected:
266 GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor
267 int32_t InitPhaseBeforeDevice();
268 virtual int32_t InitDevice() = 0;
269 int32_t InitPhasePermanentMemory();
270 int32_t InitPhaseAfterDevice();
271 void WriteConstantParams(int32_t stream = -1);
272 virtual int32_t ExitDevice() = 0;
273 virtual size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, gpu_reconstruction_kernels::deviceEvent* ev = nullptr) = 0;
274 void UpdateMaxMemoryUsed();
275 int32_t EnqueuePipeline(bool terminate = false);
277 virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; }
278
279 virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
280 virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0;
281
282 // Management for GPU thread contexts
283 virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() = 0;
284
285 // Private helpers for library loading
286 static std::shared_ptr<LibraryLoader>* GetLibraryInstance(DeviceType type, bool verbose);
287 static std::string getBackendVersions();
288
289 // Private helper functions for memory management
290 size_t AllocateRegisteredMemoryHelper(GPUMemoryResource* res, void*& ptr, void*& memorypool, void* memorybase, size_t memorysize, void* (GPUMemoryResource::*SetPointers)(void*) const, void*& memorypoolend, const char* device);
292
293 // Private helper functions for reading / writing / allocating IO buffer from/to file
294 template <class T, class S>
295 uint32_t DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type);
296 template <class T, class S>
297 size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type, T** nonConstPtrs = nullptr);
298 template <class T>
299 T* AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr<T[]>& u);
300 int16_t RegisterMemoryAllocationHelper(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re);
301
302 // Private helper functions to dump / load flat objects
303 template <class T>
304 void DumpFlatObjectToFile(const T* obj, const char* file);
305 template <class T>
306 std::unique_ptr<T> ReadFlatObjectFromFile(const char* file);
307 template <class T>
308 void DumpStructToFile(const T* obj, const char* file);
309 template <class T>
310 void DumpDynamicStructToFile(const T* obj, size_t dynamicSize, const char* file);
311 template <class T>
312 std::unique_ptr<T> ReadStructFromFile(const char* file, T* obj = nullptr, bool* errorOnMissing = nullptr, bool allowSmaller = false);
313 template <class T, auto F>
315
316 // Others
317 virtual RecoStepField AvailableGPURecoSteps() { return RecoStep::AllRecoSteps; }
318 virtual bool CanQueryMaxMemory() { return false; }
319
320 // Pointers to tracker classes
322 const GPUConstantMem* processors() const { return mHostConstantMem.get(); }
323 GPUParam& param();
324 std::unique_ptr<GPUConstantMem> mHostConstantMem;
326
327 // Settings
328 std::unique_ptr<GPUSettingsGRP> mGRPSettings; // Global Run Parameters
329 std::unique_ptr<GPUSettingsDeviceBackend> mDeviceBackendSettings; // Processing Parameters (at constructor level)
330 std::unique_ptr<GPUSettingsProcessing> mProcessingSettings; // Processing Parameters (at init level)
331 GPUOutputControl mOutputControl; // Controls the output of the individual components
332 GPUOutputControl mInputControl; // Prefefined input memory location for reading standalone dumps
333 std::unique_ptr<GPUMemorySizeScalers> mMemoryScalers; // Scalers how much memory will be needed
334
336
337 std::string mDeviceName = "CPU";
338
339 // Ptrs to host and device memory;
340 void* mHostMemoryBase = nullptr; // Ptr to begin of large host memory buffer
341 void* mHostMemoryPermanent = nullptr; // Ptr to large host memory buffer offset by permanently allocated memory
342 void* mHostMemoryPool = nullptr; // Ptr to next free location in host memory buffer
343 void* mHostMemoryPoolEnd = nullptr; // Ptr to end of pool
344 void* mHostMemoryPoolBlocked = nullptr; // Ptr to end of pool
345 size_t mHostMemorySize = 0; // Size of host memory buffer
346 size_t mHostMemoryUsedMax = 0; // Maximum host memory size used over time
347 void* mDeviceMemoryBase = nullptr; // Same for device ...
348 void* mDeviceMemoryPermanent = nullptr; // ...
349 void* mDeviceMemoryPool = nullptr; // ...
350 void* mDeviceMemoryPoolEnd = nullptr; // ...
351 void* mDeviceMemoryPoolBlocked = nullptr; // ...
352 size_t mDeviceMemorySize = 0; // ...
353 size_t mDeviceMemoryUsedMax = 0; // ...
354 void* mVolatileMemoryStart = nullptr; // Ptr to beginning of temporary volatile memory allocation, nullptr if uninitialized
355 bool mDeviceMemoryAsVolatile = false; // Make device memory allocations volatile
356
357 std::unordered_set<const void*> mRegisteredMemoryPtrs; // List of pointers registered for GPU
358
359 GPUReconstruction* mMaster = nullptr; // Ptr to a GPUReconstruction object serving as master, sharing GPU memory, events, etc.
360 std::vector<GPUReconstruction*> mSlaves; // Ptr to slave GPUReconstructions
361 int mSlaveId = -1; // Id of this slave (-1 for master)
362
363 // Others
364 bool mInitialized = false;
365 bool mInErrorHandling = false;
366 uint32_t mStatNEvents = 0;
367 uint32_t mNEventsProcessed = 0;
368 double mStatKernelTime = 0.;
369 double mStatWallTime = 0.;
370 double mStatCPUTime = 0.;
371 std::shared_ptr<GPUROOTDumpCore> mROOTDump;
372 std::vector<std::array<uint32_t, 4>>* mOutputErrorCodes = nullptr;
373
374 int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU
375 int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events
376 int32_t mNStreams = 1; // Number of parallel GPU streams
377 int32_t mMaxHostThreads = 0; // Maximum number of OMP threads
378
379 // Management for GPUProcessors
387 std::vector<ProcessorData> mProcessors;
389 MemoryReuseMeta() = default;
390 MemoryReuseMeta(GPUProcessor* p, uint16_t r) : proc(p), res{r} {}
391 GPUProcessor* proc = nullptr;
392 std::vector<uint16_t> res;
393 };
394 std::unordered_map<GPUMemoryReuse::ID, MemoryReuseMeta> mMemoryReuse1to1;
395 std::vector<std::tuple<void*, void*, size_t, size_t, uint64_t>> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag
396 std::vector<GPUMemoryResource*> mNonPersistentIndividualAllocations;
397 std::vector<std::unique_ptr<char[], alignedDefaultBufferDeleter>> mNonPersistentIndividualDirectAllocations;
398 std::vector<std::unique_ptr<char[], alignedDefaultBufferDeleter>> mDirectMemoryChunks;
399 std::vector<std::unique_ptr<char[], alignedDefaultBufferDeleter>> mVolatileChunks;
400 std::atomic_flag mMemoryMutex = ATOMIC_FLAG_INIT;
401
402 std::unique_ptr<GPUReconstructionPipelineContext> mPipelineContext;
403
404 // Helpers for loading device library via dlopen
406 {
407 public:
409 LibraryLoader(const LibraryLoader&) = delete;
410 const LibraryLoader& operator=(const LibraryLoader&) = delete;
411
412 private:
413 friend class GPUReconstruction;
414 LibraryLoader(const char* lib, const char* func);
415 int32_t LoadLibrary();
416 int32_t CloseLibrary();
418
419 const char* mLibName;
420 const char* mFuncName;
421 void* mGPULib;
422 void* mGPUEntry;
423 };
424 static std::shared_ptr<LibraryLoader> sLibCUDA, sLibHIP, sLibOCL;
425
426 // Debugging
427 struct debugInternal;
428 static std::unique_ptr<debugInternal> mDebugData;
429 bool mDebugEnabled = false;
430 void debugInit();
431 void debugExit();
432
434};
435
436template <class T, typename... Args>
437inline T* GPUReconstruction::AddChain(Args... args)
438{
439 mChains.emplace_back(new T(this, args...));
440 return (T*)mChains.back().get();
441}
442
443template <class T>
444inline int16_t GPUReconstruction::RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re)
445{
446 return RegisterMemoryAllocationHelper(proc, static_cast<void* (GPUProcessor::*)(void*)>(setPtr), type, name, re);
447}
448
449template <class T>
450inline void GPUReconstruction::RegisterGPUProcessor(T* proc, bool deviceSlave)
451{
452 mProcessors.emplace_back(proc, static_cast<void (GPUProcessor::*)()>(&T::RegisterMemoryAllocation), static_cast<void (GPUProcessor::*)()>(&T::InitializeProcessor), static_cast<void (GPUProcessor::*)(const GPUTrackingInOutPointers& io)>(&T::SetMaxData));
454 proc->InitGPUProcessor(this, processorType);
455}
456
457template <class T>
458inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate)
459{
460 static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derived class");
461 if (allocate) {
462 proc->SetMaxData(GetIOPtrs());
463 }
464 if (proc->mGPUProcessorType != GPUProcessor::PROCESSOR_TYPE_DEVICE && proc->mLinkedProcessor) {
465 std::memcpy((void*)proc->mLinkedProcessor, (const void*)proc, sizeof(*proc));
466 proc->mLinkedProcessor->InitGPUProcessor((GPUReconstruction*)this, GPUProcessor::PROCESSOR_TYPE_DEVICE, proc);
467 }
468 if (allocate) {
469 AllocateRegisteredMemory(proc, true);
470 } else {
472 }
473}
474
475} // namespace o2::gpu
476
477#endif
int32_t i
uint32_t res
Definition RawData.h:0
TBranch * ptr
double num
const LibraryLoader & operator=(const LibraryLoader &)=delete
LibraryLoader(const LibraryLoader &)=delete
GPURecoStepConfiguration mRecoSteps
std::vector< std::array< uint32_t, 4 > > * getErrorCodeOutput()
void SetupGPUProcessor(T *proc, bool allocate)
static DeviceType GetDeviceType(const char *type)
std::unordered_set< const void * > mRegisteredMemoryPtrs
int16_t RegisterMemoryAllocationHelper(GPUProcessor *proc, void *(GPUProcessor::*setPtr)(void *), int32_t type, const char *name, const GPUMemoryReuse &re)
std::vector< std::unique_ptr< GPUChain > > mChains
void * AllocateVolatileMemory(size_t size, bool device)
ThrustVolatileAllocator getThrustVolatileDeviceAllocator()
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
std::unique_ptr< T > ReadStructFromFile(const char *file, T *obj=nullptr, bool *errorOnMissing=nullptr, bool allowSmaller=false)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
virtual void * getGPUPointer(void *ptr)
std::shared_ptr< GPUROOTDumpCore > mROOTDump
static uint32_t getNIOTypeMultiplicity(InOutPointerType type)
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static T * alignedDefaultBufferAllocator(size_t n)
std::string getDebugFolder(const std::string &prefix="")
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::vector< GPUReconstruction * > mSlaves
static std::shared_ptr< LibraryLoader > sLibHIP
std::vector< std::tuple< void *, void *, size_t, size_t, uint64_t > > mNonPersistentMemoryStack
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::unique_ptr< GPUSettingsDeviceBackend > mDeviceBackendSettings
std::vector< GPUMemoryResource > mMemoryResources
void RegisterGPUProcessor(T *proc, bool deviceSlave)
void setDebugDumpCallback(std::function< void()> &&callback=std::function< void()>(nullptr))
static std::shared_ptr< LibraryLoader > * GetLibraryInstance(DeviceType type, bool verbose)
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
InOutTypeField GetRecoStepsInputs() const
void DumpDynamicStructToFile(const T *obj, size_t dynamicSize, const char *file)
std::vector< std::unique_ptr< char[], alignedDefaultBufferDeleter > > mVolatileChunks
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
std::shared_ptr< LibraryLoader > mMyLib
std::vector< ProcessorData > mProcessors
void * AllocateVolatileDeviceMemory(size_t size)
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
const GPUCalibObjectsConst & GetCalib() const
const GPUTrackingInOutPointers GetIOPtrs() const
const GPUConstantMem * processors() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
GPUReconstruction(const GPUReconstruction &)=delete
static constexpr GeometryType geometryType
static std::shared_ptr< LibraryLoader > sLibOCL
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
void WriteConstantParams(int32_t stream=-1)
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
void setErrorCodeOutput(std::vector< std::array< uint32_t, 4 > > *v)
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
GPUMemoryResource & Res(int16_t num)
virtual RecoStepField AvailableGPURecoSteps()
static constexpr const char *const IOTYPENAMES[]
GPUReconstruction & operator=(const GPUReconstruction &)=delete
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
void PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor *proc=nullptr)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
RecoStepField GetRecoSteps() const
gpudatatypes::GeneralStep GeneralStep
virtual int32_t RunChains()=0
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
const GPUParam & GetParam() const
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
const GPUConstantMem & GetConstantMem() const
virtual int32_t ExitDevice()=0
std::unique_ptr< GPUSettingsGRP > mGRPSettings
std::unique_ptr< GPUSettingsProcessing > mProcessingSettings
const void * DeviceMemoryBase() const
void PushNonPersistentMemory(uint64_t tag)
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
static std::unique_ptr< debugInternal > mDebugData
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
GPUMemorySizeScalers * MemoryScalers()
void BlockStackedMemory(GPUReconstruction *rec)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
std::vector< std::unique_ptr< char[], alignedDefaultBufferDeleter > > mNonPersistentIndividualDirectAllocations
void * AllocateDirectMemory(size_t size, int32_t type)
void DumpFlatObjectToFile(const T *obj, const char *file)
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits< 7 > > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits< 7 > > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame< 7 > > *timeFrame)
int32_t unregisterMemoryForGPU(const void *ptr)
virtual const GPUDefParameters & getGPUParameters(bool doGPU) const =0
static GPUReconstruction * CreateInstance(int32_t type, bool forceType, GPUReconstruction *master=nullptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
static std::shared_ptr< LibraryLoader > sLibCUDA
const GPUSettingsGRP & GetGRPSettings() const
void SetDebugLevelTmp(int32_t level)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *) const, void *&memorypoolend, const char *device)
std::vector< std::unique_ptr< char[], alignedDefaultBufferDeleter > > mDirectMemoryChunks
int32_t EnqueuePipeline(bool terminate=false)
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
gpudatatypes::RecoStep RecoStep
aligned_unique_buffer_ptr< T > ReadDynamicStructFromFile(const char *file)
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const
int32_t GPUChkErrA(const int64_t error, const char *file, int32_t line, bool failOnError)
GPUOutputControl & OutputControl()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
gpudatatypes::DeviceType DeviceType
gpudatatypes::GeometryType GeometryType
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
std::vector< std::array< uint32_t, 4 > > * mOutputErrorCodes
static constexpr uint32_t NSECTORS
GLdouble n
Definition glcorearb.h:1982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
const GLdouble * v
Definition glcorearb.h:832
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean GLboolean g
Definition glcorearb.h:1233
GLboolean r
Definition glcorearb.h:1233
GLuint GLuint stream
Definition glcorearb.h:1806
bitfield< InOutType, uint32_t > InOutTypeField
bitfield< RecoStep, uint32_t > RecoStepField
GPUReconstruction * rec
gpudatatypes::RecoStepField steps
gpudatatypes::InOutTypeField inputs
gpudatatypes::RecoStepField stepsGPUMask
gpudatatypes::InOutTypeField outputs
ProcessorData(GPUProcessor *p, void(GPUProcessor::*r)(), void(GPUProcessor::*i)(), void(GPUProcessor::*d)(const GPUTrackingInOutPointers &))
void(GPUProcessor::* SetMaxData)(const GPUTrackingInOutPointers &)
static T * allocate(std::size_t n)