Project
Loading...
Searching...
No Matches
GPUReconstruction.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#if !defined(GPURECONSTRUCTION_H) && !defined(__OPENCL__)
16#define GPURECONSTRUCTION_H
17
18#include <cstddef>
19#include <cstdio>
20#include <cstring>
21#include <string>
22#include <memory>
23#include <iosfwd>
24#include <vector>
25#include <functional>
26#include <unordered_map>
27#include <unordered_set>
28
29#include "GPUDataTypes.h"
30#include "GPUMemoryResource.h"
31#include "GPUOutputControl.h"
32
33/*#include "GPUParam.h"
34#include "GPUSettings.h"
35#include "GPULogging.h"*/
36
37namespace o2::its
38{
39template <int>
40class TrackerTraits;
41class VertexerTraits;
42template <int>
43class TimeFrame;
44} // namespace o2::its
45
46namespace o2::gpu
47{
48class GPUChain;
49struct GPUMemorySizeScalers;
50struct GPUReconstructionPipelineContext;
51struct GPUReconstructionThreading;
52class GPUROOTDumpCore;
53class ThrustVolatileAllocator;
54struct GPUDefParameters;
55class GPUMemoryResource;
56struct GPUSettingsDeviceBackend;
57struct GPUSettingsGRP;
58struct GPUSettingsProcessing;
59struct GPUSettingsRec;
60struct GPUSettingsRecDynamic;
61struct GPUMemoryReuse;
62
63namespace gpu_reconstruction_kernels
64{
65struct deviceEvent;
66class threadContext;
67} // namespace gpu_reconstruction_kernels
68
70{
71 protected:
72 class LibraryLoader; // These must be the first members to ensure correct destructor order!
73 std::shared_ptr<LibraryLoader> mMyLib = nullptr;
74 std::vector<GPUMemoryResource> mMemoryResources;
75 std::vector<std::unique_ptr<GPUChain>> mChains;
76
77 public:
78 virtual ~GPUReconstruction();
81
82 // General definitions
83 constexpr static uint32_t NSECTORS = GPUCA_NSECTORS;
84
91
92 static constexpr const char* const GEOMETRY_TYPE_NAMES[] = {"INVALID", "ALIROOT", "O2"};
93#ifdef GPUCA_TPC_GEOMETRY_O2
94 static constexpr GeometryType geometryType = GeometryType::O2;
95#else
96 static constexpr GeometryType geometryType = GeometryType::ALIROOT;
97#endif
98
99 static DeviceType GetDeviceType(const char* type);
100 enum InOutPointerType : uint32_t { CLUSTER_DATA = 0,
114 TPC_ZS = 14,
120 static constexpr const char* const IOTYPENAMES[] = {"TPC HLT Clusters", "TPC Sector Tracks", "TPC Sector Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets",
121 "TPC Raw Clusters", "TPC Native Clusters", "TRD Tracklet MC Labels", "TPC Compressed Clusters", "TPC Digit", "TPC ZS Page", "TPC Native Clusters MC Labels", "TPC Digit MC Labeels",
122 "TRD Spacepoints", "TRD Triggerrecords", "TF Settings"};
124
125 // Functionality to create an instance of GPUReconstruction for the desired device
127 static GPUReconstruction* CreateInstance(DeviceType type = DeviceType::CPU, bool forceType = true, GPUReconstruction* master = nullptr);
128 static GPUReconstruction* CreateInstance(int32_t type, bool forceType, GPUReconstruction* master = nullptr) { return CreateInstance((DeviceType)type, forceType, master); }
129 static GPUReconstruction* CreateInstance(const char* type, bool forceType, GPUReconstruction* master = nullptr);
130 static bool CheckInstanceAvailable(DeviceType type, bool verbose);
131
132 enum class krnlDeviceType : int32_t { CPU = 0,
133 Device = 1,
134 Auto = -1 };
135
136 // Global steering functions
137 template <class T, typename... Args>
138 T* AddChain(Args... args);
139
140 int32_t Init();
141 int32_t Finalize();
142 int32_t Exit();
143
144 void DumpSettings(const char* dir = "");
145 int32_t ReadSettings(const char* dir = "");
146
147 void PrepareEvent();
148 virtual int32_t RunChains() = 0;
151 int32_t registerMemoryForGPU(const void* ptr, size_t size);
152 int32_t unregisterMemoryForGPU(const void* ptr);
153 virtual void* getGPUPointer(void* ptr) { return ptr; }
154 virtual void startGPUProfiling() {}
155 virtual void endGPUProfiling() {}
156 int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError);
157 int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr);
158 void RunPipelineWorker();
160
161 // Helpers for memory allocation
163 template <class T>
164 int16_t RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name = "", const GPUMemoryReuse& re = GPUMemoryReuse());
166 size_t AllocateRegisteredMemory(GPUProcessor* proc, bool resetCustom = false);
167
168 size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr);
170 void* AllocateDirectMemory(size_t size, int32_t type);
172 void* AllocateVolatileMemory(size_t size, bool device);
174 void FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom = false, bool freePermanent = false);
175 void FreeRegisteredMemory(int16_t res);
176 void ClearAllocatedMemory(bool clearOutputs = true);
180 void PushNonPersistentMemory(uint64_t tag);
181 void PopNonPersistentMemory(RecoStep step, uint64_t tag);
186 void ComputeReuseMax(GPUProcessor* proc);
188 void PrintMemoryOverview();
189 void PrintMemoryMax();
190 void SetMemoryExternalInput(int16_t res, void* ptr);
192
193 // Helpers to fetch processors from other shared libraries
194 virtual void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits<7>>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame<7>>* timeFrame);
195 bool slavesExist() { return mSlaves.size() || mMaster; }
196 int slaveId() { return mSlaveId; }
197
198 // Getters / setters for parameters
200 bool IsGPU() const { return GetDeviceType() != DeviceType::INVALID_DEVICE && GetDeviceType() != DeviceType::CPU; }
201 const GPUParam& GetParam() const;
204 const GPUSettingsGRP& GetGRPSettings() const { return *mGRPSettings; }
206 const GPUSettingsProcessing& GetProcessingSettings() const { return *mProcessingSettings; }
207 const GPUCalibObjectsConst& GetCalib() const;
208 bool IsInitialized() const { return mInitialized; }
209 void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr);
210 void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr);
211 void SetResetTimers(bool reset); // May update also after Init()
212 void SetDebugLevelTmp(int32_t level); // Temporarily, before calling SetSettings()
213 void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr);
214 void UpdateDynamicSettings(const GPUSettingsRecDynamic* d);
216 void SetOutputControl(void* ptr, size_t size);
217 void SetInputControl(void* ptr, size_t size);
219 uint32_t NStreams() const { return mNStreams; }
220 const void* DeviceMemoryBase() const { return mDeviceMemoryBase; }
221 virtual const GPUDefParameters& getGPUParameters(bool doGPU) const = 0;
222
227 int32_t getRecoStepNum(RecoStep step, bool validCheck = true);
228 int32_t getGeneralStepNum(GeneralStep step, bool validCheck = true);
229
230 void setErrorCodeOutput(std::vector<std::array<uint32_t, 4>>* v) { mOutputErrorCodes = v; }
231 std::vector<std::array<uint32_t, 4>>* getErrorCodeOutput() { return mOutputErrorCodes; }
232
233 // Registration of GPU Processors
234 template <class T>
235 void RegisterGPUProcessor(T* proc, bool deviceSlave);
236 template <class T>
237 void SetupGPUProcessor(T* proc, bool allocate);
238 void RegisterGPUDeviceProcessor(GPUProcessor* proc, GPUProcessor* slaveProcessor);
240
241 // Support / Debugging
242 virtual void PrintKernelOccupancies() {}
244 double GetStatWallTime() { return mStatWallTime; }
245 void setDebugDumpCallback(std::function<void()>&& callback = std::function<void()>(nullptr));
246 bool triggerDebugDump();
247 std::string getDebugFolder(const std::string& prefix = ""); // empty string = no debug
248
249 // Threading
250 std::shared_ptr<GPUReconstructionThreading> mThreading;
251 static int32_t getHostThreadIndex();
252 int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; }
253
254 protected:
257 GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor
258 int32_t InitPhaseBeforeDevice();
259 virtual int32_t InitDevice() = 0;
260 int32_t InitPhasePermanentMemory();
261 int32_t InitPhaseAfterDevice();
262 void WriteConstantParams();
263 virtual int32_t ExitDevice() = 0;
264 virtual size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, gpu_reconstruction_kernels::deviceEvent* ev = nullptr) = 0;
265 void UpdateMaxMemoryUsed();
266 int32_t EnqueuePipeline(bool terminate = false);
268 virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; }
269
270 virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
271 virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0;
272
273 // Management for GPU thread contexts
274 virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() = 0;
275
276 // Private helpers for library loading
277 static std::shared_ptr<LibraryLoader>* GetLibraryInstance(DeviceType type, bool verbose);
278 static std::string getBackendVersions();
279
280 // Private helper functions for memory management
281 size_t AllocateRegisteredMemoryHelper(GPUMemoryResource* res, void*& ptr, void*& memorypool, void* memorybase, size_t memorysize, void* (GPUMemoryResource::*SetPointers)(void*), void*& memorypoolend, const char* device);
283
284 // Private helper functions for reading / writing / allocating IO buffer from/to file
285 template <class T, class S>
286 uint32_t DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type);
287 template <class T, class S>
288 size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type, T** nonConstPtrs = nullptr);
289 template <class T>
290 T* AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr<T[]>& u);
291 int16_t RegisterMemoryAllocationHelper(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re);
292
293 // Private helper functions to dump / load flat objects
294 template <class T>
295 void DumpFlatObjectToFile(const T* obj, const char* file);
296 template <class T>
297 std::unique_ptr<T> ReadFlatObjectFromFile(const char* file);
298 template <class T>
299 void DumpStructToFile(const T* obj, const char* file);
300 template <class T>
301 std::unique_ptr<T> ReadStructFromFile(const char* file);
302 template <class T>
303 int32_t ReadStructFromFile(const char* file, T* obj);
304
305 // Others
306 virtual RecoStepField AvailableGPURecoSteps() { return RecoStep::AllRecoSteps; }
307 virtual bool CanQueryMaxMemory() { return false; }
308
309 // Pointers to tracker classes
311 const GPUConstantMem* processors() const { return mHostConstantMem.get(); }
312 GPUParam& param();
313 std::unique_ptr<GPUConstantMem> mHostConstantMem;
315
316 // Settings
317 std::unique_ptr<GPUSettingsGRP> mGRPSettings; // Global Run Parameters
318 std::unique_ptr<GPUSettingsDeviceBackend> mDeviceBackendSettings; // Processing Parameters (at constructor level)
319 std::unique_ptr<GPUSettingsProcessing> mProcessingSettings; // Processing Parameters (at init level)
320 GPUOutputControl mOutputControl; // Controls the output of the individual components
321 GPUOutputControl mInputControl; // Prefefined input memory location for reading standalone dumps
322 std::unique_ptr<GPUMemorySizeScalers> mMemoryScalers; // Scalers how much memory will be needed
323
325
326 std::string mDeviceName = "CPU";
327
328 // Ptrs to host and device memory;
329 void* mHostMemoryBase = nullptr; // Ptr to begin of large host memory buffer
330 void* mHostMemoryPermanent = nullptr; // Ptr to large host memory buffer offset by permanently allocated memory
331 void* mHostMemoryPool = nullptr; // Ptr to next free location in host memory buffer
332 void* mHostMemoryPoolEnd = nullptr; // Ptr to end of pool
333 void* mHostMemoryPoolBlocked = nullptr; // Ptr to end of pool
334 size_t mHostMemorySize = 0; // Size of host memory buffer
335 size_t mHostMemoryUsedMax = 0; // Maximum host memory size used over time
336 void* mDeviceMemoryBase = nullptr; // Same for device ...
337 void* mDeviceMemoryPermanent = nullptr; // ...
338 void* mDeviceMemoryPool = nullptr; // ...
339 void* mDeviceMemoryPoolEnd = nullptr; // ...
340 void* mDeviceMemoryPoolBlocked = nullptr; // ...
341 size_t mDeviceMemorySize = 0; // ...
342 size_t mDeviceMemoryUsedMax = 0; // ...
343 void* mVolatileMemoryStart = nullptr; // Ptr to beginning of temporary volatile memory allocation, nullptr if uninitialized
344 bool mDeviceMemoryAsVolatile = false; // Make device memory allocations volatile
345
346 std::unordered_set<const void*> mRegisteredMemoryPtrs; // List of pointers registered for GPU
347
348 GPUReconstruction* mMaster = nullptr; // Ptr to a GPUReconstruction object serving as master, sharing GPU memory, events, etc.
349 std::vector<GPUReconstruction*> mSlaves; // Ptr to slave GPUReconstructions
350 int mSlaveId = -1; // Id of this slave (-1 for master)
351
352 // Others
353 bool mInitialized = false;
354 bool mInErrorHandling = false;
355 uint32_t mStatNEvents = 0;
356 uint32_t mNEventsProcessed = 0;
357 double mStatKernelTime = 0.;
358 double mStatWallTime = 0.;
359 double mStatCPUTime = 0.;
360 std::shared_ptr<GPUROOTDumpCore> mROOTDump;
361 std::vector<std::array<uint32_t, 4>>* mOutputErrorCodes = nullptr;
362
363 int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU
364 int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events
365 int32_t mNStreams = 1; // Number of parallel GPU streams
366 int32_t mMaxHostThreads = 0; // Maximum number of OMP threads
367
368 // Management for GPUProcessors
376 std::vector<ProcessorData> mProcessors;
378 MemoryReuseMeta() = default;
379 MemoryReuseMeta(GPUProcessor* p, uint16_t r) : proc(p), res{r} {}
380 GPUProcessor* proc = nullptr;
381 std::vector<uint16_t> res;
382 };
384 void operator()(void* ptr) { ::operator delete[](ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); };
385 };
386 std::unordered_map<GPUMemoryReuse::ID, MemoryReuseMeta> mMemoryReuse1to1;
387 std::vector<std::tuple<void*, void*, size_t, size_t, uint64_t>> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag
388 std::vector<GPUMemoryResource*> mNonPersistentIndividualAllocations;
389 std::vector<std::unique_ptr<char[], alignedDeleter>> mNonPersistentIndividualDirectAllocations;
390 std::vector<std::unique_ptr<char[], alignedDeleter>> mDirectMemoryChunks;
391 std::vector<std::unique_ptr<char[], alignedDeleter>> mVolatileChunks;
392
393 std::unique_ptr<GPUReconstructionPipelineContext> mPipelineContext;
394
395 // Helpers for loading device library via dlopen
397 {
398 public:
400 LibraryLoader(const LibraryLoader&) = delete;
401 const LibraryLoader& operator=(const LibraryLoader&) = delete;
402
403 private:
404 friend class GPUReconstruction;
405 LibraryLoader(const char* lib, const char* func);
406 int32_t LoadLibrary();
407 int32_t CloseLibrary();
409
410 const char* mLibName;
411 const char* mFuncName;
412 void* mGPULib;
413 void* mGPUEntry;
414 };
415 static std::shared_ptr<LibraryLoader> sLibCUDA, sLibHIP, sLibOCL;
416
417 // Debugging
418 struct debugInternal;
419 static std::unique_ptr<debugInternal> mDebugData;
420 bool mDebugEnabled = false;
421 void debugInit();
422 void debugExit();
423
425};
426
427template <class T, typename... Args>
428inline T* GPUReconstruction::AddChain(Args... args)
429{
430 mChains.emplace_back(new T(this, args...));
431 return (T*)mChains.back().get();
432}
433
434template <class T>
435inline int16_t GPUReconstruction::RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re)
436{
437 return RegisterMemoryAllocationHelper(proc, static_cast<void* (GPUProcessor::*)(void*)>(setPtr), type, name, re);
438}
439
440template <class T>
441inline void GPUReconstruction::RegisterGPUProcessor(T* proc, bool deviceSlave)
442{
443 mProcessors.emplace_back(proc, static_cast<void (GPUProcessor::*)()>(&T::RegisterMemoryAllocation), static_cast<void (GPUProcessor::*)()>(&T::InitializeProcessor), static_cast<void (GPUProcessor::*)(const GPUTrackingInOutPointers& io)>(&T::SetMaxData));
445 proc->InitGPUProcessor(this, processorType);
446}
447
448template <class T>
449inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate)
450{
451 static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derived class");
452 if (allocate) {
453 proc->SetMaxData(GetIOPtrs());
454 }
455 if (proc->mGPUProcessorType != GPUProcessor::PROCESSOR_TYPE_DEVICE && proc->mLinkedProcessor) {
456 std::memcpy((void*)proc->mLinkedProcessor, (const void*)proc, sizeof(*proc));
457 proc->mLinkedProcessor->InitGPUProcessor((GPUReconstruction*)this, GPUProcessor::PROCESSOR_TYPE_DEVICE, proc);
458 }
459 if (allocate) {
460 AllocateRegisteredMemory(proc, true);
461 } else {
463 }
464}
465
466} // namespace o2::gpu
467
468#endif
int32_t i
#define GPUCA_BUFFER_ALIGNMENT
#define GPUCA_NSECTORS
uint32_t res
Definition RawData.h:0
TBranch * ptr
double num
bitfield< RecoStep, uint32_t > RecoStepField
bitfield< InOutType, uint32_t > InOutTypeField
const LibraryLoader & operator=(const LibraryLoader &)=delete
LibraryLoader(const LibraryLoader &)=delete
GPURecoStepConfiguration mRecoSteps
std::vector< std::array< uint32_t, 4 > > * getErrorCodeOutput()
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits< 7 > > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame< 7 > > *timeFrame)
void SetupGPUProcessor(T *proc, bool allocate)
static DeviceType GetDeviceType(const char *type)
std::unordered_set< const void * > mRegisteredMemoryPtrs
int16_t RegisterMemoryAllocationHelper(GPUProcessor *proc, void *(GPUProcessor::*setPtr)(void *), int32_t type, const char *name, const GPUMemoryReuse &re)
std::vector< std::unique_ptr< GPUChain > > mChains
GPUDataTypes::RecoStep RecoStep
void * AllocateVolatileMemory(size_t size, bool device)
ThrustVolatileAllocator getThrustVolatileDeviceAllocator()
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
virtual void * getGPUPointer(void *ptr)
std::shared_ptr< GPUROOTDumpCore > mROOTDump
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
static uint32_t getNIOTypeMultiplicity(InOutPointerType type)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device)
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
std::string getDebugFolder(const std::string &prefix="")
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::vector< GPUReconstruction * > mSlaves
static std::shared_ptr< LibraryLoader > sLibHIP
std::vector< std::tuple< void *, void *, size_t, size_t, uint64_t > > mNonPersistentMemoryStack
std::unique_ptr< T > ReadStructFromFile(const char *file)
GPUDataTypes::DeviceType DeviceType
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::unique_ptr< GPUSettingsDeviceBackend > mDeviceBackendSettings
std::vector< GPUMemoryResource > mMemoryResources
void RegisterGPUProcessor(T *proc, bool deviceSlave)
void setDebugDumpCallback(std::function< void()> &&callback=std::function< void()>(nullptr))
static std::shared_ptr< LibraryLoader > * GetLibraryInstance(DeviceType type, bool verbose)
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
InOutTypeField GetRecoStepsInputs() const
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
std::shared_ptr< LibraryLoader > mMyLib
std::vector< ProcessorData > mProcessors
void * AllocateVolatileDeviceMemory(size_t size)
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
const GPUCalibObjectsConst & GetCalib() const
const GPUTrackingInOutPointers GetIOPtrs() const
const GPUConstantMem * processors() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
GPUReconstruction(const GPUReconstruction &)=delete
static constexpr GeometryType geometryType
static std::shared_ptr< LibraryLoader > sLibOCL
std::vector< std::unique_ptr< char[], alignedDeleter > > mNonPersistentIndividualDirectAllocations
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUDataTypes::GeometryType GeometryType
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
void setErrorCodeOutput(std::vector< std::array< uint32_t, 4 > > *v)
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
std::vector< std::unique_ptr< char[], alignedDeleter > > mVolatileChunks
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
GPUMemoryResource & Res(int16_t num)
virtual RecoStepField AvailableGPURecoSteps()
static constexpr const char *const IOTYPENAMES[]
GPUReconstruction & operator=(const GPUReconstruction &)=delete
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
std::vector< std::unique_ptr< char[], alignedDeleter > > mDirectMemoryChunks
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
RecoStepField GetRecoSteps() const
virtual int32_t RunChains()=0
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
const GPUParam & GetParam() const
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
const GPUConstantMem & GetConstantMem() const
virtual int32_t ExitDevice()=0
std::unique_ptr< GPUSettingsGRP > mGRPSettings
std::unique_ptr< GPUSettingsProcessing > mProcessingSettings
const void * DeviceMemoryBase() const
void PushNonPersistentMemory(uint64_t tag)
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
static std::unique_ptr< debugInternal > mDebugData
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
GPUMemorySizeScalers * MemoryScalers()
GPUDataTypes::GeneralStep GeneralStep
void BlockStackedMemory(GPUReconstruction *rec)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
void * AllocateDirectMemory(size_t size, int32_t type)
void DumpFlatObjectToFile(const T *obj, const char *file)
int32_t unregisterMemoryForGPU(const void *ptr)
virtual const GPUDefParameters & getGPUParameters(bool doGPU) const =0
static GPUReconstruction * CreateInstance(int32_t type, bool forceType, GPUReconstruction *master=nullptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
static std::shared_ptr< LibraryLoader > sLibCUDA
const GPUSettingsGRP & GetGRPSettings() const
void SetDebugLevelTmp(int32_t level)
int32_t EnqueuePipeline(bool terminate=false)
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const
int32_t GPUChkErrA(const int64_t error, const char *file, int32_t line, bool failOnError)
GPUOutputControl & OutputControl()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
std::vector< std::array< uint32_t, 4 > > * mOutputErrorCodes
GLdouble n
Definition glcorearb.h:1982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
const GLdouble * v
Definition glcorearb.h:832
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean GLboolean g
Definition glcorearb.h:1233
GLboolean r
Definition glcorearb.h:1233
GLuint GLuint stream
Definition glcorearb.h:1806
GPUReconstruction * rec
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
ProcessorData(GPUProcessor *p, void(GPUProcessor::*r)(), void(GPUProcessor::*i)(), void(GPUProcessor::*d)(const GPUTrackingInOutPointers &))
void(GPUProcessor::* SetMaxData)(const GPUTrackingInOutPointers &)