Project
Loading...
Searching...
No Matches
GPUReconstruction.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#if !defined(GPURECONSTRUCTION_H) && !defined(__OPENCL__)
16#define GPURECONSTRUCTION_H
17
18#include <cstddef>
19#include <cstdio>
20#include <cstring>
21#include <string>
22#include <memory>
23#include <iosfwd>
24#include <vector>
25#include <unordered_map>
26#include <unordered_set>
27
28#include "GPUTRDDef.h"
29#include "GPUParam.h"
30#include "GPUSettings.h"
31#include "GPUOutputControl.h"
32#include "GPUMemoryResource.h"
33#include "GPUConstantMem.h"
34#include "GPUTPCSectorOutput.h"
35#include "GPULogging.h"
36
37namespace o2::its
38{
39class TrackerTraits;
40class VertexerTraits;
41class TimeFrame;
42} // namespace o2::its
43
44namespace o2::gpu
45{
46class GPUChain;
47struct GPUMemorySizeScalers;
48struct GPUReconstructionPipelineContext;
49struct GPUReconstructionThreading;
50class GPUROOTDumpCore;
51
52namespace gpu_reconstruction_kernels
53{
54struct deviceEvent;
55class threadContext;
56}
57
59{
60 protected:
61 class LibraryLoader; // These must be the first members to ensure correct destructor order!
62 std::shared_ptr<LibraryLoader> mMyLib = nullptr;
63 std::vector<GPUMemoryResource> mMemoryResources;
64 std::vector<std::unique_ptr<char[]>> mUnmanagedChunks;
65 std::vector<std::unique_ptr<char[]>> mVolatileChunks;
66 std::vector<std::unique_ptr<GPUChain>> mChains;
67
68 public:
69 virtual ~GPUReconstruction();
72
73 // General definitions
74 constexpr static uint32_t NSECTORS = GPUCA_NSECTORS;
75
76 using GeometryType = GPUDataTypes::GeometryType;
77 using DeviceType = GPUDataTypes::DeviceType;
78 using RecoStep = GPUDataTypes::RecoStep;
79 using GeneralStep = GPUDataTypes::GeneralStep;
82
83 static constexpr const char* const GEOMETRY_TYPE_NAMES[] = {"INVALID", "ALIROOT", "O2"};
84#ifdef GPUCA_TPC_GEOMETRY_O2
85 static constexpr GeometryType geometryType = GeometryType::O2;
86#else
87 static constexpr GeometryType geometryType = GeometryType::ALIROOT;
88#endif
89
90 static DeviceType GetDeviceType(const char* type);
91 enum InOutPointerType : uint32_t { CLUSTER_DATA = 0,
105 TPC_ZS = 14,
111 static constexpr const char* const IOTYPENAMES[] = {"TPC HLT Clusters", "TPC Sector Tracks", "TPC Sector Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets",
112 "TPC Raw Clusters", "TPC Native Clusters", "TRD Tracklet MC Labels", "TPC Compressed Clusters", "TPC Digit", "TPC ZS Page", "TPC Native Clusters MC Labels", "TPC Digit MC Labeels",
113 "TRD Spacepoints", "TRD Triggerrecords", "TF Settings"};
115
116 // Functionality to create an instance of GPUReconstruction for the desired device
118 static GPUReconstruction* CreateInstance(DeviceType type = DeviceType::CPU, bool forceType = true, GPUReconstruction* master = nullptr);
119 static GPUReconstruction* CreateInstance(int32_t type, bool forceType, GPUReconstruction* master = nullptr) { return CreateInstance((DeviceType)type, forceType, master); }
120 static GPUReconstruction* CreateInstance(const char* type, bool forceType, GPUReconstruction* master = nullptr);
121 static bool CheckInstanceAvailable(DeviceType type, bool verbose);
122
123 enum class krnlDeviceType : int32_t { CPU = 0,
124 Device = 1,
125 Auto = -1 };
126
127 // Global steering functions
128 template <class T, typename... Args>
129 T* AddChain(Args... args);
130
131 int32_t Init();
132 int32_t Finalize();
133 int32_t Exit();
134
135 void DumpSettings(const char* dir = "");
136 int32_t ReadSettings(const char* dir = "");
137
138 void PrepareEvent();
139 virtual int32_t RunChains() = 0;
142 int32_t registerMemoryForGPU(const void* ptr, size_t size);
143 int32_t unregisterMemoryForGPU(const void* ptr);
144 virtual void* getGPUPointer(void* ptr) { return ptr; }
145 virtual void startGPUProfiling() {}
146 virtual void endGPUProfiling() {}
147 int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr);
148 void RunPipelineWorker();
150
151 // Helpers for memory allocation
153 template <class T>
154 int16_t RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name = "", const GPUMemoryReuse& re = GPUMemoryReuse());
156 size_t AllocateRegisteredMemory(GPUProcessor* proc, bool resetCustom = false);
157
158 size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr);
160 void* AllocateUnmanagedMemory(size_t size, int32_t type);
162 void* AllocateVolatileMemory(size_t size, bool device);
163 void FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom = false, bool freePermanent = false);
164 void FreeRegisteredMemory(int16_t res);
165 void ClearAllocatedMemory(bool clearOutputs = true);
168 void PushNonPersistentMemory(uint64_t tag);
169 void PopNonPersistentMemory(RecoStep step, uint64_t tag);
174 void ComputeReuseMax(GPUProcessor* proc);
176 void PrintMemoryOverview();
177 void PrintMemoryMax();
178 void SetMemoryExternalInput(int16_t res, void* ptr);
180
181 // Helpers to fetch processors from other shared libraries
182 virtual void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame);
183 bool slavesExist() { return mSlaves.size() || mMaster; }
184
185 // Getters / setters for parameters
187 bool IsGPU() const { return GetDeviceType() != DeviceType::INVALID_DEVICE && GetDeviceType() != DeviceType::CPU; }
188 const GPUParam& GetParam() const { return mHostConstantMem->param; }
190 const GPUSettingsGRP& GetGRPSettings() const { return mGRPSettings; }
192 const GPUSettingsProcessing& GetProcessingSettings() const { return mProcessingSettings; }
193 bool IsInitialized() const { return mInitialized; }
194 void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr);
195 void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr);
196 void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init()
197 void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings()
198 void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr);
199 void UpdateDynamicSettings(const GPUSettingsRecDynamic* d);
201 void SetOutputControl(void* ptr, size_t size);
202 void SetInputControl(void* ptr, size_t size);
204 int32_t NStreams() const { return mNStreams; }
205 const void* DeviceMemoryBase() const { return mDeviceMemoryBase; }
206
211 int32_t getRecoStepNum(RecoStep step, bool validCheck = true);
212 int32_t getGeneralStepNum(GeneralStep step, bool validCheck = true);
213
214 void setErrorCodeOutput(std::vector<std::array<uint32_t, 4>>* v) { mOutputErrorCodes = v; }
215 std::vector<std::array<uint32_t, 4>>* getErrorCodeOutput() { return mOutputErrorCodes; }
216
217 // Registration of GPU Processors
218 template <class T>
219 void RegisterGPUProcessor(T* proc, bool deviceSlave);
220 template <class T>
221 void SetupGPUProcessor(T* proc, bool allocate);
222 void RegisterGPUDeviceProcessor(GPUProcessor* proc, GPUProcessor* slaveProcessor);
224
225 // Support / Debugging
226 virtual void PrintKernelOccupancies() {}
228 double GetStatWallTime() { return mStatWallTime; }
229
230 // Threading
231 std::shared_ptr<GPUReconstructionThreading> mThreading;
232 static int32_t getHostThreadIndex();
233 int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; }
234
235 protected:
238 GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor
239 int32_t InitPhaseBeforeDevice();
241 virtual int32_t InitDevice() = 0;
242 int32_t InitPhasePermanentMemory();
243 int32_t InitPhaseAfterDevice();
244 void WriteConstantParams();
245 virtual int32_t ExitDevice() = 0;
246 virtual size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, gpu_reconstruction_kernels::deviceEvent* ev = nullptr) = 0;
247 void UpdateMaxMemoryUsed();
248 int32_t EnqueuePipeline(bool terminate = false);
250
251 virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
252 virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0;
253
254 // Management for GPU thread contexts
255 virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() = 0;
256
257 // Private helpers for library loading
258 static std::shared_ptr<LibraryLoader>* GetLibraryInstance(DeviceType type, bool verbose);
259
260 // Private helper functions for memory management
261 size_t AllocateRegisteredMemoryHelper(GPUMemoryResource* res, void*& ptr, void*& memorypool, void* memorybase, size_t memorysize, void* (GPUMemoryResource::*SetPointers)(void*), void*& memorypoolend, const char* device);
263
264 // Private helper functions for reading / writing / allocating IO buffer from/to file
265 template <class T, class S>
266 uint32_t DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type);
267 template <class T, class S>
268 size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type, T** nonConstPtrs = nullptr);
269 template <class T>
270 T* AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr<T[]>& u);
271
272 // Private helper functions to dump / load flat objects
273 template <class T>
274 void DumpFlatObjectToFile(const T* obj, const char* file);
275 template <class T>
276 std::unique_ptr<T> ReadFlatObjectFromFile(const char* file);
277 template <class T>
278 void DumpStructToFile(const T* obj, const char* file);
279 template <class T>
280 std::unique_ptr<T> ReadStructFromFile(const char* file);
281 template <class T>
282 int32_t ReadStructFromFile(const char* file, T* obj);
283
284 // Others
285 virtual RecoStepField AvailableGPURecoSteps() { return RecoStep::AllRecoSteps; }
286 virtual bool CanQueryMaxMemory() { return false; }
287
288 // Pointers to tracker classes
290 const GPUConstantMem* processors() const { return mHostConstantMem.get(); }
291 GPUParam& param() { return mHostConstantMem->param; }
292 std::unique_ptr<GPUConstantMem> mHostConstantMem;
294
295 // Settings
296 GPUSettingsGRP mGRPSettings; // Global Run Parameters
297 GPUSettingsDeviceBackend mDeviceBackendSettings; // Processing Parameters (at constructor level)
298 GPUSettingsProcessing mProcessingSettings; // Processing Parameters (at init level)
299 GPUOutputControl mOutputControl; // Controls the output of the individual components
300 GPUOutputControl mInputControl; // Prefefined input memory location for reading standalone dumps
301 std::unique_ptr<GPUMemorySizeScalers> mMemoryScalers; // Scalers how much memory will be needed
302
304
305 std::string mDeviceName = "CPU";
306
307 // Ptrs to host and device memory;
308 void* mHostMemoryBase = nullptr; // Ptr to begin of large host memory buffer
309 void* mHostMemoryPermanent = nullptr; // Ptr to large host memory buffer offset by permanently allocated memory
310 void* mHostMemoryPool = nullptr; // Ptr to next free location in host memory buffer
311 void* mHostMemoryPoolEnd = nullptr; // Ptr to end of pool
312 void* mHostMemoryPoolBlocked = nullptr; // Ptr to end of pool
313 size_t mHostMemorySize = 0; // Size of host memory buffer
314 size_t mHostMemoryUsedMax = 0; // Maximum host memory size used over time
315 void* mDeviceMemoryBase = nullptr; //
316 void* mDeviceMemoryPermanent = nullptr; //
317 void* mDeviceMemoryPool = nullptr; //
318 void* mDeviceMemoryPoolEnd = nullptr; //
319 void* mDeviceMemoryPoolBlocked = nullptr; //
320 size_t mDeviceMemorySize = 0; //
321 void* mVolatileMemoryStart = nullptr; // Ptr to beginning of temporary volatile memory allocation, nullptr if uninitialized
322 size_t mDeviceMemoryUsedMax = 0; //
323
324 std::unordered_set<const void*> mRegisteredMemoryPtrs; // List of pointers registered for GPU
325
326 GPUReconstruction* mMaster = nullptr; // Ptr to a GPUReconstruction object serving as master, sharing GPU memory, events, etc.
327 std::vector<GPUReconstruction*> mSlaves; // Ptr to slave GPUReconstructions
328
329 // Others
330 bool mInitialized = false;
331 uint32_t mStatNEvents = 0;
332 uint32_t mNEventsProcessed = 0;
333 double mStatKernelTime = 0.;
334 double mStatWallTime = 0.;
335 double mStatCPUTime = 0.;
336 std::shared_ptr<GPUROOTDumpCore> mROOTDump;
337 std::vector<std::array<uint32_t, 4>>* mOutputErrorCodes = nullptr;
338
339 int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU
340 int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events
341 int32_t mNStreams = 1; // Number of parallel GPU streams
342 int32_t mMaxHostThreads = 0; // Maximum number of OMP threads
343
344 // Management for GPUProcessors
352 std::vector<ProcessorData> mProcessors;
354 MemoryReuseMeta() = default;
355 MemoryReuseMeta(GPUProcessor* p, uint16_t r) : proc(p), res{r} {}
356 GPUProcessor* proc = nullptr;
357 std::vector<uint16_t> res;
358 };
359 std::unordered_map<GPUMemoryReuse::ID, MemoryReuseMeta> mMemoryReuse1to1;
360 std::vector<std::tuple<void*, void*, size_t, uint64_t>> mNonPersistentMemoryStack;
361 std::vector<GPUMemoryResource*> mNonPersistentIndividualAllocations;
362
363 std::unique_ptr<GPUReconstructionPipelineContext> mPipelineContext;
364
365 // Helpers for loading device library via dlopen
367 {
368 public:
370 LibraryLoader(const LibraryLoader&) = delete;
371 const LibraryLoader& operator=(const LibraryLoader&) = delete;
372
373 private:
374 friend class GPUReconstruction;
375 LibraryLoader(const char* lib, const char* func);
376 int32_t LoadLibrary();
377 int32_t CloseLibrary();
379
380 const char* mLibName;
381 const char* mFuncName;
382 void* mGPULib;
383 void* mGPUEntry;
384 };
385 static std::shared_ptr<LibraryLoader> sLibCUDA, sLibHIP, sLibOCL;
386
388};
389
390template <class T>
391inline T* GPUReconstruction::AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr<T[]>& u)
392{
393 if (n == 0) {
394 u.reset(nullptr);
395 return nullptr;
396 }
397 T* retVal;
399 u.reset(nullptr);
402 if ((size_t)((char*)mInputControl.ptrCurrent - (char*)mInputControl.ptrBase) > mInputControl.size) {
403 throw std::bad_alloc();
404 }
405 } else {
406 u.reset(new T[n]);
407 retVal = u.get();
408 if (mProcessingSettings.registerStandaloneInputMemory) {
409 if (registerMemoryForGPU(u.get(), n * sizeof(T))) {
410 GPUError("Error registering memory for GPU: %p - %ld bytes\n", (void*)u.get(), (int64_t)(n * sizeof(T)));
411 throw std::bad_alloc();
412 }
413 }
414 }
415 ptr = retVal;
416 return retVal;
417}
418
419template <class T, typename... Args>
420inline T* GPUReconstruction::AddChain(Args... args)
421{
422 mChains.emplace_back(new T(this, args...));
423 return (T*)mChains.back().get();
424}
425
426template <class T>
427inline int16_t GPUReconstruction::RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re)
428{
430 if ((type & GPUMemoryResource::MEMORY_SCRATCH) && !mProcessingSettings.keepDisplayMemory) { // keepAllMemory --> keepDisplayMemory
432 } else {
434 }
435 }
436 if (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU) {
437 type &= ~GPUMemoryResource::MEMORY_GPU;
438 }
439 mMemoryResources.emplace_back(proc, static_cast<void* (GPUProcessor::*)(void*)>(setPtr), (GPUMemoryResource::MemoryType)type, name);
440 if (mMemoryResources.size() >= 32768) {
441 throw std::bad_alloc();
442 }
443 uint16_t retVal = mMemoryResources.size() - 1;
444 if (re.type != GPUMemoryReuse::NONE && !mProcessingSettings.disableMemoryReuse) {
445 const auto& it = mMemoryReuse1to1.find(re.id);
446 if (it == mMemoryReuse1to1.end()) {
447 mMemoryReuse1to1[re.id] = {proc, retVal};
448 } else {
449 mMemoryResources[retVal].mReuse = it->second.res[0];
450 it->second.res.emplace_back(retVal);
451 }
452 }
453 return retVal;
454}
455
456template <class T>
457inline void GPUReconstruction::RegisterGPUProcessor(T* proc, bool deviceSlave)
458{
459 mProcessors.emplace_back(proc, static_cast<void (GPUProcessor::*)()>(&T::RegisterMemoryAllocation), static_cast<void (GPUProcessor::*)()>(&T::InitializeProcessor), static_cast<void (GPUProcessor::*)(const GPUTrackingInOutPointers& io)>(&T::SetMaxData));
461 proc->InitGPUProcessor(this, processorType);
462}
463
464template <class T>
465inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate)
466{
467 static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derived class");
468 if (allocate) {
469 proc->SetMaxData(mHostConstantMem->ioPtrs);
470 }
471 if (proc->mGPUProcessorType != GPUProcessor::PROCESSOR_TYPE_DEVICE && proc->mLinkedProcessor) {
472 std::memcpy((void*)proc->mLinkedProcessor, (const void*)proc, sizeof(*proc));
473 proc->mLinkedProcessor->InitGPUProcessor((GPUReconstruction*)this, GPUProcessor::PROCESSOR_TYPE_DEVICE, proc);
474 }
475 if (allocate) {
476 AllocateRegisteredMemory(proc, true);
477 } else {
479 }
480}
481
482} // namespace o2::gpu
483
484#endif
int32_t i
int32_t retVal
#define GPUCA_NSECTORS
uint32_t res
Definition RawData.h:0
TBranch * ptr
double num
bitfield< RecoStep, uint32_t > RecoStepField
bitfield< InOutType, uint32_t > InOutTypeField
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
const LibraryLoader & operator=(const LibraryLoader &)=delete
LibraryLoader(const LibraryLoader &)=delete
GPURecoStepConfiguration mRecoSteps
std::vector< std::array< uint32_t, 4 > > * getErrorCodeOutput()
DeviceType GetDeviceType() const
void SetupGPUProcessor(T *proc, bool allocate)
static DeviceType GetDeviceType(const char *type)
std::unordered_set< const void * > mRegisteredMemoryPtrs
std::vector< std::unique_ptr< GPUChain > > mChains
GPUDataTypes::RecoStep RecoStep
void * AllocateVolatileMemory(size_t size, bool device)
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
virtual void UpdateAutomaticProcessingSettings()
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
virtual void * getGPUPointer(void *ptr)
std::shared_ptr< GPUROOTDumpCore > mROOTDump
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
static uint32_t getNIOTypeMultiplicity(InOutPointerType type)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device)
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
void SetDebugLevelTmp(int32_t level)
static constexpr uint32_t NSECTORS
const GPUParam & GetParam() const
RecoStepField GetRecoStepsGPU() const
const GPUSettingsDeviceBackend & GetDeviceBackendSettings()
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::vector< GPUReconstruction * > mSlaves
static std::shared_ptr< LibraryLoader > sLibHIP
std::unique_ptr< T > ReadStructFromFile(const char *file)
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame > *timeFrame)
GPUDataTypes::DeviceType DeviceType
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
std::vector< std::tuple< void *, void *, size_t, uint64_t > > mNonPersistentMemoryStack
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::vector< GPUMemoryResource > mMemoryResources
void RegisterGPUProcessor(T *proc, bool deviceSlave)
static std::shared_ptr< LibraryLoader > * GetLibraryInstance(DeviceType type, bool verbose)
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
InOutTypeField GetRecoStepsInputs() const
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
std::shared_ptr< LibraryLoader > mMyLib
std::vector< std::unique_ptr< char[]> > mUnmanagedChunks
std::vector< ProcessorData > mProcessors
void * AllocateVolatileDeviceMemory(size_t size)
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
const GPUConstantMem * processors() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
GPUReconstruction(const GPUReconstruction &)=delete
static constexpr GeometryType geometryType
static std::shared_ptr< LibraryLoader > sLibOCL
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUSettingsProcessing mProcessingSettings
GPUDataTypes::GeometryType GeometryType
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
void setErrorCodeOutput(std::vector< std::array< uint32_t, 4 > > *v)
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
GPUMemoryResource & Res(int16_t num)
std::vector< std::unique_ptr< char[]> > mVolatileChunks
virtual RecoStepField AvailableGPURecoSteps()
static constexpr const char *const IOTYPENAMES[]
GPUReconstruction & operator=(const GPUReconstruction &)=delete
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
RecoStepField GetRecoSteps() const
virtual int32_t RunChains()=0
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
const GPUConstantMem & GetConstantMem() const
virtual int32_t ExitDevice()=0
const void * DeviceMemoryBase() const
void * AllocateUnmanagedMemory(size_t size, int32_t type)
void PushNonPersistentMemory(uint64_t tag)
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
GPUMemorySizeScalers * MemoryScalers()
GPUDataTypes::GeneralStep GeneralStep
void BlockStackedMemory(GPUReconstruction *rec)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
void DumpFlatObjectToFile(const T *obj, const char *file)
int32_t unregisterMemoryForGPU(const void *ptr)
static GPUReconstruction * CreateInstance(int32_t type, bool forceType, GPUReconstruction *master=nullptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
static std::shared_ptr< LibraryLoader > sLibCUDA
const GPUSettingsGRP & GetGRPSettings() const
GPUSettingsDeviceBackend mDeviceBackendSettings
int32_t EnqueuePipeline(bool terminate=false)
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
GPUOutputControl & OutputControl()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
std::vector< std::array< uint32_t, 4 > > * mOutputErrorCodes
GLdouble n
Definition glcorearb.h:1982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
const GLdouble * v
Definition glcorearb.h:832
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean GLboolean g
Definition glcorearb.h:1233
GLint level
Definition glcorearb.h:275
GLboolean r
Definition glcorearb.h:1233
GLuint GLuint stream
Definition glcorearb.h:1806
GPUReconstruction * rec
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
ProcessorData(GPUProcessor *p, void(GPUProcessor::*r)(), void(GPUProcessor::*i)(), void(GPUProcessor::*d)(const GPUTrackingInOutPointers &))
void(GPUProcessor::* SetMaxData)(const GPUTrackingInOutPointers &)