Project
Loading...
Searching...
No Matches
GPUReconstruction.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#if !defined(GPURECONSTRUCTION_H) && !defined(__OPENCL__)
16#define GPURECONSTRUCTION_H
17
18#include <cstddef>
19#include <cstdio>
20#include <cstring>
21#include <string>
22#include <memory>
23#include <iosfwd>
24#include <vector>
25#include <unordered_map>
26#include <unordered_set>
27
28#include "GPUTRDDef.h"
29#include "GPUParam.h"
30#include "GPUSettings.h"
31#include "GPUOutputControl.h"
32#include "GPUMemoryResource.h"
33#include "GPUConstantMem.h"
34#include "GPULogging.h"
35
36namespace o2::its
37{
38class TrackerTraits;
39class VertexerTraits;
40class TimeFrame;
41} // namespace o2::its
42
43namespace o2::gpu
44{
45class GPUChain;
46struct GPUMemorySizeScalers;
47struct GPUReconstructionPipelineContext;
48struct GPUReconstructionThreading;
49class GPUROOTDumpCore;
50class ThrustVolatileAllocator;
51
52namespace gpu_reconstruction_kernels
53{
54struct deviceEvent;
55class threadContext;
56} // namespace gpu_reconstruction_kernels
57
59{
60 protected:
61 class LibraryLoader; // These must be the first members to ensure correct destructor order!
62 std::shared_ptr<LibraryLoader> mMyLib = nullptr;
63 std::vector<GPUMemoryResource> mMemoryResources;
64 std::vector<std::unique_ptr<char[]>> mUnmanagedChunks;
65 std::vector<std::unique_ptr<char[]>> mVolatileChunks;
66 std::vector<std::unique_ptr<GPUChain>> mChains;
67
68 public:
69 virtual ~GPUReconstruction();
72
73 // General definitions
74 constexpr static uint32_t NSECTORS = GPUCA_NSECTORS;
75
82
83 static constexpr const char* const GEOMETRY_TYPE_NAMES[] = {"INVALID", "ALIROOT", "O2"};
84#ifdef GPUCA_TPC_GEOMETRY_O2
85 static constexpr GeometryType geometryType = GeometryType::O2;
86#else
87 static constexpr GeometryType geometryType = GeometryType::ALIROOT;
88#endif
89
90 static DeviceType GetDeviceType(const char* type);
91 enum InOutPointerType : uint32_t { CLUSTER_DATA = 0,
105 TPC_ZS = 14,
111 static constexpr const char* const IOTYPENAMES[] = {"TPC HLT Clusters", "TPC Sector Tracks", "TPC Sector Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets",
112 "TPC Raw Clusters", "TPC Native Clusters", "TRD Tracklet MC Labels", "TPC Compressed Clusters", "TPC Digit", "TPC ZS Page", "TPC Native Clusters MC Labels", "TPC Digit MC Labeels",
113 "TRD Spacepoints", "TRD Triggerrecords", "TF Settings"};
115
116 // Functionality to create an instance of GPUReconstruction for the desired device
118 static GPUReconstruction* CreateInstance(DeviceType type = DeviceType::CPU, bool forceType = true, GPUReconstruction* master = nullptr);
119 static GPUReconstruction* CreateInstance(int32_t type, bool forceType, GPUReconstruction* master = nullptr) { return CreateInstance((DeviceType)type, forceType, master); }
120 static GPUReconstruction* CreateInstance(const char* type, bool forceType, GPUReconstruction* master = nullptr);
121 static bool CheckInstanceAvailable(DeviceType type, bool verbose);
122
123 enum class krnlDeviceType : int32_t { CPU = 0,
124 Device = 1,
125 Auto = -1 };
126
127 // Global steering functions
128 template <class T, typename... Args>
129 T* AddChain(Args... args);
130
131 int32_t Init();
132 int32_t Finalize();
133 int32_t Exit();
134
135 void DumpSettings(const char* dir = "");
136 int32_t ReadSettings(const char* dir = "");
137
138 void PrepareEvent();
139 virtual int32_t RunChains() = 0;
142 int32_t registerMemoryForGPU(const void* ptr, size_t size);
143 int32_t unregisterMemoryForGPU(const void* ptr);
144 virtual void* getGPUPointer(void* ptr) { return ptr; }
145 virtual void startGPUProfiling() {}
146 virtual void endGPUProfiling() {}
147 int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError);
148 int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr);
149 void RunPipelineWorker();
151
152 // Helpers for memory allocation
154 template <class T>
155 int16_t RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name = "", const GPUMemoryReuse& re = GPUMemoryReuse());
157 size_t AllocateRegisteredMemory(GPUProcessor* proc, bool resetCustom = false);
158
159 size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr);
161 void* AllocateUnmanagedMemory(size_t size, int32_t type);
163 void* AllocateVolatileMemory(size_t size, bool device);
164 void FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom = false, bool freePermanent = false);
165 void FreeRegisteredMemory(int16_t res);
166 void ClearAllocatedMemory(bool clearOutputs = true);
170 void PushNonPersistentMemory(uint64_t tag);
171 void PopNonPersistentMemory(RecoStep step, uint64_t tag);
176 void ComputeReuseMax(GPUProcessor* proc);
178 void PrintMemoryOverview();
179 void PrintMemoryMax();
180 void SetMemoryExternalInput(int16_t res, void* ptr);
182
183 // Helpers to fetch processors from other shared libraries
184 virtual void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame);
185 bool slavesExist() { return mSlaves.size() || mMaster; }
186
187 // Getters / setters for parameters
189 bool IsGPU() const { return GetDeviceType() != DeviceType::INVALID_DEVICE && GetDeviceType() != DeviceType::CPU; }
190 const GPUParam& GetParam() const { return mHostConstantMem->param; }
192 const GPUSettingsGRP& GetGRPSettings() const { return mGRPSettings; }
194 const GPUSettingsProcessing& GetProcessingSettings() const { return mProcessingSettings; }
195 bool IsInitialized() const { return mInitialized; }
196 void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr);
197 void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr);
198 void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init()
199 void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings()
200 void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr);
201 void UpdateDynamicSettings(const GPUSettingsRecDynamic* d);
203 void SetOutputControl(void* ptr, size_t size);
204 void SetInputControl(void* ptr, size_t size);
206 uint32_t NStreams() const { return mNStreams; }
207 const void* DeviceMemoryBase() const { return mDeviceMemoryBase; }
208
213 int32_t getRecoStepNum(RecoStep step, bool validCheck = true);
214 int32_t getGeneralStepNum(GeneralStep step, bool validCheck = true);
215
216 void setErrorCodeOutput(std::vector<std::array<uint32_t, 4>>* v) { mOutputErrorCodes = v; }
217 std::vector<std::array<uint32_t, 4>>* getErrorCodeOutput() { return mOutputErrorCodes; }
218
219 // Registration of GPU Processors
220 template <class T>
221 void RegisterGPUProcessor(T* proc, bool deviceSlave);
222 template <class T>
223 void SetupGPUProcessor(T* proc, bool allocate);
224 void RegisterGPUDeviceProcessor(GPUProcessor* proc, GPUProcessor* slaveProcessor);
226
227 // Support / Debugging
228 virtual void PrintKernelOccupancies() {}
230 double GetStatWallTime() { return mStatWallTime; }
231
232 // Threading
233 std::shared_ptr<GPUReconstructionThreading> mThreading;
234 static int32_t getHostThreadIndex();
235 int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; }
236
237 protected:
240 GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor
241 int32_t InitPhaseBeforeDevice();
243 virtual int32_t InitDevice() = 0;
244 int32_t InitPhasePermanentMemory();
245 int32_t InitPhaseAfterDevice();
246 void WriteConstantParams();
247 virtual int32_t ExitDevice() = 0;
248 virtual size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, gpu_reconstruction_kernels::deviceEvent* ev = nullptr) = 0;
249 void UpdateMaxMemoryUsed();
250 int32_t EnqueuePipeline(bool terminate = false);
252 virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; }
253
254 virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
255 virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0;
256
257 // Management for GPU thread contexts
258 virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() = 0;
259
260 // Private helpers for library loading
261 static std::shared_ptr<LibraryLoader>* GetLibraryInstance(DeviceType type, bool verbose);
262
263 // Private helper functions for memory management
264 size_t AllocateRegisteredMemoryHelper(GPUMemoryResource* res, void*& ptr, void*& memorypool, void* memorybase, size_t memorysize, void* (GPUMemoryResource::*SetPointers)(void*), void*& memorypoolend, const char* device);
266
267 // Private helper functions for reading / writing / allocating IO buffer from/to file
268 template <class T, class S>
269 uint32_t DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type);
270 template <class T, class S>
271 size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type, T** nonConstPtrs = nullptr);
272 template <class T>
273 T* AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr<T[]>& u);
274
275 // Private helper functions to dump / load flat objects
276 template <class T>
277 void DumpFlatObjectToFile(const T* obj, const char* file);
278 template <class T>
279 std::unique_ptr<T> ReadFlatObjectFromFile(const char* file);
280 template <class T>
281 void DumpStructToFile(const T* obj, const char* file);
282 template <class T>
283 std::unique_ptr<T> ReadStructFromFile(const char* file);
284 template <class T>
285 int32_t ReadStructFromFile(const char* file, T* obj);
286
287 // Others
288 virtual RecoStepField AvailableGPURecoSteps() { return RecoStep::AllRecoSteps; }
289 virtual bool CanQueryMaxMemory() { return false; }
290
291 // Pointers to tracker classes
293 const GPUConstantMem* processors() const { return mHostConstantMem.get(); }
294 GPUParam& param() { return mHostConstantMem->param; }
295 std::unique_ptr<GPUConstantMem> mHostConstantMem;
297
298 // Settings
299 GPUSettingsGRP mGRPSettings; // Global Run Parameters
300 GPUSettingsDeviceBackend mDeviceBackendSettings; // Processing Parameters (at constructor level)
301 GPUSettingsProcessing mProcessingSettings; // Processing Parameters (at init level)
302 GPUOutputControl mOutputControl; // Controls the output of the individual components
303 GPUOutputControl mInputControl; // Prefefined input memory location for reading standalone dumps
304 std::unique_ptr<GPUMemorySizeScalers> mMemoryScalers; // Scalers how much memory will be needed
305
307
308 std::string mDeviceName = "CPU";
309
310 // Ptrs to host and device memory;
311 void* mHostMemoryBase = nullptr; // Ptr to begin of large host memory buffer
312 void* mHostMemoryPermanent = nullptr; // Ptr to large host memory buffer offset by permanently allocated memory
313 void* mHostMemoryPool = nullptr; // Ptr to next free location in host memory buffer
314 void* mHostMemoryPoolEnd = nullptr; // Ptr to end of pool
315 void* mHostMemoryPoolBlocked = nullptr; // Ptr to end of pool
316 size_t mHostMemorySize = 0; // Size of host memory buffer
317 size_t mHostMemoryUsedMax = 0; // Maximum host memory size used over time
318 void* mDeviceMemoryBase = nullptr; //
319 void* mDeviceMemoryPermanent = nullptr; //
320 void* mDeviceMemoryPool = nullptr; //
321 void* mDeviceMemoryPoolEnd = nullptr; //
322 void* mDeviceMemoryPoolBlocked = nullptr; //
323 size_t mDeviceMemorySize = 0; //
324 void* mVolatileMemoryStart = nullptr; // Ptr to beginning of temporary volatile memory allocation, nullptr if uninitialized
325 size_t mDeviceMemoryUsedMax = 0; //
326
327 std::unordered_set<const void*> mRegisteredMemoryPtrs; // List of pointers registered for GPU
328
329 GPUReconstruction* mMaster = nullptr; // Ptr to a GPUReconstruction object serving as master, sharing GPU memory, events, etc.
330 std::vector<GPUReconstruction*> mSlaves; // Ptr to slave GPUReconstructions
331
332 // Others
333 bool mInitialized = false;
334 bool mInErrorHandling = false;
335 uint32_t mStatNEvents = 0;
336 uint32_t mNEventsProcessed = 0;
337 double mStatKernelTime = 0.;
338 double mStatWallTime = 0.;
339 double mStatCPUTime = 0.;
340 std::shared_ptr<GPUROOTDumpCore> mROOTDump;
341 std::vector<std::array<uint32_t, 4>>* mOutputErrorCodes = nullptr;
342
343 int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU
344 int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events
345 int32_t mNStreams = 1; // Number of parallel GPU streams
346 int32_t mMaxHostThreads = 0; // Maximum number of OMP threads
347
348 // Management for GPUProcessors
356 std::vector<ProcessorData> mProcessors;
358 MemoryReuseMeta() = default;
359 MemoryReuseMeta(GPUProcessor* p, uint16_t r) : proc(p), res{r} {}
360 GPUProcessor* proc = nullptr;
361 std::vector<uint16_t> res;
362 };
363 std::unordered_map<GPUMemoryReuse::ID, MemoryReuseMeta> mMemoryReuse1to1;
364 std::vector<std::tuple<void*, void*, size_t, uint64_t>> mNonPersistentMemoryStack;
365 std::vector<GPUMemoryResource*> mNonPersistentIndividualAllocations;
366
367 std::unique_ptr<GPUReconstructionPipelineContext> mPipelineContext;
368
369 // Helpers for loading device library via dlopen
371 {
372 public:
374 LibraryLoader(const LibraryLoader&) = delete;
375 const LibraryLoader& operator=(const LibraryLoader&) = delete;
376
377 private:
378 friend class GPUReconstruction;
379 LibraryLoader(const char* lib, const char* func);
380 int32_t LoadLibrary();
381 int32_t CloseLibrary();
383
384 const char* mLibName;
385 const char* mFuncName;
386 void* mGPULib;
387 void* mGPUEntry;
388 };
389 static std::shared_ptr<LibraryLoader> sLibCUDA, sLibHIP, sLibOCL;
390
392};
393
394template <class T>
395inline T* GPUReconstruction::AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr<T[]>& u)
396{
397 if (n == 0) {
398 u.reset(nullptr);
399 return nullptr;
400 }
401 T* retVal;
403 u.reset(nullptr);
406 if ((size_t)((char*)mInputControl.ptrCurrent - (char*)mInputControl.ptrBase) > mInputControl.size) {
407 throw std::bad_alloc();
408 }
409 } else {
410 u.reset(new T[n]);
411 retVal = u.get();
412 if (mProcessingSettings.registerStandaloneInputMemory) {
413 if (registerMemoryForGPU(u.get(), n * sizeof(T))) {
414 GPUError("Error registering memory for GPU: %p - %ld bytes\n", (void*)u.get(), (int64_t)(n * sizeof(T)));
415 throw std::bad_alloc();
416 }
417 }
418 }
419 ptr = retVal;
420 return retVal;
421}
422
423template <class T, typename... Args>
424inline T* GPUReconstruction::AddChain(Args... args)
425{
426 mChains.emplace_back(new T(this, args...));
427 return (T*)mChains.back().get();
428}
429
430template <class T>
431inline int16_t GPUReconstruction::RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re)
432{
434 if ((type & GPUMemoryResource::MEMORY_SCRATCH) && !mProcessingSettings.keepDisplayMemory) { // keepAllMemory --> keepDisplayMemory
436 } else {
438 }
439 }
440 if (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU) {
441 type &= ~GPUMemoryResource::MEMORY_GPU;
442 }
443 mMemoryResources.emplace_back(proc, static_cast<void* (GPUProcessor::*)(void*)>(setPtr), (GPUMemoryResource::MemoryType)type, name);
444 if (mMemoryResources.size() >= 32768) {
445 throw std::bad_alloc();
446 }
447 uint16_t retVal = mMemoryResources.size() - 1;
448 if (re.type != GPUMemoryReuse::NONE && !mProcessingSettings.disableMemoryReuse) {
449 const auto& it = mMemoryReuse1to1.find(re.id);
450 if (it == mMemoryReuse1to1.end()) {
451 mMemoryReuse1to1[re.id] = {proc, retVal};
452 } else {
453 mMemoryResources[retVal].mReuse = it->second.res[0];
454 it->second.res.emplace_back(retVal);
455 }
456 }
457 return retVal;
458}
459
460template <class T>
461inline void GPUReconstruction::RegisterGPUProcessor(T* proc, bool deviceSlave)
462{
463 mProcessors.emplace_back(proc, static_cast<void (GPUProcessor::*)()>(&T::RegisterMemoryAllocation), static_cast<void (GPUProcessor::*)()>(&T::InitializeProcessor), static_cast<void (GPUProcessor::*)(const GPUTrackingInOutPointers& io)>(&T::SetMaxData));
465 proc->InitGPUProcessor(this, processorType);
466}
467
468template <class T>
469inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate)
470{
471 static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derived class");
472 if (allocate) {
473 proc->SetMaxData(mHostConstantMem->ioPtrs);
474 }
475 if (proc->mGPUProcessorType != GPUProcessor::PROCESSOR_TYPE_DEVICE && proc->mLinkedProcessor) {
476 std::memcpy((void*)proc->mLinkedProcessor, (const void*)proc, sizeof(*proc));
477 proc->mLinkedProcessor->InitGPUProcessor((GPUReconstruction*)this, GPUProcessor::PROCESSOR_TYPE_DEVICE, proc);
478 }
479 if (allocate) {
480 AllocateRegisteredMemory(proc, true);
481 } else {
483 }
484}
485
486} // namespace o2::gpu
487
488#endif
int32_t i
int32_t retVal
#define GPUCA_NSECTORS
uint32_t res
Definition RawData.h:0
TBranch * ptr
double num
bitfield< RecoStep, uint32_t > RecoStepField
bitfield< InOutType, uint32_t > InOutTypeField
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
const LibraryLoader & operator=(const LibraryLoader &)=delete
LibraryLoader(const LibraryLoader &)=delete
GPURecoStepConfiguration mRecoSteps
std::vector< std::array< uint32_t, 4 > > * getErrorCodeOutput()
DeviceType GetDeviceType() const
void SetupGPUProcessor(T *proc, bool allocate)
static DeviceType GetDeviceType(const char *type)
std::unordered_set< const void * > mRegisteredMemoryPtrs
std::vector< std::unique_ptr< GPUChain > > mChains
GPUDataTypes::RecoStep RecoStep
void * AllocateVolatileMemory(size_t size, bool device)
ThrustVolatileAllocator getThrustVolatileDeviceAllocator()
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
virtual void UpdateAutomaticProcessingSettings()
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
virtual void * getGPUPointer(void *ptr)
std::shared_ptr< GPUROOTDumpCore > mROOTDump
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
static uint32_t getNIOTypeMultiplicity(InOutPointerType type)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device)
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
void SetDebugLevelTmp(int32_t level)
static constexpr uint32_t NSECTORS
const GPUParam & GetParam() const
RecoStepField GetRecoStepsGPU() const
const GPUSettingsDeviceBackend & GetDeviceBackendSettings()
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::vector< GPUReconstruction * > mSlaves
static std::shared_ptr< LibraryLoader > sLibHIP
std::unique_ptr< T > ReadStructFromFile(const char *file)
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame > *timeFrame)
GPUDataTypes::DeviceType DeviceType
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
std::vector< std::tuple< void *, void *, size_t, uint64_t > > mNonPersistentMemoryStack
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::vector< GPUMemoryResource > mMemoryResources
void RegisterGPUProcessor(T *proc, bool deviceSlave)
static std::shared_ptr< LibraryLoader > * GetLibraryInstance(DeviceType type, bool verbose)
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
InOutTypeField GetRecoStepsInputs() const
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
std::shared_ptr< LibraryLoader > mMyLib
std::vector< std::unique_ptr< char[]> > mUnmanagedChunks
std::vector< ProcessorData > mProcessors
void * AllocateVolatileDeviceMemory(size_t size)
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
const GPUConstantMem * processors() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
GPUReconstruction(const GPUReconstruction &)=delete
static constexpr GeometryType geometryType
static std::shared_ptr< LibraryLoader > sLibOCL
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUSettingsProcessing mProcessingSettings
GPUDataTypes::GeometryType GeometryType
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
void setErrorCodeOutput(std::vector< std::array< uint32_t, 4 > > *v)
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
GPUMemoryResource & Res(int16_t num)
std::vector< std::unique_ptr< char[]> > mVolatileChunks
virtual RecoStepField AvailableGPURecoSteps()
static constexpr const char *const IOTYPENAMES[]
GPUReconstruction & operator=(const GPUReconstruction &)=delete
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
RecoStepField GetRecoSteps() const
virtual int32_t RunChains()=0
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
const GPUConstantMem & GetConstantMem() const
virtual int32_t ExitDevice()=0
const void * DeviceMemoryBase() const
void * AllocateUnmanagedMemory(size_t size, int32_t type)
void PushNonPersistentMemory(uint64_t tag)
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
GPUMemorySizeScalers * MemoryScalers()
GPUDataTypes::GeneralStep GeneralStep
void BlockStackedMemory(GPUReconstruction *rec)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
void DumpFlatObjectToFile(const T *obj, const char *file)
int32_t unregisterMemoryForGPU(const void *ptr)
static GPUReconstruction * CreateInstance(int32_t type, bool forceType, GPUReconstruction *master=nullptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
static std::shared_ptr< LibraryLoader > sLibCUDA
const GPUSettingsGRP & GetGRPSettings() const
GPUSettingsDeviceBackend mDeviceBackendSettings
int32_t EnqueuePipeline(bool terminate=false)
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const
int32_t GPUChkErrA(const int64_t error, const char *file, int32_t line, bool failOnError)
GPUOutputControl & OutputControl()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
std::vector< std::array< uint32_t, 4 > > * mOutputErrorCodes
GLdouble n
Definition glcorearb.h:1982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
const GLdouble * v
Definition glcorearb.h:832
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean GLboolean g
Definition glcorearb.h:1233
GLint level
Definition glcorearb.h:275
GLboolean r
Definition glcorearb.h:1233
GLuint GLuint stream
Definition glcorearb.h:1806
GPUReconstruction * rec
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
ProcessorData(GPUProcessor *p, void(GPUProcessor::*r)(), void(GPUProcessor::*i)(), void(GPUProcessor::*d)(const GPUTrackingInOutPointers &))
void(GPUProcessor::* SetMaxData)(const GPUTrackingInOutPointers &)