15#if !defined(GPURECONSTRUCTION_H) && !defined(__OPENCL__)
16#define GPURECONSTRUCTION_H
25#include <unordered_map>
26#include <unordered_set>
47struct GPUMemorySizeScalers;
48struct GPUReconstructionPipelineContext;
49struct GPUReconstructionThreading;
52namespace gpu_reconstruction_kernels
62 std::shared_ptr<LibraryLoader>
mMyLib =
nullptr;
66 std::vector<std::unique_ptr<GPUChain>>
mChains;
84#ifdef GPUCA_TPC_GEOMETRY_O2
111 static constexpr const char*
const IOTYPENAMES[] = {
"TPC HLT Clusters",
"TPC Sector Tracks",
"TPC Sector Track Clusters",
"TPC Cluster MC Labels",
"TPC Track MC Informations",
"TPC Tracks",
"TPC Track Clusters",
"TRD Tracks",
"TRD Tracklets",
112 "TPC Raw Clusters",
"TPC Native Clusters",
"TRD Tracklet MC Labels",
"TPC Compressed Clusters",
"TPC Digit",
"TPC ZS Page",
"TPC Native Clusters MC Labels",
"TPC Digit MC Labeels",
113 "TRD Spacepoints",
"TRD Triggerrecords",
"TF Settings"};
128 template <
class T,
typename... Args>
147 int32_t
CheckErrorCodes(
bool cpuOnly =
false,
bool forceShowErrors =
false, std::vector<std::array<uint32_t, 4>>* fillErrors =
nullptr);
182 virtual void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame);
265 template <
class T,
class S>
267 template <
class T,
class S>
357 std::vector<uint16_t>
res;
376 int32_t LoadLibrary();
377 int32_t CloseLibrary();
380 const char* mLibName;
381 const char* mFuncName;
403 throw std::bad_alloc();
410 GPUError(
"Error registering memory for GPU: %p - %ld bytes\n", (
void*)u.get(), (int64_t)(
n *
sizeof(T)));
411 throw std::bad_alloc();
419template <
class T,
typename... Args>
422 mChains.emplace_back(
new T(
this, args...));
423 return (T*)
mChains.back().get();
441 throw std::bad_alloc();
450 it->second.res.emplace_back(
retVal);
461 proc->InitGPUProcessor(
this, processorType);
467 static_assert(
sizeof(T) >
sizeof(
GPUProcessor),
"Need to setup derived class");
472 std::memcpy((
void*)proc->mLinkedProcessor, (
const void*)proc,
sizeof(*proc));
bitfield< RecoStep, uint32_t > RecoStepField
bitfield< InOutType, uint32_t > InOutTypeField
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
const LibraryLoader & operator=(const LibraryLoader &)=delete
LibraryLoader(const LibraryLoader &)=delete
GPURecoStepConfiguration mRecoSteps
std::vector< std::array< uint32_t, 4 > > * getErrorCodeOutput()
DeviceType GetDeviceType() const
int32_t InitPhaseBeforeDevice()
void SetupGPUProcessor(T *proc, bool allocate)
static DeviceType GetDeviceType(const char *type)
std::unordered_set< const void * > mRegisteredMemoryPtrs
int32_t InitPhasePermanentMemory()
std::vector< std::unique_ptr< GPUChain > > mChains
GPUDataTypes::RecoStep RecoStep
GPUReconstruction * mMaster
void * AllocateVolatileMemory(size_t size, bool device)
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
virtual void UpdateAutomaticProcessingSettings()
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
void TerminatePipelineWorker()
virtual void * getGPUPointer(void *ptr)
std::shared_ptr< GPUROOTDumpCore > mROOTDump
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
static uint32_t getNIOTypeMultiplicity(InOutPointerType type)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device)
int32_t GetMaxBackendThreads() const
GPUConstantMem * processors()
void ReturnVolatileMemory()
virtual void startGPUProfiling()
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
void SetDebugLevelTmp(int32_t level)
static constexpr uint32_t NSECTORS
const GPUParam & GetParam() const
GPUOutputControl mInputControl
RecoStepField GetRecoStepsGPU() const
const GPUSettingsDeviceBackend & GetDeviceBackendSettings()
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
uint32_t mNEventsProcessed
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::vector< GPUReconstruction * > mSlaves
static std::shared_ptr< LibraryLoader > sLibHIP
std::unique_ptr< T > ReadStructFromFile(const char *file)
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame > *timeFrame)
GPUDataTypes::DeviceType DeviceType
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
std::vector< std::tuple< void *, void *, size_t, uint64_t > > mNonPersistentMemoryStack
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::vector< GPUMemoryResource > mMemoryResources
void RegisterGPUProcessor(T *proc, bool deviceSlave)
static std::shared_ptr< LibraryLoader > * GetLibraryInstance(DeviceType type, bool verbose)
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
size_t AllocateRegisteredPermanentMemory()
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
InOutTypeField GetRecoStepsInputs() const
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
bool IsInitialized() const
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
std::shared_ptr< LibraryLoader > mMyLib
size_t mDeviceMemoryUsedMax
std::vector< std::unique_ptr< char[]> > mUnmanagedChunks
std::vector< ProcessorData > mProcessors
void ReturnVolatileDeviceMemory()
void * AllocateVolatileDeviceMemory(size_t size)
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
virtual ~GPUReconstruction()
int32_t mMaxBackendThreads
const GPUConstantMem * processors() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
void UnblockStackedMemory()
GPUReconstruction(const GPUReconstruction &)=delete
T * AddChain(Args... args)
static constexpr GeometryType geometryType
static std::shared_ptr< LibraryLoader > sLibOCL
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUSettingsProcessing mProcessingSettings
GPUDataTypes::GeometryType GeometryType
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
void setErrorCodeOutput(std::vector< std::array< uint32_t, 4 > > *v)
void WriteConstantParams()
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
void UpdateMaxMemoryUsed()
GPUMemoryResource & Res(int16_t num)
std::vector< std::unique_ptr< char[]> > mVolatileChunks
virtual RecoStepField AvailableGPURecoSteps()
uint32_t getNEventsProcessed()
static constexpr const char *const IOTYPENAMES[]
GPUReconstruction & operator=(const GPUReconstruction &)=delete
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
RecoStepField GetRecoSteps() const
virtual int32_t RunChains()=0
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
GPUOutputControl mOutputControl
const GPUConstantMem & GetConstantMem() const
size_t mHostMemoryUsedMax
GPUSettingsGRP mGRPSettings
void * mDeviceMemoryPoolEnd
virtual int32_t ExitDevice()=0
void PrintMemoryOverview()
uint32_t getNEventsProcessedInStat()
const void * DeviceMemoryBase() const
virtual bool CanQueryMaxMemory()
void * AllocateUnmanagedMemory(size_t size, int32_t type)
void PrintMemoryStatistics()
void PushNonPersistentMemory(uint64_t tag)
double GetStatKernelTime()
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
GPUMemorySizeScalers * MemoryScalers()
int32_t InitPhaseAfterDevice()
static int32_t getHostThreadIndex()
GPUDataTypes::GeneralStep GeneralStep
void * mDeviceMemoryPermanent
void BlockStackedMemory(GPUReconstruction *rec)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
void * mHostMemoryPoolBlocked
size_t AllocateMemoryResources()
void DumpFlatObjectToFile(const T *obj, const char *file)
int32_t unregisterMemoryForGPU(const void *ptr)
static GPUReconstruction * CreateInstance(int32_t type, bool forceType, GPUReconstruction *master=nullptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
static std::shared_ptr< LibraryLoader > sLibCUDA
const GPUSettingsGRP & GetGRPSettings() const
GPUSettingsDeviceBackend mDeviceBackendSettings
int32_t EnqueuePipeline(bool terminate=false)
virtual void PrintKernelOccupancies()
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
void * mHostMemoryPoolEnd
void * mDeviceMemoryPoolBlocked
void * mVolatileMemoryStart
void SetResetTimers(bool reset)
GPUChain * GetNextChainInQueue()
void * mHostMemoryPermanent
virtual void endGPUProfiling()
GPUOutputControl & OutputControl()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
std::vector< std::array< uint32_t, 4 > > * mOutputErrorCodes
GLuint const GLchar * name
GLint GLint GLsizei GLint GLenum GLenum type
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
void(GPUProcessor::* InitializeProcessor)()
void(GPUProcessor::* RegisterMemoryAllocation)()
ProcessorData(GPUProcessor *p, void(GPUProcessor::*r)(), void(GPUProcessor::*i)(), void(GPUProcessor::*d)(const GPUTrackingInOutPointers &))
void(GPUProcessor::* SetMaxData)(const GPUTrackingInOutPointers &)