15#if !defined(GPURECONSTRUCTION_H) && !defined(__OPENCL__)
16#define GPURECONSTRUCTION_H
25#include <unordered_map>
26#include <unordered_set>
46struct GPUMemorySizeScalers;
47struct GPUReconstructionPipelineContext;
48struct GPUReconstructionThreading;
50class ThrustVolatileAllocator;
51struct GPUDefParameters;
52class GPUMemoryResource;
53struct GPUSettingsDeviceBackend;
55struct GPUSettingsProcessing;
57struct GPUSettingsRecDynamic;
60namespace gpu_reconstruction_kernels
70 std::shared_ptr<LibraryLoader>
mMyLib =
nullptr;
72 std::vector<std::unique_ptr<GPUChain>>
mChains;
90#ifdef GPUCA_TPC_GEOMETRY_O2
117 static constexpr const char*
const IOTYPENAMES[] = {
"TPC HLT Clusters",
"TPC Sector Tracks",
"TPC Sector Track Clusters",
"TPC Cluster MC Labels",
"TPC Track MC Informations",
"TPC Tracks",
"TPC Track Clusters",
"TRD Tracks",
"TRD Tracklets",
118 "TPC Raw Clusters",
"TPC Native Clusters",
"TRD Tracklet MC Labels",
"TPC Compressed Clusters",
"TPC Digit",
"TPC ZS Page",
"TPC Native Clusters MC Labels",
"TPC Digit MC Labeels",
119 "TRD Spacepoints",
"TRD Triggerrecords",
"TF Settings"};
134 template <
class T,
typename... Args>
153 int32_t
GPUChkErrA(
const int64_t error,
const char*
file, int32_t line,
bool failOnError);
154 int32_t
CheckErrorCodes(
bool cpuOnly =
false,
bool forceShowErrors =
false, std::vector<std::array<uint32_t, 4>>* fillErrors =
nullptr);
191 virtual void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame);
277 template <
class T,
class S>
279 template <
class T,
class S>
372 std::vector<uint16_t>
res;
397 int32_t LoadLibrary();
398 int32_t CloseLibrary();
401 const char* mLibName;
402 const char* mFuncName;
411template <
class T,
typename... Args>
414 mChains.emplace_back(
new T(
this, args...));
415 return (T*)
mChains.back().get();
429 proc->InitGPUProcessor(
this, processorType);
435 static_assert(
sizeof(T) >
sizeof(
GPUProcessor),
"Need to setup derived class");
440 std::memcpy((
void*)proc->mLinkedProcessor, (
const void*)proc,
sizeof(*proc));
#define GPUCA_BUFFER_ALIGNMENT
bitfield< RecoStep, uint32_t > RecoStepField
bitfield< InOutType, uint32_t > InOutTypeField
const LibraryLoader & operator=(const LibraryLoader &)=delete
LibraryLoader(const LibraryLoader &)=delete
GPURecoStepConfiguration mRecoSteps
std::vector< std::array< uint32_t, 4 > > * getErrorCodeOutput()
int32_t InitPhaseBeforeDevice()
void SetupGPUProcessor(T *proc, bool allocate)
static DeviceType GetDeviceType(const char *type)
std::unordered_set< const void * > mRegisteredMemoryPtrs
int32_t InitPhasePermanentMemory()
int16_t RegisterMemoryAllocationHelper(GPUProcessor *proc, void *(GPUProcessor::*setPtr)(void *), int32_t type, const char *name, const GPUMemoryReuse &re)
std::vector< std::unique_ptr< GPUChain > > mChains
GPUDataTypes::RecoStep RecoStep
GPUReconstruction * mMaster
void * AllocateVolatileMemory(size_t size, bool device)
ThrustVolatileAllocator getThrustVolatileDeviceAllocator()
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
void TerminatePipelineWorker()
virtual void * getGPUPointer(void *ptr)
std::shared_ptr< GPUROOTDumpCore > mROOTDump
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
static uint32_t getNIOTypeMultiplicity(InOutPointerType type)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device)
int32_t GetMaxBackendThreads() const
GPUConstantMem * processors()
void ReturnVolatileMemory()
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
virtual void startGPUProfiling()
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static constexpr uint32_t NSECTORS
void MakeFutureDeviceMemoryAllocationsVolatile()
GPUOutputControl mInputControl
RecoStepField GetRecoStepsGPU() const
void SetResetTimers(bool reset)
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
uint32_t mNEventsProcessed
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::vector< GPUReconstruction * > mSlaves
static std::shared_ptr< LibraryLoader > sLibHIP
std::vector< std::tuple< void *, void *, size_t, size_t, uint64_t > > mNonPersistentMemoryStack
std::unique_ptr< T > ReadStructFromFile(const char *file)
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame > *timeFrame)
GPUDataTypes::DeviceType DeviceType
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::unique_ptr< GPUSettingsDeviceBackend > mDeviceBackendSettings
std::vector< GPUMemoryResource > mMemoryResources
void RegisterGPUProcessor(T *proc, bool deviceSlave)
static std::shared_ptr< LibraryLoader > * GetLibraryInstance(DeviceType type, bool verbose)
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
size_t AllocateRegisteredPermanentMemory()
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
InOutTypeField GetRecoStepsInputs() const
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
bool IsInitialized() const
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
std::shared_ptr< LibraryLoader > mMyLib
size_t mDeviceMemoryUsedMax
std::vector< ProcessorData > mProcessors
void ReturnVolatileDeviceMemory()
void * AllocateVolatileDeviceMemory(size_t size)
bool mDeviceMemoryAsVolatile
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
virtual ~GPUReconstruction()
int32_t mMaxBackendThreads
const GPUCalibObjectsConst & GetCalib() const
const GPUTrackingInOutPointers GetIOPtrs() const
const GPUConstantMem * processors() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
void UnblockStackedMemory()
GPUReconstruction(const GPUReconstruction &)=delete
T * AddChain(Args... args)
static constexpr GeometryType geometryType
static std::shared_ptr< LibraryLoader > sLibOCL
std::vector< std::unique_ptr< char[], alignedDeleter > > mNonPersistentIndividualDirectAllocations
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUDataTypes::GeometryType GeometryType
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
void setErrorCodeOutput(std::vector< std::array< uint32_t, 4 > > *v)
void WriteConstantParams()
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
std::vector< std::unique_ptr< char[], alignedDeleter > > mVolatileChunks
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
void UpdateMaxMemoryUsed()
GPUMemoryResource & Res(int16_t num)
virtual RecoStepField AvailableGPURecoSteps()
uint32_t getNEventsProcessed()
static constexpr const char *const IOTYPENAMES[]
GPUReconstruction & operator=(const GPUReconstruction &)=delete
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
std::vector< std::unique_ptr< char[], alignedDeleter > > mDirectMemoryChunks
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
RecoStepField GetRecoSteps() const
uint32_t NStreams() const
virtual int32_t RunChains()=0
DeviceType GetDeviceType() const
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
const GPUParam & GetParam() const
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
GPUOutputControl mOutputControl
const GPUConstantMem & GetConstantMem() const
size_t mHostMemoryUsedMax
void * mDeviceMemoryPoolEnd
virtual int32_t ExitDevice()=0
void PrintMemoryOverview()
std::unique_ptr< GPUSettingsGRP > mGRPSettings
uint32_t getNEventsProcessedInStat()
std::unique_ptr< GPUSettingsProcessing > mProcessingSettings
const void * DeviceMemoryBase() const
virtual bool CanQueryMaxMemory()
void PrintMemoryStatistics()
void PushNonPersistentMemory(uint64_t tag)
double GetStatKernelTime()
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
GPUMemorySizeScalers * MemoryScalers()
int32_t InitPhaseAfterDevice()
static int32_t getHostThreadIndex()
GPUDataTypes::GeneralStep GeneralStep
void * mDeviceMemoryPermanent
void BlockStackedMemory(GPUReconstruction *rec)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
void * AllocateDirectMemory(size_t size, int32_t type)
void * mHostMemoryPoolBlocked
size_t AllocateMemoryResources()
void DumpFlatObjectToFile(const T *obj, const char *file)
int32_t unregisterMemoryForGPU(const void *ptr)
virtual const GPUDefParameters & getGPUParameters(bool doGPU) const =0
static GPUReconstruction * CreateInstance(int32_t type, bool forceType, GPUReconstruction *master=nullptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
static std::shared_ptr< LibraryLoader > sLibCUDA
const GPUSettingsGRP & GetGRPSettings() const
void SetDebugLevelTmp(int32_t level)
int32_t EnqueuePipeline(bool terminate=false)
virtual void PrintKernelOccupancies()
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
void * mHostMemoryPoolEnd
void * mDeviceMemoryPoolBlocked
void * mVolatileMemoryStart
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const
GPUChain * GetNextChainInQueue()
void * mHostMemoryPermanent
virtual void endGPUProfiling()
int32_t GPUChkErrA(const int64_t error, const char *file, int32_t line, bool failOnError)
GPUOutputControl & OutputControl()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
std::vector< std::array< uint32_t, 4 > > * mOutputErrorCodes
GLuint const GLchar * name
GLint GLint GLsizei GLint GLenum GLenum type
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
void(GPUProcessor::* InitializeProcessor)()
void(GPUProcessor::* RegisterMemoryAllocation)()
ProcessorData(GPUProcessor *p, void(GPUProcessor::*r)(), void(GPUProcessor::*i)(), void(GPUProcessor::*d)(const GPUTrackingInOutPointers &))
void(GPUProcessor::* SetMaxData)(const GPUTrackingInOutPointers &)
void operator()(void *ptr)