44 virtual int32_t
Init() = 0;
48 virtual void MemorySize(
size_t& gpuMem,
size_t& pageLockedHostMem) = 0;
50 virtual int32_t
CheckErrorCodes(
bool cpuOnly =
false,
bool forceShowErrors =
false, std::vector<std::array<uint32_t, 4>>* fillErrors =
nullptr) {
return 0; }
115 inline void TransferMemoryResourceToGPU(
RecoStep step,
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step,
true, &
GPUReconstructionCPU::TransferMemoryResourceToGPU,
res,
stream, ev, evList,
nEvents); }
116 inline void TransferMemoryResourceToHost(
RecoStep step,
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step,
false, &
GPUReconstructionCPU::TransferMemoryResourceToHost,
res,
stream, ev, evList,
nEvents); }
119 inline void TransferMemoryResourceLinkToGPU(
RecoStep step, int16_t
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step,
true, &
GPUReconstructionCPU::TransferMemoryResourceLinkToGPU,
res,
stream, ev, evList,
nEvents); }
120 inline void TransferMemoryResourceLinkToHost(
RecoStep step, int16_t
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step,
false, &
GPUReconstructionCPU::TransferMemoryResourceLinkToHost,
res,
stream, ev, evList,
nEvents); }
122 inline void WriteToConstantMemory(
RecoStep step,
size_t offset,
const void*
src,
size_t size, int32_t
stream = -1,
deviceEvent* ev =
nullptr) { timeCpy(step,
true, &
GPUReconstructionCPU::WriteToConstantMemory,
offset,
src,
size,
stream, ev); }
123 inline void GPUMemCpy(
RecoStep step,
void*
dst,
const void*
src,
size_t size, int32_t
stream, int32_t toGPU,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step, toGPU, &
GPUReconstructionCPU::GPUMemCpy,
dst,
src,
size,
stream, toGPU, ev, evList,
nEvents); }
129 timeCpy<true>(step, toGPU, &
GPUReconstructionCPU::GPUMemCpyAlways,
GetRecoStepsGPU() & step,
dst,
src,
size,
stream, toGPU, ev, evList,
nEvents);
138 template <
class T,
class S>
143 template <
class T,
class S>
173 template <
class S, int32_t I = 0,
typename... Args>
176 return mRec->
runKernel<
S, I, Args...>(std::forward<gpu_reconstruction_kernels::krnlSetup&&>(setup), std::forward<Args>(args)...);
178 template <
class S,
int32_t I = 0>
184 template <
class T,
int32_t I = 0>
189 template <
class T, int32_t J = -1>
222 template <
class T,
class S,
typename... Args>
227 template <
class T,
class S,
typename... Args>
230 template <
class T,
class S,
typename... Args>
234 template <
bool Always =
false,
class T,
class S,
typename... Args>
235 void timeCpy(
RecoStep step, int32_t toGPU,
S T::*
func, Args... args);
238template <
bool Always,
class T,
class S,
typename... Args>
239inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU,
S T::*
func, Args... args)
241 if (!Always && step != RecoStep::NoRecoStep && !(
GetRecoStepsGPU() & step)) {
245 size_t* bytes =
nullptr;
250 timer = toGPU ? &tmp.timerToGPU : &tmp.timerToHost;
251 bytes = toGPU ? &tmp.bytesToGPU : &tmp.bytesToHost;
252 (toGPU ? tmp.countToGPU : tmp.countToHost)++;
264template <
class T,
class S,
typename... Args>
273 (processor.*
func)(args...);
281template <
class T,
class S,
typename... Args>
288 timer->timerTotal.
Start();
291 int32_t
retVal = (
reinterpret_cast<T*
>(
this)->*
func)(args...);
293 timer->timerTotal.
Stop();
294 timer->timerCPU += (double)(std::clock() -
c) / CLOCKS_PER_SEC;
void RecordMarker(deviceEvent *ev, int32_t stream)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const GPUConstantMem * processors() const
GPUChain(GPUReconstruction *rec)
virtual int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
void GPUMemCpyAlways(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
const GPUSettingsGRP & GetGRPSettings() const
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const GPUReconstruction * rec() const
std::unique_ptr< T > ReadStructFromFile(const char *file)
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
void CondWaitEvent(T &cond, deviceEvent *ev)
GPUConstantMem * processorsDevice()
virtual void RegisterGPUProcessors()=0
GPUReconstruction::RecoStep RecoStep
virtual bool SupportsDoublePipeline()
virtual int32_t Finalize()=0
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
GPUReconstruction::RecoStepField GetRecoSteps() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1)
virtual int32_t FinalizePipelinedProcessing()
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
uint32_t WarpSize() const
const GPUConstantMem * GetProcessors()
uint32_t ThreadCount() const
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
GPUChain * GetNextChainInQueue()
size_t AllocateRegisteredMemory(GPUProcessor *proc)
virtual int32_t PrepareEvent()=0
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const
GPUConstantMem * processors()
static constexpr krnlRunRange krnlRunRangeNone
static constexpr krnlEvent krnlEventNone
size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl *control=nullptr)
krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
void DumpStructToFile(const T *obj, const char *file)
const GPUCalibObjectsConst & calib() const
virtual int32_t RunChain()=0
GPUReconstruction::GeneralStep GeneralStep
void SetupGPUProcessor(T *proc, bool allocate)
void TransferMemoryResourceToHost(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
GPUReconstruction::GeometryType GeometryType
gpu_reconstruction_kernels::krnlProperties getKernelProperties()
void ReadStructFromFile(const char *file, T *obj)
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
virtual ~GPUChain()=default
GPUConstantMem * processorsShadow()
GPUReconstruction::InOutTypeField GetRecoStepsInputs() const
static constexpr int32_t NSECTORS
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
const GPUParam & GetParam() const
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void AllocateIOMemoryHelper(uint32_t n, const T *&ptr, std::unique_ptr< T[]> &u)
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
virtual void RegisterPermanentMemoryAndProcessors()=0
virtual void MemorySize(size_t &gpuMem, size_t &pageLockedHostMem)=0
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
int32_t runKernel(gpu_reconstruction_kernels::krnlSetup &&setup, Args &&... args)
int32_t runRecoStep(RecoStep step, S T::*func, Args... args)
void DumpFlatObjectToFile(const T *obj, const char *file)
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
virtual int32_t PrepareTextures()
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
virtual int32_t EarlyConfigure()
HighResTimer & getTimer(const char *name, int32_t num=-1)
uint32_t BlockCount() const
virtual void DumpSettings(const char *dir="")
virtual void ReadSettings(const char *dir="")
GPUReconstruction * rec()
void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
gpu_reconstruction_kernels::krnlExec krnlExec
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual void PrintMemoryStatistics()
GPUConstantMem *& mProcessorsShadow
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const gpu_reconstruction_kernels::krnlProperties getKernelProperties()
virtual void SynchronizeGPU()
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
int32_t runKernel(krnlSetup &&setup, Args &&... args)
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext() override
auto & getRecoStepTimer(RecoStep step)
RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
HighResTimer & getTimer(const char *name, int32_t num=-1)
void SetupGPUProcessor(T *proc, bool allocate)
GPUDataTypes::RecoStep RecoStep
GPUConstantMem * mDeviceConstantMem
GPUConstantMem * processors()
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::unique_ptr< T > ReadStructFromFile(const char *file)
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
std::unique_ptr< GPUConstantMem > mHostConstantMem
void DumpStructToFile(const T *obj, const char *file)
InOutTypeField GetRecoStepsInputs() const
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUSettingsProcessing mProcessingSettings
GPUDataTypes::GeometryType GeometryType
RecoStepField GetRecoSteps() const
GPUSettingsGRP mGRPSettings
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
GPUDataTypes::GeneralStep GeneralStep
void DumpFlatObjectToFile(const T *obj, const char *file)
GPUSettingsDeviceBackend mDeviceBackendSettings
GPUChain * GetNextChainInQueue()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
GLuint const GLchar * name
GLint GLint GLsizei GLint GLenum GLenum type
GPUCalibObjectsConst calibObjects