47 virtual int32_t
Init() = 0;
51 virtual void MemorySize(
size_t& gpuMem,
size_t& pageLockedHostMem) = 0;
53 virtual int32_t
CheckErrorCodes(
bool cpuOnly =
false,
bool forceShowErrors =
false, std::vector<std::array<uint32_t, 4>>* fillErrors =
nullptr) {
return 0; }
119 inline void TransferMemoryResourceToGPU(
RecoStep step,
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step,
true, &
GPUReconstructionCPU::TransferMemoryResourceToGPU,
res,
stream, ev, evList,
nEvents); }
120 inline void TransferMemoryResourceToHost(
RecoStep step,
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step,
false, &
GPUReconstructionCPU::TransferMemoryResourceToHost,
res,
stream, ev, evList,
nEvents); }
123 inline void TransferMemoryResourceLinkToGPU(
RecoStep step, int16_t
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step,
true, &
GPUReconstructionCPU::TransferMemoryResourceLinkToGPU,
res,
stream, ev, evList,
nEvents); }
124 inline void TransferMemoryResourceLinkToHost(
RecoStep step, int16_t
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step,
false, &
GPUReconstructionCPU::TransferMemoryResourceLinkToHost,
res,
stream, ev, evList,
nEvents); }
126 inline void WriteToConstantMemory(
RecoStep step,
size_t offset,
const void*
src,
size_t size, int32_t
stream = -1,
deviceEvent* ev =
nullptr) { timeCpy(step,
true, &
GPUReconstructionCPU::WriteToConstantMemory,
offset,
src,
size,
stream, ev); }
127 inline void GPUMemCpy(
RecoStep step,
void*
dst,
const void*
src,
size_t size, int32_t
stream, int32_t toGPU,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) { timeCpy(step, toGPU, &
GPUReconstructionCPU::GPUMemCpy,
dst,
src,
size,
stream, toGPU, ev, evList,
nEvents); }
133 timeCpy<true>(step, toGPU, &
GPUReconstructionCPU::GPUMemCpyAlways,
GetRecoStepsGPU() & step,
dst,
src,
size,
stream, toGPU, ev, evList,
nEvents);
142 template <
class T,
class S>
147 template <
class T,
class S>
178 template <
class S, int32_t I = 0,
typename... Args>
179 requires(
sizeof(
S) >= 0)
182 runKernelCallInterface<S, I>(std::forward<GPUReconstructionProcessing::krnlSetup&&>(setup), args...);
185 template <
class S,
int32_t I = 0>
191 template <
class T,
int32_t I = 0>
196 template <
class T, int32_t J = -1>
228 template <
class T,
class S,
typename... Args>
233 template <
class T,
class S,
typename... Args>
236 template <
class T,
class S,
typename... Args>
240 template <
bool Always =
false,
class T,
class S,
typename... Args>
241 void timeCpy(
RecoStep step, int32_t toGPU,
S T::*
func, Args... args);
243#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
244 template <class S, int32_t I> \
245 requires(std::is_same_v<S, GPUCA_M_FIRST(GPUCA_M_STRIP(x_class))> && I == S::GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_class), defaultKernel))) \
246 inline void runKernelCallInterface(GPUReconstructionProcessing::krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
248 mRec->runKernelInterface<GPUCA_M_FIRST(GPUCA_M_STRIP(x_class)), S::GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_class), defaultKernel)) GPUCA_M_STRIP(x_types)>(std::forward<GPUReconstructionProcessing::krnlSetup&&>(setup) GPUCA_M_STRIP(x_forward)); \
250#include "GPUReconstructionKernelList.h"
254template <
bool Always,
class T,
class S,
typename... Args>
255inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU,
S T::*
func, Args... args)
257 if (!Always &&
step != RecoStep::NoRecoStep && !(GetRecoStepsGPU() &
step)) {
261 size_t* bytes =
nullptr;
262 if (mRec->GetProcessingSettings().debugLevel >= 1 && toGPU >= 0) {
263 int32_t
id = mRec->getRecoStepNum(step,
false);
265 auto& tmp = mRec->mTimersRecoSteps[
id];
266 timer = toGPU ? &tmp.timerToGPU : &tmp.timerToHost;
267 bytes = toGPU ? &tmp.bytesToGPU : &tmp.bytesToHost;
268 (toGPU ? tmp.countToGPU : tmp.countToHost)++;
272 size_t n = (mRec->*
func)(args...);
280template <
class T,
class S,
typename... Args>
289 (processor.*
func)(args...);
297template <
class T,
class S,
typename... Args>
304 timer->timerTotal.
Start();
307 int32_t
retVal = (
reinterpret_cast<T*
>(
this)->*
func)(args...);
309 timer->timerTotal.
Stop();
310 timer->timerCPU += (double)(std::clock() -
c) / CLOCKS_PER_SEC;
void RecordMarker(deviceEvent *ev, int32_t stream)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const GPUConstantMem * processors() const
GPUChain(GPUReconstruction *rec)
virtual int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
void GPUMemCpyAlways(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
const GPUSettingsGRP & GetGRPSettings() const
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const GPUReconstruction * rec() const
std::unique_ptr< T > ReadStructFromFile(const char *file)
void CondWaitEvent(T &cond, deviceEvent *ev)
GPUConstantMem * processorsDevice()
virtual void RegisterGPUProcessors()=0
GPUReconstruction::RecoStep RecoStep
virtual bool SupportsDoublePipeline()
virtual int32_t Finalize()=0
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
GPUReconstruction::RecoStepField GetRecoSteps() const
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1)
virtual int32_t FinalizePipelinedProcessing()
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
void runKernel(GPUReconstructionProcessing::krnlSetup &&setup, Args const &... args)
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
uint32_t WarpSize() const
GPUReconstructionProcessing::krnlExec krnlExec
uint32_t ThreadCount() const
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
GPUChain * GetNextChainInQueue()
krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
size_t AllocateRegisteredMemory(GPUProcessor *proc)
virtual int32_t PrepareEvent()=0
GPUReconstructionProcessing::krnlProperties getKernelProperties()
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
GPUConstantMem * processors()
static constexpr krnlRunRange krnlRunRangeNone
static constexpr krnlEvent krnlEventNone
size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl *control=nullptr)
void SetONNXGPUStream(Ort::SessionOptions &opt, int32_t stream, int32_t *deviceId)
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
void DumpStructToFile(const T *obj, const char *file)
virtual int32_t RunChain()=0
void SetupGPUProcessor(T *proc, bool allocate)
void TransferMemoryResourceToHost(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const GPUCalibObjectsConst & GetCalib() const
void ReadStructFromFile(const char *file, T *obj)
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
virtual ~GPUChain()=default
GPUConstantMem * processorsShadow()
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
GPUReconstruction::InOutTypeField GetRecoStepsInputs() const
static constexpr int32_t NSECTORS
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
const GPUParam & GetParam() const
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void AllocateIOMemoryHelper(uint32_t n, const T *&ptr, std::unique_ptr< T[]> &u)
virtual void RegisterPermanentMemoryAndProcessors()=0
virtual void MemorySize(size_t &gpuMem, size_t &pageLockedHostMem)=0
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
int32_t runRecoStep(RecoStep step, S T::*func, Args... args)
void DumpFlatObjectToFile(const T *obj, const char *file)
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
const GPUConstantMem * GetProcessors() const
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
virtual int32_t EarlyConfigure()
HighResTimer & getTimer(const char *name, int32_t num=-1)
uint32_t BlockCount() const
virtual void DumpSettings(const char *dir="")
virtual void ReadSettings(const char *dir="")
GPUReconstruction * rec()
void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual void PrintMemoryStatistics()
virtual void SetONNXGPUStream(Ort::SessionOptions &, int32_t, int32_t *)
GPUConstantMem *& mProcessorsShadow
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual void SynchronizeGPU()
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
krnlProperties getKernelProperties(int gpu=-1)
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
auto & getRecoStepTimer(RecoStep step)
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
gpu_reconstruction_kernels::deviceEvent deviceEvent
virtual std::unique_ptr< threadContext > GetThreadContext() override
HighResTimer & getTimer(const char *name, int32_t num=-1)
void SetupGPUProcessor(T *proc, bool allocate)
GPUDataTypes::RecoStep RecoStep
GPUConstantMem * mDeviceConstantMem
GPUConstantMem * processors()
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::unique_ptr< T > ReadStructFromFile(const char *file)
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
void DumpStructToFile(const T *obj, const char *file)
InOutTypeField GetRecoStepsInputs() const
const GPUCalibObjectsConst & GetCalib() const
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUDataTypes::GeometryType GeometryType
RecoStepField GetRecoSteps() const
const GPUParam & GetParam() const
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
GPUDataTypes::GeneralStep GeneralStep
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpFlatObjectToFile(const T *obj, const char *file)
const GPUSettingsGRP & GetGRPSettings() const
GPUChain * GetNextChainInQueue()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
GLuint const GLchar * name
GLint GLint GLsizei GLint GLenum GLenum type