15#ifndef GPURECONSTRUCTIONICPU_H
16#define GPURECONSTRUCTIONICPU_H
40 template <
class S,
int32_t I = 0>
42 template <
class T, int32_t I = 0,
typename... Args>
45 virtual int32_t
GPUDebug(
const char*
state =
"UNKNOWN", int32_t
stream = -1,
bool force =
false);
73 size_t TransferMemoryResourceToGPU(
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) {
return TransferMemoryInternal(
res,
stream, ev, evList,
nEvents,
true,
res->Ptr(),
res->PtrDevice()); }
74 size_t TransferMemoryResourceToHost(
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) {
return TransferMemoryInternal(
res,
stream, ev, evList,
nEvents,
false,
res->PtrDevice(),
res->Ptr()); }
102 size_t TransferMemoryResourcesHelper(
GPUProcessor* proc, int32_t
stream,
bool all,
bool toGPU);
103 template <
class S, int32_t I = 0,
typename... Args>
104 void runKernelInterface(
krnlSetup&& setup, Args
const&... args);
GPUReconstructionCPU(const GPUSettingsDeviceBackend &cfg)
~GPUReconstructionCPU() override
virtual void SetONNXGPUStream(Ort::SessionOptions &, int32_t, int32_t *)
GPUConstantMem *& mProcessorsShadow
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void runKernelBackend(const krnlSetupTime &_xyz, const Args &... args)
virtual void SynchronizeGPU()
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
static constexpr krnlRunRange krnlRunRangeNone
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
int32_t InitDevice() override
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
int32_t RunChains() override
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
int32_t unregisterMemoryForGPU_internal(const void *ptr) override
int32_t registerMemoryForGPU_internal(const void *ptr, size_t size) override
GPUProcessorProcessors mProcShadow
krnlProperties getKernelProperties(int gpu=-1)
void ResetDeviceProcessorTypes()
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
int32_t ExitDevice() override
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
static constexpr krnlEvent krnlEventNone
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
virtual size_t TransferMemoryInternal(GPUMemoryResource *res, int32_t stream, deviceEvent *ev, deviceEvent *evList, int32_t nEvents, bool toGPU, const void *src, void *dst)
gpu_reconstruction_kernels::deviceEvent deviceEvent
std::vector< GPUMemoryResource > mMemoryResources
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
GPUConstantMem * mProcessorsProc
void * SetPointersDeviceProcessor(void *mem)
int16_t mMemoryResProcessors