15#ifndef GPURECONSTRUCTIONICPU_H
16#define GPURECONSTRUCTIONICPU_H
24#include "GPUReconstructionKernelIncludes.h"
37 template <
class T, int32_t I = 0,
typename... Args>
39 template <
class T, int32_t I = 0,
typename... Args>
41 template <
class T,
int32_t I>
55 template <
class S, int32_t I = 0,
typename... Args>
57 template <
class S,
int32_t I = 0>
63 virtual int32_t
GPUDebug(
const char*
state =
"UNKNOWN", int32_t
stream = -1,
bool force =
false);
80#define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \
81 inline int32_t runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
84 return GPUReconstructionCPU::runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
86 return runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
89#include "GPUReconstructionKernelList.h"
103 size_t TransferMemoryResourceToGPU(
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) {
return TransferMemoryInternal(
res,
stream, ev, evList,
nEvents,
true,
res->Ptr(),
res->PtrDevice()); }
104 size_t TransferMemoryResourceToHost(
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) {
return TransferMemoryInternal(
res,
stream, ev, evList,
nEvents,
false,
res->PtrDevice(),
res->Ptr()); }
130 size_t TransferMemoryResourcesHelper(
GPUProcessor* proc, int32_t
stream,
bool all,
bool toGPU);
133template <
class S, int32_t I,
typename... Args>
137 GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep();
138 if (myStep == GPUCA_RECO_STEP::NoRecoStep) {
139 throw std::runtime_error(
"Failure running general kernel without defining RecoStep");
142 uint32_t& nThreads = setup.x.nThreads;
143 uint32_t& nBlocks = setup.x.nBlocks;
144 const uint32_t
stream = setup.x.stream;
145 auto prop = getKernelProperties<S, I>();
146 const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
147 const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks *
mBlockCount));
148 if (nBlocks == (uint32_t)-1) {
149 nBlocks = (nThreads + autoThreads - 1) / autoThreads;
150 nThreads = autoThreads;
151 }
else if (nBlocks == (uint32_t)-2) {
153 nThreads = autoThreads;
154 }
else if (nBlocks == (uint32_t)-3) {
155 nBlocks = autoBlocks;
156 nThreads = autoThreads;
157 }
else if ((int32_t)nThreads < 0) {
158 nThreads = cpuFallback ? 1 : -nThreads;
161 throw std::runtime_error(
"GPUCA_MAX_THREADS exceeded");
164 GPUInfo(
"Running kernel %s (Stream %d, Range %d/%d, Grid %d/%d) on %s", GetKernelName<S, I>(),
stream, setup.y.start, setup.y.num, nBlocks, nThreads, cpuFallback == 2 ?
"CPU (forced)" : cpuFallback ?
"CPU (fallback)" :
mDeviceName.c_str());
166 if (nThreads == 0 || nBlocks == 0) {
175 double deviceTimerTime = 0.;
178 throw std::runtime_error(
"kernel failure");
182 if (deviceTimerTime != 0.) {
192 throw std::runtime_error(
"kernel error code");
#define GPUCA_MAX_THREADS
GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend &cfg)
int32_t runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime &_xyz, const Args &... args)
int32_t runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs< T, I, Args... > &args)
gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend()
~GPUReconstructionCPUBackend() override=default
GPUReconstructionCPU(const GPUSettingsDeviceBackend &cfg)
~GPUReconstructionCPU() override
GPUConstantMem *& mProcessorsShadow
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const gpu_reconstruction_kernels::krnlProperties getKernelProperties()
virtual void SynchronizeGPU()
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
static constexpr krnlRunRange krnlRunRangeNone
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
int32_t InitDevice() override
int32_t runKernel(krnlSetup &&setup, Args &&... args)
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
int32_t RunChains() override
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
int32_t unregisterMemoryForGPU_internal(const void *ptr) override
int32_t registerMemoryForGPU_internal(const void *ptr, size_t size) override
GPUProcessorProcessors mProcShadow
void ResetDeviceProcessorTypes()
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
int32_t ExitDevice() override
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
virtual int32_t PrepareTextures()
static constexpr krnlEvent krnlEventNone
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
virtual size_t TransferMemoryInternal(GPUMemoryResource *res, int32_t stream, deviceEvent *ev, deviceEvent *evList, int32_t nEvents, bool toGPU, const void *src, void *dst)
gpu_reconstruction_kernels::krnlSetup krnlSetup
gpu_reconstruction_kernels::deviceEvent deviceEvent
uint32_t mNActiveThreadsOuterLoop
GPURecoStepConfiguration mRecoSteps
std::vector< GPUMemoryResource > mMemoryResources
GPUSettingsProcessing mProcessingSettings
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
static int32_t getHostThreadIndex()
GPUDataTypes::RecoStepField stepsGPUMask
GPUConstantMem * mProcessorsProc
void * SetPointersDeviceProcessor(void *mem)
int16_t mMemoryResProcessors