15#ifndef GPURECONSTRUCTIONICPU_H
16#define GPURECONSTRUCTIONICPU_H
24#include "GPUReconstructionKernelIncludes.h"
37 template <
class T, int32_t I = 0,
typename... Args>
39 template <
class T, int32_t I = 0,
typename... Args>
53 template <
class S, int32_t I = 0,
typename... Args>
55 template <
class S,
int32_t I = 0>
58 virtual int32_t
GPUDebug(
const char*
state =
"UNKNOWN", int32_t
stream = -1,
bool force =
false);
75#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
76 inline void runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
78 krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)> args(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward)); \
79 const uint32_t num = GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(); \
81 GPUReconstructionCPU::runKernelImpl(num, &args); \
83 runKernelImpl(num, &args); \
86#include "GPUReconstructionKernelList.h"
100 size_t TransferMemoryResourceToGPU(
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) {
return TransferMemoryInternal(
res,
stream, ev, evList,
nEvents,
true,
res->Ptr(),
res->PtrDevice()); }
101 size_t TransferMemoryResourceToHost(
GPUMemoryResource*
res, int32_t
stream = -1,
deviceEvent* ev =
nullptr,
deviceEvent* evList =
nullptr, int32_t
nEvents = 1) {
return TransferMemoryInternal(
res,
stream, ev, evList,
nEvents,
false,
res->PtrDevice(),
res->Ptr()); }
127 size_t TransferMemoryResourcesHelper(
GPUProcessor* proc, int32_t
stream,
bool all,
bool toGPU);
130template <
class S, int32_t I,
typename... Args>
136 throw std::runtime_error(
"Failure running general kernel without defining RecoStep");
139 uint32_t& nThreads = setup.x.nThreads;
140 uint32_t& nBlocks = setup.x.nBlocks;
141 const uint32_t
stream = setup.x.stream;
142 auto prop = getKernelProperties<S, I>();
143 const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
144 const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks *
mBlockCount));
145 if (nBlocks == (uint32_t)-1) {
146 nBlocks = (nThreads + autoThreads - 1) / autoThreads;
147 nThreads = autoThreads;
148 }
else if (nBlocks == (uint32_t)-2) {
150 nThreads = autoThreads;
151 }
else if (nBlocks == (uint32_t)-3) {
152 nBlocks = autoBlocks;
153 nThreads = autoThreads;
154 }
else if ((int32_t)nThreads < 0) {
155 nThreads = cpuFallback ? 1 : -nThreads;
158 throw std::runtime_error(
"GPUCA_MAX_THREADS exceeded");
161 GPUInfo(
"Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName<S, I>(),
stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ?
"CPU (forced)" : cpuFallback ?
"CPU (fallback)" :
mDeviceName.c_str());
163 if (nThreads == 0 || nBlocks == 0) {
172 double deviceTimerTime = 0.;
175 throw std::runtime_error(
"kernel failure");
179 if (deviceTimerTime != 0.) {
189 throw std::runtime_error(
"kernel error code");
#define GPUCA_MAX_THREADS
void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs< T, I, Args... > &args)
void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime &_xyz, const Args &... args)
GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend &cfg)
~GPUReconstructionCPUBackend() override=default
GPUReconstructionCPU(const GPUSettingsDeviceBackend &cfg)
~GPUReconstructionCPU() override
GPUConstantMem *& mProcessorsShadow
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual void SynchronizeGPU()
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
static constexpr krnlRunRange krnlRunRangeNone
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
int32_t InitDevice() override
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
gpu_reconstruction_kernels::krnlProperties getKernelProperties(int gpu=-1)
int32_t RunChains() override
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
int32_t unregisterMemoryForGPU_internal(const void *ptr) override
int32_t registerMemoryForGPU_internal(const void *ptr, size_t size) override
GPUProcessorProcessors mProcShadow
void ResetDeviceProcessorTypes()
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
int32_t ExitDevice() override
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
virtual int32_t PrepareTextures()
static constexpr krnlEvent krnlEventNone
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
virtual size_t TransferMemoryInternal(GPUMemoryResource *res, int32_t stream, deviceEvent *ev, deviceEvent *evList, int32_t nEvents, bool toGPU, const void *src, void *dst)
void runKernel(krnlSetup &&setup, Args &&... args)
gpu_reconstruction_kernels::krnlSetup krnlSetup
gpu_reconstruction_kernels::deviceEvent deviceEvent
uint32_t mNActiveThreadsOuterLoop
GPURecoStepConfiguration mRecoSteps
std::vector< GPUMemoryResource > mMemoryResources
GPUSettingsProcessing mProcessingSettings
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
static int32_t getHostThreadIndex()
GPUDataTypes::RecoStepField stepsGPUMask
GPUConstantMem * mProcessorsProc
void * SetPointersDeviceProcessor(void *mem)
int16_t mMemoryResProcessors