15#ifndef GPURECONSTRUCTIONICPUKERNELS_H
16#define GPURECONSTRUCTIONICPUKERNELS_H
25template <
class S, int32_t I,
typename... Args>
26inline void GPUReconstructionCPU::runKernelInterface(krnlSetup&& setup, Args
const&... args)
31 throw std::runtime_error(
"Failure running general kernel without defining RecoStep");
34 uint32_t& nThreads = setup.x.nThreads;
35 uint32_t& nBlocks = setup.x.nBlocks;
36 const uint32_t
stream = setup.x.stream;
37 auto prop = getKernelProperties<S, I>();
38 const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
39 const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks *
mBlockCount));
40 if (nBlocks == (uint32_t)-1) {
41 nBlocks = (nThreads + autoThreads - 1) / autoThreads;
42 nThreads = autoThreads;
43 }
else if (nBlocks == (uint32_t)-2) {
45 nThreads = autoThreads;
46 }
else if (nBlocks == (uint32_t)-3) {
48 nThreads = autoThreads;
49 }
else if ((int32_t)nThreads < 0) {
50 nThreads = cpuFallback ? 1 : -nThreads;
53 throw std::runtime_error(
"GPUCA_MAX_THREADS exceeded");
56 GPUInfo(
"Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName<S, I>(),
stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ?
"CPU (forced)" : (cpuFallback ?
"CPU (fallback)" :
mDeviceName.c_str()));
58 if (nThreads == 0 || nBlocks == 0) {
67 double deviceTimerTime = 0.;
68 krnlSetupArgs<
S, I, Args...> argPack{{}, {{setup.
x, setup.y, setup.z}, deviceTimerTime}, {args...}};
69 const uint32_t
num = GetKernelNum<S, I>();
77 throw std::runtime_error(
"kernel failure");
81 if (deviceTimerTime != 0.) {
91 throw std::runtime_error(
"kernel error code");
#define GPUCA_MAX_THREADS
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
virtual void runKernelVirtual(const int num, const void *args)
uint32_t mNActiveThreadsOuterLoop
GPURecoStepConfiguration mRecoSteps
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
static int32_t getHostThreadIndex()
const GPUSettingsProcessing & GetProcessingSettings() const
GPUDataTypes::RecoStepField stepsGPUMask