Project
Loading...
Searching...
No Matches
GPUReconstructionCPUKernels.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPURECONSTRUCTIONICPUKERNELS_H
16#define GPURECONSTRUCTIONICPUKERNELS_H
17
19#include "GPUSettings.h"
20#include "GPULogging.h"
21
22namespace o2::gpu
23{
24
25template <class S, int32_t I, typename... Args>
26inline void GPUReconstructionCPU::runKernelInterface(krnlSetup&& setup, Args const&... args)
27{
28 HighResTimer* t = nullptr;
29 GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep();
31 throw std::runtime_error("Failure running general kernel without defining RecoStep");
32 }
33 int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0;
34 uint32_t& nThreads = setup.x.nThreads;
35 uint32_t& nBlocks = setup.x.nBlocks;
36 const uint32_t stream = setup.x.stream;
37 auto prop = getKernelProperties<S, I>();
38 const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
39 const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount));
40 if (nBlocks == (uint32_t)-1) {
41 nBlocks = (nThreads + autoThreads - 1) / autoThreads;
42 nThreads = autoThreads;
43 } else if (nBlocks == (uint32_t)-2) {
44 nBlocks = nThreads;
45 nThreads = autoThreads;
46 } else if (nBlocks == (uint32_t)-3) {
47 nBlocks = autoBlocks;
48 nThreads = autoThreads;
49 } else if ((int32_t)nThreads < 0) {
50 nThreads = cpuFallback ? 1 : -nThreads;
51 }
52 if (nThreads > GPUCA_MAX_THREADS) {
53 throw std::runtime_error("GPUCA_MAX_THREADS exceeded");
54 }
55 if (GetProcessingSettings().debugLevel >= 3) {
56 GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName<S, I>(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : (cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()));
57 }
58 if (nThreads == 0 || nBlocks == 0) {
59 return;
60 }
61 if (GetProcessingSettings().debugLevel >= 1) {
62 t = &getKernelTimer<S, I>(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream);
63 if ((!GetProcessingSettings().deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) {
64 t->Start();
65 }
66 }
67 double deviceTimerTime = 0.;
68 krnlSetupArgs<S, I, Args...> argPack{{}, {{setup.x, setup.y, setup.z}, deviceTimerTime}, {args...}};
69 const uint32_t num = GetKernelNum<S, I>();
70 if (cpuFallback) {
72 } else {
73 runKernelVirtual(num, &argPack);
74 }
75
76 if (GPUDebug(GetKernelName<S, I>(), stream, GetProcessingSettings().serializeGPU & 1)) {
77 throw std::runtime_error("kernel failure");
78 }
79 if (GetProcessingSettings().debugLevel >= 1) {
80 if (t) {
81 if (deviceTimerTime != 0.) {
82 t->AddTime(deviceTimerTime);
83 if (t->IsRunning()) {
84 t->Abort();
85 }
86 } else if (t->IsRunning()) {
87 t->Stop();
88 }
89 }
90 if (CheckErrorCodes(cpuFallback) && !GetProcessingSettings().ignoreNonFatalGPUErrors) {
91 throw std::runtime_error("kernel error code");
92 }
93 }
94}
95
96} // namespace o2::gpu
97
98#endif
#define GPUCA_MAX_THREADS
double num
void AddTime(double t)
Definition timer.cxx:128
void Start()
Definition timer.cxx:57
void Abort()
Definition timer.cxx:81
void Stop()
Definition timer.cxx:69
int32_t IsRunning()
Definition timer.h:33
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
GPURecoStepConfiguration mRecoSteps
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
const GPUSettingsProcessing & GetProcessingSettings() const
GLuint GLuint stream
Definition glcorearb.h:1806
GPUDataTypes::RecoStepField stepsGPUMask