Project
Loading...
Searching...
No Matches
GPUReconstructionCPU.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPURECONSTRUCTIONICPU_H
16#define GPURECONSTRUCTIONICPU_H
17
19#include "GPUConstantMem.h"
20#include <stdexcept>
21#include <vector>
22
23#include "GPUGeneralKernels.h"
24#include "GPUReconstructionKernelIncludes.h"
26
27namespace o2::gpu
28{
29
31{
32 public:
33 ~GPUReconstructionCPUBackend() override = default;
34
35 protected:
37 template <class T, int32_t I = 0, typename... Args>
39 template <class T, int32_t I = 0, typename... Args>
40 int32_t runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args);
41 template <class T, int32_t I>
43};
44
45class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCPUBackend>
46{
48 friend class GPUChain;
49
50 public:
51 ~GPUReconstructionCPU() override;
52 static constexpr krnlRunRange krnlRunRangeNone{0, -1};
53 static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0};
54
55 template <class S, int32_t I = 0, typename... Args>
56 int32_t runKernel(krnlSetup&& setup, Args&&... args);
57 template <class S, int32_t I = 0>
62
63 virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false);
64 int32_t GPUStuck() { return mGPUStuck; }
66
67 int32_t RunChains() override;
68
69 void UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, int32_t stream = -1);
70
71 protected:
77
79
80#define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \
81 inline int32_t runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
82 { \
83 if (cpuFallback) { \
84 return GPUReconstructionCPU::runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
85 } else { \
86 return runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
87 } \
88 }
89#include "GPUReconstructionKernelList.h"
90#undef GPUCA_KRNL
91
92 int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override { return 0; }
93 int32_t unregisterMemoryForGPU_internal(const void* ptr) override { return 0; }
94
95 virtual void SynchronizeStream(int32_t stream) {}
96 virtual void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) {}
97 virtual void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) {}
98 virtual bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return true; }
99 virtual void RecordMarker(deviceEvent* ev, int32_t stream) {}
100 virtual void SynchronizeGPU() {}
101 virtual void ReleaseEvent(deviceEvent ev) {}
102
103 size_t TransferMemoryResourceToGPU(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, true, res->Ptr(), res->PtrDevice()); }
104 size_t TransferMemoryResourceToHost(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, false, res->PtrDevice(), res->Ptr()); }
105 size_t TransferMemoryResourcesToGPU(GPUProcessor* proc, int32_t stream = -1, bool all = false) { return TransferMemoryResourcesHelper(proc, stream, all, true); }
106 size_t TransferMemoryResourcesToHost(GPUProcessor* proc, int32_t stream = -1, bool all = false) { return TransferMemoryResourcesHelper(proc, stream, all, false); }
107 size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryResourceToGPU(&mMemoryResources[res], stream, ev, evList, nEvents); }
108 size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryResourceToHost(&mMemoryResources[res], stream, ev, evList, nEvents); }
109 virtual size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1);
110 virtual size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1);
111 size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
112 virtual size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst);
113
114 int32_t InitDevice() override;
115 int32_t ExitDevice() override;
116 int32_t GetThread();
117
118 virtual int32_t PrepareTextures() { return 0; }
119 virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; }
120
121 // Pointers to tracker classes
122 GPUProcessorProcessors mProcShadow; // Host copy of tracker objects that will be used on the GPU
124
125 uint32_t mBlockCount = 1;
126 uint32_t mThreadCount = 1;
127 uint32_t mWarpSize = 1;
128
129 private:
130 size_t TransferMemoryResourcesHelper(GPUProcessor* proc, int32_t stream, bool all, bool toGPU);
131};
132
133template <class S, int32_t I, typename... Args>
134inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args)
135{
136 HighResTimer* t = nullptr;
137 GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep();
138 if (myStep == GPUCA_RECO_STEP::NoRecoStep) {
139 throw std::runtime_error("Failure running general kernel without defining RecoStep");
140 }
141 int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0;
142 uint32_t& nThreads = setup.x.nThreads;
143 uint32_t& nBlocks = setup.x.nBlocks;
144 const uint32_t stream = setup.x.stream;
145 auto prop = getKernelProperties<S, I>();
146 const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
147 const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount));
148 if (nBlocks == (uint32_t)-1) {
149 nBlocks = (nThreads + autoThreads - 1) / autoThreads;
150 nThreads = autoThreads;
151 } else if (nBlocks == (uint32_t)-2) {
152 nBlocks = nThreads;
153 nThreads = autoThreads;
154 } else if (nBlocks == (uint32_t)-3) {
155 nBlocks = autoBlocks;
156 nThreads = autoThreads;
157 } else if ((int32_t)nThreads < 0) {
158 nThreads = cpuFallback ? 1 : -nThreads;
159 }
160 if (nThreads > GPUCA_MAX_THREADS) {
161 throw std::runtime_error("GPUCA_MAX_THREADS exceeded");
162 }
163 if (mProcessingSettings.debugLevel >= 3) {
164 GPUInfo("Running kernel %s (Stream %d, Range %d/%d, Grid %d/%d) on %s", GetKernelName<S, I>(), stream, setup.y.start, setup.y.num, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str());
165 }
166 if (nThreads == 0 || nBlocks == 0) {
167 return 0;
168 }
169 if (mProcessingSettings.debugLevel >= 1) {
170 t = &getKernelTimer<S, I>(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream);
171 if ((!mProcessingSettings.deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) {
172 t->Start();
173 }
174 }
175 double deviceTimerTime = 0.;
176 int32_t retVal = runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<S, I>(), cpuFallback, deviceTimerTime, std::forward<krnlSetup&&>(setup), std::forward<Args>(args)...);
177 if (GPUDebug(GetKernelName<S, I>(), stream, mProcessingSettings.serializeGPU & 1)) {
178 throw std::runtime_error("kernel failure");
179 }
180 if (mProcessingSettings.debugLevel >= 1) {
181 if (t) {
182 if (deviceTimerTime != 0.) {
183 t->AddTime(deviceTimerTime);
184 if (t->IsRunning()) {
185 t->Abort();
186 }
187 } else if (t->IsRunning()) {
188 t->Stop();
189 }
190 }
191 if (CheckErrorCodes(cpuFallback) && !mProcessingSettings.ignoreNonFatalGPUErrors) {
192 throw std::runtime_error("kernel error code");
193 }
194 }
195 return retVal;
196}
197
198} // namespace o2::gpu
199
200#endif
benchmark::State & state
#define GPUCA_RECO_STEP
#define GPUCA_MAX_THREADS
int32_t retVal
uint32_t res
Definition RawData.h:0
TBranch * ptr
void AddTime(double t)
Definition timer.cxx:128
void Start()
Definition timer.cxx:57
void Abort()
Definition timer.cxx:81
void Stop()
Definition timer.cxx:69
int32_t IsRunning()
Definition timer.h:33
GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend &cfg)
int32_t runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime &_xyz, const Args &... args)
int32_t runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs< T, I, Args... > &args)
gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend()
~GPUReconstructionCPUBackend() override=default
GPUReconstructionCPU(const GPUSettingsDeviceBackend &cfg)
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const gpu_reconstruction_kernels::krnlProperties getKernelProperties()
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
static constexpr krnlRunRange krnlRunRangeNone
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
int32_t runKernel(krnlSetup &&setup, Args &&... args)
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
int32_t unregisterMemoryForGPU_internal(const void *ptr) override
int32_t registerMemoryForGPU_internal(const void *ptr, size_t size) override
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
static constexpr krnlEvent krnlEventNone
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
virtual size_t TransferMemoryInternal(GPUMemoryResource *res, int32_t stream, deviceEvent *ev, deviceEvent *evList, int32_t nEvents, bool toGPU, const void *src, void *dst)
GPURecoStepConfiguration mRecoSteps
std::vector< GPUMemoryResource > mMemoryResources
GPUSettingsProcessing mProcessingSettings
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
struct _cl_event * event
Definition glcorearb.h:2982
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum dst
Definition glcorearb.h:1767
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
GPUDataTypes::RecoStepField stepsGPUMask
const int nEvents
Definition test_Fifo.cxx:27