Project
Loading...
Searching...
No Matches
GPUReconstructionCPU.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPURECONSTRUCTIONICPU_H
16#define GPURECONSTRUCTIONICPU_H
17
19#include "GPUConstantMem.h"
20#include <stdexcept>
21#include <vector>
22
23#include "GPUGeneralKernels.h"
24#include "GPUReconstructionKernelIncludes.h"
26
27namespace o2::gpu
28{
29
31{
32 public:
33 ~GPUReconstructionCPUBackend() override = default;
34
35 protected:
37 template <class T, int32_t I = 0, typename... Args>
39 template <class T, int32_t I = 0, typename... Args>
40 void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args);
41};
42
43class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCPUBackend>
44{
46 friend class GPUChain;
47
48 public:
49 ~GPUReconstructionCPU() override;
50 static constexpr krnlRunRange krnlRunRangeNone{0};
51 static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0};
52
53 template <class S, int32_t I = 0, typename... Args>
54 void runKernel(krnlSetup&& setup, Args&&... args);
55 template <class S, int32_t I = 0>
57
58 virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false);
59 int32_t GPUStuck() { return mGPUStuck; }
61
62 int32_t RunChains() override;
63
64 void UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, int32_t stream = -1);
65
66 protected:
72
74
75#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
76 inline void runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
77 { \
78 krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)> args(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward)); \
79 const uint32_t num = GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(); \
80 if (cpuFallback) { \
81 GPUReconstructionCPU::runKernelImpl(num, &args); \
82 } else { \
83 runKernelImpl(num, &args); \
84 } \
85 }
86#include "GPUReconstructionKernelList.h"
87#undef GPUCA_KRNL
88
89 int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override { return 0; }
90 int32_t unregisterMemoryForGPU_internal(const void* ptr) override { return 0; }
91
92 virtual void SynchronizeStream(int32_t stream) {}
93 virtual void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) {}
94 virtual void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) {}
95 virtual bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return true; }
96 virtual void RecordMarker(deviceEvent* ev, int32_t stream) {}
97 virtual void SynchronizeGPU() {}
98 virtual void ReleaseEvent(deviceEvent ev) {}
99
100 size_t TransferMemoryResourceToGPU(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, true, res->Ptr(), res->PtrDevice()); }
101 size_t TransferMemoryResourceToHost(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, false, res->PtrDevice(), res->Ptr()); }
102 size_t TransferMemoryResourcesToGPU(GPUProcessor* proc, int32_t stream = -1, bool all = false) { return TransferMemoryResourcesHelper(proc, stream, all, true); }
103 size_t TransferMemoryResourcesToHost(GPUProcessor* proc, int32_t stream = -1, bool all = false) { return TransferMemoryResourcesHelper(proc, stream, all, false); }
104 size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryResourceToGPU(&mMemoryResources[res], stream, ev, evList, nEvents); }
105 size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryResourceToHost(&mMemoryResources[res], stream, ev, evList, nEvents); }
106 virtual size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1);
107 virtual size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1);
108 size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
109 virtual size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst);
110
111 int32_t InitDevice() override;
112 int32_t ExitDevice() override;
113 int32_t GetThread();
114
115 virtual int32_t PrepareTextures() { return 0; }
116 virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; }
117
118 // Pointers to tracker classes
119 GPUProcessorProcessors mProcShadow; // Host copy of tracker objects that will be used on the GPU
121
122 uint32_t mBlockCount = 1;
123 uint32_t mThreadCount = 1;
124 uint32_t mWarpSize = 1;
125
126 private:
127 size_t TransferMemoryResourcesHelper(GPUProcessor* proc, int32_t stream, bool all, bool toGPU);
128};
129
130template <class S, int32_t I, typename... Args>
131inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args)
132{
133 HighResTimer* t = nullptr;
134 GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep();
136 throw std::runtime_error("Failure running general kernel without defining RecoStep");
137 }
138 int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0;
139 uint32_t& nThreads = setup.x.nThreads;
140 uint32_t& nBlocks = setup.x.nBlocks;
141 const uint32_t stream = setup.x.stream;
142 auto prop = getKernelProperties<S, I>();
143 const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
144 const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount));
145 if (nBlocks == (uint32_t)-1) {
146 nBlocks = (nThreads + autoThreads - 1) / autoThreads;
147 nThreads = autoThreads;
148 } else if (nBlocks == (uint32_t)-2) {
149 nBlocks = nThreads;
150 nThreads = autoThreads;
151 } else if (nBlocks == (uint32_t)-3) {
152 nBlocks = autoBlocks;
153 nThreads = autoThreads;
154 } else if ((int32_t)nThreads < 0) {
155 nThreads = cpuFallback ? 1 : -nThreads;
156 }
157 if (nThreads > GPUCA_MAX_THREADS) {
158 throw std::runtime_error("GPUCA_MAX_THREADS exceeded");
159 }
160 if (mProcessingSettings.debugLevel >= 3) {
161 GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName<S, I>(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str());
162 }
163 if (nThreads == 0 || nBlocks == 0) {
164 return;
165 }
166 if (mProcessingSettings.debugLevel >= 1) {
167 t = &getKernelTimer<S, I>(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream);
168 if ((!mProcessingSettings.deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) {
169 t->Start();
170 }
171 }
172 double deviceTimerTime = 0.;
173 runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<S, I>(), cpuFallback, deviceTimerTime, std::forward<krnlSetup&&>(setup), std::forward<Args>(args)...);
174 if (GPUDebug(GetKernelName<S, I>(), stream, mProcessingSettings.serializeGPU & 1)) {
175 throw std::runtime_error("kernel failure");
176 }
177 if (mProcessingSettings.debugLevel >= 1) {
178 if (t) {
179 if (deviceTimerTime != 0.) {
180 t->AddTime(deviceTimerTime);
181 if (t->IsRunning()) {
182 t->Abort();
183 }
184 } else if (t->IsRunning()) {
185 t->Stop();
186 }
187 }
188 if (CheckErrorCodes(cpuFallback) && !mProcessingSettings.ignoreNonFatalGPUErrors) {
189 throw std::runtime_error("kernel error code");
190 }
191 }
192}
193
194} // namespace o2::gpu
195
196#endif
benchmark::State & state
#define GPUCA_MAX_THREADS
uint32_t res
Definition RawData.h:0
TBranch * ptr
void AddTime(double t)
Definition timer.cxx:128
void Start()
Definition timer.cxx:57
void Abort()
Definition timer.cxx:81
void Stop()
Definition timer.cxx:69
int32_t IsRunning()
Definition timer.h:33
void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs< T, I, Args... > &args)
void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime &_xyz, const Args &... args)
GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend &cfg)
~GPUReconstructionCPUBackend() override=default
GPUReconstructionCPU(const GPUSettingsDeviceBackend &cfg)
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
static constexpr krnlRunRange krnlRunRangeNone
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
gpu_reconstruction_kernels::krnlProperties getKernelProperties(int gpu=-1)
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
int32_t unregisterMemoryForGPU_internal(const void *ptr) override
int32_t registerMemoryForGPU_internal(const void *ptr, size_t size) override
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
static constexpr krnlEvent krnlEventNone
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
virtual size_t TransferMemoryInternal(GPUMemoryResource *res, int32_t stream, deviceEvent *ev, deviceEvent *evList, int32_t nEvents, bool toGPU, const void *src, void *dst)
void runKernel(krnlSetup &&setup, Args &&... args)
GPURecoStepConfiguration mRecoSteps
std::vector< GPUMemoryResource > mMemoryResources
GPUSettingsProcessing mProcessingSettings
static GPUReconstruction * GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend &cfg)
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
struct _cl_event * event
Definition glcorearb.h:2982
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum dst
Definition glcorearb.h:1767
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
GPUDataTypes::RecoStepField stepsGPUMask
const int nEvents
Definition test_Fifo.cxx:27