Project
Loading...
Searching...
No Matches
GPUChain.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUCHAIN_H
16#define GPUCHAIN_H
17
19
20#include <ctime>
21
22namespace o2::gpu
23{
25{
26 friend class GPUReconstruction;
27
28 public:
37 static constexpr krnlRunRange krnlRunRangeNone{0, -1};
38 static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0};
39
40 virtual ~GPUChain() = default;
42 virtual void RegisterGPUProcessors() = 0;
43 virtual int32_t EarlyConfigure() { return 0; };
44 virtual int32_t Init() = 0;
45 virtual int32_t PrepareEvent() = 0;
46 virtual int32_t Finalize() = 0;
47 virtual int32_t RunChain() = 0;
48 virtual void MemorySize(size_t& gpuMem, size_t& pageLockedHostMem) = 0;
49 virtual void PrintMemoryStatistics(){};
50 virtual int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr) { return 0; }
51 virtual bool SupportsDoublePipeline() { return false; }
52 virtual int32_t FinalizePipelinedProcessing() { return 0; }
53
54 constexpr static int32_t NSECTORS = GPUReconstruction::NSECTORS;
55
56 virtual void DumpSettings(const char* dir = "") {}
57 virtual void ReadSettings(const char* dir = "") {}
58
59 const GPUParam& GetParam() const { return mRec->mHostConstantMem->param; }
60 const GPUSettingsGRP& GetGRPSettings() const { return mRec->mGRPSettings; }
61 const GPUCalibObjectsConst& calib() const { return processors()->calibObjects; }
62 GPUReconstruction* rec() { return mRec; }
63 const GPUReconstruction* rec() const { return mRec; }
64 inline const GPUConstantMem* GetProcessors() { return mRec->processors(); }
65
66 // Make functions from GPUReconstruction*** available
72 inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->mProcessingSettings; }
73
74 protected:
77
78 int32_t GetThread();
79 // Make functions from GPUReconstruction*** available
80 inline GPUConstantMem* processors() { return mRec->processors(); }
83 inline GPUParam& param() { return mRec->param(); }
84 inline const GPUConstantMem* processors() const { return mRec->processors(); }
86 inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); }
87 inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true)
88 {
89 if (doGPU) {
91 ReleaseEvent(ev);
92 }
93 }
94 template <class T>
95 inline void CondWaitEvent(T& cond, deviceEvent* ev)
96 {
97 if (cond == true) {
99 cond = 2;
100 }
101 }
102 inline bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return mRec->IsEventDone(evList, nEvents); }
103 inline void RecordMarker(deviceEvent* ev, int32_t stream) { mRec->RecordMarker(ev, stream); }
104 virtual inline std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() { return mRec->GetThreadContext(); }
105 inline void SynchronizeGPU() { mRec->SynchronizeGPU(); }
106 inline void ReleaseEvent(deviceEvent ev, bool doGPU = true)
107 {
108 if (doGPU) {
109 mRec->ReleaseEvent(ev);
110 }
111 }
112 inline void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) { mRec->StreamWaitForEvents(stream, evList, nEvents); }
113 inline int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1) { return mRec->GPUDebug(state, stream); }
114 // nEvents is forced to 0 if evList == nullptr
115 inline void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceToGPU, res, stream, ev, evList, nEvents); }
116 inline void TransferMemoryResourceToHost(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourceToHost, res, stream, ev, evList, nEvents); }
117 inline void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor* proc, int32_t stream = -1, bool all = false) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourcesToGPU, proc, stream, all); }
118 inline void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor* proc, int32_t stream = -1, bool all = false) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourcesToHost, proc, stream, all); }
119 inline void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceLinkToGPU, res, stream, ev, evList, nEvents); }
120 inline void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourceLinkToHost, res, stream, ev, evList, nEvents); }
121 // Todo: retrieve step from proc, move kernelClass->GetStep to retrieve it from GetProcessor
122 inline void WriteToConstantMemory(RecoStep step, size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) { timeCpy(step, true, &GPUReconstructionCPU::WriteToConstantMemory, offset, src, size, stream, ev); }
123 inline void GPUMemCpy(RecoStep step, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, toGPU, &GPUReconstructionCPU::GPUMemCpy, dst, src, size, stream, toGPU, ev, evList, nEvents); }
124 inline void GPUMemCpyAlways(RecoStep step, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1)
125 {
126 if (toGPU == -1) {
127 memcpy(dst, src, size);
128 } else {
129 timeCpy<true>(step, toGPU, &GPUReconstructionCPU::GPUMemCpyAlways, GetRecoStepsGPU() & step, dst, src, size, stream, toGPU, ev, evList, nEvents);
130 }
131 }
132
133 template <class T>
134 inline void AllocateIOMemoryHelper(uint32_t n, const T*& ptr, std::unique_ptr<T[]>& u)
135 {
137 }
138 template <class T, class S>
139 inline uint32_t DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type)
140 {
141 return mRec->DumpData<T>(fp, entries, num, type);
142 }
143 template <class T, class S>
144 inline size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type, T** nonConstPtrs = nullptr)
145 {
146 return mRec->ReadData<T>(fp, entries, num, mem, type, nonConstPtrs);
147 }
148 template <class T>
149 inline void DumpFlatObjectToFile(const T* obj, const char* file)
150 {
152 }
153 template <class T>
154 inline std::unique_ptr<T> ReadFlatObjectFromFile(const char* file)
155 {
156 return mRec->ReadFlatObjectFromFile<T>(file);
157 }
158 template <class T>
159 inline void DumpStructToFile(const T* obj, const char* file)
160 {
161 mRec->DumpStructToFile<T>(obj, file);
162 }
163 template <class T>
164 inline std::unique_ptr<T> ReadStructFromFile(const char* file)
165 {
166 return mRec->ReadStructFromFile<T>(file);
167 }
168 template <class T>
169 inline void ReadStructFromFile(const char* file, T* obj)
170 {
171 mRec->ReadStructFromFile<T>(file, obj);
172 }
173 template <class S, int32_t I = 0, typename... Args>
174 inline int32_t runKernel(gpu_reconstruction_kernels::krnlSetup&& setup, Args&&... args)
175 {
176 return mRec->runKernel<S, I, Args...>(std::forward<gpu_reconstruction_kernels::krnlSetup&&>(setup), std::forward<Args>(args)...);
177 }
178 template <class S, int32_t I = 0>
183
184 template <class T, int32_t I = 0>
185 HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true)
186 {
187 return mRec->getKernelTimer<T, I>(step, num, addMemorySize, increment);
188 }
189 template <class T, int32_t J = -1>
190 HighResTimer& getTimer(const char* name, int32_t num = -1)
191 {
192 return mRec->getTimer<T, J>(name, num);
193 }
194 // Get GRID with NBLOCKS minimal such that nThreads * NBLOCS >= totalItems
195 krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
196 // Get GRID with NBLOCKS minimal such that ideal number of threads * NBLOCKS >= totalItems
197 krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
198 // Get GRID with specified number of blocks, each block with ideal number of threads
200 krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
201 // Get GRID with ideal number of threads / blocks for GPU
203 krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
204
205 inline uint32_t BlockCount() const { return mRec->mBlockCount; }
206 inline uint32_t WarpSize() const { return mRec->mWarpSize; }
207 inline uint32_t ThreadCount() const { return mRec->mThreadCount; }
208
210 inline size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr) { return mRec->AllocateRegisteredMemory(res, control); }
211 template <class T>
212 inline void SetupGPUProcessor(T* proc, bool allocate)
213 {
214 mRec->SetupGPUProcessor<T>(proc, allocate);
215 }
216
218
219 virtual int32_t PrepareTextures() { return 0; }
220 virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; }
221
222 template <class T, class S, typename... Args>
223 bool DoDebugAndDump(RecoStep step, int32_t mask, T& processor, S T::*func, Args&&... args)
224 {
225 return DoDebugAndDump(step, mask, true, processor, func, args...);
226 }
227 template <class T, class S, typename... Args>
228 bool DoDebugAndDump(RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args);
229
230 template <class T, class S, typename... Args>
231 int32_t runRecoStep(RecoStep step, S T::*func, Args... args);
232
233 private:
234 template <bool Always = false, class T, class S, typename... Args>
235 void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args);
236};
237
238template <bool Always, class T, class S, typename... Args>
239inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args)
240{
241 if (!Always && step != RecoStep::NoRecoStep && !(GetRecoStepsGPU() & step)) {
242 return;
243 }
244 HighResTimer* timer = nullptr;
245 size_t* bytes = nullptr;
246 if (mRec->mProcessingSettings.debugLevel >= 1 && toGPU >= 0) { // Todo: time special cases toGPU < 0
247 int32_t id = mRec->getRecoStepNum(step, false);
248 if (id != -1) {
249 auto& tmp = mRec->mTimersRecoSteps[id];
250 timer = toGPU ? &tmp.timerToGPU : &tmp.timerToHost;
251 bytes = toGPU ? &tmp.bytesToGPU : &tmp.bytesToHost;
252 (toGPU ? tmp.countToGPU : tmp.countToHost)++;
253 timer->Start();
254 }
255 }
256 size_t n = (mRec->*func)(args...);
257 if (timer) {
259 timer->Stop();
260 *bytes += n;
261 }
262}
263
264template <class T, class S, typename... Args>
265bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args)
266{
267 if (GetProcessingSettings().keepAllMemory) {
268 if (transfer) {
269 TransferMemoryResourcesToHost(step, &processor, -1, true);
270 }
271 if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) {
272 if (func) {
273 (processor.*func)(args...);
274 }
275 return true;
276 }
277 }
278 return false;
279}
280
281template <class T, class S, typename... Args>
282int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args)
283{
284 if (GetRecoSteps().isSet(step)) {
285 auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr;
286 std::clock_t c;
287 if (timer) {
288 timer->timerTotal.Start();
289 c = std::clock();
290 }
291 int32_t retVal = (reinterpret_cast<T*>(this)->*func)(args...);
292 if (timer) {
293 timer->timerTotal.Stop();
294 timer->timerCPU += (double)(std::clock() - c) / CLOCKS_PER_SEC;
295 }
296 return retVal;
297 }
298 return 0;
299}
300
301} // namespace o2::gpu
302
303#endif
benchmark::State & state
#define GPUCA_RECO_STEP
int32_t retVal
uint32_t res
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
TBranch * ptr
double num
benchmark::State & st
void Start()
Definition timer.cxx:57
void Stop()
Definition timer.cxx:69
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:103
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:119
const GPUConstantMem * processors() const
Definition GPUChain.h:84
GPUChain(GPUReconstruction *rec)
Definition GPUChain.h:76
virtual int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
Definition GPUChain.h:50
void GPUMemCpyAlways(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:124
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:102
const GPUSettingsGRP & GetGRPSettings() const
Definition GPUChain.h:60
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
const GPUReconstruction * rec() const
Definition GPUChain.h:63
std::unique_ptr< T > ReadStructFromFile(const char *file)
Definition GPUChain.h:164
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:32
void CondWaitEvent(T &cond, deviceEvent *ev)
Definition GPUChain.h:95
GPUConstantMem * processorsDevice()
Definition GPUChain.h:82
virtual void RegisterGPUProcessors()=0
GPUReconstruction::RecoStep RecoStep
Definition GPUChain.h:29
virtual bool SupportsDoublePipeline()
Definition GPUChain.h:51
void SynchronizeGPU()
Definition GPUChain.h:105
virtual int32_t Finalize()=0
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:42
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:68
GPUReconstruction::RecoStepField GetRecoSteps() const
Definition GPUChain.h:67
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
Definition GPUChain.h:104
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:122
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
Definition GPUChain.h:71
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1)
Definition GPUChain.h:113
virtual int32_t FinalizePipelinedProcessing()
Definition GPUChain.h:52
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:106
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
Definition GPUChain.h:139
uint32_t WarpSize() const
Definition GPUChain.h:206
const GPUConstantMem * GetProcessors()
Definition GPUChain.h:64
uint32_t ThreadCount() const
Definition GPUChain.h:207
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
Definition GPUChain.h:220
GPUChain * GetNextChainInQueue()
Definition GPUChain.h:217
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:209
virtual int32_t Init()=0
virtual int32_t PrepareEvent()=0
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const
Definition GPUChain.h:70
GPUConstantMem * processors()
Definition GPUChain.h:80
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:37
static constexpr krnlEvent krnlEventNone
Definition GPUChain.h:38
size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl *control=nullptr)
Definition GPUChain.h:210
krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:47
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:112
void DumpStructToFile(const T *obj, const char *file)
Definition GPUChain.h:159
const GPUCalibObjectsConst & calib() const
Definition GPUChain.h:61
GPUParam & param()
Definition GPUChain.h:83
virtual int32_t RunChain()=0
GPUReconstruction::GeneralStep GeneralStep
Definition GPUChain.h:30
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:212
void TransferMemoryResourceToHost(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:116
GPUReconstruction::GeometryType GeometryType
Definition GPUChain.h:32
gpu_reconstruction_kernels::krnlProperties getKernelProperties()
Definition GPUChain.h:179
void ReadStructFromFile(const char *file, T *obj)
Definition GPUChain.h:169
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:72
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:85
GPUReconstructionCPU * mRec
Definition GPUChain.h:75
virtual ~GPUChain()=default
GPUConstantMem * processorsShadow()
Definition GPUChain.h:81
GPUReconstruction::InOutTypeField GetRecoStepsInputs() const
Definition GPUChain.h:69
static constexpr int32_t NSECTORS
Definition GPUChain.h:54
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
Definition GPUChain.h:185
const GPUParam & GetParam() const
Definition GPUChain.h:59
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:120
void AllocateIOMemoryHelper(uint32_t n, const T *&ptr, std::unique_ptr< T[]> &u)
Definition GPUChain.h:134
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:21
virtual void RegisterPermanentMemoryAndProcessors()=0
virtual void MemorySize(size_t &gpuMem, size_t &pageLockedHostMem)=0
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:118
int32_t runKernel(gpu_reconstruction_kernels::krnlSetup &&setup, Args &&... args)
Definition GPUChain.h:174
int32_t runRecoStep(RecoStep step, S T::*func, Args... args)
Definition GPUChain.h:282
void DumpFlatObjectToFile(const T *obj, const char *file)
Definition GPUChain.h:149
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:223
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:86
virtual int32_t PrepareTextures()
Definition GPUChain.h:219
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
Definition GPUChain.h:144
virtual int32_t EarlyConfigure()
Definition GPUChain.h:43
HighResTimer & getTimer(const char *name, int32_t num=-1)
Definition GPUChain.h:190
uint32_t BlockCount() const
Definition GPUChain.h:205
virtual void DumpSettings(const char *dir="")
Definition GPUChain.h:56
virtual void ReadSettings(const char *dir="")
Definition GPUChain.h:57
GPUReconstruction * rec()
Definition GPUChain.h:62
void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:115
krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:37
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:87
gpu_reconstruction_kernels::krnlExec krnlExec
Definition GPUChain.h:34
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
Definition GPUChain.h:154
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:117
virtual void PrintMemoryStatistics()
Definition GPUChain.h:49
int32_t GetThread()
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
const gpu_reconstruction_kernels::krnlProperties getKernelProperties()
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
int32_t runKernel(krnlSetup &&setup, Args &&... args)
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext() override
RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
HighResTimer & getTimer(const char *name, int32_t num=-1)
void SetupGPUProcessor(T *proc, bool allocate)
GPUDataTypes::RecoStep RecoStep
GPUConstantMem * mDeviceConstantMem
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::unique_ptr< T > ReadStructFromFile(const char *file)
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
std::unique_ptr< GPUConstantMem > mHostConstantMem
void DumpStructToFile(const T *obj, const char *file)
InOutTypeField GetRecoStepsInputs() const
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUSettingsProcessing mProcessingSettings
GPUDataTypes::GeometryType GeometryType
RecoStepField GetRecoSteps() const
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
GPUDataTypes::GeneralStep GeneralStep
void DumpFlatObjectToFile(const T *obj, const char *file)
GPUSettingsDeviceBackend mDeviceBackendSettings
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
struct _cl_event * event
Definition glcorearb.h:2982
GLdouble n
Definition glcorearb.h:1982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLenum GLenum dst
Definition glcorearb.h:1767
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
GLint GLuint mask
Definition glcorearb.h:291
GLuint id
Definition glcorearb.h:650
GPUCalibObjectsConst calibObjects
const int nEvents
Definition test_Fifo.cxx:27