Project
Loading...
Searching...
No Matches
GPUChain.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUCHAIN_H
16#define GPUCHAIN_H
17
20#include "GPUKernelClassesFwd.h"
21
22#include <ctime>
23#include <functional>
24
25namespace o2::gpu
26{
27
29{
30 friend class GPUReconstruction;
31
32 public:
41 static constexpr krnlRunRange krnlRunRangeNone{0};
42 static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0};
43
44 virtual ~GPUChain() = default;
46 virtual void RegisterGPUProcessors() = 0;
47 virtual int32_t EarlyConfigure() { return 0; };
48 virtual int32_t Init() = 0;
49 virtual int32_t PrepareEvent() = 0;
50 virtual int32_t Finalize() = 0;
51 virtual int32_t RunChain() = 0;
52 virtual void MemorySize(size_t& gpuMem, size_t& pageLockedHostMem) = 0;
53 virtual void PrintMemoryStatistics() {};
54 virtual int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr) { return 0; }
55 virtual bool SupportsDoublePipeline() { return false; }
56 virtual int32_t FinalizePipelinedProcessing() { return 0; }
57
58 constexpr static int32_t NSECTORS = GPUReconstruction::NSECTORS;
59
60 virtual void DumpSettings(const char* dir = "") {}
61 virtual void ReadSettings(const char* dir = "") {}
62
63 const GPUParam& GetParam() const { return mRec->GetParam(); }
64 const GPUSettingsGRP& GetGRPSettings() const { return mRec->GetGRPSettings(); }
65 const GPUCalibObjectsConst& GetCalib() const { return mRec->GetCalib(); }
66 GPUReconstruction* rec() { return mRec; }
67 const GPUReconstruction* rec() const { return mRec; }
68 inline const GPUConstantMem* GetProcessors() const { return mRec->processors(); }
69
70 // Make functions from GPUReconstruction*** available
76 inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->GetProcessingSettings(); }
77
78 protected:
81
82 int32_t GetThread();
83 // Make functions from GPUReconstruction*** available
84 inline GPUConstantMem* processors() { return mRec->processors(); }
87 inline GPUParam& param() { return mRec->param(); }
88 inline const GPUConstantMem* processors() const { return mRec->processors(); }
90 inline void SetONNXGPUStream(Ort::SessionOptions& opt, int32_t stream, int32_t* deviceId) { mRec->SetONNXGPUStream(opt, stream, deviceId); }
91 inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); }
92 inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true)
93 {
94 if (doGPU) {
96 ReleaseEvent(ev);
97 }
98 }
99 template <class T>
100 inline void CondWaitEvent(T& cond, deviceEvent* ev)
101 {
102 if (cond == true) {
104 cond = 2;
105 }
106 }
107 inline bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return mRec->IsEventDone(evList, nEvents); }
108 inline void RecordMarker(deviceEvent* ev, int32_t stream) { mRec->RecordMarker(ev, stream); }
109 virtual inline std::unique_ptr<GPUReconstructionProcessing::threadContext> GetThreadContext() { return mRec->GetThreadContext(); }
110 inline void SynchronizeGPU() { mRec->SynchronizeGPU(); }
111 inline void ReleaseEvent(deviceEvent ev, bool doGPU = true)
112 {
113 if (doGPU) {
114 mRec->ReleaseEvent(ev);
115 }
116 }
117 inline void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) { mRec->StreamWaitForEvents(stream, evList, nEvents); }
118 inline int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1) { return mRec->GPUDebug(state, stream); }
119 // nEvents is forced to 0 if evList == nullptr
120 inline void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceToGPU, res, stream, ev, evList, nEvents); }
121 inline void TransferMemoryResourceToHost(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourceToHost, res, stream, ev, evList, nEvents); }
122 inline void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor* proc, int32_t stream = -1, bool all = false) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourcesToGPU, proc, stream, all); }
123 inline void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor* proc, int32_t stream = -1, bool all = false) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourcesToHost, proc, stream, all); }
124 inline void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceLinkToGPU, res, stream, ev, evList, nEvents); }
125 inline void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourceLinkToHost, res, stream, ev, evList, nEvents); }
126 // Todo: retrieve step from proc, move kernelClass->GetStep to retrieve it from GetProcessor
127 inline void WriteToConstantMemory(RecoStep step, size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) { timeCpy(step, true, &GPUReconstructionCPU::WriteToConstantMemory, offset, src, size, stream, ev); }
128 inline void GPUMemCpy(RecoStep step, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, toGPU, &GPUReconstructionCPU::GPUMemCpy, dst, src, size, stream, toGPU, ev, evList, nEvents); }
129 inline void GPUMemCpyAlways(RecoStep step, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1)
130 {
131 if (toGPU == -1) {
132 memcpy(dst, src, size);
133 } else {
134 timeCpy<true>(step, toGPU, &GPUReconstructionCPU::GPUMemCpyAlways, GetRecoStepsGPU() & step, dst, src, size, stream, toGPU, ev, evList, nEvents);
135 }
136 }
137
138 template <class T>
139 inline void AllocateIOMemoryHelper(uint32_t n, const T*& ptr, std::unique_ptr<T[]>& u)
140 {
142 }
143 template <class T, class S>
144 inline uint32_t DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type)
145 {
146 return mRec->DumpData<T>(fp, entries, num, type);
147 }
148 template <class T, class S>
149 inline size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type, T** nonConstPtrs = nullptr)
150 {
151 return mRec->ReadData<T>(fp, entries, num, mem, type, nonConstPtrs);
152 }
153 template <class T>
154 inline void DumpFlatObjectToFile(const T* obj, const char* file)
155 {
157 }
158 template <class T>
159 inline std::unique_ptr<T> ReadFlatObjectFromFile(const char* file)
160 {
161 return mRec->ReadFlatObjectFromFile<T>(file);
162 }
163 template <class T>
164 inline void DumpStructToFile(const T* obj, const char* file)
165 {
166 mRec->DumpStructToFile<T>(obj, file);
167 }
168 template <class T>
169 inline std::unique_ptr<T> ReadStructFromFile(const char* file)
170 {
171 return mRec->ReadStructFromFile<T>(file);
172 }
173 template <class T>
174 inline void ReadStructFromFile(const char* file, T* obj)
175 {
176 mRec->ReadStructFromFile<T>(file, obj);
177 }
178
179 template <class S, int32_t I = 0, typename... Args>
180 requires(sizeof(S) >= 0) // Yields better incomplete type errors than calling runKernelCallInterface directly
181 inline void runKernel(GPUReconstructionProcessing::krnlSetup&& setup, Args const&... args)
182 {
183 runKernelCallInterface<S, I>(std::forward<GPUReconstructionProcessing::krnlSetup&&>(setup), args...);
184 }
185
186 template <class S, int32_t I = 0>
191
192 template <class T, int32_t I = 0>
193 HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true)
194 {
195 return mRec->getKernelTimer<T, I>(step, num, addMemorySize, increment);
196 }
197 template <class T, int32_t J = -1>
198 HighResTimer& getTimer(const char* name, int32_t num = -1)
199 {
200 return mRec->getTimer<T, J>(name, num);
201 }
202 // Get GRID with NBLOCKS minimal such that nThreads * NBLOCS >= totalItems
204 // Get GRID with NBLOCKS minimal such that ideal number of threads * NBLOCKS >= totalItems
206 // Get GRID with specified number of blocks, each block with ideal number of threads
209 // Get GRID with ideal number of threads / blocks for GPU
212
213 inline uint32_t BlockCount() const { return mRec->mBlockCount; }
214 inline uint32_t WarpSize() const { return mRec->mWarpSize; }
215 inline uint32_t ThreadCount() const { return mRec->mThreadCount; }
216
218 inline size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr) { return mRec->AllocateRegisteredMemory(res, control); }
219 template <class T>
220 inline void SetupGPUProcessor(T* proc, bool allocate)
221 {
222 mRec->SetupGPUProcessor<T>(proc, allocate);
223 }
224
226
227 virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; }
228
229 template <class T, class S, typename... Args>
230 bool DoDebugAndDump(RecoStep step, uint32_t mask, T& processor, S T::*func, Args&&... args)
231 {
232 return DoDebugAndDump(step, mask, true, processor, func, args...);
233 }
234 template <class T, class S, typename... Args>
235 bool DoDebugAndDump(RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args);
236 template <typename... Args>
237 bool DoDebugDump(uint32_t mask, std::function<void(Args&...)> func, Args&... args);
238 template <class S, typename... Args>
239 bool DoDebugDump(uint32_t mask, S* func, Args&&... args)
240 {
241 return DoDebugDump(mask, std::function<void(Args && ...)>([&func](Args&&... args_tmp) { (*func)(args_tmp...); }), args...);
242 }
243
244 template <class T, class S, typename... Args>
245 int32_t runRecoStep(RecoStep step, S T::*func, Args... args);
246
247 private:
248 template <bool Always = false, class T, class S, typename... Args>
249 void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args);
250
251#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
252 template <class S, int32_t I> \
253 requires(std::is_same_v<S, GPUCA_M_FIRST(GPUCA_M_STRIP(x_class))> && I == S::GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_class), defaultKernel))) \
254 inline void runKernelCallInterface(GPUReconstructionProcessing::krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
255 { \
256 mRec->runKernelInterface<GPUCA_M_FIRST(GPUCA_M_STRIP(x_class)), S::GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_class), defaultKernel)) GPUCA_M_STRIP(x_types)>(std::forward<GPUReconstructionProcessing::krnlSetup&&>(setup) GPUCA_M_STRIP(x_forward)); \
257 }
258#include "GPUReconstructionKernelList.h"
259#undef GPUCA_KRNL
260};
261
262template <bool Always, class T, class S, typename... Args>
263inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args)
264{
265 if (!Always && step != RecoStep::NoRecoStep && !(GetRecoStepsGPU() & step)) {
266 return;
267 }
268 HighResTimer* timer = nullptr;
269 size_t* bytes = nullptr;
270 if (mRec->GetProcessingSettings().debugLevel >= 1 && toGPU >= 0) { // Todo: time special cases toGPU < 0
271 int32_t id = mRec->getRecoStepNum(step, false);
272 if (id != -1) {
273 auto& tmp = mRec->mTimersRecoSteps[id];
274 timer = toGPU ? &tmp.timerToGPU : &tmp.timerToHost;
275 bytes = toGPU ? &tmp.bytesToGPU : &tmp.bytesToHost;
276 (toGPU ? tmp.countToGPU : tmp.countToHost)++;
277 timer->Start();
278 }
279 }
280 size_t n = (mRec->*func)(args...);
281 if (timer) {
282 SynchronizeGPU();
283 timer->Stop();
284 *bytes += n;
285 }
286}
287
288template <class T, class S, typename... Args>
289inline int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args)
290{
291 if (GetRecoSteps().isSet(step)) {
292 auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr;
293 std::clock_t c;
294 if (timer) {
295 timer->timerTotal.Start();
296 c = std::clock();
297 }
298 int32_t retVal = (reinterpret_cast<T*>(this)->*func)(args...);
299 if (timer) {
300 timer->timerTotal.Stop();
301 timer->timerCPU += (double)(std::clock() - c) / CLOCKS_PER_SEC;
302 }
303 return retVal;
304 }
305 return 0;
306}
307
308} // namespace o2::gpu
309
310#endif
benchmark::State & state
int32_t retVal
uint32_t res
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
TBranch * ptr
double num
benchmark::State & st
void Start()
Definition timer.cxx:57
void Stop()
Definition timer.cxx:69
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:108
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:124
const GPUConstantMem * processors() const
Definition GPUChain.h:88
GPUChain(GPUReconstruction *rec)
Definition GPUChain.h:80
virtual int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
Definition GPUChain.h:54
void GPUMemCpyAlways(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:129
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:107
const GPUSettingsGRP & GetGRPSettings() const
Definition GPUChain.h:64
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:128
const GPUReconstruction * rec() const
Definition GPUChain.h:67
std::unique_ptr< T > ReadStructFromFile(const char *file)
Definition GPUChain.h:169
bool DoDebugAndDump(RecoStep step, uint32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:230
void CondWaitEvent(T &cond, deviceEvent *ev)
Definition GPUChain.h:100
GPUConstantMem * processorsDevice()
Definition GPUChain.h:86
virtual void RegisterGPUProcessors()=0
GPUReconstruction::RecoStep RecoStep
Definition GPUChain.h:33
virtual bool SupportsDoublePipeline()
Definition GPUChain.h:55
void SynchronizeGPU()
Definition GPUChain.h:110
virtual int32_t Finalize()=0
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:72
GPUReconstruction::RecoStepField GetRecoSteps() const
Definition GPUChain.h:71
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:127
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
Definition GPUChain.h:75
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1)
Definition GPUChain.h:118
virtual int32_t FinalizePipelinedProcessing()
Definition GPUChain.h:56
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:111
void runKernel(GPUReconstructionProcessing::krnlSetup &&setup, Args const &... args)
Definition GPUChain.h:181
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
Definition GPUChain.h:144
uint32_t WarpSize() const
Definition GPUChain.h:214
GPUReconstructionProcessing::krnlExec krnlExec
Definition GPUChain.h:38
uint32_t ThreadCount() const
Definition GPUChain.h:215
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
Definition GPUChain.h:227
GPUChain * GetNextChainInQueue()
Definition GPUChain.h:225
krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:37
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:217
virtual int32_t Init()=0
virtual int32_t PrepareEvent()=0
GPUReconstructionProcessing::krnlProperties getKernelProperties()
Definition GPUChain.h:187
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const
Definition GPUChain.h:74
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
Definition GPUChain.h:109
GPUConstantMem * processors()
Definition GPUChain.h:84
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:41
bool DoDebugDump(uint32_t mask, std::function< void(Args &...)> func, Args &... args)
static constexpr krnlEvent krnlEventNone
Definition GPUChain.h:42
size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl *control=nullptr)
Definition GPUChain.h:218
void SetONNXGPUStream(Ort::SessionOptions &opt, int32_t stream, int32_t *deviceId)
Definition GPUChain.h:90
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:117
void DumpStructToFile(const T *obj, const char *file)
Definition GPUChain.h:164
GPUParam & param()
Definition GPUChain.h:87
virtual int32_t RunChain()=0
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:220
void TransferMemoryResourceToHost(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:121
const GPUCalibObjectsConst & GetCalib() const
Definition GPUChain.h:65
void ReadStructFromFile(const char *file, T *obj)
Definition GPUChain.h:174
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:76
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:89
GPUReconstructionCPU * mRec
Definition GPUChain.h:79
bool DoDebugDump(uint32_t mask, S *func, Args &&... args)
Definition GPUChain.h:239
virtual ~GPUChain()=default
GPUConstantMem * processorsShadow()
Definition GPUChain.h:85
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:47
GPUReconstruction::InOutTypeField GetRecoStepsInputs() const
Definition GPUChain.h:73
static constexpr int32_t NSECTORS
Definition GPUChain.h:58
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
Definition GPUChain.h:193
const GPUParam & GetParam() const
Definition GPUChain.h:63
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:125
void AllocateIOMemoryHelper(uint32_t n, const T *&ptr, std::unique_ptr< T[]> &u)
Definition GPUChain.h:139
virtual void RegisterPermanentMemoryAndProcessors()=0
virtual void MemorySize(size_t &gpuMem, size_t &pageLockedHostMem)=0
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:123
int32_t runRecoStep(RecoStep step, S T::*func, Args... args)
Definition GPUChain.h:289
void DumpFlatObjectToFile(const T *obj, const char *file)
Definition GPUChain.h:154
const GPUConstantMem * GetProcessors() const
Definition GPUChain.h:68
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:91
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:21
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:42
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:32
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
Definition GPUChain.h:149
virtual int32_t EarlyConfigure()
Definition GPUChain.h:47
HighResTimer & getTimer(const char *name, int32_t num=-1)
Definition GPUChain.h:198
uint32_t BlockCount() const
Definition GPUChain.h:213
virtual void DumpSettings(const char *dir="")
Definition GPUChain.h:60
virtual void ReadSettings(const char *dir="")
Definition GPUChain.h:61
GPUReconstruction * rec()
Definition GPUChain.h:66
void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:120
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:92
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
Definition GPUChain.h:159
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:122
virtual void PrintMemoryStatistics()
Definition GPUChain.h:53
int32_t GetThread()
virtual void SetONNXGPUStream(Ort::SessionOptions &, int32_t, int32_t *)
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
krnlProperties getKernelProperties(int gpu=-1)
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
gpu_reconstruction_kernels::deviceEvent deviceEvent
virtual std::unique_ptr< threadContext > GetThreadContext() override
HighResTimer & getTimer(const char *name, int32_t num=-1)
void SetupGPUProcessor(T *proc, bool allocate)
GPUDataTypes::RecoStep RecoStep
GPUConstantMem * mDeviceConstantMem
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::unique_ptr< T > ReadStructFromFile(const char *file)
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
void DumpStructToFile(const T *obj, const char *file)
InOutTypeField GetRecoStepsInputs() const
const GPUCalibObjectsConst & GetCalib() const
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUDataTypes::GeometryType GeometryType
RecoStepField GetRecoSteps() const
const GPUParam & GetParam() const
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
GPUDataTypes::GeneralStep GeneralStep
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpFlatObjectToFile(const T *obj, const char *file)
const GPUSettingsGRP & GetGRPSettings() const
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
struct _cl_event * event
Definition glcorearb.h:2982
GLdouble n
Definition glcorearb.h:1982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLenum GLenum dst
Definition glcorearb.h:1767
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
GLint GLuint mask
Definition glcorearb.h:291
GLuint id
Definition glcorearb.h:650
value_T step
Definition TrackUtils.h:42
const int nEvents
Definition test_Fifo.cxx:27