Project
Loading...
Searching...
No Matches
GPUChain.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUCHAIN_H
16#define GPUCHAIN_H
17
20#include "GPUKernelClassesFwd.h"
21
22#include <ctime>
23
24namespace o2::gpu
25{
26
28{
29 friend class GPUReconstruction;
30
31 public:
40 static constexpr krnlRunRange krnlRunRangeNone{0};
41 static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0};
42
43 virtual ~GPUChain() = default;
45 virtual void RegisterGPUProcessors() = 0;
46 virtual int32_t EarlyConfigure() { return 0; };
47 virtual int32_t Init() = 0;
48 virtual int32_t PrepareEvent() = 0;
49 virtual int32_t Finalize() = 0;
50 virtual int32_t RunChain() = 0;
51 virtual void MemorySize(size_t& gpuMem, size_t& pageLockedHostMem) = 0;
52 virtual void PrintMemoryStatistics() {};
53 virtual int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr) { return 0; }
54 virtual bool SupportsDoublePipeline() { return false; }
55 virtual int32_t FinalizePipelinedProcessing() { return 0; }
56
57 constexpr static int32_t NSECTORS = GPUReconstruction::NSECTORS;
58
59 virtual void DumpSettings(const char* dir = "") {}
60 virtual void ReadSettings(const char* dir = "") {}
61
62 const GPUParam& GetParam() const { return mRec->GetParam(); }
63 const GPUSettingsGRP& GetGRPSettings() const { return mRec->GetGRPSettings(); }
64 const GPUCalibObjectsConst& GetCalib() const { return mRec->GetCalib(); }
65 GPUReconstruction* rec() { return mRec; }
66 const GPUReconstruction* rec() const { return mRec; }
67 inline const GPUConstantMem* GetProcessors() const { return mRec->processors(); }
68
69 // Make functions from GPUReconstruction*** available
75 inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->GetProcessingSettings(); }
76
77 protected:
80
81 int32_t GetThread();
82 // Make functions from GPUReconstruction*** available
83 inline GPUConstantMem* processors() { return mRec->processors(); }
86 inline GPUParam& param() { return mRec->param(); }
87 inline const GPUConstantMem* processors() const { return mRec->processors(); }
89 inline void SetONNXGPUStream(Ort::SessionOptions& opt, int32_t stream, int32_t* deviceId) { mRec->SetONNXGPUStream(opt, stream, deviceId); }
90 inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); }
91 inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true)
92 {
93 if (doGPU) {
95 ReleaseEvent(ev);
96 }
97 }
98 template <class T>
99 inline void CondWaitEvent(T& cond, deviceEvent* ev)
100 {
101 if (cond == true) {
103 cond = 2;
104 }
105 }
106 inline bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return mRec->IsEventDone(evList, nEvents); }
107 inline void RecordMarker(deviceEvent* ev, int32_t stream) { mRec->RecordMarker(ev, stream); }
108 virtual inline std::unique_ptr<GPUReconstructionProcessing::threadContext> GetThreadContext() { return mRec->GetThreadContext(); }
109 inline void SynchronizeGPU() { mRec->SynchronizeGPU(); }
110 inline void ReleaseEvent(deviceEvent ev, bool doGPU = true)
111 {
112 if (doGPU) {
113 mRec->ReleaseEvent(ev);
114 }
115 }
116 inline void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) { mRec->StreamWaitForEvents(stream, evList, nEvents); }
117 inline int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1) { return mRec->GPUDebug(state, stream); }
118 // nEvents is forced to 0 if evList == nullptr
119 inline void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceToGPU, res, stream, ev, evList, nEvents); }
120 inline void TransferMemoryResourceToHost(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourceToHost, res, stream, ev, evList, nEvents); }
121 inline void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor* proc, int32_t stream = -1, bool all = false) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourcesToGPU, proc, stream, all); }
122 inline void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor* proc, int32_t stream = -1, bool all = false) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourcesToHost, proc, stream, all); }
123 inline void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceLinkToGPU, res, stream, ev, evList, nEvents); }
124 inline void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, false, &GPUReconstructionCPU::TransferMemoryResourceLinkToHost, res, stream, ev, evList, nEvents); }
125 // Todo: retrieve step from proc, move kernelClass->GetStep to retrieve it from GetProcessor
126 inline void WriteToConstantMemory(RecoStep step, size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) { timeCpy(step, true, &GPUReconstructionCPU::WriteToConstantMemory, offset, src, size, stream, ev); }
127 inline void GPUMemCpy(RecoStep step, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, toGPU, &GPUReconstructionCPU::GPUMemCpy, dst, src, size, stream, toGPU, ev, evList, nEvents); }
128 inline void GPUMemCpyAlways(RecoStep step, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1)
129 {
130 if (toGPU == -1) {
131 memcpy(dst, src, size);
132 } else {
133 timeCpy<true>(step, toGPU, &GPUReconstructionCPU::GPUMemCpyAlways, GetRecoStepsGPU() & step, dst, src, size, stream, toGPU, ev, evList, nEvents);
134 }
135 }
136
137 template <class T>
138 inline void AllocateIOMemoryHelper(uint32_t n, const T*& ptr, std::unique_ptr<T[]>& u)
139 {
141 }
142 template <class T, class S>
143 inline uint32_t DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type)
144 {
145 return mRec->DumpData<T>(fp, entries, num, type);
146 }
147 template <class T, class S>
148 inline size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type, T** nonConstPtrs = nullptr)
149 {
150 return mRec->ReadData<T>(fp, entries, num, mem, type, nonConstPtrs);
151 }
152 template <class T>
153 inline void DumpFlatObjectToFile(const T* obj, const char* file)
154 {
156 }
157 template <class T>
158 inline std::unique_ptr<T> ReadFlatObjectFromFile(const char* file)
159 {
160 return mRec->ReadFlatObjectFromFile<T>(file);
161 }
162 template <class T>
163 inline void DumpStructToFile(const T* obj, const char* file)
164 {
165 mRec->DumpStructToFile<T>(obj, file);
166 }
167 template <class T>
168 inline std::unique_ptr<T> ReadStructFromFile(const char* file)
169 {
170 return mRec->ReadStructFromFile<T>(file);
171 }
172 template <class T>
173 inline void ReadStructFromFile(const char* file, T* obj)
174 {
175 mRec->ReadStructFromFile<T>(file, obj);
176 }
177
178 template <class S, int32_t I = 0, typename... Args>
179 requires(sizeof(S) >= 0) // Yields better incomplete type errors than calling runKernelCallInterface directly
180 inline void runKernel(GPUReconstructionProcessing::krnlSetup&& setup, Args const&... args)
181 {
182 runKernelCallInterface<S, I>(std::forward<GPUReconstructionProcessing::krnlSetup&&>(setup), args...);
183 }
184
185 template <class S, int32_t I = 0>
190
191 template <class T, int32_t I = 0>
192 HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true)
193 {
194 return mRec->getKernelTimer<T, I>(step, num, addMemorySize, increment);
195 }
196 template <class T, int32_t J = -1>
197 HighResTimer& getTimer(const char* name, int32_t num = -1)
198 {
199 return mRec->getTimer<T, J>(name, num);
200 }
201 // Get GRID with NBLOCKS minimal such that nThreads * NBLOCS >= totalItems
203 // Get GRID with NBLOCKS minimal such that ideal number of threads * NBLOCKS >= totalItems
205 // Get GRID with specified number of blocks, each block with ideal number of threads
208 // Get GRID with ideal number of threads / blocks for GPU
211
212 inline uint32_t BlockCount() const { return mRec->mBlockCount; }
213 inline uint32_t WarpSize() const { return mRec->mWarpSize; }
214 inline uint32_t ThreadCount() const { return mRec->mThreadCount; }
215
217 inline size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr) { return mRec->AllocateRegisteredMemory(res, control); }
218 template <class T>
219 inline void SetupGPUProcessor(T* proc, bool allocate)
220 {
221 mRec->SetupGPUProcessor<T>(proc, allocate);
222 }
223
225
226 virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; }
227
228 template <class T, class S, typename... Args>
229 bool DoDebugAndDump(RecoStep step, int32_t mask, T& processor, S T::*func, Args&&... args)
230 {
231 return DoDebugAndDump(step, mask, true, processor, func, args...);
232 }
233 template <class T, class S, typename... Args>
234 bool DoDebugAndDump(RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args);
235
236 template <class T, class S, typename... Args>
237 int32_t runRecoStep(RecoStep step, S T::*func, Args... args);
238
239 private:
240 template <bool Always = false, class T, class S, typename... Args>
241 void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args);
242
243#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
244 template <class S, int32_t I> \
245 requires(std::is_same_v<S, GPUCA_M_FIRST(GPUCA_M_STRIP(x_class))> && I == S::GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_class), defaultKernel))) \
246 inline void runKernelCallInterface(GPUReconstructionProcessing::krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
247 { \
248 mRec->runKernelInterface<GPUCA_M_FIRST(GPUCA_M_STRIP(x_class)), S::GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_class), defaultKernel)) GPUCA_M_STRIP(x_types)>(std::forward<GPUReconstructionProcessing::krnlSetup&&>(setup) GPUCA_M_STRIP(x_forward)); \
249 }
250#include "GPUReconstructionKernelList.h"
251#undef GPUCA_KRNL
252};
253
254template <bool Always, class T, class S, typename... Args>
255inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args)
256{
257 if (!Always && step != RecoStep::NoRecoStep && !(GetRecoStepsGPU() & step)) {
258 return;
259 }
260 HighResTimer* timer = nullptr;
261 size_t* bytes = nullptr;
262 if (mRec->GetProcessingSettings().debugLevel >= 1 && toGPU >= 0) { // Todo: time special cases toGPU < 0
263 int32_t id = mRec->getRecoStepNum(step, false);
264 if (id != -1) {
265 auto& tmp = mRec->mTimersRecoSteps[id];
266 timer = toGPU ? &tmp.timerToGPU : &tmp.timerToHost;
267 bytes = toGPU ? &tmp.bytesToGPU : &tmp.bytesToHost;
268 (toGPU ? tmp.countToGPU : tmp.countToHost)++;
269 timer->Start();
270 }
271 }
272 size_t n = (mRec->*func)(args...);
273 if (timer) {
274 SynchronizeGPU();
275 timer->Stop();
276 *bytes += n;
277 }
278}
279
280template <class T, class S, typename... Args>
281bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args)
282{
283 if (GetProcessingSettings().keepAllMemory) {
284 if (transfer) {
285 TransferMemoryResourcesToHost(step, &processor, -1, true);
286 }
287 if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) {
288 if (func) {
289 (processor.*func)(args...);
290 }
291 return true;
292 }
293 }
294 return false;
295}
296
297template <class T, class S, typename... Args>
298int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args)
299{
300 if (GetRecoSteps().isSet(step)) {
301 auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr;
302 std::clock_t c;
303 if (timer) {
304 timer->timerTotal.Start();
305 c = std::clock();
306 }
307 int32_t retVal = (reinterpret_cast<T*>(this)->*func)(args...);
308 if (timer) {
309 timer->timerTotal.Stop();
310 timer->timerCPU += (double)(std::clock() - c) / CLOCKS_PER_SEC;
311 }
312 return retVal;
313 }
314 return 0;
315}
316
317} // namespace o2::gpu
318
319#endif
benchmark::State & state
int32_t retVal
uint32_t res
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
TBranch * ptr
double num
benchmark::State & st
void Start()
Definition timer.cxx:57
void Stop()
Definition timer.cxx:69
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:107
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
const GPUConstantMem * processors() const
Definition GPUChain.h:87
GPUChain(GPUReconstruction *rec)
Definition GPUChain.h:79
virtual int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
Definition GPUChain.h:53
void GPUMemCpyAlways(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:128
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:106
const GPUSettingsGRP & GetGRPSettings() const
Definition GPUChain.h:63
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:127
const GPUReconstruction * rec() const
Definition GPUChain.h:66
std::unique_ptr< T > ReadStructFromFile(const char *file)
Definition GPUChain.h:168
void CondWaitEvent(T &cond, deviceEvent *ev)
Definition GPUChain.h:99
GPUConstantMem * processorsDevice()
Definition GPUChain.h:85
virtual void RegisterGPUProcessors()=0
GPUReconstruction::RecoStep RecoStep
Definition GPUChain.h:32
virtual bool SupportsDoublePipeline()
Definition GPUChain.h:54
void SynchronizeGPU()
Definition GPUChain.h:109
virtual int32_t Finalize()=0
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:71
GPUReconstruction::RecoStepField GetRecoSteps() const
Definition GPUChain.h:70
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:126
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
Definition GPUChain.h:74
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1)
Definition GPUChain.h:117
virtual int32_t FinalizePipelinedProcessing()
Definition GPUChain.h:55
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:110
void runKernel(GPUReconstructionProcessing::krnlSetup &&setup, Args const &... args)
Definition GPUChain.h:180
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
Definition GPUChain.h:143
uint32_t WarpSize() const
Definition GPUChain.h:213
GPUReconstructionProcessing::krnlExec krnlExec
Definition GPUChain.h:37
uint32_t ThreadCount() const
Definition GPUChain.h:214
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event)
Definition GPUChain.h:226
GPUChain * GetNextChainInQueue()
Definition GPUChain.h:224
krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:37
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:216
virtual int32_t Init()=0
virtual int32_t PrepareEvent()=0
GPUReconstructionProcessing::krnlProperties getKernelProperties()
Definition GPUChain.h:186
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const
Definition GPUChain.h:73
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
Definition GPUChain.h:108
GPUConstantMem * processors()
Definition GPUChain.h:83
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:40
static constexpr krnlEvent krnlEventNone
Definition GPUChain.h:41
size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl *control=nullptr)
Definition GPUChain.h:217
void SetONNXGPUStream(Ort::SessionOptions &opt, int32_t stream, int32_t *deviceId)
Definition GPUChain.h:89
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:116
void DumpStructToFile(const T *obj, const char *file)
Definition GPUChain.h:163
GPUParam & param()
Definition GPUChain.h:86
virtual int32_t RunChain()=0
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:219
void TransferMemoryResourceToHost(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:120
const GPUCalibObjectsConst & GetCalib() const
Definition GPUChain.h:64
void ReadStructFromFile(const char *file, T *obj)
Definition GPUChain.h:173
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:75
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:88
GPUReconstructionCPU * mRec
Definition GPUChain.h:78
virtual ~GPUChain()=default
GPUConstantMem * processorsShadow()
Definition GPUChain.h:84
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:47
GPUReconstruction::InOutTypeField GetRecoStepsInputs() const
Definition GPUChain.h:72
static constexpr int32_t NSECTORS
Definition GPUChain.h:57
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
Definition GPUChain.h:192
const GPUParam & GetParam() const
Definition GPUChain.h:62
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:124
void AllocateIOMemoryHelper(uint32_t n, const T *&ptr, std::unique_ptr< T[]> &u)
Definition GPUChain.h:138
virtual void RegisterPermanentMemoryAndProcessors()=0
virtual void MemorySize(size_t &gpuMem, size_t &pageLockedHostMem)=0
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:122
int32_t runRecoStep(RecoStep step, S T::*func, Args... args)
Definition GPUChain.h:298
void DumpFlatObjectToFile(const T *obj, const char *file)
Definition GPUChain.h:153
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:229
const GPUConstantMem * GetProcessors() const
Definition GPUChain.h:67
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
Definition GPUChain.h:90
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:21
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:42
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:32
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
Definition GPUChain.h:148
virtual int32_t EarlyConfigure()
Definition GPUChain.h:46
HighResTimer & getTimer(const char *name, int32_t num=-1)
Definition GPUChain.h:197
uint32_t BlockCount() const
Definition GPUChain.h:212
virtual void DumpSettings(const char *dir="")
Definition GPUChain.h:59
virtual void ReadSettings(const char *dir="")
Definition GPUChain.h:60
GPUReconstruction * rec()
Definition GPUChain.h:65
void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:119
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:91
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
Definition GPUChain.h:158
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:121
virtual void PrintMemoryStatistics()
Definition GPUChain.h:52
int32_t GetThread()
virtual void SetONNXGPUStream(Ort::SessionOptions &, int32_t, int32_t *)
virtual void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual size_t GPUMemCpyAlways(bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
size_t TransferMemoryResourceToHost(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
virtual void SynchronizeStream(int32_t stream)
virtual void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
size_t TransferMemoryResourcesToHost(GPUProcessor *proc, int32_t stream=-1, bool all=false)
size_t TransferMemoryResourceToGPU(GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
krnlProperties getKernelProperties(int gpu=-1)
size_t TransferMemoryResourcesToGPU(GPUProcessor *proc, int32_t stream=-1, bool all=false)
virtual int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false)
virtual void ReleaseEvent(deviceEvent ev)
virtual void RecordMarker(deviceEvent *ev, int32_t stream)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
gpu_reconstruction_kernels::deviceEvent deviceEvent
virtual std::unique_ptr< threadContext > GetThreadContext() override
HighResTimer & getTimer(const char *name, int32_t num=-1)
void SetupGPUProcessor(T *proc, bool allocate)
GPUDataTypes::RecoStep RecoStep
GPUConstantMem * mDeviceConstantMem
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
static constexpr uint32_t NSECTORS
RecoStepField GetRecoStepsGPU() const
uint32_t DumpData(FILE *fp, const T *const *entries, const S *num, InOutPointerType type)
std::unique_ptr< T > ReadStructFromFile(const char *file)
std::unique_ptr< T > ReadFlatObjectFromFile(const char *file)
void DumpStructToFile(const T *obj, const char *file)
InOutTypeField GetRecoStepsInputs() const
const GPUCalibObjectsConst & GetCalib() const
T * AllocateIOMemoryHelper(size_t n, const T *&ptr, std::unique_ptr< T[]> &u)
GPUDataTypes::GeometryType GeometryType
RecoStepField GetRecoSteps() const
const GPUParam & GetParam() const
InOutTypeField GetRecoStepsOutputs() const
size_t ReadData(FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr)
GPUDataTypes::GeneralStep GeneralStep
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpFlatObjectToFile(const T *obj, const char *file)
const GPUSettingsGRP & GetGRPSettings() const
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
struct _cl_event * event
Definition glcorearb.h:2982
GLdouble n
Definition glcorearb.h:1982
GLenum func
Definition glcorearb.h:778
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLenum GLenum dst
Definition glcorearb.h:1767
GLintptr offset
Definition glcorearb.h:660
GLuint GLuint stream
Definition glcorearb.h:1806
GLint GLuint mask
Definition glcorearb.h:291
GLuint id
Definition glcorearb.h:650
value_T step
Definition TrackUtils.h:42
const int nEvents
Definition test_Fifo.cxx:27