Project
Loading...
Searching...
No Matches
GPUReconstructionProcessing.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#if !defined(GPURECONSTRUCTIONPROCESSING_H)
16#define GPURECONSTRUCTIONPROCESSING_H
17
18#include "GPUReconstruction.h"
19#include "GPUReconstructionKernelIncludes.h"
20
21#include "utils/timer.h"
22#include <functional>
23
24namespace o2::gpu
25{
26
27namespace gpu_reconstruction_kernels
28{
30 constexpr deviceEvent() = default;
31 constexpr deviceEvent(std::nullptr_t p) : v(nullptr){};
32 template <class T>
33 void set(T val)
34 {
35 v = reinterpret_cast<void*&>(val);
36 }
37 template <class T>
38 T& get()
39 {
40 return reinterpret_cast<T&>(v);
41 }
42 template <class T>
44 {
45 return reinterpret_cast<T*>(this);
46 }
47 bool isSet() const { return v; }
48
49 private:
50 void* v = nullptr; // We use only pointers anyway, and since cl_event and cudaEvent_t and hipEvent_t are actually pointers, we can cast them to deviceEvent (void*) this way.
51};
52
54{
55 public:
57 virtual ~threadContext();
58};
59
60} // namespace gpu_reconstruction_kernels
61
63{
64 public:
65 ~GPUReconstructionProcessing() override = default;
66
67 // Threading
68 int32_t getNKernelHostThreads(bool splitCores);
71 uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max);
72 void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function<void(uint32_t)> lambda);
73 void SetNActiveThreads(int32_t n);
74
75 // Interface to query name of a kernel
76 template <class T, int32_t I>
77 constexpr static const char* GetKernelName();
78
79 // Public queries for timers
82
83 template <class T>
84 void AddGPUEvents(T*& events);
85
86 virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() override;
87
98
99 protected:
102
103 int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels
104 uint32_t mNActiveThreadsOuterLoop = 1; // Number of threads currently running an outer loop
105
106 std::vector<std::vector<deviceEvent>> mEvents;
107
108 // Timer related stuff
109 struct timerMeta {
110 std::unique_ptr<HighResTimer[]> timer;
111 std::string name;
112 int32_t num; // How many parallel instances to sum up (CPU threads / GPU streams)
113 int32_t type; // 0 = kernel, 1 = CPU step, 2 = DMA transfer
114 uint32_t count; // How often was the timer queried
115 RecoStep step; // Which RecoStep is this
116 size_t memSize; // Memory size for memory bandwidth computation
117 };
118
120
121 std::vector<std::unique_ptr<timerMeta>> mTimers;
124 template <class T, int32_t I = 0>
125 HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true);
126 template <class T, int32_t J = -1>
127 HighResTimer& getTimer(const char* name, int32_t num = -1);
128
129 private:
130 uint32_t getNextTimerId();
131 timerMeta* getTimerById(uint32_t id, bool increment = true);
132 timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step);
133};
134
135template <class T>
137{
138 mEvents.emplace_back(std::vector<deviceEvent>(sizeof(T) / sizeof(deviceEvent)));
139 events = (T*)mEvents.back().data();
140}
141
142template <class T, int32_t I>
143HighResTimer& GPUReconstructionProcessing::getKernelTimer(RecoStep step, int32_t num, size_t addMemorySize, bool increment)
144{
145 static int32_t id = getNextTimerId();
146 timerMeta* timer = getTimerById(id, increment);
147 if (timer == nullptr) {
148 timer = insertTimer(id, GetKernelName<T, I>(), -1, NSECTORS, 0, step);
149 }
150 if (addMemorySize) {
151 timer->memSize += addMemorySize;
152 }
153 if (num < 0 || num >= timer->num) {
154 throw std::runtime_error("Invalid timer requested");
155 }
156 return timer->timer[num];
157}
158
159template <class T, int32_t J>
161{
162 static int32_t id = getNextTimerId();
163 timerMeta* timer = getTimerById(id);
164 if (timer == nullptr) {
165 int32_t max = std::max<int32_t>({mMaxHostThreads, mProcessingSettings.nStreams});
166 timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep);
167 }
168 if (num == -1) {
170 }
171 if (num < 0 || num >= timer->num) {
172 throw std::runtime_error("Invalid timer requested");
173 }
174 return timer->timer[num];
175}
176
177#define GPUCA_KRNL(x_class, ...) \
178 template <> \
179 constexpr const char* GPUReconstructionProcessing::GetKernelName<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
180 { \
181 return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \
182 }
183#include "GPUReconstructionKernelList.h"
184#undef GPUCA_KRNL
185
186} // namespace o2::gpu
187
188#endif
double num
static constexpr int32_t N_RECO_STEPS
static constexpr int32_t N_GENERAL_STEPS
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext() override
HighResTimer & getGeneralStepTimer(GeneralStep step)
RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
std::vector< std::vector< deviceEvent > > mEvents
std::vector< std::unique_ptr< timerMeta > > mTimers
HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]
static constexpr const char * GetKernelName()
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
~GPUReconstructionProcessing() override=default
GPUReconstructionProcessing(const GPUSettingsDeviceBackend &cfg)
uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max)
HighResTimer & getTimer(const char *name, int32_t num=-1)
GPUDataTypes::RecoStep RecoStep
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static constexpr uint32_t NSECTORS
GPUSettingsProcessing mProcessingSettings
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
GPUDataTypes::GeneralStep GeneralStep
GLdouble n
Definition glcorearb.h:1982
const GLdouble * v
Definition glcorearb.h:832
GLuint const GLchar * name
Definition glcorearb.h:781
GLdouble f
Definition glcorearb.h:310
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLuint GLfloat * val
Definition glcorearb.h:1582
constexpr size_t max