Project
Loading...
Searching...
No Matches
GPUReconstructionProcessing.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#if !defined(GPURECONSTRUCTIONPROCESSING_H)
16#define GPURECONSTRUCTIONPROCESSING_H
17
18#include "GPUReconstruction.h"
19#include "GPUReconstructionKernelIncludes.h"
20
21#include "utils/timer.h"
22#include <functional>
23#include <atomic>
24
25namespace o2::gpu
26{
27
28struct GPUDefParameters;
29
30namespace gpu_reconstruction_kernels
31{
33 constexpr deviceEvent() = default;
34 constexpr deviceEvent(std::nullptr_t p) : v(nullptr) {};
35 template <class T>
36 void set(T val)
37 {
38 v = reinterpret_cast<void*&>(val);
39 }
40 template <class T>
41 T& get()
42 {
43 return reinterpret_cast<T&>(v);
44 }
45 template <class T>
47 {
48 return reinterpret_cast<T*>(this);
49 }
50 bool isSet() const { return v; }
51
52 private:
53 void* v = nullptr; // We use only pointers anyway, and since cl_event and cudaEvent_t and hipEvent_t are actually pointers, we can cast them to deviceEvent (void*) this way.
54};
55
57{
58 public:
60 virtual ~threadContext();
61};
62
63} // namespace gpu_reconstruction_kernels
64
66{
67 public:
69
70 // Threading
71 int32_t getNKernelHostThreads(bool splitCores);
74 uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max);
75 void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function<void(uint32_t)> lambda);
76 void SetNActiveThreads(int32_t n);
77
78 // Interface to query name of a kernel
79 template <class T, int32_t I>
80 static const char* GetKernelName();
81 const std::string& GetKernelName(int32_t i) const { return mKernelNames[i]; }
82 template <class T, int32_t I = 0>
83 static uint32_t GetKernelNum();
84
85 // Public queries for timers
88
89 template <class T>
90 void AddGPUEvents(T*& events);
91
92 virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() override;
93
104
105 protected:
108
109 static const std::vector<std::string> mKernelNames;
110
111 int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels
112 uint32_t mNActiveThreadsOuterLoop = 1; // Number of threads currently running an outer loop
113
114 std::vector<std::vector<deviceEvent>> mEvents;
115
116 // Timer related stuff
117 struct timerMeta {
118 std::unique_ptr<HighResTimer[]> timer;
119 std::string name;
120 int32_t num; // How many parallel instances to sum up (CPU threads / GPU streams)
121 int32_t type; // 0 = kernel, 1 = CPU step, 2 = DMA transfer
122 uint32_t count; // How often was the timer queried
123 RecoStep step; // Which RecoStep is this
124 size_t memSize; // Memory size for memory bandwidth computation
125 };
126
128
129 std::vector<std::unique_ptr<timerMeta>> mTimers;
132 template <class T, int32_t I = 0>
133 HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true);
134 template <class T, int32_t J = -1>
135 HighResTimer& getTimer(const char* name, int32_t num = -1);
136
139
140 private:
141 uint32_t getNextTimerId();
142 timerMeta* getTimerById(uint32_t id, bool increment = true);
143 timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step);
144
145 static std::atomic_flag mTimerFlag;
146};
147
148template <class T>
150{
151 mEvents.emplace_back(std::vector<deviceEvent>(sizeof(T) / sizeof(deviceEvent)));
152 events = (T*)mEvents.back().data();
153}
154
155template <class T, int32_t I>
156HighResTimer& GPUReconstructionProcessing::getKernelTimer(RecoStep step, int32_t num, size_t addMemorySize, bool increment)
157{
158 static int32_t id = getNextTimerId();
159 timerMeta* timer = getTimerById(id, increment);
160 if (timer == nullptr) {
161 timer = insertTimer(id, GetKernelName<T, I>(), -1, NSECTORS, 0, step);
162 }
163 if (addMemorySize) {
164 timer->memSize += addMemorySize;
165 }
166 if (num < 0 || num >= timer->num) {
167 throw std::runtime_error("Invalid timer requested");
168 }
169 return timer->timer[num];
170}
171
172template <class T, int32_t J>
174{
175 static int32_t id = getNextTimerId();
176 timerMeta* timer = getTimerById(id);
177 if (timer == nullptr) {
178 int32_t max = std::max<int32_t>({mMaxHostThreads, mProcessingSettings.nStreams});
179 timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep);
180 }
181 if (num == -1) {
183 }
184 if (num < 0 || num >= timer->num) {
185 throw std::runtime_error("Invalid timer requested");
186 }
187 return timer->timer[num];
188}
189
190} // namespace o2::gpu
191
192#endif
int32_t i
double num
static constexpr int32_t N_RECO_STEPS
static constexpr int32_t N_GENERAL_STEPS
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext() override
HighResTimer & getGeneralStepTimer(GeneralStep step)
RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]
static const char * GetKernelName()
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
std::vector< std::vector< deviceEvent > > mEvents
std::vector< std::unique_ptr< timerMeta > > mTimers
HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
const std::string & GetKernelName(int32_t i) const
static const std::vector< std::string > mKernelNames
uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max)
HighResTimer & getTimer(const char *name, int32_t num=-1)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static constexpr uint32_t NSECTORS
GPUSettingsProcessing mProcessingSettings
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
GLdouble n
Definition glcorearb.h:1982
const GLdouble * v
Definition glcorearb.h:832
GLuint const GLchar * name
Definition glcorearb.h:781
GLdouble f
Definition glcorearb.h:310
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLuint GLfloat * val
Definition glcorearb.h:1582
constexpr size_t max