15#if !defined(GPURECONSTRUCTIONPROCESSING_H)
16#define GPURECONSTRUCTIONPROCESSING_H
32struct GPUDefParameters;
34namespace gpu_reconstruction_kernels
42 v =
reinterpret_cast<void*&
>(
val);
47 return reinterpret_cast<T&
>(
v);
52 return reinterpret_cast<T*
>(
this);
88 template <
class T,
int32_t I = 0>
91 static constexpr int32_t
i = I;
135 template <
class T, int32_t I = 0,
typename... Args>
138 std::tuple<
typename std::conditional<(
sizeof(Args) >
sizeof(
void*)),
const Args&,
const Args>
::type...>
v;
141 template <
class T,
class S>
145 template <
typename... Args>
163 template <
class T,
int32_t I>
166 template <
class T,
int32_t I = 0>
188 std::vector<std::vector<deviceEvent>>
mEvents;
192 std::unique_ptr<HighResTimer[]>
timer;
203 std::vector<std::unique_ptr<timerMeta>>
mTimers;
206 template <
class T,
int32_t I = 0>
208 template <
class T, int32_t J = -1>
215 uint32_t getNextTimerId();
216 timerMeta* getTimerById(uint32_t
id,
bool increment =
true);
219 static std::atomic_flag mTimerFlag;
226 events = (T*)
mEvents.back().data();
229template <
class T,
int32_t I>
232 static int32_t
id = getNextTimerId();
233 timerMeta* timer = getTimerById(
id, increment);
234 if (timer ==
nullptr) {
235 timer = insertTimer(
id, GetKernelName<T, I>(), -1,
NSECTORS, 0, step);
238 timer->
memSize += addMemorySize;
240 if (num < 0 || num >= timer->
num) {
241 throw std::runtime_error(
"Invalid timer requested");
246template <
class T,
int32_t J>
249 static int32_t
id = getNextTimerId();
251 if (timer ==
nullptr) {
253 timer = insertTimer(
id,
name, J,
max, 1, RecoStep::NoRecoStep);
258 if (num < 0 || num >= timer->
num) {
259 throw std::runtime_error(
"Invalid timer requested");
#define GPUCA_MAX_STREAMS
static constexpr int32_t N_RECO_STEPS
static constexpr int32_t N_GENERAL_STEPS
KernelInterface(const Args &... args)
virtual void runKernelVirtual(const int num, const void *args)
auto & getRecoStepTimer(RecoStep step)
HighResTimer & getGeneralStepTimer(GeneralStep step)
RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]
static const char * GetKernelName()
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
std::vector< std::vector< deviceEvent > > mEvents
static uint32_t GetKernelNum()
int32_t mActiveHostKernelThreads
std::vector< std::unique_ptr< timerMeta > > mTimers
void SetNActiveThreads(int32_t n)
GPUDefParameters * mParCPU
HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]
uint32_t getNActiveThreadsOuterLoop() const
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
int32_t getNKernelHostThreads(bool splitCores)
const std::string & GetKernelName(int32_t i) const
static const std::vector< std::string > mKernelNames
uint32_t mNActiveThreadsOuterLoop
virtual std::unique_ptr< threadContext > GetThreadContext() override
GPUDefParameters * mParDevice
const GPUDefParameters & getGPUParameters(bool doGPU) const override
uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max)
HighResTimer & getTimer(const char *name, int32_t num=-1)
~GPUReconstructionProcessing() override
void SetNActiveThreadsOuterLoop(uint32_t f)
void AddGPUEvents(T *&events)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static constexpr uint32_t NSECTORS
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
static int32_t getHostThreadIndex()
GLuint const GLchar * name
GLboolean GLboolean GLboolean b
GLint GLint GLsizei GLint GLenum GLenum type
GLdouble GLdouble GLdouble z
static constexpr int32_t i
constexpr krnlEvent(deviceEvent *e=nullptr, deviceEvent *el=nullptr, int32_t n=1)
GPUReconstruction::krnlDeviceType device
GPUDataTypes::RecoStep step
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st)
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto)
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st)
krnlProperties(int32_t t=0, int32_t b=1, int32_t b2=0)
constexpr krnlRunRange(uint32_t v)
constexpr krnlRunRange()=default
krnlSetup(const krnlExec &xx, const krnlRunRange &yy={0}, const krnlEvent &zz={nullptr, nullptr, 0})
constexpr deviceEvent()=default
constexpr deviceEvent(std::nullptr_t p)