15#if !defined(GPURECONSTRUCTIONPROCESSING_H)
16#define GPURECONSTRUCTIONPROCESSING_H
19#include "GPUReconstructionKernelIncludes.h"
27namespace gpu_reconstruction_kernels
35 v =
reinterpret_cast<void*&
>(
val);
40 return reinterpret_cast<T&
>(
v);
45 return reinterpret_cast<T*
>(
this);
76 template <
class T,
int32_t I>
86 virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext>
GetThreadContext()
override;
106 std::vector<std::vector<deviceEvent>>
mEvents;
110 std::unique_ptr<HighResTimer[]>
timer;
121 std::vector<std::unique_ptr<timerMeta>>
mTimers;
124 template <
class T,
int32_t I = 0>
126 template <
class T, int32_t J = -1>
130 uint32_t getNextTimerId();
131 timerMeta* getTimerById(uint32_t
id,
bool increment =
true);
139 events = (T*)
mEvents.back().data();
142template <
class T,
int32_t I>
145 static int32_t
id = getNextTimerId();
146 timerMeta* timer = getTimerById(
id, increment);
147 if (timer ==
nullptr) {
148 timer = insertTimer(
id, GetKernelName<T, I>(), -1,
NSECTORS, 0, step);
151 timer->
memSize += addMemorySize;
153 if (num < 0 || num >= timer->
num) {
154 throw std::runtime_error(
"Invalid timer requested");
159template <
class T,
int32_t J>
162 static int32_t
id = getNextTimerId();
164 if (timer ==
nullptr) {
166 timer = insertTimer(
id,
name, J,
max, 1, RecoStep::NoRecoStep);
171 if (num < 0 || num >= timer->
num) {
172 throw std::runtime_error(
"Invalid timer requested");
177#define GPUCA_KRNL(x_class, ...) \
179 constexpr const char* GPUReconstructionProcessing::GetKernelName<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
181 return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \
183#include "GPUReconstructionKernelList.h"
static constexpr int32_t N_RECO_STEPS
static constexpr int32_t N_GENERAL_STEPS
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext() override
auto & getRecoStepTimer(RecoStep step)
HighResTimer & getGeneralStepTimer(GeneralStep step)
RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
std::vector< std::vector< deviceEvent > > mEvents
int32_t mActiveHostKernelThreads
std::vector< std::unique_ptr< timerMeta > > mTimers
void SetNActiveThreads(int32_t n)
HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]
uint32_t getNActiveThreadsOuterLoop() const
static constexpr const char * GetKernelName()
HighResTimer & getKernelTimer(RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true)
int32_t getNKernelHostThreads(bool splitCores)
~GPUReconstructionProcessing() override=default
uint32_t mNActiveThreadsOuterLoop
GPUReconstructionProcessing(const GPUSettingsDeviceBackend &cfg)
uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max)
HighResTimer & getTimer(const char *name, int32_t num=-1)
void SetNActiveThreadsOuterLoop(uint32_t f)
void AddGPUEvents(T *&events)
GPUDataTypes::RecoStep RecoStep
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static constexpr uint32_t NSECTORS
GPUSettingsProcessing mProcessingSettings
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
static int32_t getHostThreadIndex()
GPUDataTypes::GeneralStep GeneralStep
GLuint const GLchar * name
GLint GLint GLsizei GLint GLenum GLenum type
constexpr deviceEvent()=default
constexpr deviceEvent(std::nullptr_t p)