17#ifndef GPURECONSTRUCTIONCUDAINTERNALS_H
18#define GPURECONSTRUCTIONCUDAINTERNALS_H
29#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__)
30#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__)
39 template <
typename T,
typename... Args>
40 static void getArgPtrs(
const void** pArgs,
const T& arg,
const Args&... args)
54 mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[0].get<cudaEvent_t>(), mStreams[mXYZ.
x.
stream]));
62 if (mDo && mXYZ.
t == 0.) {
64 mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[1].get<cudaEvent_t>(), mStreams[mXYZ.
x.
stream]));
65 mRec->GPUFailedMsg(cudaEventSynchronize(mDeviceTimers[1].get<cudaEvent_t>()));
67 mRec->GPUFailedMsg(cudaEventElapsedTime(&
v, mDeviceTimers[0].get<cudaEvent_t>(), mDeviceTimers[1].get<cudaEvent_t>()));
70 mRec->GPUFailedMsg(cudaStreamSynchronize(mStreams[mXYZ.
x.
stream]));
78 cudaStream_t* mStreams;
85static_assert(std::is_convertible<cudaEvent_t, void*>::value,
"CUDA event type incompatible to deviceEvent");
#define GPUCA_MAX_STREAMS
double GetCurrentElapsedTime(bool reset=false)
GPUDebugTiming(bool d, gpu_reconstruction_kernels::deviceEvent *t, cudaStream_t *s, const gpu_reconstruction_kernels::krnlSetupTime &x, GPUReconstructionCUDABackend *r)
static void getArgPtrs(const void **pArgs)
cudaStream_t Streams[GPUCA_MAX_STREAMS]
static void getArgPtrs(const void **pArgs, const T &arg, const Args &... args)
std::vector< std::unique_ptr< CUmodule > > kernelModules
std::vector< std::unique_ptr< CUfunction > > kernelFunctions
std::vector< std::string > kernelNames