16#ifndef ITSTRACKINGGPU_UTILS_H_
17#define ITSTRACKINGGPU_UTILS_H_
28#define THRUST_NAMESPACE thrust::cuda
30#define THRUST_NAMESPACE thrust::hip
34#define GPULog(...) LOGP(info, __VA_ARGS__)
42template <
typename T1,
typename T2>
56 GPUd()
ref operator[](
unsigned int idx)
const {
return _data[idx]; }
78 GPUd()
ref operator[](
unsigned int idx)
const {
return _data[idx]; }
96 using Handle = hipStream_t;
98 static constexpr unsigned int DefaultFlag = hipStreamNonBlocking;
99 using Event = hipEvent_t;
100#elif defined(__CUDACC__)
101 using Handle = cudaStream_t;
103 static constexpr unsigned int DefaultFlag = cudaStreamNonBlocking;
104 using Event = cudaEvent_t;
114#if defined(__HIPCC__)
116 GPUChkErrS(hipEventCreateWithFlags(&mEvent, hipEventDisableTiming));
117#elif defined(__CUDACC__)
119 GPUChkErrS(cudaEventCreateWithFlags(&mEvent, cudaEventDisableTiming));
127#if defined(__HIPCC__)
130#elif defined(__CUDACC__)
143#if defined(__HIPCC__)
145#elif defined(__CUDACC__)
151#if defined(__HIPCC__)
153#elif defined(__CUDACC__)
160 Event mEvent{
nullptr};
167 size_t size() const noexcept {
return mStreams.size(); }
175#if defined(__HIPCC__)
177#elif defined(__CUDACC__)
181 for (
auto& s : mStreams) {
188#if defined(__HIPCC__)
189 GPUChkErrS(hipStreamWaitEvent(mStreams[iStream].
get(), mStreams[iEvent].getEvent()));
190#elif defined(__CUDACC__)
191 GPUChkErrS(cudaStreamWaitEvent(mStreams[iStream].
get(), mStreams[iEvent].getEvent()));
196 std::vector<Stream> mStreams;
199#ifdef ITS_MEASURE_GPU_TIME
203 GPUTimer(
const std::string&
name)
209 GPUTimer(Streams& streams,
const std::string&
name)
212 for (
size_t i{0};
i < streams.size(); ++
i) {
213 mStreams.push_back(streams[
i].
get());
220 for (
size_t sta{
start}; sta <
end; ++sta) {
221 mStreams.push_back(streams[sta].
get());
228 mStreams.push_back(
stream.get());
234 for (
size_t i{0};
i < mStreams.size(); ++
i) {
236#if defined(__HIPCC__)
239 GPUChkErrS(hipEventElapsedTime(&ms, mStarts[
i], mStops[
i]));
242#elif defined(__CUDACC__)
245 GPUChkErrS(cudaEventElapsedTime(&ms, mStarts[
i], mStops[
i]));
249 LOGP(info,
"Elapsed time for {}:{} {} ms", mName,
i, ms);
255 mStarts.resize(mStreams.size());
256 mStops.resize(mStreams.size());
257 for (
size_t i{0};
i < mStreams.size(); ++
i) {
258#if defined(__HIPCC__)
262#elif defined(__CUDACC__)
272 std::vector<Stream::Event> mStarts, mStops;
273 std::vector<Stream::Handle> mStreams;
279 template <
typename... Args>
Class for time synchronization of RawReader instances.
const Handle & getStream()
static constexpr Handle DefaultStream
static constexpr unsigned int DefaultFlag
Stream(unsigned int flags=DefaultFlag)
void push_back(const Stream &stream)
void waitEvent(size_t iStream, size_t iEvent)
void sync(bool device=true)
auto & operator[](size_t i)
size_t size() const noexcept
GLuint const GLchar * name
std::pair< T1, T2 > gpuPair
std::string to_string(gsl::span< T, Size > span)
GPUd() gpuSpan(const gpuSpan< T > &other)
GPUd() bool empty() const
GPUd() ref operator[](unsigned int idx) const
GPUd() unsigned int size() const
GPUd() bool empty() const
GPUd() ref operator[](unsigned int idx) const
GPUd() unsigned int size() const
VectorOfTObjectPtrs other