de/d31/GPUReconstructionCUDAInternals_8h_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


// All CUDA-header related stuff goes here, so we can run CING over GPUReconstructionCUDA


#ifndef GPURECONSTRUCTIONCUDAINTERNALS_H

#define GPURECONSTRUCTIONCUDAINTERNALS_H


#include <cuda.h>

#include "GPULogging.h"

#include <vector>

#include <memory>

#include <string>

#include "GPUCommonHelpers.h"


namespace o2::gpu

{


struct GPUReconstructionCUDAInternals {

  std::vector<std::unique_ptr<CUmodule>> kernelModules;     // module for RTC compilation

  std::vector<std::unique_ptr<CUfunction>> kernelFunctions; // vector of ptrs to RTC kernels

  cudaStream_t Streams[GPUCA_MAX_STREAMS];                  // Pointer to array of CUDA Streams


  static void getArgPtrs(const void** pArgs) {}

  template <typename T, typename... Args>


  static void getArgPtrs(const void** pArgs, const T& arg, const Args&... args)

  {

    *pArgs = &arg;

    getArgPtrs(pArgs + 1, args...);

  }


};


class GPUDebugTiming

{

 public:


  GPUDebugTiming(bool d, GPUReconstructionProcessing::deviceEvent* t, cudaStream_t* s, const GPUReconstructionProcessing::krnlSetupTime& x, GPUReconstructionCUDA* r) : mDeviceTimers(t), mStreams(s), mXYZ(x), mRec(r), mDo(d)

  {

    if (mDo) {

      if (mDeviceTimers) {

        mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[0].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));

      } else {

        mTimer.ResetStart();

      }

    }

  }


  ~GPUDebugTiming()

  {

    if (mDo && mXYZ.t == 0.) {

      if (mDeviceTimers) {

        mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[1].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));

        mRec->GPUChkErr(cudaEventSynchronize(mDeviceTimers[1].get<cudaEvent_t>()));

        float v;

        mRec->GPUChkErr(cudaEventElapsedTime(&v, mDeviceTimers[0].get<cudaEvent_t>(), mDeviceTimers[1].get<cudaEvent_t>()));

        mXYZ.t = v * 1.e-3f;

      } else {

        mRec->GPUChkErr(cudaStreamSynchronize(mStreams[mXYZ.x.stream]));

        mXYZ.t = mTimer.GetCurrentElapsedTime();

      }

    }

  }


 private:

  GPUReconstructionProcessing::deviceEvent* mDeviceTimers;

  cudaStream_t* mStreams;

  const GPUReconstructionProcessing::krnlSetupTime& mXYZ;

  GPUReconstructionCUDA* mRec;

  HighResTimer mTimer;

  bool mDo;

};


static_assert(std::is_convertible_v<cudaEvent_t, void*>, "CUDA event type incompatible to deviceEvent");


} // namespace o2::gpu


#endif

GPUCommonHelpers.h

GPUCA_MAX_STREAMS
#define GPUCA_MAX_STREAMS
Definition GPUDefParametersConstants.h:26

GPULogging.h

HighResTimer
Definition timer.h:21

HighResTimer::GetCurrentElapsedTime
double GetCurrentElapsedTime(bool reset=false)
Definition timer.cxx:110

HighResTimer::ResetStart
void ResetStart()
Definition timer.cxx:63

o2::gpu::GPUDebugTiming
Definition GPUReconstructionCUDAInternals.h:45

o2::gpu::GPUDebugTiming::~GPUDebugTiming
~GPUDebugTiming()
Definition GPUReconstructionCUDAInternals.h:57

o2::gpu::GPUDebugTiming::GPUDebugTiming
GPUDebugTiming(bool d, GPUReconstructionProcessing::deviceEvent *t, cudaStream_t *s, const GPUReconstructionProcessing::krnlSetupTime &x, GPUReconstructionCUDA *r)
Definition GPUReconstructionCUDAInternals.h:47

o2::gpu::GPUReconstructionCUDA
Definition GPUReconstructionCUDA.h:39

x
GLint GLenum GLint x
Definition glcorearb.h:403

v
const GLdouble * v
Definition glcorearb.h:832

r
GLboolean r
Definition glcorearb.h:1233

o2::gpu
Definition TrackTRD.h:35

o2::gpu::GPUReconstructionCUDAInternals
Definition GPUReconstructionCUDAInternals.h:30

o2::gpu::GPUReconstructionCUDAInternals::getArgPtrs
static void getArgPtrs(const void **pArgs)
Definition GPUReconstructionCUDAInternals.h:35

o2::gpu::GPUReconstructionCUDAInternals::Streams
cudaStream_t Streams[GPUCA_MAX_STREAMS]
Definition GPUReconstructionCUDAInternals.h:33

o2::gpu::GPUReconstructionCUDAInternals::getArgPtrs
static void getArgPtrs(const void **pArgs, const T &arg, const Args &... args)
Definition GPUReconstructionCUDAInternals.h:37

o2::gpu::GPUReconstructionCUDAInternals::kernelModules
std::vector< std::unique_ptr< CUmodule > > kernelModules
Definition GPUReconstructionCUDAInternals.h:31

o2::gpu::GPUReconstructionCUDAInternals::kernelFunctions
std::vector< std::unique_ptr< CUfunction > > kernelFunctions
Definition GPUReconstructionCUDAInternals.h:32

o2::gpu::GPUReconstructionProcessing::krnlExec::stream
int32_t stream
Definition GPUReconstructionProcessing.h:100

o2::gpu::GPUReconstructionProcessing::krnlSetupTime
Definition GPUReconstructionProcessing.h:131

o2::gpu::GPUReconstructionProcessing::krnlSetupTime::t
double & t
Definition GPUReconstructionProcessing.h:132

o2::gpu::GPUReconstructionProcessing::krnlSetup::x
krnlExec x
Definition GPUReconstructionProcessing.h:126

o2::gpu::gpu_reconstruction_kernels::deviceEvent
Definition GPUReconstructionProcessing.h:36