Project
Loading...
Searching...
No Matches
GPUReconstructionCUDA.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPURECONSTRUCTIONCUDA_H
16#define GPURECONSTRUCTIONCUDA_H
17
19#include "GPUCommonAlgorithm.h"
20#include <vector>
21#include <string>
22
23#ifdef _WIN32
25#else
27#endif
28
29namespace Ort
30{
31struct SessionOptions;
32}
33
34namespace o2::gpu
35{
36struct GPUReconstructionCUDAInternals;
37
38class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterface<GPUReconstructionCUDA, GPUReconstructionDeviceBase>
39{
40 public:
43
44 void PrintKernelOccupancies() override;
45 virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override;
46
47 template <class T, int32_t I = 0, typename... Args>
48 void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args);
49 template <class T, int32_t I = 0, typename... Args>
50 void runKernelBackendTimed(const krnlSetupTime& _xyz, const Args&... args);
51 template <class T, int32_t I>
53
54 template <class T, class S>
55 friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp);
56
59
60 int32_t InitDevice_Runtime() override;
61 int32_t ExitDevice_Runtime() override;
62
63 std::unique_ptr<threadContext> GetThreadContext() override;
64 void SynchronizeGPU() override;
65 int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override;
66 void SynchronizeStream(int32_t stream) override;
67 void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) override;
68 void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) override;
69 bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) override;
70 int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override;
71 int32_t unregisterMemoryForGPU_internal(const void* ptr) override;
72
73 size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
74 size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
75 void ReleaseEvent(deviceEvent ev) override;
76 void RecordMarker(deviceEvent* ev, int32_t stream) override;
77 void SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) override;
78
79 void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame) override;
80
81#ifndef __HIPCC__ // CUDA
82 bool CanQueryMaxMemory() override { return true; }
83 void startGPUProfiling() override;
84 void endGPUProfiling() override;
85#else // HIP
86 void* getGPUPointer(void* ptr) override;
87#endif
88
89 private:
90 int32_t genRTC(std::string& filename, uint32_t& nCompile);
91 void getRTCKernelCalls(std::vector<std::string>& kernels);
92 void genAndLoadRTC();
93 void loadKernelModules(bool perKernel);
94 const char *mRtcSrcExtension = ".src", *mRtcBinExtension = ".o";
95};
96
97} // namespace o2::gpu
98
99#endif
benchmark::State & state
#define GPUh()
o2::gpu::GPUReconstruction * GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend &cfg)
#define protected
TBranch * ptr
int32_t ExitDevice_Runtime() override
GPUReconstructionCUDAInternals * mInternals
int32_t unregisterMemoryForGPU_internal(const void *ptr) override
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1) override
GPUReconstructionCUDA(const GPUSettingsDeviceBackend &cfg)
size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override
void startGPUProfiling() override
int32_t InitDevice_Runtime() override
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1) override
void runKernelBackend(const krnlSetupTime &_xyz, const Args &... args)
void SetONNXGPUStream(Ort::SessionOptions &session_options, int32_t stream, int32_t *deviceId) override
void RecordMarker(deviceEvent *ev, int32_t stream) override
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1) override
size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) override
int32_t registerMemoryForGPU_internal(const void *ptr, size_t size) override
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1, bool force=false) override
void SynchronizeStream(int32_t stream) override
void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame > *timeFrame) override
void PrintKernelOccupancies() override
void runKernelBackendTimed(const krnlSetupTime &_xyz, const Args &... args)
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const override
std::unique_ptr< threadContext > GetThreadContext() override
void ReleaseEvent(deviceEvent ev) override
virtual void * getGPUPointer(void *ptr)
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum dst
Definition glcorearb.h:1767
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLuint GLuint stream
Definition glcorearb.h:1806
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Defining DataPointCompositeObject explicitly as copiable.
std::string filename()
GPUReconstruction * rec
const int nEvents
Definition test_Fifo.cxx:27