Project
Loading...
Searching...
No Matches
GPUGeneralKernels.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUGENERALKERNELS_H
16#define GPUGENERALKERNELS_H
17
18#include "GPUDef.h"
19#include "GPUDataTypes.h"
20
21#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) && !defined(GPUCA_GPUCODE_HOSTONLY)
22#if defined(__CUDACC__)
23#include <cub/cub.cuh>
24#elif defined(__HIPCC__)
25#include <hipcub/hipcub.hpp>
26#endif
27#endif
28
29#if defined(__HIPCC__)
30#define GPUCA_CUB hipcub
31#else
32#define GPUCA_CUB cub
33#endif
34
35namespace o2::gpu
36{
37struct GPUConstantMem;
38
40{
41 public:
42 enum K { defaultKernel = 0,
43 step0 = 0,
44 step1 = 1,
45 step2 = 2,
46 step3 = 3,
47 step4 = 4,
48 step5 = 5 };
49
51 };
52
53 template <class T, int32_t I>
55 // Provides the shared memory resources for warp wide CUB collectives
56#if (defined(__CUDACC__) || defined(__HIPCC__)) && defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_HOSTONLY)
57 typedef GPUCA_CUB::WarpScan<T> WarpScan;
58 union {
59 typename WarpScan::TempStorage cubWarpTmpMem;
60 };
61#endif
62 };
63
64 template <class T, int32_t I>
66 // Provides the shared memory resources for CUB collectives
67#if (defined(__CUDACC__) || defined(__HIPCC__)) && defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_HOSTONLY)
68 typedef GPUCA_CUB::BlockScan<T, I> BlockScan;
69 typedef GPUCA_CUB::BlockReduce<T, I> BlockReduce;
70 typedef GPUCA_CUB::WarpScan<T> WarpScan;
71 union {
72 typename BlockScan::TempStorage cubTmpMem;
73 typename BlockReduce::TempStorage cubReduceTmpMem;
74 typename WarpScan::TempStorage cubWarpTmpMem;
75 int32_t tmpBroadcast;
76 int32_t warpPredicateSum[I / GPUCA_WARP_SIZE];
77 };
78#endif
79 };
80
81 typedef GPUconstantref() GPUConstantMem processorType;
82 GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
83 GPUhdi() static processorType* Processor(GPUConstantMem& processors)
84 {
85 return &processors;
86 }
87 template <int32_t iKernel, typename... Args>
88 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args)
89 {
90 }
91};
92
93// Clean memory, ptr multiple of 16, size will be extended to multiple of 16
95{
96 public:
97 GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
98 template <int32_t iKernel = defaultKernel>
99 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size);
100};
101
102// Fill with incrementing sequnce of integers
104{
105 public:
106 GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
107 template <int32_t iKernel = defaultKernel>
108 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size);
109};
110
111} // namespace o2::gpu
112
113#undef GPUCA_CUB
114
115#endif
#define GPUsharedref()
#define GPUCA_WARP_SIZE
TBranch * ptr
GPUhdi() static processorType *Processor(GPUConstantMem &processors)
int32_t int32_t int32_t processorType & processors
int32_t int32_t int32_t iThread
GPUhdi() const expr static GPUDataTypes
GPUd() static void Thread(int32_t nBlocks
typedef GPUconstantref() GPUConstantMem processorType
int32_t int32_t int32_t processorType & processors
GPUhdi() const expr static GPUDataTypes
GPUd() static void Thread(int32_t nBlocks
int32_t int32_t int32_t iThread
int32_t int32_t int32_t processorType GPUglobalref() void *ptr
GPUhdi() const expr static GPUDataTypes
GPUd() static void Thread(int32_t nBlocks
GLsizeiptr size
Definition glcorearb.h:659