Project
Loading...
Searching...
No Matches
GPUGeneralKernels.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUGENERALKERNELS_H
16#define GPUGENERALKERNELS_H
17
18#include "GPUDef.h"
19#include "GPUDataTypesIO.h"
20#include "GPUDataTypesConfig.h"
21
22#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) && !defined(GPUCA_GPUCODE_HOSTONLY)
23#if defined(__CUDACC__)
24#include <cub/cub.cuh>
25#elif defined(__HIPCC__)
26#include <hipcub/hipcub.hpp>
27#endif
28#endif
29
30#if defined(__HIPCC__)
31#define GPUCA_CUB_NAMESPACE hipcub
32#else
33#define GPUCA_CUB_NAMESPACE cub
34#endif
35
36namespace o2::gpu
37{
38struct GPUConstantMem;
39
41{
42 public:
43 enum K { defaultKernel = 0,
44 step0 = 0,
45 step1 = 1,
46 step2 = 2,
47 step3 = 3,
48 step4 = 4,
49 step5 = 5 };
50
52 };
53
54 template <class T, int32_t I>
56 // Provides the shared memory resources for warp wide CUB collectives
57#if (defined(__CUDACC__) || defined(__HIPCC__)) && defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_HOSTONLY)
58 typedef GPUCA_CUB_NAMESPACE::WarpScan<T> WarpScan;
59 union {
60 typename WarpScan::TempStorage cubWarpTmpMem;
61 };
62#endif
63 };
64
65 template <class T, int32_t I>
67 // Provides the shared memory resources for CUB collectives
68#if (defined(__CUDACC__) || defined(__HIPCC__)) && defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_HOSTONLY)
69 typedef GPUCA_CUB_NAMESPACE::BlockScan<T, I> BlockScan;
70 typedef GPUCA_CUB_NAMESPACE::BlockReduce<T, I> BlockReduce;
71 typedef GPUCA_CUB_NAMESPACE::WarpScan<T> WarpScan;
72 union {
73 typename BlockScan::TempStorage cubTmpMem;
74 typename BlockReduce::TempStorage cubReduceTmpMem;
75 typename WarpScan::TempStorage cubWarpTmpMem;
76 int32_t tmpBroadcast;
77 int32_t warpPredicateSum[I / GPUCA_WARP_SIZE];
78 };
79#endif
80 };
81
82 typedef GPUconstantref() GPUConstantMem processorType;
83 GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep() { return gpudatatypes::RecoStep::NoRecoStep; }
84 GPUhdi() static processorType* Processor(GPUConstantMem& processors)
85 {
86 return &processors;
87 }
88 template <int32_t iKernel, typename... Args>
89 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args)
90 {
91 }
92};
93
94// Clean memory, ptr multiple of 16, size will be extended to multiple of 16
96{
97 public:
98 GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep() { return gpudatatypes::RecoStep::NoRecoStep; }
99 template <int32_t iKernel = defaultKernel>
100 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size);
101};
102
103// Fill with incrementing sequnce of integers
105{
106 public:
107 GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep() { return gpudatatypes::RecoStep::NoRecoStep; }
108 template <int32_t iKernel = defaultKernel>
109 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size);
110};
111
112} // namespace o2::gpu
113
114#undef GPUCA_CUB_NAMESPACE
115
116#endif
#define GPUsharedref()
TBranch * ptr
GPUhdi() static processorType *Processor(GPUConstantMem &processors)
int32_t int32_t int32_t processorType & processors
GPUhdi() const expr static gpudatatypes
int32_t int32_t int32_t iThread
GPUd() static void Thread(int32_t nBlocks
typedef GPUconstantref() GPUConstantMem processorType
int32_t int32_t int32_t processorType & processors
GPUhdi() const expr static gpudatatypes
GPUd() static void Thread(int32_t nBlocks
int32_t int32_t int32_t iThread
int32_t int32_t int32_t processorType GPUglobalref() void *ptr
GPUhdi() const expr static gpudatatypes
GPUd() static void Thread(int32_t nBlocks
GLsizeiptr size
Definition glcorearb.h:659