Project
Loading...
Searching...
No Matches
GPUCommonDefAPI.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUCOMMONDEFAPI_H
16#define GPUCOMMONDEFAPI_H
17// clang-format off
18
19#ifndef GPUCOMMONDEF_H
20 #error Please include GPUCommonDef.h!
21#endif
22
23#ifndef GPUCA_GPUCODE_DEVICE
24#include <cstdint>
25#endif
26
27//Define macros for GPU keywords. i-version defines inline functions.
28//All host-functions in GPU code are automatically inlined, to avoid duplicate symbols.
29//For non-inline host only functions, use no keyword at all!
30#if !defined(GPUCA_GPUCODE) || defined(__OPENCL_HOST__) // For host / ROOT dictionary
31 #define GPUd() // device function
32 #define GPUdDefault() // default (constructor / operator) device function
33 #define GPUhdDefault() // default (constructor / operator) host device function
34 #define GPUdi() inline // to-be-inlined device function
35 #define GPUdii() // Only on GPU to-be-inlined (forced) device function
36 #define GPUdni() // Device function, not-to-be-inlined
37 #define GPUdnii() inline // Device function, not-to-be-inlined on device, inlined on host
38 #define GPUh() // Host-only function
39 // NOTE: All GPUd*() functions are also compiled on the host during GCC compilation.
40 // The GPUh*() macros are for the rare cases of functions that you want to compile for the host during GPU compilation.
41 // Usually, you do not need the GPUh*() versions. If in doubt, use GPUd*()!
42 #define GPUhi() inline // to-be-inlined host-only function
43 #define GPUhd() // Host and device function, inlined during GPU compilation to avoid symbol clashes in host code
44 #define GPUhdi() inline // Host and device function, to-be-inlined on host and device
45 #define GPUhdni() // Host and device function, not to-be-inlined automatically
46 #define GPUg() INVALID_TRIGGER_ERROR_NO_HOST_CODE // GPU kernel
47 #define GPUshared() // shared memory variable declaration
48 #define GPUglobal() // global memory variable declaration (only used for kernel input pointers)
49 #define GPUconstant() // constant memory variable declaraion
50 #define GPUconstexpr() static constexpr // constexpr on GPU that needs to be instantiated for dynamic access (e.g. arrays), becomes __constant on GPU
51 #define GPUprivate() // private memory variable declaration
52 #define GPUgeneric() // reference / ptr to generic address space
53 #define GPUbarrier() // synchronize all GPU threads in block
54 #define GPUbarrierWarp() // synchronize threads inside warp
55 #define GPUAtomic(type) type // atomic variable type
56 #define GPUsharedref() // reference / ptr to shared memory
57 #define GPUglobalref() // reference / ptr to global memory
58 #define GPUconstantref() // reference / ptr to constant memory
59 #define GPUconstexprref() // reference / ptr to variable declared as GPUconstexpr()
60
61 #ifndef __VECTOR_TYPES_H__ // FIXME: ROOT will pull in these CUDA definitions if built against CUDA, so we have to add an ugly protection here
62 struct float4 { float x, y, z, w; };
63 struct float3 { float x, y, z; };
64 struct float2 { float x; float y; };
65 struct uchar2 { uint8_t x, y; };
66 struct short2 { int16_t x, y; };
67 struct ushort2 { uint16_t x, y; };
68 struct int2 { int32_t x, y; };
69 struct int3 { int32_t x, y, z; };
70 struct int4 { int32_t x, y, z, w; };
71 struct uint1 { uint32_t x; };
72 struct uint2 { uint32_t x, y; };
73 struct uint3 { uint32_t x, y, z; };
74 struct uint4 { uint32_t x, y, z, w; };
75 struct dim3 { uint32_t x, y, z; };
76 #endif
77#elif defined(__OPENCL__) // Defines for OpenCL
78 #define GPUd()
79 #define GPUdDefault()
80 #define GPUhdDefault()
81 #define GPUdi() inline
82 #define GPUdii() inline
83 #define GPUdni()
84 #define GPUdnii()
85 #define GPUh() INVALID_TRIGGER_ERROR_NO_HOST_CODE
86 #define GPUhi() INVALID_TRIGGER_ERROR_NO_HOST_CODE
87 #define GPUhd() inline
88 #define GPUhdi() inline
89 #define GPUhdni()
90 #define GPUg() __kernel
91 #define GPUshared() __local
92 #define GPUglobal() __global
93 #define GPUconstant() __constant // TODO: possibly add const __restrict where possible later!
94 #define GPUconstexpr() __constant
95 #define GPUprivate() __private
96 #define GPUgeneric() __generic
97 #define GPUconstexprref() GPUconstexpr()
98 #if defined(__OPENCL__) && !defined(__clang__)
99 #define GPUbarrier() work_group_barrier(mem_fence::global | mem_fence::local);
100 #define GPUbarrierWarp()
101 #define GPUAtomic(type) atomic<type>
102 static_assert(sizeof(atomic<uint32_t>) == sizeof(uint32_t), "Invalid size of atomic type");
103 #else
104 #define GPUbarrier() barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)
105 #define GPUbarrierWarp()
106 #if defined(__OPENCL__) && defined(GPUCA_OPENCL_CLANG_C11_ATOMICS)
107 namespace o2 { namespace gpu {
108 template <class T> struct oclAtomic;
109 template <> struct oclAtomic<uint32_t> {typedef atomic_uint t;};
110 static_assert(sizeof(oclAtomic<uint32_t>::t) == sizeof(uint32_t), "Invalid size of atomic type");
111 }}
112 #define GPUAtomic(type) o2::gpu::oclAtomic<type>::t
113 #else
114 #define GPUAtomic(type) volatile type
115 #endif
116 #endif
117 #if !defined(__OPENCL__) // Other special defines for OpenCL 1
118 #define GPUCA_USE_TEMPLATE_ADDRESS_SPACES // TODO: check if we can make this (partially, where it is already implemented) compatible with OpenCL CPP
119 #define GPUsharedref() GPUshared()
120 #define GPUglobalref() GPUglobal()
121 #undef GPUgeneric
122 #define GPUgeneric()
123 #endif
124 #if (!defined(__OPENCL__) || !defined(GPUCA_NO_CONSTANT_MEMORY))
125 #define GPUconstantref() GPUconstant()
126 #endif
127#elif defined(__HIPCC__) //Defines for HIP
128 #define GPUd() __device__
129 #define GPUdDefault() __device__
130 #define GPUhdDefault() __host__ __device__
131 #define GPUdi() __device__ inline
132 #define GPUdii() __device__ __forceinline__
133 #define GPUdni() __device__ __attribute__((noinline))
134 #define GPUdnii() __device__ __attribute__((noinline))
135 #define GPUh() __host__ inline
136 #define GPUhi() __host__ inline
137 #define GPUhd() __host__ __device__ inline
138 #define GPUhdi() __host__ __device__ inline
139 #define GPUhdni() __host__ __device__
140 #define GPUg() __global__
141 #define GPUshared() __shared__
142 #if defined(GPUCA_GPUCODE_DEVICE) && 0 // TODO: Fix for HIP
143 #define GPUCA_USE_TEMPLATE_ADDRESS_SPACES
144 #define GPUglobal() __attribute__((address_space(1)))
145 #define GPUglobalref() GPUglobal()
146 #define GPUconstantref() __attribute__((address_space(4)))
147 #define GPUsharedref() __attribute__((address_space(3)))
148 #else
149 #define GPUglobal()
150 #endif
151 #define GPUconstant() __constant__
152 #define GPUconstexpr() constexpr __constant__
153 #define GPUprivate()
154 #define GPUgeneric()
155 #define GPUbarrier() __syncthreads()
156 #define GPUbarrierWarp()
157 #define GPUAtomic(type) type
158#elif defined(__CUDACC__) //Defines for CUDA
159 #ifndef GPUCA_GPUCODE_DEVICE
160 #define GPUd() __device__ inline // FIXME: DR: Workaround: mark device function as inline such that nvcc does not create bogus host symbols
161 #else
162 #define GPUd() __device__
163 #endif
164 #define GPUdDefault()
165 #define GPUhdDefault()
166 #define GPUdi() __device__ inline
167 #define GPUdii() __device__ inline
168 #define GPUdni() __device__ __attribute__((noinline))
169 #define GPUdnii() __device__ __attribute__((noinline))
170 #define GPUh() __host__ inline
171 #define GPUhi() __host__ inline
172 #define GPUhd() __host__ __device__ inline
173 #define GPUhdi() __host__ __device__ inline
174 #define GPUhdni() __host__ __device__
175 #define GPUg() __global__
176 #define GPUshared() __shared__
177 #define GPUglobal()
178 #define GPUconstant() __constant__
179 #define GPUconstexpr() constexpr __constant__
180 #define GPUprivate()
181 #define GPUgeneric()
182 #define GPUbarrier() __syncthreads()
183 #define GPUbarrierWarp() __syncwarp()
184 #define GPUAtomic(type) type
185#endif
186
187#ifndef GPUdic // Takes different parameter for inlining: 0 = never, 1 = always, 2 = compiler-decision
188#define GPUdic(...) GPUd()
189#endif
190#define GPUCA_GPUdic_select_0() GPUdni()
191#define GPUCA_GPUdic_select_1() GPUdii()
192#define GPUCA_GPUdic_select_2() GPUd()
193
194#if defined(GPUCA_NO_CONSTANT_MEMORY)
195 #undef GPUconstant
196 #define GPUconstant() GPUglobal()
197#endif
198
199#ifndef GPUsharedref
200#define GPUsharedref()
201#endif
202#ifndef GPUglobalref
203#define GPUglobalref()
204#endif
205#ifndef GPUconstantref
206#define GPUconstantref()
207#endif
208#ifndef GPUconstexprref
209#define GPUconstexprref()
210#endif
211
212#define GPUrestrict() __restrict__
213
214// Macros for GRID dimension
215#if defined(__CUDACC__) || defined(__HIPCC__)
216 #define get_global_id(dim) (blockIdx.x * blockDim.x + threadIdx.x)
217 #define get_global_size(dim) (blockDim.x * gridDim.x)
218 #define get_num_groups(dim) (gridDim.x)
219 #define get_local_id(dim) (threadIdx.x)
220 #define get_local_size(dim) (blockDim.x)
221 #define get_group_id(dim) (blockIdx.x)
222#elif defined(__OPENCL__)
223 // Using OpenCL defaults
224#else
225 #define get_global_id(dim) iBlock
226 #define get_global_size(dim) nBlocks
227 #define get_num_groups(dim) nBlocks
228 #define get_local_id(dim) 0
229 #define get_local_size(dim) 1
230 #define get_group_id(dim) iBlock
231#endif
232
233 // clang-format on
234#endif
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
uint32_t y
uint32_t z
uint32_t x
int32_t x
int32_t y
int32_t z
int32_t y
int32_t x
int32_t y
int32_t z
int32_t x
int32_t w
int16_t y
int16_t x
uint8_t x
uint8_t y
uint32_t x
uint32_t x
uint32_t y
uint32_t y
uint32_t x
uint32_t z
uint32_t z
uint32_t w
uint32_t x
uint32_t y
uint16_t x
uint16_t y