Project
Loading...
Searching...
No Matches
GPUReconstructionKernelMacros.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15// clang-format off
16#ifndef O2_GPU_GPURECONSTRUCTIONKERNELMACROS_H
17#define O2_GPU_GPURECONSTRUCTIONKERNELMACROS_H
18
19#include "GPUDefMacros.h"
20
21#define GPUCA_M_KRNL_TEMPLATE_B(a, b, ...) a, a::b
22#define GPUCA_M_KRNL_TEMPLATE_A(...) GPUCA_M_KRNL_TEMPLATE_B(__VA_ARGS__, defaultKernel)
23#define GPUCA_M_KRNL_TEMPLATE(...) GPUCA_M_KRNL_TEMPLATE_A(GPUCA_M_STRIP(__VA_ARGS__))
24
25#define GPUCA_M_KRNL_NUM_B(a, b, ...) a::b
26#define GPUCA_M_KRNL_NUM_A(...) GPUCA_M_KRNL_NUM_B(__VA_ARGS__, defaultKernel)
27#define GPUCA_M_KRNL_NUM(...) GPUCA_M_KRNL_NUM_A(GPUCA_M_STRIP(__VA_ARGS__))
28
29#define GPUCA_M_KRNL_NAME_B0(a, b, ...) GPUCA_M_CAT3(a, _, b)
30#define GPUCA_M_KRNL_NAME_B1(a) a
31#define GPUCA_M_KRNL_NAME_A(...) GPUCA_M_CAT(GPUCA_M_KRNL_NAME_B, GPUCA_M_SINGLEOPT(__VA_ARGS__))(__VA_ARGS__)
32#define GPUCA_M_KRNL_NAME(...) GPUCA_M_KRNL_NAME_A(GPUCA_M_STRIP(__VA_ARGS__))
33
34#if defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_HOSTONLY)
35#ifndef GPUCA_KRNL_REG
36#define GPUCA_KRNL_REG(...)
37#endif
38#define GPUCA_KRNL_REG_INTERNAL_PROP(...) GPUCA_M_STRIP(__VA_ARGS__)
39#ifndef GPUCA_KRNL_CUSTOM
40#define GPUCA_KRNL_CUSTOM(...)
41#endif
42#define GPUCA_KRNL_CUSTOM_INTERNAL_PROP(...)
43#define GPUCA_ATTRRES_REG(XX, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, XX))(num) GPUCA_ATTRRES2(XX, __VA_ARGS__)
44#define GPUCA_ATTRRES2_REG(XX, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, XX))(num) GPUCA_ATTRRES3(XX, __VA_ARGS__)
45#define GPUCA_ATTRRES_CUSTOM(XX, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, XX))(args) GPUCA_ATTRRES2(XX, __VA_ARGS__)
46#define GPUCA_ATTRRES2_CUSTOM(XX, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, XX))(args) GPUCA_ATTRRES3(XX, __VA_ARGS__)
47#define GPUCA_ATTRRES_NONE(XX, ...)
48#define GPUCA_ATTRRES2_NONE(XX, ...)
49#define GPUCA_ATTRRES_(XX, ...)
50#define GPUCA_ATTRRES2_(XX, ...)
51#define GPUCA_ATTRRES3(XX) // 3 attributes not supported
52#define GPUCA_ATTRRES2(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES2_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__)
53#define GPUCA_ATTRRES(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__)
54// GPU Kernel entry point for single sector
55#define GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, ...) \
56 GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t iSector_internal GPUCA_M_STRIP(x_arguments))
57#ifdef GPUCA_KRNL_DEFONLY
58#define GPUCA_KRNLGPU_SINGLE(...) GPUCA_KRNLGPU_SINGLE_DEF(__VA_ARGS__);
59#else
60#define GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \
61 { \
62 GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \
63 GPUCA_M_STRIP_FIRST(x_class)::template Thread<GPUCA_M_KRNL_NUM(x_class)>(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSector_internal] GPUCA_M_STRIP(x_forward)); \
64 }
65#endif
66
67// GPU Kernel entry point for multiple sector
68#define GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, ...) \
69 GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)(GPUCA_CONSMEM_PTR int32_t firstSector, int32_t nSectorCount GPUCA_M_STRIP(x_arguments))
70#ifdef GPUCA_KRNL_DEFONLY
71#define GPUCA_KRNLGPU_MULTI(...) GPUCA_KRNLGPU_MULTI_DEF(__VA_ARGS__);
72#else
73#define GPUCA_KRNLGPU_MULTI(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \
74 { \
75 const int32_t iSector_internal = nSectorCount * (get_group_id(0) + (get_num_groups(0) % nSectorCount != 0 && nSectorCount * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0); \
76 const int32_t nSectorBlockOffset = get_num_groups(0) * iSector_internal / nSectorCount; \
77 const int32_t sectorBlockId = get_group_id(0) - nSectorBlockOffset; \
78 const int32_t sectorGridDim = get_num_groups(0) * (iSector_internal + 1) / nSectorCount - get_num_groups(0) * (iSector_internal) / nSectorCount; \
79 GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \
80 GPUCA_M_STRIP_FIRST(x_class)::template Thread<GPUCA_M_KRNL_NUM(x_class)>(sectorGridDim, get_local_size(0), sectorBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSector + iSector_internal] GPUCA_M_STRIP(x_forward)); \
81 }
82#endif
83
84// GPU Host wrapper pre- and post-parts
85#define GPUCA_KRNL_PRE(x_class, ...) \
86 template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal<GPUCA_M_KRNL_TEMPLATE(x_class)> { \
87 public: \
88 template <typename T, typename... Args> \
89 static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \
90 { \
91 auto& x = _xyz.x; \
92 auto& y = _xyz.y;
93
94#define GPUCA_KRNL_POST() \
95 } \
96 };
97
98// GPU Host wrappers for single kernel, multi-sector, or auto-detection
99#define GPUCA_KRNL_single(...) \
100 GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) \
101 GPUCA_KRNL_PRE(__VA_ARGS__) \
102 if (y.num > 1) { \
103 throw std::runtime_error("Kernel called with invalid number of sectors"); \
104 } else { \
105 GPUCA_KRNL_CALL_single(__VA_ARGS__) \
106 } \
107 GPUCA_KRNL_POST()
108
109#define GPUCA_KRNL_multi(...) \
110 GPUCA_KRNLGPU_MULTI(__VA_ARGS__) \
111 GPUCA_KRNL_PRE(__VA_ARGS__) \
112 GPUCA_KRNL_CALL_multi(__VA_ARGS__) \
113 GPUCA_KRNL_POST()
114
115#define GPUCA_KRNL_(...) GPUCA_KRNL_single(__VA_ARGS__)
116#define GPUCA_KRNL_simple(...) GPUCA_KRNL_single(__VA_ARGS__)
117#define GPUCA_KRNL_both(...) \
118 GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) \
119 GPUCA_KRNLGPU_MULTI(__VA_ARGS__) \
120 GPUCA_KRNL_PRE(__VA_ARGS__) \
121 if (y.num <= 1) { \
122 GPUCA_KRNL_CALL_single(__VA_ARGS__) \
123 } else { \
124 GPUCA_KRNL_CALL_multi(__VA_ARGS__) \
125 } \
126 GPUCA_KRNL_POST()
127
128#define GPUCA_KRNL_LOAD_(...) GPUCA_KRNL_LOAD_single(__VA_ARGS__)
129#define GPUCA_KRNL_LOAD_simple(...) GPUCA_KRNL_LOAD_single(__VA_ARGS__)
130#define GPUCA_KRNL_LOAD_both(...) \
131 GPUCA_KRNL_LOAD_single(__VA_ARGS__) \
132 GPUCA_KRNL_LOAD_multi(__VA_ARGS__)
133
134#define GPUCA_KRNL_PROP(x_class, x_attributes) \
135 template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>() { \
136 gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes)))}; \
137 return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \
138 }
139
140// Generate GPU kernel and host wrapper
141#define GPUCA_KRNL_WRAP(x_func, x_class, x_attributes, ...) GPUCA_M_CAT(x_func, GPUCA_M_STRIP_FIRST(x_attributes))(x_class, x_attributes, __VA_ARGS__)
142#endif // GPUCA_GPUCODE
143
144#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (GPUCA_M_STRIP(x_attributes), REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class)))), __VA_ARGS__)
145
146#endif // O2_GPU_GPURECONSTRUCTIONKERNELMACROS_H
147// clang-format on