Project
Loading...
Searching...
No Matches
GPUDefParametersDefault.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15// This files contains compile-time constants affecting the GPU performance.
16// Many of these constants are GPU-architecture specific.
17// This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc.
18// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h
19
20#ifndef GPUDEFPARAMETERSDEFAULT_H
21#define GPUDEFPARAMETERSDEFAULT_H
22// clang-format off
23
24#include "GPUCommonDef.h"
25#include "GPUDefMacros.h"
26
27// Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds)
28
29// GPU Run Configuration
30#ifdef GPUCA_GPUCODE
31#if defined(GPUCA_GPUTYPE_MI2xx)
32 #define GPUCA_WARP_SIZE 64
33 #define GPUCA_THREAD_COUNT 256
34 #define GPUCA_LB_GPUTPCCreateTrackingData 256
35 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
36 #define GPUCA_LB_GPUTPCStartHitsFinder 1024
37 #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2
38 #define GPUCA_LB_GPUTPCTrackletSelector 192, 3
39 #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1
40 #define GPUCA_LB_GPUTPCNeighboursCleaner 896
41 #define GPUCA_LB_GPUTPCExtrapolationTracking 256
42 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4
43 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
44 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
45 #define GPUCA_LB_GPUTPCCFGather 1024, 1
46 #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1
47 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12
48 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
49 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
50 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
51 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512
52 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512
53 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512
54 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512
55 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512
56 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
57 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
58 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
59 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512
60 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512
61 #define GPUCA_LB_GPUTPCGMMergerMergeCE 512
62 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
63 #define GPUCA_LB_GPUTPCGMMergerCollect 512
64 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
65 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
66 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
67 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
68 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
69 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
70 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
71 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2
72 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
73 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2
74 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2
75 #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
76 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
77 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
78 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
79 #define GPUCA_LB_GPUTPCCFPeakFinder 512
80 #define GPUCA_LB_GPUTPCCFNoiseSuppression 512
81 #define GPUCA_LB_GPUTPCCFDeconvolution 512
82 #define GPUCA_LB_GPUTPCCFClusterizer 448
83 #define GPUCA_LB_COMPRESSION_GATHER 1024
84 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
85 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
86 #define GPUCA_ALTERNATE_BORDER_SORT 1
87 #define GPUCA_SORT_BEFORE_FIT 1
88 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
89 #define GPUCA_NO_ATOMIC_PRECHECK 1
90 #define GPUCA_DEDX_STORAGE_TYPE uint16_t
91 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
92 #define GPUCA_COMP_GATHER_KERNEL 4
93 #define GPUCA_COMP_GATHER_MODE 3
94#elif defined(GPUCA_GPUTYPE_VEGA)
95 #define GPUCA_WARP_SIZE 64
96 #define GPUCA_THREAD_COUNT 256
97 #define GPUCA_LB_GPUTPCCreateTrackingData 128
98 #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2
99 #define GPUCA_LB_GPUTPCStartHitsFinder 1024
100 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2
101 #define GPUCA_LB_GPUTPCTrackletSelector 256, 8
102 #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1
103 #define GPUCA_LB_GPUTPCNeighboursCleaner 896
104 #define GPUCA_LB_GPUTPCExtrapolationTracking 256
105 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4
106 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
107 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
108 #define GPUCA_LB_GPUTPCCFGather 1024, 1
109 #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1
110 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200
111 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
112 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
113 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
114 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
115 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
116 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
117 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
118 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256
119 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
120 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
121 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
122 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256
123 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
124 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256
125 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
126 #define GPUCA_LB_GPUTPCGMMergerCollect 512
127 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
128 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
129 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
130 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
131 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
132 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
133 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
134 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2
135 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
136 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2
137 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2
138 #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
139 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
140 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
141 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
142 #define GPUCA_LB_GPUTPCCFPeakFinder 512
143 #define GPUCA_LB_GPUTPCCFNoiseSuppression 512
144 #define GPUCA_LB_GPUTPCCFDeconvolution 512
145 #define GPUCA_LB_GPUTPCCFClusterizer 512
146 #define GPUCA_LB_COMPRESSION_GATHER 1024
147 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
148 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
149 #define GPUCA_ALTERNATE_BORDER_SORT 1
150 #define GPUCA_SORT_BEFORE_FIT 1
151 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
152 #define GPUCA_NO_ATOMIC_PRECHECK 1
153 #define GPUCA_DEDX_STORAGE_TYPE uint16_t
154 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
155 #define GPUCA_COMP_GATHER_KERNEL 4
156 #define GPUCA_COMP_GATHER_MODE 3
157#elif defined(GPUCA_GPUTYPE_AMPERE)
158 #define GPUCA_WARP_SIZE 32
159 #define GPUCA_THREAD_COUNT 512
160 #define GPUCA_LB_GPUTPCCreateTrackingData 384
161 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
162 #define GPUCA_LB_GPUTPCStartHitsFinder 512
163 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4
164 #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4
165 #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1
166 #define GPUCA_LB_GPUTPCNeighboursCleaner 512
167 #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4
168 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10
169 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
170 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
171 #define GPUCA_LB_GPUTPCCFGather 1024, 1
172 #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4
173 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12
174 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6
175 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
176 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
177 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
178 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
179 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
180 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
181 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4
182 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
183 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
184 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2
185 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192
186 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2
187 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256
188 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
189 #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2
190 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
191 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
192 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
193 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
194 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
195 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
196 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
197 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2
198 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3
199 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1
200 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1
201 #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8
202 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448
203 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448
204 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448
205 #define GPUCA_LB_GPUTPCCFPeakFinder 128
206 #define GPUCA_LB_GPUTPCCFNoiseSuppression 448
207 #define GPUCA_LB_GPUTPCCFDeconvolution 384
208 #define GPUCA_LB_GPUTPCCFClusterizer 448
209 #define GPUCA_LB_COMPRESSION_GATHER 1024
210 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
211 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
212 #define GPUCA_ALTERNATE_BORDER_SORT 1
213 #define GPUCA_SORT_BEFORE_FIT 1
214 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
215 #define GPUCA_NO_ATOMIC_PRECHECK 1
216 #define GPUCA_DEDX_STORAGE_TYPE uint16_t
217 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
218 #define GPUCA_COMP_GATHER_KERNEL 4
219 #define GPUCA_COMP_GATHER_MODE 3
220#elif defined(GPUCA_GPUTYPE_TURING)
221 #define GPUCA_WARP_SIZE 32
222 #define GPUCA_THREAD_COUNT 512
223 #define GPUCA_LB_GPUTPCCreateTrackingData 256
224 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
225 #define GPUCA_LB_GPUTPCStartHitsFinder 512
226 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2
227 #define GPUCA_LB_GPUTPCTrackletSelector 192, 3
228 #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1
229 #define GPUCA_LB_GPUTPCNeighboursCleaner 512
230 #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2
231 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8
232 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
233 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
234 #define GPUCA_LB_GPUTPCCFGather 1024, 1
235 #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8
236 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4
237 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5
238 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
239 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
240 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
241 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
242 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
243 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
244 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4
245 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
246 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
247 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2
248 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192
249 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
250 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256
251 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
252 #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2
253 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
254 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
255 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
256 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
257 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
258 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
259 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
260 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128
261 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
262 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1
263 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1
264 #define GPUCA_LB_COMPRESSION_GATHER 1024
265 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
266 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
267 #define GPUCA_ALTERNATE_BORDER_SORT 1
268 #define GPUCA_SORT_BEFORE_FIT 1
269 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
270 #define GPUCA_NO_ATOMIC_PRECHECK 1
271 #define GPUCA_COMP_GATHER_KERNEL 4
272 #define GPUCA_COMP_GATHER_MODE 3
273 #define GPUCA_DEDX_STORAGE_TYPE uint16_t
274 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
275 // #define GPUCA_USE_TEXTURES
276#elif defined(GPUCA_GPUTYPE_OPENCL)
277#else
278 #error GPU TYPE NOT SET
279#endif
280#endif // GPUCA_GPUCODE
281
282#ifdef GPUCA_GPUCODE
283 // Default settings for GPU, if not already set for selected GPU type
284 #ifndef GPUCA_THREAD_COUNT
285 #define GPUCA_THREAD_COUNT 256
286 #endif
287 #ifndef GPUCA_LB_GPUTPCCreateTrackingData
288 #define GPUCA_LB_GPUTPCCreateTrackingData 256
289 #endif
290 #ifndef GPUCA_LB_GPUTPCTrackletConstructor
291 #define GPUCA_LB_GPUTPCTrackletConstructor 256
292 #endif
293 #ifndef GPUCA_LB_GPUTPCTrackletSelector
294 #define GPUCA_LB_GPUTPCTrackletSelector 256
295 #endif
296 #ifndef GPUCA_LB_GPUTPCNeighboursFinder
297 #define GPUCA_LB_GPUTPCNeighboursFinder 256
298 #endif
299 #ifndef GPUCA_LB_GPUTPCNeighboursCleaner
300 #define GPUCA_LB_GPUTPCNeighboursCleaner 256
301 #endif
302 #ifndef GPUCA_LB_GPUTPCExtrapolationTracking
303 #define GPUCA_LB_GPUTPCExtrapolationTracking 256
304 #endif
305 #ifndef GPUCA_LB_GPUTRDTrackerKernels_gpuVersion
306 #define GPUCA_LB_GPUTRDTrackerKernels_gpuVersion 512
307 #endif
308 #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fill
309 #define GPUCA_LB_GPUTPCCreateOccupancyMap_fill 256
310 #endif
311 #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fold
312 #define GPUCA_LB_GPUTPCCreateOccupancyMap_fold 256
313 #endif
314 #ifndef GPUCA_LB_GPUTRDTrackerKernels_o2Version
315 #define GPUCA_LB_GPUTRDTrackerKernels_o2Version 512
316 #endif
317 #ifndef GPUCA_LB_GPUTPCConvertKernel
318 #define GPUCA_LB_GPUTPCConvertKernel 256
319 #endif
320 #ifndef GPUCA_LB_GPUTPCCompressionKernels_step0attached
321 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 256
322 #endif
323 #ifndef GPUCA_LB_GPUTPCCompressionKernels_step1unattached
324 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 256
325 #endif
326 #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step0attached
327 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 256
328 #endif
329 #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step1unattached
330 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 256
331 #endif
332 #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow
333 #define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 256
334 #endif
335 #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters
336 #define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 256
337 #endif
338 #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters
339 #define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 256
340 #endif
341 #ifndef GPUCA_LB_GPUTPCCFDecodeZS
342 #define GPUCA_LB_GPUTPCCFDecodeZS 128, 4
343 #endif
344 #ifndef GPUCA_LB_GPUTPCCFDecodeZSLink
345 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
346 #endif
347 #ifndef GPUCA_LB_GPUTPCCFDecodeZSDenseLink
348 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
349 #endif
350 #ifndef GPUCA_LB_GPUTPCCFGather
351 #define GPUCA_LB_GPUTPCCFGather 1024, 1
352 #endif
353 #ifndef GPUCA_LB_COMPRESSION_GATHER
354 #define GPUCA_LB_COMPRESSION_GATHER 1024
355 #endif
356 #ifndef GPUCA_LB_GPUTPCGMMergerTrackFit
357 #define GPUCA_LB_GPUTPCGMMergerTrackFit 256
358 #endif
359 #ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers
360 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256
361 #endif
362 #ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit
363 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
364 #endif
365 #ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds
366 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
367 #endif
368 #ifndef GPUCA_LB_GPUTPCGMMergerUnpackGlobal
369 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
370 #endif
371 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step0
372 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
373 #endif
374 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step1
375 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
376 #endif
377 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step2
378 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
379 #endif
380 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step3
381 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
382 #endif
383 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step4
384 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256
385 #endif
386 #ifndef GPUCA_LB_GPUTPCGMMergerClearLinks
387 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
388 #endif
389 #ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare
390 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
391 #endif
392 #ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare
393 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
394 #endif
395 #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0
396 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256
397 #endif
398 #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step2
399 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
400 #endif
401 #ifndef GPUCA_LB_GPUTPCGMMergerMergeCE
402 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256
403 #endif
404 #ifndef GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks
405 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
406 #endif
407 #ifndef GPUCA_LB_GPUTPCGMMergerCollect
408 #define GPUCA_LB_GPUTPCGMMergerCollect 256
409 #endif
410 #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksPrepare
411 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
412 #endif
413 #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0
414 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
415 #endif
416 #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1
417 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
418 #endif
419 #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2
420 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
421 #endif
422 #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step0
423 #define GPUCA_LB_GPUTPCGMMergerFinalize_step0 256
424 #endif
425 #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step1
426 #define GPUCA_LB_GPUTPCGMMergerFinalize_step1 256
427 #endif
428 #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step2
429 #define GPUCA_LB_GPUTPCGMMergerFinalize_step2 256
430 #endif
431 #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0
432 #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 256
433 #endif
434 #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1
435 #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 256
436 #endif
437 #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2
438 #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 256
439 #endif
440 #ifndef GPUCA_LB_GPUTPCGMO2Output_prepare
441 #define GPUCA_LB_GPUTPCGMO2Output_prepare 256
442 #endif
443 #ifndef GPUCA_LB_GPUTPCGMO2Output_output
444 #define GPUCA_LB_GPUTPCGMO2Output_output 256
445 #endif
446 #ifndef GPUCA_LB_GPUITSFitterKernels
447 #define GPUCA_LB_GPUITSFitterKernels 256
448 #endif
449 #ifndef GPUCA_LB_GPUTPCStartHitsFinder
450 #define GPUCA_LB_GPUTPCStartHitsFinder 256
451 #endif
452 #ifndef GPUCA_LB_GPUTPCStartHitsSorter
453 #define GPUCA_LB_GPUTPCStartHitsSorter 256
454 #endif
455 #ifndef GPUCA_LB_GPUTPCCFCheckPadBaseline
456 #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
457 #endif
458 #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap
459 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
460 #endif
461 #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits
462 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
463 #endif
464 #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart
465 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
466 #endif
467 #ifndef GPUCA_LB_GPUTPCCFPeakFinder
468 #define GPUCA_LB_GPUTPCCFPeakFinder 512
469 #endif
470 #ifndef GPUCA_LB_GPUTPCCFNoiseSuppression
471 #define GPUCA_LB_GPUTPCCFNoiseSuppression 512
472 #endif
473 #ifndef GPUCA_LB_GPUTPCCFDeconvolution
474 #define GPUCA_LB_GPUTPCCFDeconvolution 512
475 #endif
476 #ifndef GPUCA_LB_GPUTPCCFClusterizer
477 #define GPUCA_LB_GPUTPCCFClusterizer 512
478 #endif
479 #ifndef GPUCA_LB_GPUTPCNNClusterizerKernels
480 #define GPUCA_LB_GPUTPCNNClusterizerKernels 512
481 #endif
482 #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU
483 #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 256
484 #endif
485 #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov
486 #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256
487 #endif
488 #ifndef GPUCA_LB_GPUMemClean16
489 #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT, 1
490 #endif
491 #ifndef GPUCA_LB_GPUitoa
492 #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT, 1
493 #endif
494 #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__)
495
496 // These kernel launch-bounds are derrived from one of the constants set above
497 #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression
498 #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression
499
500 #ifdef GPUCA_HAS_ONNX
501 #define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels
502 #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels
503 #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels
504 #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels
505 #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels
506 #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels
507 #endif
508
509 #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN
510 #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN
511 #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN
512 #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN
513 #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN
514 #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER
515 #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER
516 #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER
517 #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER
518 #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER
519#else
520 #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block.
521#endif
522
523#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE)
524
525#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!
526
527#if defined(__CUDACC__) || defined(__HIPCC__)
528 #define GPUCA_SPECIALIZE_THRUST_SORTS
529#endif
530
531#ifndef GPUCA_NEIGHBORSFINDER_REGS
532 #define GPUCA_NEIGHBORSFINDER_REGS NONE, 0
533#endif
534#ifdef GPUCA_GPUCODE
535 #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
536 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
537 #endif
538 #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
539 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
540 #endif
541 #ifndef GPUCA_ALTERNATE_BORDER_SORT
542 #define GPUCA_ALTERNATE_BORDER_SORT 0
543 #endif
544 #ifndef GPUCA_SORT_BEFORE_FIT
545 #define GPUCA_SORT_BEFORE_FIT 0
546 #endif
547 #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION
548 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
549 #endif
550 #ifndef GPUCA_COMP_GATHER_KERNEL
551 #define GPUCA_COMP_GATHER_KERNEL 0
552 #endif
553 #ifndef GPUCA_COMP_GATHER_MODE
554 #define GPUCA_COMP_GATHER_MODE 2
555 #endif
556#else
557 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0
558 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0
559 #define GPUCA_ALTERNATE_BORDER_SORT 0
560 #define GPUCA_SORT_BEFORE_FIT 0
561 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
562 #define GPUCA_THREAD_COUNT_FINDER 1
563 #define GPUCA_COMP_GATHER_KERNEL 0
564 #define GPUCA_COMP_GATHER_MODE 0
565#endif
566#ifndef GPUCA_DEDX_STORAGE_TYPE
567 #define GPUCA_DEDX_STORAGE_TYPE float
568#endif
569#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
570 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
571#endif
572#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE)
573#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE)
574
575#ifndef GPUCA_WARP_SIZE
576 #ifdef GPUCA_GPUCODE
577 #define GPUCA_WARP_SIZE 32
578 #else
579 #define GPUCA_WARP_SIZE 1
580 #endif
581#endif
582
583#define GPUCA_MAX_THREADS 1024
584#define GPUCA_MAX_STREAMS 36
585
586#define GPUCA_SORT_STARTHITS_GPU // Sort the start hits when running on GPU
587#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid
588#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers
589#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks
590
591// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling
592
593// Default maximum numbers
594#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters
595#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets
596#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit
597#define GPUCA_TRACKER_CONSTANT_MEM ((size_t) 63 * 1024) // Amount of Constant Memory to reserve
598#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device
599#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host
600#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread
601#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread
602
603#define GPUCA_MAX_SECTOR_NTRACK (2 << 24) // Maximum number of tracks per sector (limited by track id format)
604
605// #define GPUCA_KERNEL_DEBUGGER_OUTPUT
606
607// Some assertions to make sure the parameters are not invalid
608#if defined(GPUCA_GPUCODE)
609 static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP");
610 static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE");
611 static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE");
612#endif
613
614// Derived parameters
615#ifdef GPUCA_USE_TEXTURES
616 #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache
617 #define GPUCA_TEXTURE_FETCH_NEIGHBORS // Fetch also in Neighbours Finder
618#endif
619#if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE)
620 #define GPUCA_SORT_STARTHITS
621#endif
622
623#define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT})
624#define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT
625
626// clang-format on
627#endif // GPUDEFPARAMETERSDEFAULT_H
#define GPUCA_MAXN
#define GPUCA_TPC_COMP_CHUNK_SIZE
#define GPUCA_M_FIRST(...)
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
#define GPUCA_ROW_COUNT