Project
Loading...
Searching...
No Matches
GPUDefGPUParameters.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15// This files contains compile-time constants affecting the GPU performance.
16// Many of these constants are GPU-architecture specific.
17// This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc.
18// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h
19
20#ifndef GPUDEFGPUPARAMETERS_H
21#define GPUDEFGPUPARAMETERS_H
22// clang-format off
23
24#ifndef GPUDEF_H
25#error Please include GPUDef.h
26#endif
27
28#include "GPUDefMacros.h"
29
30// GPU Run Configuration
31#ifdef GPUCA_GPUCODE
32#if defined(GPUCA_GPUTYPE_MI2xx)
33 #define GPUCA_WARP_SIZE 64
34 #define GPUCA_THREAD_COUNT 256
35 #define GPUCA_LB_GPUTPCCreateTrackingData 256
36 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
37 #define GPUCA_LB_GPUTPCStartHitsFinder 1024
38 #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2
39 #define GPUCA_LB_GPUTPCTrackletSelector 192, 3
40 #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1
41 #define GPUCA_LB_GPUTPCNeighboursCleaner 896
42 #define GPUCA_LB_GPUTPCExtrapolationTracking 256
43 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4
44 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
45 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
46 #define GPUCA_LB_GPUTPCCFGather 1024, 1
47 #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1
48 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12
49 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
50 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
51 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
52 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512
53 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512
54 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512
55 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512
56 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512
57 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
58 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
59 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
60 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512
61 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512
62 #define GPUCA_LB_GPUTPCGMMergerMergeCE 512
63 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
64 #define GPUCA_LB_GPUTPCGMMergerCollect 512
65 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
66 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
67 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
68 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
69 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
70 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
71 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
72 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2
73 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
74 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2
75 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2
76 #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
77 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
78 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
79 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
80 #define GPUCA_LB_GPUTPCCFPeakFinder 512
81 #define GPUCA_LB_GPUTPCCFNoiseSuppression 512
82 #define GPUCA_LB_GPUTPCCFDeconvolution 512
83 #define GPUCA_LB_GPUTPCCFClusterizer 448
84 #define GPUCA_LB_COMPRESSION_GATHER 1024
85 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
86 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
87 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1
88 #define GPUCA_SELECTOR_IN_PIPELINE 1
89 #define GPUCA_ALTERNATE_BORDER_SORT 1
90 #define GPUCA_SORT_BEFORE_FIT 1
91 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
92 #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1
93 #define GPUCA_NO_ATOMIC_PRECHECK 1
94 #define GPUCA_DEDX_STORAGE_TYPE uint16_t
95 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
96 #define GPUCA_COMP_GATHER_KERNEL 4
97 #define GPUCA_COMP_GATHER_MODE 3
98#elif defined(GPUCA_GPUTYPE_VEGA)
99 #define GPUCA_WARP_SIZE 64
100 #define GPUCA_THREAD_COUNT 256
101 #define GPUCA_LB_GPUTPCCreateTrackingData 128
102 #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2
103 #define GPUCA_LB_GPUTPCStartHitsFinder 1024
104 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2
105 #define GPUCA_LB_GPUTPCTrackletSelector 256, 8
106 #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1
107 #define GPUCA_LB_GPUTPCNeighboursCleaner 896
108 #define GPUCA_LB_GPUTPCExtrapolationTracking 256
109 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4
110 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
111 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
112 #define GPUCA_LB_GPUTPCCFGather 1024, 1
113 #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1
114 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200
115 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
116 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
117 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
118 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
119 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
120 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
121 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
122 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256
123 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
124 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
125 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
126 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256
127 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
128 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256
129 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
130 #define GPUCA_LB_GPUTPCGMMergerCollect 512
131 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
132 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
133 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
134 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
135 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
136 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
137 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
138 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2
139 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
140 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2
141 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2
142 #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
143 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
144 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
145 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
146 #define GPUCA_LB_GPUTPCCFPeakFinder 512
147 #define GPUCA_LB_GPUTPCCFNoiseSuppression 512
148 #define GPUCA_LB_GPUTPCCFDeconvolution 512
149 #define GPUCA_LB_GPUTPCCFClusterizer 512
150 #define GPUCA_LB_COMPRESSION_GATHER 1024
151 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
152 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
153 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1
154 #define GPUCA_SELECTOR_IN_PIPELINE 1
155 #define GPUCA_ALTERNATE_BORDER_SORT 1
156 #define GPUCA_SORT_BEFORE_FIT 1
157 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
158 #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1
159 #define GPUCA_NO_ATOMIC_PRECHECK 1
160 #define GPUCA_DEDX_STORAGE_TYPE uint16_t
161 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
162 #define GPUCA_COMP_GATHER_KERNEL 4
163 #define GPUCA_COMP_GATHER_MODE 3
164#elif defined(GPUCA_GPUTYPE_AMPERE)
165 #define GPUCA_WARP_SIZE 32
166 #define GPUCA_THREAD_COUNT 512
167 #define GPUCA_LB_GPUTPCCreateTrackingData 384
168 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
169 #define GPUCA_LB_GPUTPCStartHitsFinder 512
170 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4
171 #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4
172 #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1
173 #define GPUCA_LB_GPUTPCNeighboursCleaner 512
174 #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4
175 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10
176 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
177 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
178 #define GPUCA_LB_GPUTPCCFGather 1024, 1
179 #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4
180 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12
181 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6
182 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
183 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
184 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
185 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
186 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
187 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
188 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4
189 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
190 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
191 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2
192 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192
193 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2
194 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256
195 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
196 #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2
197 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
198 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
199 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
200 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
201 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
202 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
203 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
204 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2
205 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3
206 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1
207 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1
208 #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8
209 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448
210 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448
211 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448
212 #define GPUCA_LB_GPUTPCCFPeakFinder 128
213 #define GPUCA_LB_GPUTPCCFNoiseSuppression 448
214 #define GPUCA_LB_GPUTPCCFDeconvolution 384
215 #define GPUCA_LB_GPUTPCCFClusterizer 448
216 #define GPUCA_LB_COMPRESSION_GATHER 1024
217 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
218 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
219 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1
220 #define GPUCA_SELECTOR_IN_PIPELINE 1
221 #define GPUCA_ALTERNATE_BORDER_SORT 1
222 #define GPUCA_SORT_BEFORE_FIT 1
223 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
224 #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1
225 #define GPUCA_NO_ATOMIC_PRECHECK 1
226 #define GPUCA_DEDX_STORAGE_TYPE uint16_t
227 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
228 #define GPUCA_COMP_GATHER_KERNEL 4
229 #define GPUCA_COMP_GATHER_MODE 3
230#elif defined(GPUCA_GPUTYPE_TURING)
231 #define GPUCA_WARP_SIZE 32
232 #define GPUCA_THREAD_COUNT 512
233 #define GPUCA_LB_GPUTPCCreateTrackingData 256
234 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
235 #define GPUCA_LB_GPUTPCStartHitsFinder 512
236 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2
237 #define GPUCA_LB_GPUTPCTrackletSelector 192, 3
238 #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1
239 #define GPUCA_LB_GPUTPCNeighboursCleaner 512
240 #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2
241 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8
242 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
243 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
244 #define GPUCA_LB_GPUTPCCFGather 1024, 1
245 #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8
246 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4
247 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5
248 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
249 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
250 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
251 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
252 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
253 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
254 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4
255 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
257 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2
258 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192
259 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
260 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256
261 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
262 #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2
263 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
264 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
265 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
266 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
267 #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
268 #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
269 #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
270 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128
271 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
272 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1
273 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1
274 #define GPUCA_LB_COMPRESSION_GATHER 1024
275 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
276 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
277 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1
278 #define GPUCA_SELECTOR_IN_PIPELINE 1
279 #define GPUCA_ALTERNATE_BORDER_SORT 1
280 #define GPUCA_SORT_BEFORE_FIT 1
281 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
282 #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1
283 #define GPUCA_NO_ATOMIC_PRECHECK 1
284 #define GPUCA_COMP_GATHER_KERNEL 4
285 #define GPUCA_COMP_GATHER_MODE 3
286 #define GPUCA_DEDX_STORAGE_TYPE uint16_t
287 #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
288 // #define GPUCA_USE_TEXTURES
289#elif defined(GPUCA_GPUTYPE_OPENCL)
290#else
291 #error GPU TYPE NOT SET
292#endif
293#endif // GPUCA_GPUCODE
294
295#ifdef GPUCA_GPUCODE
296 // Default settings, if not already set for selected GPU type
297 #ifndef GPUCA_THREAD_COUNT
298 #define GPUCA_THREAD_COUNT 256
299 #endif
300 #ifndef GPUCA_LB_GPUTPCCreateTrackingData
301 #define GPUCA_LB_GPUTPCCreateTrackingData 256
302 #endif
303 #ifndef GPUCA_LB_GPUTPCTrackletConstructor
304 #define GPUCA_LB_GPUTPCTrackletConstructor 256
305 #endif
306 #ifndef GPUCA_LB_GPUTPCTrackletSelector
307 #define GPUCA_LB_GPUTPCTrackletSelector 256
308 #endif
309 #ifndef GPUCA_LB_GPUTPCNeighboursFinder
310 #define GPUCA_LB_GPUTPCNeighboursFinder 256
311 #endif
312 #ifndef GPUCA_LB_GPUTPCNeighboursCleaner
313 #define GPUCA_LB_GPUTPCNeighboursCleaner 256
314 #endif
315 #ifndef GPUCA_LB_GPUTPCExtrapolationTracking
316 #define GPUCA_LB_GPUTPCExtrapolationTracking 256
317 #endif
318 #ifndef GPUCA_LB_GPUTRDTrackerKernels_gpuVersion
319 #define GPUCA_LB_GPUTRDTrackerKernels_gpuVersion 512
320 #endif
321 #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fill
322 #define GPUCA_LB_GPUTPCCreateOccupancyMap_fill 256
323 #endif
324 #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fold
325 #define GPUCA_LB_GPUTPCCreateOccupancyMap_fold 256
326 #endif
327 #ifndef GPUCA_LB_GPUTRDTrackerKernels_o2Version
328 #define GPUCA_LB_GPUTRDTrackerKernels_o2Version 512
329 #endif
330 #ifndef GPUCA_LB_GPUTPCConvertKernel
331 #define GPUCA_LB_GPUTPCConvertKernel 256
332 #endif
333 #ifndef GPUCA_LB_GPUTPCCompressionKernels_step0attached
334 #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 256
335 #endif
336 #ifndef GPUCA_LB_GPUTPCCompressionKernels_step1unattached
337 #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 256
338 #endif
339 #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step0attached
340 #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 256
341 #endif
342 #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step1unattached
343 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 256
344 #endif
345 #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow
346 #define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 256
347 #endif
348 #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters
349 #define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 256
350 #endif
351 #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters
352 #define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 256
353 #endif
354 #ifndef GPUCA_LB_GPUTPCCFDecodeZS
355 #define GPUCA_LB_GPUTPCCFDecodeZS 128, 4
356 #endif
357 #ifndef GPUCA_LB_GPUTPCCFDecodeZSLink
358 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
359 #endif
360 #ifndef GPUCA_LB_GPUTPCCFDecodeZSDenseLink
361 #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
362 #endif
363 #ifndef GPUCA_LB_GPUTPCCFGather
364 #define GPUCA_LB_GPUTPCCFGather 1024, 1
365 #endif
366 #ifndef GPUCA_LB_COMPRESSION_GATHER
367 #define GPUCA_LB_COMPRESSION_GATHER 1024
368 #endif
369 #ifndef GPUCA_LB_GPUTPCGMMergerTrackFit
370 #define GPUCA_LB_GPUTPCGMMergerTrackFit 256
371 #endif
372 #ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers
373 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256
374 #endif
375 #ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit
376 #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
377 #endif
378 #ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds
379 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
380 #endif
381 #ifndef GPUCA_LB_GPUTPCGMMergerUnpackGlobal
382 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
383 #endif
384 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step0
385 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
386 #endif
387 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step1
388 #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
389 #endif
390 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step2
391 #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
392 #endif
393 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step3
394 #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
395 #endif
396 #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step4
397 #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256
398 #endif
399 #ifndef GPUCA_LB_GPUTPCGMMergerClearLinks
400 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256
401 #endif
402 #ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare
403 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
404 #endif
405 #ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare
406 #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
407 #endif
408 #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0
409 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256
410 #endif
411 #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step2
412 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
413 #endif
414 #ifndef GPUCA_LB_GPUTPCGMMergerMergeCE
415 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256
416 #endif
417 #ifndef GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks
418 #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
419 #endif
420 #ifndef GPUCA_LB_GPUTPCGMMergerCollect
421 #define GPUCA_LB_GPUTPCGMMergerCollect 256
422 #endif
423 #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksPrepare
424 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
425 #endif
426 #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0
427 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
428 #endif
429 #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1
430 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
431 #endif
432 #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2
433 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
434 #endif
435 #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step0
436 #define GPUCA_LB_GPUTPCGMMergerFinalize_step0 256
437 #endif
438 #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step1
439 #define GPUCA_LB_GPUTPCGMMergerFinalize_step1 256
440 #endif
441 #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step2
442 #define GPUCA_LB_GPUTPCGMMergerFinalize_step2 256
443 #endif
444 #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0
445 #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 256
446 #endif
447 #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1
448 #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 256
449 #endif
450 #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2
451 #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 256
452 #endif
453 #ifndef GPUCA_LB_GPUTPCGMO2Output_prepare
454 #define GPUCA_LB_GPUTPCGMO2Output_prepare 256
455 #endif
456 #ifndef GPUCA_LB_GPUTPCGMO2Output_output
457 #define GPUCA_LB_GPUTPCGMO2Output_output 256
458 #endif
459 #ifndef GPUCA_LB_GPUITSFitterKernels
460 #define GPUCA_LB_GPUITSFitterKernels 256
461 #endif
462 #ifndef GPUCA_LB_GPUTPCStartHitsFinder
463 #define GPUCA_LB_GPUTPCStartHitsFinder 256
464 #endif
465 #ifndef GPUCA_LB_GPUTPCStartHitsSorter
466 #define GPUCA_LB_GPUTPCStartHitsSorter 256
467 #endif
468 #ifndef GPUCA_LB_GPUTPCCFCheckPadBaseline
469 #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
470 #endif
471 #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap
472 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
473 #endif
474 #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits
475 #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
476 #endif
477 #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart
478 #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
479 #endif
480 #ifndef GPUCA_LB_GPUTPCCFPeakFinder
481 #define GPUCA_LB_GPUTPCCFPeakFinder 512
482 #endif
483 #ifndef GPUCA_LB_GPUTPCCFNoiseSuppression
484 #define GPUCA_LB_GPUTPCCFNoiseSuppression 512
485 #endif
486 #ifndef GPUCA_LB_GPUTPCCFDeconvolution
487 #define GPUCA_LB_GPUTPCCFDeconvolution 512
488 #endif
489 #ifndef GPUCA_LB_GPUTPCCFClusterizer
490 #define GPUCA_LB_GPUTPCCFClusterizer 512
491 #endif
492 #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU
493 #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 256
494 #endif
495 #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov
496 #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256
497 #endif
498 #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__)
499#else
500 // The following defaults are needed to compile the host code
501 #define GPUCA_GET_THREAD_COUNT(...) 1
502#endif
503
504#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE)
505
506#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!
507
508#define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression
509#define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression
510#define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN
511#define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN
512#define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN
513#define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN
514#define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN
515#define GPUCA_LB_GPUTPCTrackletConstructor_singleSector GPUCA_LB_GPUTPCTrackletConstructor
516#define GPUCA_LB_GPUTPCTrackletConstructor_allSectors GPUCA_LB_GPUTPCTrackletConstructor
517#define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER
518#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER
519#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER
520#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER
521#define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER
522
523#if defined(__CUDACC__) || defined(__HIPCC__)
524#define GPUCA_SPECIALIZE_THRUST_SORTS
525#endif
526
527#ifndef GPUCA_NEIGHBORSFINDER_REGS
528#define GPUCA_NEIGHBORSFINDER_REGS NONE, 0
529#endif
530#ifdef GPUCA_GPUCODE
531 #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
532 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
533 #endif
534 #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
535 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
536 #endif
537 #ifndef GPUCA_CONSTRUCTOR_IN_PIPELINE
538 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1
539 #endif
540 #ifndef GPUCA_SELECTOR_IN_PIPELINE
541 #define GPUCA_SELECTOR_IN_PIPELINE 0
542 #endif
543 #ifndef GPUCA_ALTERNATE_BORDER_SORT
544 #define GPUCA_ALTERNATE_BORDER_SORT 0
545 #endif
546 #ifndef GPUCA_SORT_BEFORE_FIT
547 #define GPUCA_SORT_BEFORE_FIT 0
548 #endif
549 #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION
550 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
551 #endif
552 #ifndef GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT
553 #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 8 // Currently must be smaller than avaiable MultiProcessors on GPU or will result in wrong results
554 #endif
555 #ifndef GPUCA_COMP_GATHER_KERNEL
556 #define GPUCA_COMP_GATHER_KERNEL 0
557 #endif
558 #ifndef GPUCA_COMP_GATHER_MODE
559 #define GPUCA_COMP_GATHER_MODE 2
560 #endif
561#else
562 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0
563 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0
564 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1
565 #define GPUCA_SELECTOR_IN_PIPELINE 1
566 #define GPUCA_ALTERNATE_BORDER_SORT 0
567 #define GPUCA_SORT_BEFORE_FIT 0
568 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
569 #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1
570 #define GPUCA_THREAD_COUNT_FINDER 1
571 #define GPUCA_COMP_GATHER_KERNEL 0
572 #define GPUCA_COMP_GATHER_MODE 0
573#endif
574#ifndef GPUCA_DEDX_STORAGE_TYPE
575#define GPUCA_DEDX_STORAGE_TYPE float
576#endif
577#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
578#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
579#endif
580#ifdef GPUCA_NO_FAST_MATH
581#undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
582#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
583#undef GPUCA_DEDX_STORAGE_TYPE
584#define GPUCA_DEDX_STORAGE_TYPE float
585#endif
586
587#ifndef GPUCA_WARP_SIZE
588#ifdef GPUCA_GPUCODE
589#define GPUCA_WARP_SIZE 32
590#else
591#define GPUCA_WARP_SIZE 1
592#endif
593#endif
594
595#define GPUCA_MAX_THREADS 1024
596#define GPUCA_MAX_STREAMS 36
597
598#define GPUCA_SORT_STARTHITS_GPU // Sort the start hits when running on GPU
599#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid
600#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers
601#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks
602
603// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling
604
605// Default maximum numbers
606#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters
607#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets
608#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit
609#define GPUCA_TRACKER_CONSTANT_MEM ((size_t) 63 * 1024) // Amount of Constant Memory to reserve
610#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device
611#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host
612#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread
613#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread
614
615#define GPUCA_MAX_SECTOR_NTRACK (2 << 24) // Maximum number of tracks per sector (limited by track id format)
616
617// #define GPUCA_KERNEL_DEBUGGER_OUTPUT
618
619// Some assertions to make sure out parameters are not invalid
620 static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP");
621 static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE");
622 #ifdef GPUCA_GPUCODE
623 static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE");
624 #endif
625
626// Derived parameters
627#ifdef GPUCA_USE_TEXTURES
628 #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache
629 #define GPUCA_TEXTURE_FETCH_NEIGHBORS // Fetch also in Neighbours Finder
630#endif
631#if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE)
632 #define GPUCA_SORT_STARTHITS
633#endif
634
635#define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT})
636#define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT
637
638 // clang-format on
639#endif
#define GPUCA_MAXN
#define GPUCA_TPC_COMP_CHUNK_SIZE
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
#define GPUCA_M_FIRST(...)
#define GPUCA_ROW_COUNT