Project
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages Concepts
GPUCommonMath.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUCOMMONMATH_H
16#define GPUCOMMONMATH_H
17
18#include "GPUCommonDef.h"
19
20#if defined(__CUDACC__) && !defined(__clang__) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) && !defined(GPUCA_GPUCODE_HOSTONLY)
21#include <sm_20_atomic_functions.h>
22#endif
23
24#if !defined(GPUCA_GPUCODE_DEVICE)
25#include <cmath>
26#include <algorithm>
27#include <atomic>
28#include <limits>
29#include <cstring>
30#endif
31
32#if !defined(GPUCA_GPUCODE_COMPILEKERNELS) && (!defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__))
33#include <cstdint>
34#endif
35
36// GPUCA_CHOICE Syntax: GPUCA_CHOICE(Host, CUDA&HIP, OpenCL)
37#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off
38 #define GPUCA_CHOICE(c1, c2, c3) (c2) // Select second option for CUDA and HIP
39#elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__)
40 #define GPUCA_CHOICE(c1, c2, c3) (c3) // Select third option for OpenCL
41#else
42 #define GPUCA_CHOICE(c1, c2, c3) (c1) // Select first option for Host
43#endif // clang-format on
44
45namespace o2::gpu
46{
47
49{
50 public:
51 GPUd() static float2 MakeFloat2(float x, float y); // TODO: Find better appraoch that is constexpr
52
53 template <class T>
54 GPUhd() constexpr static T Min(const T x, const T y)
55 {
56 return GPUCA_CHOICE(std::min(x, y), min(x, y), min(x, y));
57 }
58 template <class T>
59 GPUhd() constexpr static T Max(const T x, const T y)
60 {
61 return GPUCA_CHOICE(std::max(x, y), max(x, y), max(x, y));
62 }
63 template <class T, class S, class R>
64 GPUd() static T MinWithRef(T x, T y, S refX, S refY, R& r);
65 template <class T, class S, class R>
66 GPUd() static T MaxWithRef(T x, T y, S refX, S refY, R& r);
67 template <class T, class S, class R>
68 GPUd() static T MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, S refW, R& r);
69 template <class T>
70 GPUdi() constexpr static T Clamp(const T v, const T lo, const T hi)
71 {
72 return Max(lo, Min(v, hi));
73 }
74 GPUhdni() constexpr static float Sqrt(float x);
75 GPUd() static float InvSqrt(float x);
76 template <class T>
77 GPUhd() constexpr static T Abs(T x);
78 GPUd() constexpr static float ASin(float x);
79 GPUd() constexpr static float ACos(float x);
80 GPUd() constexpr static float ATan(float x);
81 GPUhd() constexpr static float ATan2(float y, float x);
82 GPUd() constexpr static float Sin(float x);
83 GPUd() constexpr static float Cos(float x);
84 GPUhdni() static void SinCos(float x, float& s, float& c);
85 GPUhdni() static void SinCosd(double x, double& s, double& c);
86 GPUd() constexpr static float Tan(float x);
87 GPUd() constexpr static float Pow(float x, float y);
88 GPUd() constexpr static float Log(float x);
89 GPUd() constexpr static float Exp(float x);
90 GPUhdni() constexpr static float Copysign(float x, float y) { return GPUCA_CHOICE(std::copysignf(x, y), copysignf(x, y), copysign(x, y)); }
91 GPUd() constexpr static float TwoPi() { return 6.2831853f; }
92 GPUd() constexpr static float Pi() { return 3.1415927f; }
93 GPUd() constexpr static float Round(float x);
94 GPUd() constexpr static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); }
95 GPUd() static uint32_t Float2UIntReint(const float& x);
96 GPUd() constexpr static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); }
97 GPUd() constexpr static int32_t Float2IntRn(float x);
98 GPUd() constexpr static float Modf(float x, float y);
99 GPUd() constexpr static bool Finite(float x);
100 GPUd() constexpr static bool IsNaN(float x);
101 GPUd() constexpr static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits<float>::quiet_NaN(), __builtin_nanf(""), nan(0u)); }
102 GPUd() constexpr static uint32_t Clz(uint32_t val);
103 GPUd() constexpr static uint32_t Popcount(uint32_t val);
104
105 GPUd() static void memcpy(void* dst, const void* src, size_t size);
106
107 GPUhdi() constexpr static float Hypot(float x, float y) { return Sqrt(x * x + y * y); }
108 GPUhdi() constexpr static float Hypot(float x, float y, float z) { return Sqrt(x * x + y * y + z * z); }
109 GPUhdi() constexpr static float Hypot(float x, float y, float z, float w) { return Sqrt(x * x + y * y + z * z + w * w); }
110
111 template <typename T>
112 GPUhd() constexpr static void Swap(T& a, T& b);
113
114 template <class T>
115 GPUdi() static T AtomicExch(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
116 {
117 return GPUCommonMath::AtomicExchInternal(addr, val);
118 }
119
120 template <class T>
121 GPUdi() static bool AtomicCAS(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T cmp, T val)
122 {
123 return GPUCommonMath::AtomicCASInternal(addr, cmp, val);
124 }
125
126 template <class T>
127 GPUdi() static T AtomicAdd(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
128 {
129 return GPUCommonMath::AtomicAddInternal(addr, val);
130 }
131 template <class T>
132 GPUdi() static void AtomicMax(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
133 {
134 GPUCommonMath::AtomicMaxInternal(addr, val);
135 }
136 template <class T>
137 GPUdi() static void AtomicMin(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
138 {
139 GPUCommonMath::AtomicMinInternal(addr, val);
140 }
141 template <class T>
142 GPUdi() static T AtomicExchShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
143 {
144 return GPUCommonMath::AtomicExchInternal(addr, val);
145 }
146 template <class T>
147 GPUdi() static T AtomicAddShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
148 {
149 return GPUCommonMath::AtomicAddInternal(addr, val);
150 }
151 template <class T>
152 GPUdi() static void AtomicMaxShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
153 {
154 GPUCommonMath::AtomicMaxInternal(addr, val);
155 }
156 template <class T>
157 GPUdi() static void AtomicMinShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
158 {
159 GPUCommonMath::AtomicMinInternal(addr, val);
160 }
161 GPUd() constexpr static int32_t Mul24(int32_t a, int32_t b);
162 GPUd() constexpr static float FMulRZ(float a, float b);
163
164 template <int32_t I, class T>
165 GPUd() constexpr static T nextMultipleOf(T val);
166
167 template <typename... Args>
168 GPUhdni() constexpr static float Sum2(float w, Args... args);
169
170 private:
171 template <class S, class T>
172 GPUd() static uint32_t AtomicExchInternal(S* addr, T val);
173 template <class S, class T>
174 GPUd() static bool AtomicCASInternal(S* addr, T cmp, T val);
175 template <class S, class T>
176 GPUd() static uint32_t AtomicAddInternal(S* addr, T val);
177 template <class S, class T>
178 GPUd() static void AtomicMaxInternal(S* addr, T val);
179 template <class S, class T>
180 GPUd() static void AtomicMinInternal(S* addr, T val);
181};
182
184
185template <typename... Args>
186GPUhdi() constexpr float GPUCommonMath::Sum2(float w, Args... args)
187{
188 if constexpr (sizeof...(Args) == 0) {
189 return w * w;
190 } else {
191 return w * w + Sum2(args...);
192 }
193 return 0;
194}
195
196GPUdi() void GPUCommonMath::memcpy(void* dst, const void* src, size_t size)
197{
198#ifndef GPUCA_GPUCODE_DEVICE
199 std::memcpy(dst, src, size);
200#elif defined(__CUDACC__) || defined(__HIPCC__)
201 ::memcpy(dst, src, size);
202#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__)
203 __builtin_memcpy(dst, src, size);
204#else
205 char* d = (char*)dst;
206 const char* s = (const char*)src;
207 for (size_t i = 0; i < size; i++) {
208 d[i] = s[i];
209 }
210#endif
211}
212
213template <int32_t I, class T>
214GPUdi() constexpr T GPUCommonMath::nextMultipleOf(T val)
215{
216 if constexpr (I & (I - 1)) {
217 T tmp = val % I;
218 if (tmp) {
219 val += I - tmp;
220 }
221 return val;
222 } else {
223 return (val + I - 1) & ~(T)(I - 1);
224 }
225 return 0; // BUG: Cuda complains about missing return value with constexpr if
226}
227
228GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y)
229{
230#if !defined(GPUCA_GPUCODE) || defined(__OPENCL__) || defined(__OPENCL_HOST__)
231 float2 ret = {x, y};
232 return ret;
233#else
234 return make_float2(x, y);
235#endif // GPUCA_GPUCODE
236}
237
238GPUdi() constexpr float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); }
239
240GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
241{
242#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__))
243 return __float_as_uint(x);
244#elif defined(GPUCA_GPUCODE_DEVICE) && defined(__OPENCL__)
245 return as_uint(x);
246#else
247 return reinterpret_cast<const uint32_t&>(x);
248#endif
249}
250
251GPUCA_DETERMINISTIC_CODE( // clang-format off
252GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); }
253GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
254GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
255GPUdi() constexpr float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); }
256GPUhdi() constexpr float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); }
257GPUdi() constexpr float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); }
258GPUdi() constexpr float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); }
259GPUdi() constexpr float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); }
260GPUdi() constexpr float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); }
261GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); }
262GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); }
263GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); }
264GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); }
265GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); }
266GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); }
267, // !GPUCA_DETERMINISTIC_CODE
268GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); }
269GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); }
270GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); }
271GPUdi() constexpr float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE(atanf(x), atanf(x), atan(x)); }
272GPUhdi() constexpr float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); }
273GPUdi() constexpr float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE(sinf(x), sinf(x), sin(x)); }
274GPUdi() constexpr float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE(cosf(x), cosf(x), cos(x)); }
275GPUdi() constexpr float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE(tanf(x), tanf(x), tan(x)); }
276GPUdi() constexpr float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE(powf(x, y), powf(x, y), pow(x, y)); }
277GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf(x), asinf(x), asin(x)); }
278GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); }
279GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); }
280GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); }
281GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return true; }
282GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; }
283) // clang-format on
284
285GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)
286{
287 GPUCA_DETERMINISTIC_CODE( // clang-format off
288 s = sin((double)x);
289 c = cos((double)x);
290 , // !GPUCA_DETERMINISTIC_CODE
291#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
292 __sincosf(x, &s, &c);
293#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
294 sincosf(x, &s, &c);
295#else
296 GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c));
297#endif
298 ) // clang-format on
299}
300
301GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c)
302{
303#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
304 __sincos(x, &s, &c);
305#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
306 sincos(x, &s, &c);
307#else
308 GPUCA_CHOICE((void)((s = sin(x)) + (c = cos(x))), sincos(x, &s, &c), s = sincos(x, &c));
309#endif
310}
311
312GPUdi() constexpr uint32_t GPUCommonMath::Clz(uint32_t x)
313{
314#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__))
315 return x == 0 ? 32 : GPUCA_CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available
316#else
317 for (int32_t i = 31; i >= 0; i--) {
318 if (x & (1u << i)) {
319 return (31 - i);
320 }
321 }
322 return 32;
323#endif
324}
325
326GPUdi() constexpr uint32_t GPUCommonMath::Popcount(uint32_t x)
327{
328#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed
329 // use builtin if available
330 return GPUCA_CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x));
331#else
332 x = x - ((x >> 1) & 0x55555555);
333 x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
334 return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
335#endif
336}
337
338template <typename T>
339GPUhdi() constexpr void GPUCommonMath::Swap(T& a, T& b)
340{
341#ifndef GPUCA_GPUCODE_DEVICE
342 std::swap(a, b);
343#else
344 T tmp = a;
345 a = b;
346 b = tmp;
347#endif
348}
349
350template <class T, class S, class R>
351GPUdi() T GPUCommonMath::MinWithRef(T x, T y, S refX, S refY, R& r)
352{
353 if (x < y) {
354 r = refX;
355 return x;
356 }
357 r = refY;
358 return y;
359}
360
361template <class T, class S, class R>
362GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, S refX, S refY, R& r)
363{
364 if (x > y) {
365 r = refX;
366 return x;
367 }
368 r = refY;
369 return y;
370}
371
372template <class T, class S, class R>
373GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, S refW, R& r)
374{
375 T retVal = x;
376 S retRef = refX;
377 if (y > retVal) {
378 retVal = y;
379 retRef = refY;
380 }
381 if (z > retVal) {
382 retVal = z;
383 retRef = refZ;
384 }
385 if (w > retVal) {
386 retVal = w;
387 retRef = refW;
388 }
389 r = retRef;
390 return retVal;
391}
392
393GPUdi() float GPUCommonMath::InvSqrt(float _x)
394{
395 GPUCA_DETERMINISTIC_CODE( // clang-format off
396 return 1.f / Sqrt(_x);
397 , // !GPUCA_DETERMINISTIC_CODE
398#if defined(__CUDACC__) || defined(__HIPCC__)
399 return __frsqrt_rn(_x);
400#elif defined(__OPENCL__) && defined(__clang__)
401 return 1.f / sqrt(_x);
402#elif !defined(__OPENCL__) && (defined(__FAST_MATH__) || defined(__clang__))
403 return 1.f / sqrtf(_x);
404#else
405 union {
406 float f;
407 int32_t i;
408 } x = {_x};
409 const float xhalf = 0.5f * x.f;
410 x.i = 0x5f3759df - (x.i >> 1);
411 x.f = x.f * (1.5f - xhalf * x.f * x.f);
412 return x.f;
413#endif
414 ) // clang-format on
415}
416
417template <>
418GPUhdi() constexpr float GPUCommonMath::Abs<float>(float x)
419{
420 return GPUCA_CHOICE(fabsf(x), fabsf(x), fabs(x));
421}
422
423template <>
424GPUhdi() constexpr double GPUCommonMath::Abs<double>(double x)
425{
426 return GPUCA_CHOICE(fabs(x), fabs(x), fabs(x));
427}
428
429template <>
430GPUhdi() constexpr int32_t GPUCommonMath::Abs<int32_t>(int32_t x)
431{
432 return GPUCA_CHOICE(abs(x), abs(x), abs(x));
433}
434
435template <class S, class T>
436GPUdi() uint32_t GPUCommonMath::AtomicExchInternal(S* addr, T val)
437{
438#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
439 return ::atomic_exchange(addr, val);
440#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
441 return ::atomic_xchg(addr, val);
442#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
443 return ::atomicExch(addr, val);
444#elif defined(WITH_OPENMP)
445 uint32_t old;
446 __atomic_exchange(addr, &val, &old, __ATOMIC_SEQ_CST);
447 return old;
448#else
449 return reinterpret_cast<std::atomic<T>*>(addr)->exchange(val);
450#endif
451}
452
453template <class S, class T>
454GPUdi() bool GPUCommonMath::AtomicCASInternal(S* addr, T cmp, T val)
455{
456#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
457 return ::atomic_compare_exchange(addr, cmp, val) == cmp;
458#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
459 return ::atomic_cmpxchg(addr, cmp, val) == cmp;
460#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
461 return ::atomicCAS(addr, cmp, val) == cmp;
462#elif defined(WITH_OPENMP)
463 return __atomic_compare_exchange(addr, &cmp, &val, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
464#else
465 return reinterpret_cast<std::atomic<T>*>(addr)->compare_exchange_strong(cmp, val);
466#endif
467}
468
469template <class S, class T>
470GPUdi() uint32_t GPUCommonMath::AtomicAddInternal(S* addr, T val)
471{
472#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
473 return ::atomic_fetch_add(addr, val);
474#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
475 return ::atomic_add(addr, val);
476#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
477 return ::atomicAdd(addr, val);
478#elif defined(WITH_OPENMP)
479 return __atomic_add_fetch(addr, val, __ATOMIC_SEQ_CST) - val;
480#else
481 return reinterpret_cast<std::atomic<T>*>(addr)->fetch_add(val);
482#endif
483}
484
485template <class S, class T>
486GPUdi() void GPUCommonMath::AtomicMaxInternal(S* addr, T val)
487{
488#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
489 ::atomic_fetch_max(addr, val);
490#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
491 ::atomic_max(addr, val);
492#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
493 ::atomicMax(addr, val);
494#else
495 S current;
496 while ((current = *(volatile S*)addr) < val && !AtomicCASInternal(addr, current, val)) {
497 }
498#endif // GPUCA_GPUCODE
499}
500
501template <class S, class T>
502GPUdi() void GPUCommonMath::AtomicMinInternal(S* addr, T val)
503{
504#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
505 ::atomic_fetch_min(addr, val);
506#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
507 ::atomic_min(addr, val);
508#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
509 ::atomicMin(addr, val);
510#else
511 S current;
512 while ((current = *(volatile S*)addr) > val && !AtomicCASInternal(addr, current, val)) {
513 }
514#endif // GPUCA_GPUCODE
515}
516
517#if (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(G__ROOT)
518#define GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
519template <>
520GPUdii() void GPUCommonMath::AtomicMaxInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
521{
522 if (val == -0.f) {
523 val = 0.f;
524 }
525 if (val >= 0) {
526 AtomicMaxInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
527 } else {
528 AtomicMinInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
529 }
530}
531template <>
532GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
533{
534 if (val == -0.f) {
535 val = 0.f;
536 }
537 if (val >= 0) {
538 AtomicMinInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
539 } else {
540 AtomicMaxInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
541 }
542}
543#endif
544
545#undef GPUCA_CHOICE
546
547} // namespace o2::gpu
548
549#endif // GPUCOMMONMATH_H
uint64_t exp(uint64_t base, uint8_t exp) noexcept
int32_t i
#define GPUsharedref()
#define GPUdii()
#define GPUAtomic(type)
#define GPUgeneric()
#define GPUglobalref()
#define GPUCA_DETERMINISTIC_CODE(det, indet)
#define GPUCA_CHOICE(c1, c2, c3)
int32_t retVal
GPUd() const expr static float Pi()
GPUhd() const expr static T Min(const T x
GPUdi() static void AtomicMax(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUd() const expr static int32_t Float2IntRn(float x)
GPUd() const expr static float Round(float x)
GPUhdni() const expr static float Sqrt(float x)
GPUdi() static void AtomicMin(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUdi() const expr static T Clamp(const T v
GPUd() static float2 MakeFloat2(float x
GPUhd() const expr static T Max(const T x
GPUdi() static T AtomicAddShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUdi() static T AtomicAdd(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUd() const expr static float TwoPi()
const void size_t size
GPUdi() static T AtomicExchShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUd() static uint32_t Float2UIntReint(const float &x)
GPUd() static T MinWithRef(T x
GPUdi() static void AtomicMinShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUhdi() const expr static float Hypot(float x
GPUd() const expr static uint32_t Clz(uint32_t val)
GPUdi() static void AtomicMaxShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GLint GLenum GLint x
Definition glcorearb.h:403
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
const GLdouble * v
Definition glcorearb.h:832
GLdouble f
Definition glcorearb.h:310
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLenum dst
Definition glcorearb.h:1767
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLuint GLfloat * val
Definition glcorearb.h:1582
GLboolean r
Definition glcorearb.h:1233
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLubyte GLubyte GLubyte GLubyte w
Definition glcorearb.h:852
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
bool isnan(float f)
constexpr size_t min
constexpr size_t max