Project
Loading...
Searching...
No Matches
GPUCommonMath.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUCOMMONMATH_H
16#define GPUCOMMONMATH_H
17
18#include "GPUCommonDef.h"
19
20#if defined(__CUDACC__) && !defined(__clang__) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) && !defined(GPUCA_GPUCODE_HOSTONLY)
21#include <sm_20_atomic_functions.h>
22#endif
23
24#if !defined(GPUCA_GPUCODE_DEVICE)
25#include <cmath>
26#include <algorithm>
27#include <atomic>
28#include <limits>
29#include <cstring>
30#endif
31
32#if !defined(GPUCA_GPUCODE_COMPILEKERNELS) && (!defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__))
33#include <cstdint>
34#endif
35
36// GPUCA_CHOICE Syntax: GPUCA_CHOICE(Host, CUDA&HIP, OpenCL)
37#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off
38 #define GPUCA_CHOICE(c1, c2, c3) (c2) // Select second option for CUDA and HIP
39#elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__)
40 #define GPUCA_CHOICE(c1, c2, c3) (c3) // Select third option for OpenCL
41#else
42 #define GPUCA_CHOICE(c1, c2, c3) (c1) // Select first option for Host
43#endif // clang-format on
44
45namespace o2::gpu
46{
47
49{
50 public:
51 GPUd() static float2 MakeFloat2(float x, float y); // TODO: Find better appraoch that is constexpr
52
53 template <class T>
54 GPUhd() constexpr static T Min(const T x, const T y)
55 {
56 return GPUCA_CHOICE(std::min(x, y), min(x, y), min(x, y));
57 }
58 template <class T>
59 GPUhd() constexpr static T Max(const T x, const T y)
60 {
61 return GPUCA_CHOICE(std::max(x, y), max(x, y), max(x, y));
62 }
63 template <class T, class S, class R>
64 GPUd() static T MinWithRef(T x, T y, S refX, S refY, R& r);
65 template <class T, class S, class R>
66 GPUd() static T MaxWithRef(T x, T y, S refX, S refY, R& r);
67 template <class T, class S, class R>
68 GPUd() static T MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, S refW, R& r);
69 template <class T>
70 GPUdi() constexpr static T Clamp(const T v, const T lo, const T hi)
71 {
72 return Max(lo, Min(v, hi));
73 }
74 GPUhdni() constexpr static float Sqrt(float x);
75 GPUd() static float InvSqrt(float x);
76 template <class T>
77 GPUhd() constexpr static T Abs(T x);
78 GPUd() constexpr static float ASin(float x);
79 GPUd() constexpr static float ACos(float x);
80 GPUd() constexpr static float ATan(float x);
81 GPUhd() constexpr static float ATan2(float y, float x);
82 GPUd() constexpr static float Sin(float x);
83 GPUd() constexpr static float Cos(float x);
84 GPUhdni() static void SinCos(float x, float& s, float& c);
85 GPUhdni() static void SinCosd(double x, double& s, double& c);
86 GPUd() constexpr static float Tan(float x);
87 GPUd() constexpr static float Pow(float x, float y);
88 GPUd() constexpr static float Log(float x);
89 GPUd() constexpr static float Exp(float x);
90 GPUhdni() constexpr static float Copysign(float x, float y) { return GPUCA_CHOICE(std::copysignf(x, y), copysignf(x, y), copysign(x, y)); }
91 GPUd() constexpr static float TwoPi() { return 6.2831853f; }
92 GPUd() constexpr static float Pi() { return 3.1415927f; }
93 GPUd() constexpr static float Round(float x);
94 GPUd() constexpr static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); }
95 GPUd() static uint32_t Float2UIntReint(const float& x);
96 GPUd() constexpr static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); }
97 GPUd() constexpr static int32_t Float2IntRn(float x);
98 GPUd() constexpr static float Modf(float x, float y);
99 GPUhdi() static float Remainderf(float x, float y);
100 GPUd() constexpr static bool Finite(float x);
101 GPUd() constexpr static bool IsNaN(float x);
102#ifndef __FAST_MATH__
103 GPUd() constexpr static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits<float>::quiet_NaN(), __builtin_nanf(""), nan(0u)); }
104#endif
105 GPUd() constexpr static uint32_t Clz(uint32_t val);
106 GPUd() constexpr static uint32_t Popcount(uint32_t val);
107
108 GPUd() static void memcpy(void* dst, const void* src, size_t size);
109
110 GPUhdi() constexpr static float Hypot(float x, float y) { return Sqrt(x * x + y * y); }
111 GPUhdi() constexpr static float Hypot(float x, float y, float z) { return Sqrt(x * x + y * y + z * z); }
112 GPUhdi() constexpr static float Hypot(float x, float y, float z, float w) { return Sqrt(x * x + y * y + z * z + w * w); }
113
114 template <typename T>
115 GPUhd() constexpr static void Swap(T& a, T& b);
116
117 template <class T>
118 GPUdi() static T AtomicExch(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
119 {
120 return GPUCommonMath::AtomicExchInternal(addr, val);
121 }
122
123 template <class T>
124 GPUdi() static bool AtomicCAS(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T cmp, T val)
125 {
126 return GPUCommonMath::AtomicCASInternal(addr, cmp, val);
127 }
128
129 template <class T>
130 GPUdi() static T AtomicAdd(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
131 {
132 return GPUCommonMath::AtomicAddInternal(addr, val);
133 }
134 template <class T>
135 GPUdi() static void AtomicMax(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
136 {
137 GPUCommonMath::AtomicMaxInternal(addr, val);
138 }
139 template <class T>
140 GPUdi() static void AtomicMin(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
141 {
142 GPUCommonMath::AtomicMinInternal(addr, val);
143 }
144 template <class T>
145 GPUdi() static T AtomicExchShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
146 {
147 return GPUCommonMath::AtomicExchInternal(addr, val);
148 }
149 template <class T>
150 GPUdi() static T AtomicAddShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
151 {
152 return GPUCommonMath::AtomicAddInternal(addr, val);
153 }
154 template <class T>
155 GPUdi() static void AtomicMaxShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
156 {
157 GPUCommonMath::AtomicMaxInternal(addr, val);
158 }
159 template <class T>
160 GPUdi() static void AtomicMinShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
161 {
162 GPUCommonMath::AtomicMinInternal(addr, val);
163 }
164 GPUd() constexpr static int32_t Mul24(int32_t a, int32_t b);
165 GPUd() constexpr static float FMulRZ(float a, float b);
166
167 template <int32_t I, class T>
168 GPUd() constexpr static T nextMultipleOf(T val);
169
170 template <typename... Args>
171 GPUhdni() constexpr static float Sum2(float w, Args... args);
172
173 private:
174 template <class S, class T>
175 GPUd() static uint32_t AtomicExchInternal(S* addr, T val);
176 template <class S, class T>
177 GPUd() static bool AtomicCASInternal(S* addr, T cmp, T val);
178 template <class S, class T>
179 GPUd() static uint32_t AtomicAddInternal(S* addr, T val);
180 template <class S, class T>
181 GPUd() static void AtomicMaxInternal(S* addr, T val);
182 template <class S, class T>
183 GPUd() static void AtomicMinInternal(S* addr, T val);
184};
185
187
188template <typename... Args>
189GPUhdi() constexpr float GPUCommonMath::Sum2(float w, Args... args)
190{
191 if constexpr (sizeof...(Args) == 0) {
192 return w * w;
193 } else {
194 return w * w + Sum2(args...);
195 }
196 return 0;
197}
198
199GPUdi() void GPUCommonMath::memcpy(void* dst, const void* src, size_t size)
200{
201#ifndef GPUCA_GPUCODE_DEVICE
202 std::memcpy(dst, src, size);
203#elif defined(__CUDACC__) || defined(__HIPCC__)
204 ::memcpy(dst, src, size);
205#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__)
206 __builtin_memcpy(dst, src, size);
207#else
208 char* d = (char*)dst;
209 const char* s = (const char*)src;
210 for (size_t i = 0; i < size; i++) {
211 d[i] = s[i];
212 }
213#endif
214}
215
216template <int32_t I, class T>
217GPUdi() constexpr T GPUCommonMath::nextMultipleOf(T val)
218{
219 if constexpr (I & (I - 1)) {
220 T tmp = val % I;
221 if (tmp) {
222 val += I - tmp;
223 }
224 return val;
225 } else {
226 return (val + I - 1) & ~(T)(I - 1);
227 }
228 return 0; // BUG: Cuda complains about missing return value with constexpr if
229}
230
231GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y)
232{
233#if !defined(GPUCA_GPUCODE) || defined(__OPENCL__) || defined(__OPENCL_HOST__)
234 float2 ret = {x, y};
235 return ret;
236#else
237 return make_float2(x, y);
238#endif // GPUCA_GPUCODE
239}
240
241GPUdi() constexpr float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); }
242GPUhdi() float GPUCommonMath::Remainderf(float x, float y) { return GPUCA_CHOICE(std::remainderf(x, y), remainderf(x, y), remainder(x, y)); }
243
244GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
245{
246#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__))
247 return __float_as_uint(x);
248#elif defined(GPUCA_GPUCODE_DEVICE) && defined(__OPENCL__)
249 return as_uint(x);
250#else
251 return reinterpret_cast<const uint32_t&>(x);
252#endif
253}
254
255GPUCA_DETERMINISTIC_CODE( // clang-format off
256GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); }
257GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
258GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
259GPUdi() constexpr float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); }
260GPUhdi() constexpr float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); }
261GPUdi() constexpr float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); }
262GPUdi() constexpr float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); }
263GPUdi() constexpr float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); }
264GPUdi() constexpr float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); }
265GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); }
266GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); }
267GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); }
268GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); }
269GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); }
270GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); }
271, // !GPUCA_DETERMINISTIC_CODE
272GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); }
273GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); }
274GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); }
275GPUdi() constexpr float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE(atanf(x), atanf(x), atan(x)); }
276GPUhdi() constexpr float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); }
277GPUdi() constexpr float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE(sinf(x), sinf(x), sin(x)); }
278GPUdi() constexpr float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE(cosf(x), cosf(x), cos(x)); }
279GPUdi() constexpr float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE(tanf(x), tanf(x), tan(x)); }
280GPUdi() constexpr float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE(powf(x, y), powf(x, y), pow(x, y)); }
281GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf(x), asinf(x), asin(x)); }
282GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); }
283GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); }
284GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); }
285GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return true; }
286GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; }
287) // clang-format on
288
289GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)
290{
291 GPUCA_DETERMINISTIC_CODE( // clang-format off
292 s = sin((double)x);
293 c = cos((double)x);
294 , // !GPUCA_DETERMINISTIC_CODE
295#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
296 __sincosf(x, &s, &c);
297#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
298 sincosf(x, &s, &c);
299#else
300 GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c));
301#endif
302 ) // clang-format on
303}
304
305GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c)
306{
307#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
308 __sincos(x, &s, &c);
309#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
310 sincos(x, &s, &c);
311#else
312 GPUCA_CHOICE((void)((s = sin(x)) + (c = cos(x))), sincos(x, &s, &c), s = sincos(x, &c));
313#endif
314}
315
316GPUdi() constexpr uint32_t GPUCommonMath::Clz(uint32_t x)
317{
318#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__))
319 return x == 0 ? 32 : GPUCA_CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available
320#else
321 for (int32_t i = 31; i >= 0; i--) {
322 if (x & (1u << i)) {
323 return (31 - i);
324 }
325 }
326 return 32;
327#endif
328}
329
330GPUdi() constexpr uint32_t GPUCommonMath::Popcount(uint32_t x)
331{
332#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed
333 // use builtin if available
334 return GPUCA_CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x));
335#else
336 x = x - ((x >> 1) & 0x55555555);
337 x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
338 return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
339#endif
340}
341
342template <typename T>
343GPUhdi() constexpr void GPUCommonMath::Swap(T& a, T& b)
344{
345#ifndef GPUCA_GPUCODE_DEVICE
346 std::swap(a, b);
347#else
348 T tmp = a;
349 a = b;
350 b = tmp;
351#endif
352}
353
354template <class T, class S, class R>
355GPUdi() T GPUCommonMath::MinWithRef(T x, T y, S refX, S refY, R& r)
356{
357 if (x < y) {
358 r = refX;
359 return x;
360 }
361 r = refY;
362 return y;
363}
364
365template <class T, class S, class R>
366GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, S refX, S refY, R& r)
367{
368 if (x > y) {
369 r = refX;
370 return x;
371 }
372 r = refY;
373 return y;
374}
375
376template <class T, class S, class R>
377GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, S refW, R& r)
378{
379 T retVal = x;
380 S retRef = refX;
381 if (y > retVal) {
382 retVal = y;
383 retRef = refY;
384 }
385 if (z > retVal) {
386 retVal = z;
387 retRef = refZ;
388 }
389 if (w > retVal) {
390 retVal = w;
391 retRef = refW;
392 }
393 r = retRef;
394 return retVal;
395}
396
397GPUdi() float GPUCommonMath::InvSqrt(float _x)
398{
399 GPUCA_DETERMINISTIC_CODE( // clang-format off
400 return 1.f / Sqrt(_x);
401 , // !GPUCA_DETERMINISTIC_CODE
402#if defined(__CUDACC__) || defined(__HIPCC__)
403 return __frsqrt_rn(_x);
404#elif defined(__OPENCL__) && defined(__clang__)
405 return 1.f / sqrt(_x);
406#elif !defined(__OPENCL__) && (defined(__FAST_MATH__) || defined(__clang__))
407 return 1.f / sqrtf(_x);
408#else
409 union {
410 float f;
411 int32_t i;
412 } x = {_x};
413 const float xhalf = 0.5f * x.f;
414 x.i = 0x5f3759df - (x.i >> 1);
415 x.f = x.f * (1.5f - xhalf * x.f * x.f);
416 return x.f;
417#endif
418 ) // clang-format on
419}
420
421template <>
422GPUhdi() constexpr float GPUCommonMath::Abs<float>(float x)
423{
424 return GPUCA_CHOICE(fabsf(x), fabsf(x), fabs(x));
425}
426
427template <>
428GPUhdi() constexpr double GPUCommonMath::Abs<double>(double x)
429{
430 return GPUCA_CHOICE(fabs(x), fabs(x), fabs(x));
431}
432
433template <>
434GPUhdi() constexpr int32_t GPUCommonMath::Abs<int32_t>(int32_t x)
435{
436 return GPUCA_CHOICE(abs(x), abs(x), abs(x));
437}
438
439template <class S, class T>
440GPUdi() uint32_t GPUCommonMath::AtomicExchInternal(S* addr, T val)
441{
442#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
443 return ::atomic_exchange(addr, val);
444#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
445 return ::atomic_xchg(addr, val);
446#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
447 return ::atomicExch(addr, val);
448#elif defined(WITH_OPENMP)
449 uint32_t old;
450 __atomic_exchange(addr, &val, &old, __ATOMIC_SEQ_CST);
451 return old;
452#else
453 return reinterpret_cast<std::atomic<T>*>(addr)->exchange(val);
454#endif
455}
456
457template <class S, class T>
458GPUdi() bool GPUCommonMath::AtomicCASInternal(S* addr, T cmp, T val)
459{
460#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
461 return ::atomic_compare_exchange(addr, cmp, val) == cmp;
462#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
463 return ::atomic_cmpxchg(addr, cmp, val) == cmp;
464#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
465 return ::atomicCAS(addr, cmp, val) == cmp;
466#elif defined(WITH_OPENMP)
467 return __atomic_compare_exchange(addr, &cmp, &val, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
468#else
469 return reinterpret_cast<std::atomic<T>*>(addr)->compare_exchange_strong(cmp, val);
470#endif
471}
472
473template <class S, class T>
474GPUdi() uint32_t GPUCommonMath::AtomicAddInternal(S* addr, T val)
475{
476#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
477 return ::atomic_fetch_add(addr, val);
478#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
479 return ::atomic_add(addr, val);
480#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
481 return ::atomicAdd(addr, val);
482#elif defined(WITH_OPENMP)
483 return __atomic_add_fetch(addr, val, __ATOMIC_SEQ_CST) - val;
484#else
485 return reinterpret_cast<std::atomic<T>*>(addr)->fetch_add(val);
486#endif
487}
488
489template <class S, class T>
490GPUdi() void GPUCommonMath::AtomicMaxInternal(S* addr, T val)
491{
492#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
493 ::atomic_fetch_max(addr, val);
494#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
495 ::atomic_max(addr, val);
496#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
497 ::atomicMax(addr, val);
498#else
499 S current;
500 while ((current = *(volatile S*)addr) < val && !AtomicCASInternal(addr, current, val)) {
501 }
502#endif // GPUCA_GPUCODE
503}
504
505template <class S, class T>
506GPUdi() void GPUCommonMath::AtomicMinInternal(S* addr, T val)
507{
508#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
509 ::atomic_fetch_min(addr, val);
510#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
511 ::atomic_min(addr, val);
512#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
513 ::atomicMin(addr, val);
514#else
515 S current;
516 while ((current = *(volatile S*)addr) > val && !AtomicCASInternal(addr, current, val)) {
517 }
518#endif // GPUCA_GPUCODE
519}
520
521#if (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(G__ROOT) && !defined(__CLING__)
522#define GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
523template <>
524GPUdii() void GPUCommonMath::AtomicMaxInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
525{
526 if (val == -0.f) {
527 val = 0.f;
528 }
529 if (val >= 0) {
530 AtomicMaxInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
531 } else {
532 AtomicMinInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
533 }
534}
535template <>
536GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
537{
538 if (val == -0.f) {
539 val = 0.f;
540 }
541 if (val >= 0) {
542 AtomicMinInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
543 } else {
544 AtomicMaxInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
545 }
546}
547#endif
548
549#undef GPUCA_CHOICE
550
551} // namespace o2::gpu
552
553#endif // GPUCOMMONMATH_H
uint64_t exp(uint64_t base, uint8_t exp) noexcept
int32_t i
#define GPUsharedref()
#define GPUdii()
#define GPUAtomic(type)
#define GPUgeneric()
#define GPUglobalref()
#define GPUCA_DETERMINISTIC_CODE(det, indet)
#define GPUCA_CHOICE(c1, c2, c3)
int32_t retVal
GPUd() const expr static float Pi()
GPUhd() const expr static T Min(const T x
GPUd() const expr static float QuietNaN()
GPUdi() static void AtomicMax(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUd() const expr static int32_t Float2IntRn(float x)
GPUd() const expr static float Round(float x)
GPUhdni() const expr static float Sqrt(float x)
GPUdi() static void AtomicMin(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUdi() const expr static T Clamp(const T v
GPUd() static float2 MakeFloat2(float x
GPUhd() const expr static T Max(const T x
GPUdi() static T AtomicAddShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUdi() static T AtomicAdd(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUd() const expr static float TwoPi()
const void size_t size
GPUdi() static T AtomicExchShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUd() static uint32_t Float2UIntReint(const float &x)
GPUd() static T MinWithRef(T x
GPUdi() static void AtomicMinShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUhdi() static float Remainderf(float x
GPUhdi() const expr static float Hypot(float x
GPUd() const expr static uint32_t Clz(uint32_t val)
GPUdi() static void AtomicMaxShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GLint GLenum GLint x
Definition glcorearb.h:403
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
const GLdouble * v
Definition glcorearb.h:832
GLdouble f
Definition glcorearb.h:310
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLenum dst
Definition glcorearb.h:1767
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLuint GLfloat * val
Definition glcorearb.h:1582
GLboolean r
Definition glcorearb.h:1233
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLubyte GLubyte GLubyte GLubyte w
Definition glcorearb.h:852
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
bool isnan(float f)
constexpr size_t min
constexpr size_t max