Project
Loading...
Searching...
No Matches
GPUCommonMath.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUCOMMONMATH_H
16#define GPUCOMMONMATH_H
17
18#include "GPUCommonDef.h"
19
20#if defined(__CUDACC__) && !defined(__clang__) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) && !defined(GPUCA_GPUCODE_HOSTONLY)
21#include <sm_20_atomic_functions.h>
22#endif
23
24#if !defined(GPUCA_GPUCODE_DEVICE)
25#include <cmath>
26#include <algorithm>
27#include <atomic>
28#endif
29
30#if !defined(GPUCA_GPUCODE_COMPILEKERNELS) && (!defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__))
31#include <cstdint>
32#endif
33
34namespace o2
35{
36namespace gpu
37{
38
40{
41 public:
42 GPUd() static float2 MakeFloat2(float x, float y); // TODO: Find better appraoch that is constexpr
43
44 template <class T>
45 GPUhd() static T Min(const T x, const T y);
46 template <class T>
47 GPUhd() static T Max(const T x, const T y);
48 template <class T, class S, class R>
49 GPUd() static T MinWithRef(T x, T y, S refX, S refY, R& r);
50 template <class T, class S, class R>
51 GPUd() static T MaxWithRef(T x, T y, S refX, S refY, R& r);
52 template <class T, class S, class R>
53 GPUd() static T MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, S refW, R& r);
54 template <class T>
55 GPUdi() static T Clamp(const T v, const T lo, const T hi)
56 {
57 return Max(lo, Min(v, hi));
58 }
59 GPUhdni() static float Sqrt(float x);
60 GPUd() static float InvSqrt(float x);
61 template <class T>
62 GPUhd() static T Abs(T x);
63 GPUd() static float ASin(float x);
64 GPUd() static float ACos(float x);
65 GPUd() static float ATan(float x);
66 GPUhd() static float ATan2(float y, float x);
67 GPUd() static float Sin(float x);
68 GPUd() static float Cos(float x);
69 GPUhdni() static void SinCos(float x, float& s, float& c);
70 GPUhdni() static void SinCosd(double x, double& s, double& c);
71 GPUd() static float Tan(float x);
72 GPUd() static float Pow(float x, float y);
73 GPUd() static float Log(float x);
74 GPUd() static float Exp(float x);
75 GPUhdni() static float Copysign(float x, float y);
76 GPUd() static constexpr float TwoPi() { return 6.2831853f; }
77 GPUd() static constexpr float Pi() { return 3.1415927f; }
78 GPUd() static float Round(float x);
79 GPUd() static float Floor(float x);
80 GPUd() static uint32_t Float2UIntReint(const float& x);
81 GPUd() static uint32_t Float2UIntRn(float x);
82 GPUd() static int32_t Float2IntRn(float x);
83 GPUd() static float Modf(float x, float y);
84 GPUd() static bool Finite(float x);
85 GPUd() static uint32_t Clz(uint32_t val);
86 GPUd() static uint32_t Popcount(uint32_t val);
87
88 GPUhdni() static float Hypot(float x, float y);
89 GPUhdni() static float Hypot(float x, float y, float z);
90 GPUhdni() static float Hypot(float x, float y, float z, float w);
91
92 template <typename T>
93 GPUhd() static void Swap(T& a, T& b);
94
95 template <class T>
96 GPUdi() static T AtomicExch(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
97 {
98 return GPUCommonMath::AtomicExchInternal(addr, val);
99 }
100
101 template <class T>
102 GPUdi() static bool AtomicCAS(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T cmp, T val)
103 {
104 return GPUCommonMath::AtomicCASInternal(addr, cmp, val);
105 }
106
107 template <class T>
108 GPUdi() static T AtomicAdd(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
109 {
110 return GPUCommonMath::AtomicAddInternal(addr, val);
111 }
112 template <class T>
113 GPUdi() static void AtomicMax(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
114 {
115 GPUCommonMath::AtomicMaxInternal(addr, val);
116 }
117 template <class T>
118 GPUdi() static void AtomicMin(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
119 {
120 GPUCommonMath::AtomicMinInternal(addr, val);
121 }
122 template <class T>
123 GPUdi() static T AtomicExchShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
124 {
125 return GPUCommonMath::AtomicExchInternal(addr, val);
126 }
127 template <class T>
128 GPUdi() static T AtomicAddShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
129 {
130 return GPUCommonMath::AtomicAddInternal(addr, val);
131 }
132 template <class T>
133 GPUdi() static void AtomicMaxShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
134 {
135 GPUCommonMath::AtomicMaxInternal(addr, val);
136 }
137 template <class T>
138 GPUdi() static void AtomicMinShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
139 {
140 GPUCommonMath::AtomicMinInternal(addr, val);
141 }
142 GPUd() static int32_t Mul24(int32_t a, int32_t b);
143 GPUd() static float FMulRZ(float a, float b);
144
145 template <int32_t I, class T>
146 GPUd() constexpr static T nextMultipleOf(T val);
147
148 template <typename... Args>
149 GPUdi() static float Sum2(float w, Args... args)
150 {
151 if constexpr (sizeof...(Args) == 0) {
152 return w * w;
153 } else {
154 return w * w + Sum2(args...);
155 }
156 return 0;
157 }
158
159 private:
160 template <class S, class T>
161 GPUd() static uint32_t AtomicExchInternal(S* addr, T val);
162 template <class S, class T>
163 GPUd() static bool AtomicCASInternal(S* addr, T cmp, T val);
164 template <class S, class T>
165 GPUd() static uint32_t AtomicAddInternal(S* addr, T val);
166 template <class S, class T>
167 GPUd() static void AtomicMaxInternal(S* addr, T val);
168 template <class S, class T>
169 GPUd() static void AtomicMinInternal(S* addr, T val);
170};
171
173
174// CHOICE Syntax: CHOICE(Host, CUDA&HIP, OpenCL)
175#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off
176 #define CHOICE(c1, c2, c3) (c2) // Select second option for CUDA and HIP
177#elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__)
178 #define CHOICE(c1, c2, c3) (c3) // Select third option for OpenCL
179#else
180 #define CHOICE(c1, c2, c3) (c1) // Select first option for Host
181#endif // clang-format on
182
183template <int32_t I, class T>
184GPUdi() constexpr T GPUCommonMath::nextMultipleOf(T val)
185{
186 if constexpr (I & (I - 1)) {
187 T tmp = val % I;
188 if (tmp) {
189 val += I - tmp;
190 }
191 return val;
192 } else {
193 return (val + I - 1) & ~(T)(I - 1);
194 }
195 return 0; // BUG: Cuda complains about missing return value with constexpr if
196}
197
198GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y)
199{
200#if !defined(GPUCA_GPUCODE) || defined(__OPENCL__) || defined(__OPENCL_HOST__)
201 float2 ret = {x, y};
202 return ret;
203#else
204 return make_float2(x, y);
205#endif // GPUCA_GPUCODE
206}
207
208GPUdi() float GPUCommonMath::Modf(float x, float y) { return CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); }
209
210GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
211{
212#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__))
213 return __float_as_uint(x);
214#elif defined(GPUCA_GPUCODE_DEVICE) && defined(__OPENCL__)
215 return as_uint(x);
216#else
217 return reinterpret_cast<const uint32_t&>(x);
218#endif
219}
220
221GPUdi() uint32_t GPUCommonMath::Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); }
222GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); }
223
224#ifdef GPUCA_NO_FAST_MATH
225GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); }
226GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
227GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); }
228GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
229GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); }
230GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); }
231GPUdi() float GPUCommonMath::Sin(float x) { return CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); }
232GPUdi() float GPUCommonMath::Cos(float x) { return CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); }
233GPUdi() float GPUCommonMath::Tan(float x) { return CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); }
234GPUdi() float GPUCommonMath::Pow(float x, float y) { return CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); }
235GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); }
236GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); }
237GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x), (float)log((double)x), log(x)); }
238GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); }
239#else
240GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); }
241GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); }
242GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), true, true); }
243GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); }
244GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); }
245GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); }
246GPUdi() float GPUCommonMath::Sin(float x) { return CHOICE(sinf(x), sinf(x), sin(x)); }
247GPUdi() float GPUCommonMath::Cos(float x) { return CHOICE(cosf(x), cosf(x), cos(x)); }
248GPUdi() float GPUCommonMath::Tan(float x) { return CHOICE(tanf(x), tanf(x), tan(x)); }
249GPUdi() float GPUCommonMath::Pow(float x, float y) { return CHOICE(powf(x, y), powf(x, y), pow(x, y)); }
250GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE(asinf(x), asinf(x), asin(x)); }
251GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE(acosf(x), acosf(x), acos(x)); }
252GPUdi() float GPUCommonMath::Log(float x) { return CHOICE(logf(x), logf(x), log(x)); }
253GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE(expf(x), expf(x), exp(x)); }
254#endif
255
256GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)
257{
258#if defined(GPUCA_NO_FAST_MATH) && !defined(__OPENCL__)
259 s = sin((double)x);
260 c = cos((double)x);
261#elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
262 __sincosf(x, &s, &c);
263#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
264 sincosf(x, &s, &c);
265#else
266 CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c));
267#endif
268}
269
270GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c)
271{
272#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
273 __sincos(x, &s, &c);
274#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
275 sincos(x, &s, &c);
276#else
277 CHOICE((void)((s = sin(x)) + (c = cos(x))), sincos(x, &s, &c), s = sincos(x, &c));
278#endif
279}
280
281GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x)
282{
283#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__))
284 return x == 0 ? 32 : CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available
285#else
286 for (int32_t i = 31; i >= 0; i--) {
287 if (x & (1u << i)) {
288 return (31 - i);
289 }
290 }
291 return 32;
292#endif
293}
294
295GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x)
296{
297#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed
298 // use builtin if available
299 return CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x));
300#else
301 x = x - ((x >> 1) & 0x55555555);
302 x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
303 return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
304#endif
305}
306
307GPUhdi() float GPUCommonMath::Hypot(float x, float y)
308{
309 return Sqrt(x * x + y * y);
310}
311
312GPUhdi() float GPUCommonMath::Hypot(float x, float y, float z)
313{
314 return Sqrt(x * x + y * y + z * z);
315}
316
317GPUhdi() float GPUCommonMath::Hypot(float x, float y, float z, float w)
318{
319 return Sqrt(x * x + y * y + z * z + w * w);
320}
321
322template <typename T>
323GPUd() void _swap(T& a, T& b)
324{
325 T tmp = a;
326 a = b;
327 b = tmp;
328}
329
330template <typename T>
331GPUhdi() void GPUCommonMath::Swap(T& a, T& b)
332{
333 CHOICE(std::swap(a, b), _swap<T>(a, b), _swap<T>(a, b));
334}
335
336template <class T>
337GPUhdi() T GPUCommonMath::Min(const T x, const T y)
338{
339 return CHOICE(std::min(x, y), min(x, y), min(x, y));
340}
341
342template <class T>
343GPUhdi() T GPUCommonMath::Max(const T x, const T y)
344{
345 return CHOICE(std::max(x, y), max(x, y), max(x, y));
346}
347
348template <class T, class S, class R>
349GPUdi() T GPUCommonMath::MinWithRef(T x, T y, S refX, S refY, R& r)
350{
351 if (x < y) {
352 r = refX;
353 return x;
354 }
355 r = refY;
356 return y;
357}
358
359template <class T, class S, class R>
360GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, S refX, S refY, R& r)
361{
362 if (x > y) {
363 r = refX;
364 return x;
365 }
366 r = refY;
367 return y;
368}
369
370template <class T, class S, class R>
371GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, S refW, R& r)
372{
373 T retVal = x;
374 S retRef = refX;
375 if (y > retVal) {
376 retVal = y;
377 retRef = refY;
378 }
379 if (z > retVal) {
380 retVal = z;
381 retRef = refZ;
382 }
383 if (w > retVal) {
384 retVal = w;
385 retRef = refW;
386 }
387 r = retRef;
388 return retVal;
389}
390
391GPUdi() float GPUCommonMath::InvSqrt(float _x)
392{
393#if defined(GPUCA_NO_FAST_MATH) || defined(__OPENCL__)
394 return 1.f / Sqrt(_x);
395#elif defined(__CUDACC__) || defined(__HIPCC__)
396 return __frsqrt_rn(_x);
397#elif defined(__FAST_MATH__)
398 return 1.f / sqrtf(_x);
399#else
400 union {
401 float f;
402 int32_t i;
403 } x = {_x};
404 const float xhalf = 0.5f * x.f;
405 x.i = 0x5f3759df - (x.i >> 1);
406 x.f = x.f * (1.5f - xhalf * x.f * x.f);
407 return x.f;
408#endif
409}
410
411template <>
412GPUhdi() float GPUCommonMath::Abs<float>(float x)
413{
414 return CHOICE(fabsf(x), fabsf(x), fabs(x));
415}
416
417#if !defined(__OPENCL__) || defined(cl_khr_fp64)
418template <>
419GPUhdi() double GPUCommonMath::Abs<double>(double x)
420{
421 return CHOICE(fabs(x), fabs(x), fabs(x));
422}
423#endif
424
425template <>
426GPUhdi() int32_t GPUCommonMath::Abs<int32_t>(int32_t x)
427{
428 return CHOICE(abs(x), abs(x), abs(x));
429}
430
431GPUhdi() float GPUCommonMath::Copysign(float x, float y)
432{
433#if defined(__OPENCL__)
434 return copysign(x, y);
435#elif defined(GPUCA_GPUCODE) && !defined(__OPENCL__)
436 return copysignf(x, y);
437#else
438 return std::copysignf(x, y);
439#endif // GPUCA_GPUCODE
440}
441
442template <class S, class T>
443GPUdi() uint32_t GPUCommonMath::AtomicExchInternal(S* addr, T val)
444{
445#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
446 return ::atomic_exchange(addr, val);
447#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
448 return ::atomic_xchg(addr, val);
449#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
450 return ::atomicExch(addr, val);
451#elif defined(WITH_OPENMP)
452 uint32_t old;
453 __atomic_exchange(addr, &val, &old, __ATOMIC_SEQ_CST);
454 return old;
455#else
456 return reinterpret_cast<std::atomic<T>*>(addr)->exchange(val);
457#endif
458}
459
460template <class S, class T>
461GPUdi() bool GPUCommonMath::AtomicCASInternal(S* addr, T cmp, T val)
462{
463#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
464 return ::atomic_compare_exchange(addr, cmp, val) == cmp;
465#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
466 return ::atomic_cmpxchg(addr, cmp, val) == cmp;
467#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
468 return ::atomicCAS(addr, cmp, val) == cmp;
469#elif defined(WITH_OPENMP)
470 return __atomic_compare_exchange(addr, &cmp, &val, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
471#else
472 return reinterpret_cast<std::atomic<T>*>(addr)->compare_exchange_strong(cmp, val);
473#endif
474}
475
476template <class S, class T>
477GPUdi() uint32_t GPUCommonMath::AtomicAddInternal(S* addr, T val)
478{
479#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
480 return ::atomic_fetch_add(addr, val);
481#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
482 return ::atomic_add(addr, val);
483#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
484 return ::atomicAdd(addr, val);
485#elif defined(WITH_OPENMP)
486 return __atomic_add_fetch(addr, val, __ATOMIC_SEQ_CST) - val;
487#else
488 return reinterpret_cast<std::atomic<T>*>(addr)->fetch_add(val);
489#endif
490}
491
492template <class S, class T>
493GPUdi() void GPUCommonMath::AtomicMaxInternal(S* addr, T val)
494{
495#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
496 ::atomic_fetch_max(addr, val);
497#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
498 ::atomic_max(addr, val);
499#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
500 ::atomicMax(addr, val);
501#else
502 S current;
503 while ((current = *(volatile S*)addr) < val && !AtomicCASInternal(addr, current, val)) {
504 }
505#endif // GPUCA_GPUCODE
506}
507
508template <class S, class T>
509GPUdi() void GPUCommonMath::AtomicMinInternal(S* addr, T val)
510{
511#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS))
512 ::atomic_fetch_min(addr, val);
513#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
514 ::atomic_min(addr, val);
515#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
516 ::atomicMin(addr, val);
517#else
518 S current;
519 while ((current = *(volatile S*)addr) > val && !AtomicCASInternal(addr, current, val)) {
520 }
521#endif // GPUCA_GPUCODE
522}
523
524#if (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(G__ROOT)
525#define GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
526template <>
527GPUdii() void GPUCommonMath::AtomicMaxInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
528{
529 if (val == -0.f) {
530 val = 0.f;
531 }
532 if (val >= 0) {
533 AtomicMaxInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
534 } else {
535 AtomicMinInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
536 }
537}
538template <>
539GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
540{
541 if (val == -0.f) {
542 val = 0.f;
543 }
544 if (val >= 0) {
545 AtomicMinInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
546 } else {
547 AtomicMaxInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
548 }
549}
550#endif
551
552#undef CHOICE
553
554} // namespace gpu
555} // namespace o2
556
557#endif // GPUCOMMONMATH_H
uint64_t exp(uint64_t base, uint8_t exp) noexcept
int32_t i
#define GPUsharedref()
#define GPUdii()
#define GPUAtomic(type)
#define GPUgeneric()
#define GPUglobalref()
#define CHOICE(c1, c2, c3)
int32_t retVal
GPUhd() static T Min(const T x
GPUd() static const expr float Pi()
GPUdi() static void AtomicMax(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUdi() static void AtomicMin(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUhdni() static float Sqrt(float x)
GPUd() static float2 MakeFloat2(float x
GPUdi() static T AtomicAddShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUdi() static T AtomicAdd(GPUglobalref() GPUgeneric() GPUAtomic(T) *addr
GPUd() static float Round(float x)
GPUdi() static T AtomicExchShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUdi() static T Clamp(const T v
GPUdi() static void AtomicMinShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GPUdi() static void AtomicMaxShared(GPUsharedref() GPUgeneric() GPUAtomic(T) *addr
GLint GLenum GLint x
Definition glcorearb.h:403
const GLdouble * v
Definition glcorearb.h:832
GLdouble f
Definition glcorearb.h:310
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLuint GLfloat * val
Definition glcorearb.h:1582
GLboolean r
Definition glcorearb.h:1233
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLubyte GLubyte GLubyte GLubyte w
Definition glcorearb.h:852
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
GPUhdi() float GPUCommonMath
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
constexpr size_t min
constexpr size_t max