27#elif defined(__GNUC__) || defined(__clang__)
28 little = __ORDER_LITTLE_ENDIAN__,
29 big = __ORDER_BIG_ENDIAN__,
30 native = __BYTE_ORDER__,
32#error OrtDataType::detail::endian is not implemented in this environment.
37 endian::native == endian::little || endian::native == endian::big,
38 "Only little-endian or big-endian native byte orders are supported.");
45template <
class Derived>
67 return static_cast<uint16_t
>(
val & ~kSignMask);
103 return static_cast<int16_t
>(
val) < 0;
189 Derived
Abs() const noexcept {
return Derived::FromBits(
AbsImpl()); }
207 return static_cast<uint16_t
>((lhs.val | rhs.val) & ~
kSignMask) == 0;
212 if (
IsNaN() || rhs.IsNaN()) {
216 return val == rhs.val;
223 if (
IsNaN() || rhs.IsNaN()) {
229 if (left_is_negative != rhs.IsNegative()) {
233 return left_is_negative && !
AreZero(*
this, rhs);
235 return (
val != rhs.val) && ((
val < rhs.val) ^ left_is_negative);
269template <
class Derived>
278 constexpr unsigned int sign_mask = 0x80000000u;
279 uint16_t
val =
static_cast<uint16_t
>(0x0u);
281 unsigned int sign =
f.
u & sign_mask;
289 if (
f.u >= f16max.
u) {
290 val = (
f.u > f32infty.
u) ? 0x7e00 : 0x7c00;
292 if (
f.u < (113 << 23)) {
296 f.f += denorm_magic.
f;
299 val =
static_cast<uint16_t
>(
f.u - denorm_magic.
u);
301 unsigned int mant_odd = (
f.u >> 13) & 1;
310 val =
static_cast<uint16_t
>(
f.u >> 13);
314 val |=
static_cast<uint16_t
>(sign >> 16);
318template <
class Derived>
322 constexpr unsigned int shifted_exp = 0x7c00 << 13;
325 o.
u = (
val & 0x7fff) << 13;
326 unsigned int exp = shifted_exp &
o.u;
327 o.u += (127 - 15) << 23;
330 if (
exp == shifted_exp) {
331 o.u += (128 - 16) << 23;
332 }
else if (
exp == 0) {
339#if (defined _MSC_VER) && (defined _M_ARM || defined _M_ARM64 || defined _M_ARM64EC)
345 o.u |= (
val & 0x8000U) << 16U;
351template <
class Derived>
373 return static_cast<uint16_t
>(
val & ~kSignMask);
411 return static_cast<int16_t
>(
val) < 0;
497 Derived
Abs() const noexcept {
return Derived::FromBits(
AbsImpl()); }
518 return static_cast<uint16_t
>((lhs.val | rhs.val) & ~
kSignMask) == 0;
522template <
class Derived>
527 result = kPositiveQNaNBits;
529 auto get_msb_half = [](
float fl) {
531#ifdef __cpp_if_constexpr
532 if constexpr (detail::endian::native == detail::endian::little)
534 if (detail::endian::native == detail::endian::little)
537 std::memcpy(&
result,
reinterpret_cast<char*
>(&fl) +
sizeof(uint16_t),
sizeof(uint16_t));
539 std::memcpy(&
result, &fl,
sizeof(uint16_t));
544 uint16_t upper_bits = get_msb_half(
v);
550 U32 += (upper_bits & 1) + kRoundToNearest;
551 result = get_msb_half(F32);
556template <
class Derived>
560 return std::numeric_limits<float>::quiet_NaN();
563 char*
const first =
reinterpret_cast<char*
>(&
result);
564 char*
const second =
first +
sizeof(uint16_t);
565#ifdef __cpp_if_constexpr
566 if constexpr (detail::endian::native == detail::endian::little)
568 if (detail::endian::native == detail::endian::little)
571 std::memset(
first, 0,
sizeof(uint16_t));
572 std::memcpy(second, &
val,
sizeof(uint16_t));
574 std::memcpy(
first, &
val,
sizeof(uint16_t));
575 std::memset(second, 0,
sizeof(uint16_t));
713 explicit operator float() const noexcept {
return ToFloat(); }
715 using Base::operator==;
716 using Base::operator!=;
717 using Base::operator<;
720static_assert(
sizeof(Float16_t) ==
sizeof(uint16_t),
"Sizes must match");
854 explicit operator float() const noexcept {
return ToFloat(); }
863static_assert(
sizeof(
BFloat16_t) ==
sizeof(uint16_t),
"Sizes must match");
uint64_t exp(uint64_t base, uint8_t exp) noexcept
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Shared implementation between public and internal classes. CRTP pattern.
static constexpr uint16_t kMaxValueBits
static constexpr uint16_t kNegativeQNaNBits
static constexpr uint16_t kRoundToNearest
uint16_t NegateImpl() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t kSignaling_NaNBits
bool IsFinite() const noexcept
Tests if the value is finite.
Derived Negate() const noexcept
Creates a new instance with the sign flipped.
Derived Abs() const noexcept
Creates an instance that represents absolute value.
static constexpr uint16_t kSignMask
uint16_t AbsImpl() const noexcept
Creates an instance that represents absolute value.
static constexpr uint16_t kPositiveInfinityBits
static constexpr uint16_t kBiasedExponentMask
static constexpr uint16_t kNegativeInfinityBits
static constexpr uint16_t kMinusOneBits
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr uint16_t kPositiveQNaNBits
bool IsNaN() const noexcept
Tests if the value is NaN.
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
static uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation.
static constexpr uint16_t kEpsilonBits
static bool AreZero(const BFloat16Impl &lhs, const BFloat16Impl &rhs) noexcept
IEEE defines that positive and negative zero are equal, this gives us a quick equality check for two ...
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity.
float ToFloatImpl() const noexcept
Converts bfloat16 to float.
static constexpr uint16_t kOneBits
bool IsNegative() const noexcept
Checks if the value is negative.
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
static constexpr uint16_t kMinValueBits
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
bfloat16 (Brain Floating Point) data type
static constexpr BFloat16_t FromBits(uint16_t v) noexcept
Explicit conversion to uint16_t representation of bfloat16.
BFloat16_t(float v) noexcept
__ctor from float. Float is converted into bfloat16 16-bit representation.
float ToFloat() const noexcept
Converts bfloat16 to float.
bool operator!=(const BFloat16_t &rhs) const noexcept
bool operator==(const BFloat16_t &rhs) const noexcept
bool operator<(const BFloat16_t &rhs) const noexcept
Shared implementation between public and internal classes. CRTP pattern.
uint16_t NegateImpl() const noexcept
Creates a new instance with the sign flipped.
bool IsFinite() const noexcept
Tests if the value is finite.
static constexpr uint16_t kSignMask
static constexpr uint16_t kNegativeInfinityBits
float ToFloatImpl() const noexcept
Converts float16 to float.
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr uint16_t kNegativeQNaNBits
static constexpr uint16_t kPositiveInfinityBits
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
static constexpr uint16_t kMinusOneBits
bool operator==(const Float16Impl &rhs) const noexcept
bool IsNegative() const noexcept
Checks if the value is negative.
static constexpr uint16_t kBiasedExponentMask
static constexpr uint16_t kOneBits
static constexpr uint16_t kEpsilonBits
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
bool operator!=(const Float16Impl &rhs) const noexcept
static constexpr uint16_t kMaxValueBits
static constexpr uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation.
static constexpr uint16_t kMinValueBits
bool IsNaN() const noexcept
Tests if the value is NaN.
bool operator<(const Float16Impl &rhs) const noexcept
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity.
static bool AreZero(const Float16Impl &lhs, const Float16Impl &rhs) noexcept
IEEE defines that positive and negative zero are equal, this gives us a quick equality check for two ...
Derived Negate() const noexcept
Creates a new instance with the sign flipped.
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
Derived Abs() const noexcept
Creates an instance that represents absolute value.
uint16_t AbsImpl() const noexcept
Creates an instance that represents absolute value.
static constexpr uint16_t kPositiveQNaNBits
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
IEEE 754 half-precision floating point data type.
float ToFloat() const noexcept
Converts float16 to float.
Float16_t()=default
Default constructor.
static constexpr Float16_t FromBits(uint16_t v) noexcept
Explicit conversion to uint16_t representation of float16.
Float16_t(float v) noexcept
__ctor from float. Float is converted into float16 16-bit representation.