[libc][math][c++23] Add bfloat16 support in LLVM libc (#144463)

krishna2803 · overmighty · web-flow · commit bb7cea063782 · 2025-07-09T21:26:29.000+02:00
This PR enables support for BFloat16 type in LLVM libc along with
support for testing BFloat16 functions via MPFR.

---------

Signed-off-by: krishna2803 &lt;kpandey81930@gmail.com&gt;
Signed-off-by: Krishna Pandey &lt;kpandey81930@gmail.com&gt;
Co-authored-by: OverMighty &lt;its.overmighty@gmail.com&gt;
diff --git a/libc/src/__support/CPP/type_traits/is_floating_point.h b/libc/src/__support/CPP/type_traits/is_floating_point.h
@@ -36,7 +36,8 @@ template <typename T> struct is_floating_point {
                               ,
                               float128
 #endif
-                              >();
+                              ,
+                              bfloat16>();
 };
 template <typename T>
 LIBC_INLINE_VAR constexpr bool is_floating_point_v =
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -257,4 +257,17 @@ add_header_library(
     libc.src.__support.macros.properties.types
 )
 
+add_header_library(
+  bfloat16
+  HDRS
+    bfloat16.h
+  DEPENDS
+    .cast
+    .dyadic_float
+    libc.src.__support.CPP.bit
+    libc.src.__support.CPP.type_traits
+    libc.src.__support.macros.config
+    libc.src.__support.macros.properties.types
+)
+
 add_subdirectory(generic)
diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
@@ -38,6 +38,7 @@ enum class FPType {
   IEEE754_Binary64,
   IEEE754_Binary128,
   X86_Binary80,
+  BFloat16
 };
 
 // The classes hierarchy is as follows:
@@ -138,6 +139,14 @@ template <> struct FPLayout<FPType::X86_Binary80> {
   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1;
 };
 
+template <> struct FPLayout<FPType::BFloat16> {
+  using StorageType = uint16_t;
+  LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
+  LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
+  LIBC_INLINE_VAR static constexpr int SIG_LEN = 7;
+  LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
+};
+
 // FPStorage derives useful constants from the FPLayout above.
 template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> {
   using UP = FPLayout<fp_type>;
@@ -801,6 +810,8 @@ template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
   else if constexpr (cpp::is_same_v<UnqualT, float128>)
     return FPType::IEEE754_Binary128;
 #endif
+  else if constexpr (cpp::is_same_v<UnqualT, bfloat16>)
+    return FPType::BFloat16;
   else
     static_assert(cpp::always_false<UnqualT>, "Unsupported type");
 }
diff --git a/libc/src/__support/FPUtil/bfloat16.h b/libc/src/__support/FPUtil/bfloat16.h
@@ -0,0 +1,65 @@
+//===-- Definition of bfloat16 data type. -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
+#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+namespace fputil {
+
+struct BFloat16 {
+  uint16_t bits;
+
+  LIBC_INLINE BFloat16() = default;
+
+  LIBC_INLINE constexpr explicit BFloat16(uint16_t bits) : bits(bits) {}
+
+  template <typename T> LIBC_INLINE constexpr explicit BFloat16(T value) {
+    if constexpr (cpp::is_floating_point_v<T>) {
+      bits = fputil::cast<bfloat16>(value).bits;
+    } else if constexpr (cpp::is_integral_v<T>) {
+      Sign sign = Sign::POS;
+
+      if constexpr (cpp::is_signed_v<T>) {
+        if (value < 0) {
+          sign = Sign::NEG;
+          value = -value;
+        }
+      }
+
+      fputil::DyadicFloat<cpp::numeric_limits<cpp::make_unsigned_t<T>>::digits>
+          xd(sign, 0, value);
+      bits = xd.template as<bfloat16, /*ShouldSignalExceptions=*/true>().bits;
+
+    } else {
+      bits = fputil::cast<bfloat16>(static_cast<float>(value)).bits;
+    }
+  }
+
+  template <cpp::enable_if_t<fputil::get_fp_type<float>() ==
+                                 fputil::FPType::IEEE754_Binary32,
+                             int> = 0>
+  LIBC_INLINE constexpr operator float() const {
+    uint32_t x_bits = static_cast<uint32_t>(bits) << 16U;
+    return cpp::bit_cast<float>(x_bits);
+  }
+}; // struct BFloat16
+
+} // namespace fputil
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h
@@ -26,9 +26,18 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
                                            cpp::is_floating_point_v<InType>,
                                        OutType>
 cast(InType x) {
+  // Casting to the same type is a no-op.
+  if constexpr (cpp::is_same_v<InType, OutType>)
+    return x;
+
+  // bfloat16 is always defined (for now)
+  if constexpr (cpp::is_same_v<OutType, bfloat16> ||
+                cpp::is_same_v<InType, bfloat16>
 #if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
-  if constexpr (cpp::is_same_v<OutType, float16> ||
-                cpp::is_same_v<InType, float16>) {
+                || cpp::is_same_v<OutType, float16> ||
+                cpp::is_same_v<InType, float16>
+#endif
+  ) {
     using InFPBits = FPBits<InType>;
     using InStorageType = typename InFPBits::StorageType;
     using OutFPBits = FPBits<OutType>;
@@ -58,7 +67,6 @@ cast(InType x) {
     DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x);
     return xd.template as<OutType, /*ShouldSignalExceptions=*/true>();
   }
-#endif
 
   return static_cast<OutType>(x);
 }
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
@@ -411,11 +411,14 @@ template <size_t Bits> struct DyadicFloat {
                                             (FPBits<T>::FRACTION_LEN < Bits),
                                         void>>
   LIBC_INLINE constexpr T as() const {
+    if constexpr (cpp::is_same_v<T, bfloat16>
 #if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
-    if constexpr (cpp::is_same_v<T, float16>)
-      return generic_as<T, ShouldSignalExceptions>();
+                  || cpp::is_same_v<T, float16>
 #endif
-    return fast_as<T, ShouldSignalExceptions>();
+    )
+      return generic_as<T, ShouldSignalExceptions>();
+    else
+      return fast_as<T, ShouldSignalExceptions>();
   }
 
   template <typename T,
diff --git a/libc/src/__support/macros/properties/types.h b/libc/src/__support/macros/properties/types.h
@@ -10,9 +10,10 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H
 #define LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H
 
-#include "hdr/float_macros.h"                      // LDBL_MANT_DIG
+#include "hdr/float_macros.h"                        // LDBL_MANT_DIG
 #include "include/llvm-libc-macros/float16-macros.h" // LIBC_TYPES_HAS_FLOAT16
 #include "include/llvm-libc-types/float128.h"        // float128
+#include "src/__support/macros/config.h"             // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h"
 #include "src/__support/macros/properties/compiler.h"
 #include "src/__support/macros/properties/cpu_features.h"
@@ -58,4 +59,14 @@ using float16 = _Float16;
 // LIBC_TYPES_HAS_FLOAT128 and 'float128' type are provided by
 // "include/llvm-libc-types/float128.h"
 
+// -- bfloat16 support ---------------------------------------------------------
+
+namespace LIBC_NAMESPACE_DECL {
+namespace fputil {
+struct BFloat16;
+}
+} // namespace LIBC_NAMESPACE_DECL
+
+using bfloat16 = LIBC_NAMESPACE::fputil::BFloat16;
+
 #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H
diff --git a/libc/test/src/__support/FPUtil/CMakeLists.txt b/libc/test/src/__support/FPUtil/CMakeLists.txt
@@ -38,3 +38,14 @@ add_fp_unittest(
   DEPENDS
     libc.src.__support.FPUtil.rounding_mode
 )
+
+add_fp_unittest(
+  bfloat16_test
+  NEED_MPFR
+  SUITE
+    libc-fputil-tests
+  SRCS
+    bfloat16_test.cpp
+  DEPENDS
+    libc.src.__support.FPUtil.bfloat16
+)
diff --git a/libc/test/src/__support/FPUtil/bfloat16_test.cpp b/libc/test/src/__support/FPUtil/bfloat16_test.cpp
@@ -0,0 +1,69 @@
+//===-- Unit tests for bfloat16 type --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPCommon.h"
+
+using BFloat16 = LIBC_NAMESPACE::fputil::BFloat16;
+using LlvmLibcBfloat16ConversionTest =
+    LIBC_NAMESPACE::testing::FPTest<BFloat16>;
+
+// range: [0, inf]
+static constexpr uint16_t POS_START = 0x0000U;
+static constexpr uint16_t POS_STOP = 0x7f80U;
+
+// range: [-0, -inf]
+static constexpr uint16_t NEG_START = 0x8000U;
+static constexpr uint16_t NEG_STOP = 0xff80U;
+
+using MPFRNumber = LIBC_NAMESPACE::testing::mpfr::MPFRNumber;
+
+TEST_F(LlvmLibcBfloat16ConversionTest, ToFloatPositiveRange) {
+  for (uint16_t bits = POS_START; bits <= POS_STOP; bits++) {
+    BFloat16 bf16_num{bits};
+    MPFRNumber mpfr_num{bf16_num};
+
+    // bfloat16 to float
+    float mpfr_float = mpfr_num.as<float>();
+    EXPECT_FP_EQ_ALL_ROUNDING(mpfr_float, static_cast<float>(bf16_num));
+
+    // float to bfloat16
+    BFloat16 bf16_from_float{mpfr_float};
+    MPFRNumber mpfr_num_2{mpfr_float};
+    BFloat16 mpfr_bfloat = mpfr_num_2.as<BFloat16>();
+    EXPECT_FP_EQ_ALL_ROUNDING(mpfr_bfloat, bf16_from_float);
+  }
+}
+
+TEST_F(LlvmLibcBfloat16ConversionTest, ToFloatNegativeRange) {
+  for (uint16_t bits = NEG_START; bits <= NEG_STOP; bits++) {
+    BFloat16 bf16_num{bits};
+    MPFRNumber mpfr_num{bf16_num};
+
+    // bfloat16 to float
+    float mpfr_float = mpfr_num.as<float>();
+    EXPECT_FP_EQ_ALL_ROUNDING(mpfr_float, static_cast<float>(bf16_num));
+
+    // float to bfloat16
+    BFloat16 bf16_from_float{mpfr_float};
+    MPFRNumber mpfr_num_2{mpfr_float};
+    BFloat16 mpfr_bfloat = mpfr_num_2.as<BFloat16>();
+    EXPECT_FP_EQ_ALL_ROUNDING(mpfr_bfloat, bf16_from_float);
+  }
+}
+
+TEST_F(LlvmLibcBfloat16ConversionTest, FromInteger) {
+  constexpr int RANGE = 100'000;
+  for (int i = -RANGE; i <= RANGE; i++) {
+    BFloat16 mpfr_bfloat = MPFRNumber(i).as<BFloat16>();
+    BFloat16 libc_bfloat{i};
+    EXPECT_FP_EQ_ALL_ROUNDING(mpfr_bfloat, libc_bfloat);
+  }
+}
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -535,3 +535,19 @@ add_fp_unittest(
   LINK_LIBRARIES
     -lpthread
 )
+
+add_fp_unittest(
+  bfloat16_test
+  NO_RUN_POSTBUILD
+  NEED_MPFR
+  SUITE
+    libc_math_exhaustive_tests
+  SRCS
+    bfloat16_test.cpp
+  DEPENDS
+    .exhaustive_test
+    libc.src.__support.FPUtil.bfloat16
+    libc.src.__support.FPUtil.fp_bits
+  LINK_LIBRARIES
+    -lpthread
+)
diff --git a/libc/test/src/math/exhaustive/bfloat16_test.cpp b/libc/test/src/math/exhaustive/bfloat16_test.cpp
@@ -0,0 +1,70 @@
+//===-- Exhaustive tests for float -> bfloat16 conversion -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "utils/MPFRWrapper/MPCommon.h"
+
+using BFloat16 = LIBC_NAMESPACE::fputil::BFloat16;
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using MPFRNumber = LIBC_NAMESPACE::testing::mpfr::MPFRNumber;
+
+template <typename InType>
+struct Bfloat16ConversionChecker
+    : public virtual LIBC_NAMESPACE::testing::Test {
+  using FloatType = InType;
+  using FPBits = LIBC_NAMESPACE::fputil::FPBits<FloatType>;
+  using StorageType = typename FPBits::StorageType;
+
+  // Check in a range, return the number of failures.
+  // Slightly modified version of UnaryOpChecker.
+  uint64_t check(StorageType start, StorageType stop,
+                 mpfr::RoundingMode rounding) {
+    mpfr::ForceRoundingMode r(rounding);
+    if (!r.success)
+      return (stop > start);
+    StorageType bits = start;
+    uint64_t failed = 0;
+    do {
+      FPBits x_bits(bits);
+      FloatType x = x_bits.get_val();
+
+      const BFloat16 libc_bfloat{x};
+      const BFloat16 mpfr_bfloat = MPFRNumber(x).as<BFloat16>();
+
+      const bool correct =
+          LIBC_NAMESPACE::testing::getMatcher<
+              LIBC_NAMESPACE::testing::TestCond::EQ>(mpfr_bfloat)
+              .match(libc_bfloat);
+
+      failed += (!correct);
+    } while (bits++ < stop);
+    return failed;
+  }
+};
+
+template <typename FloatType>
+using LlvmLibcBfloat16ExhaustiveTest =
+    LlvmLibcExhaustiveMathTest<Bfloat16ConversionChecker<FloatType>>;
+using LlvmLibcBfloat16FromFloatTest = LlvmLibcBfloat16ExhaustiveTest<float>;
+
+// Positive Range: [0, Inf];
+constexpr uint32_t POS_START = 0x0000'0000U;
+constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+// Negative Range: [-Inf, 0];
+constexpr uint32_t NEG_START = 0xb000'0000U;
+constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcBfloat16FromFloatTest, PostiveRange) {
+  test_full_range_all_roundings(POS_START, POS_STOP);
+}
+
+TEST_F(LlvmLibcBfloat16FromFloatTest, NegativeRange) {
+  test_full_range_all_roundings(NEG_START, NEG_STOP);
+}
diff --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt
@@ -13,6 +13,7 @@ if(LIBC_TESTS_CAN_USE_MPFR OR LIBC_TESTS_CAN_USE_MPC)
     libc.src.__support.CPP.string
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.type_traits
+    libc.src.__support.FPUtil.bfloat16
     libc.src.__support.FPUtil.cast
     libc.src.__support.FPUtil.fp_bits
   )
diff --git a/libc/utils/MPFRWrapper/MPCommon.cpp b/libc/utils/MPFRWrapper/MPCommon.cpp
diff --git a/libc/utils/MPFRWrapper/MPCommon.h b/libc/utils/MPFRWrapper/MPCommon.h