WIP f16 fma

tgross35 · tgross35 · commit 3289b058d70b · 2025-01-23T07:23:05.000Z
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
@@ -92,6 +92,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &["copysignf128", "fdimf128"],
     ),
+    (
+        // `(f16, f16, f16) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16, Ty::F16, Ty::F16], returns: &[Ty::F16] },
+        None,
+        &["fmaf16"],
+    ),
     (
         // `(f32, f32, f32) -> f32`
         FloatTy::F32,
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
@@ -188,7 +188,7 @@ libm_macros::for_each_function! {
         expm1 | expm1f => exp_m1,
         fabs | fabsf => abs,
         fdim | fdimf | fdimf16 | fdimf128  => positive_diff,
-        fma | fmaf => mul_add,
+        fma | fmaf | fmaf16 => mul_add,
         fmax | fmaxf => max,
         fmin | fminf => min,
         lgamma | lgammaf => ln_gamma,
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
@@ -588,6 +588,9 @@ fn int_float_common<F1: Float, F2: Float>(
 impl MaybeOverride<(f32, i32)> for SpecialCase {}
 impl MaybeOverride<(f64, i32)> for SpecialCase {}
 
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, f16, f16)> for SpecialCase {}
+
 impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f32, f32, f32),
@@ -609,6 +612,9 @@ impl MaybeOverride<(f64, f64, f64)> for SpecialCase {
     }
 }
 
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, f128, f128)> for SpecialCase {}
+
 // F1 and F2 are always the same type, this is just to please generics
 fn ternop_common<F1: Float, F2: Float>(
     input: (F1, F1, F1),
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
@@ -89,6 +89,7 @@ libm_macros::for_each_function! {
         fdimf16,
         floorf128,
         floorf16,
+        fmaf16,
         rintf128,
         rintf16,
         sqrtf128,
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
@@ -376,6 +376,12 @@
         ],
         "type": "f32"
     },
+    "fmaf16": {
+        "sources": [
+            "src/math/fmaf16.rs"
+        ],
+        "type": "f16"
+    },
     "fmax": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
@@ -53,6 +53,7 @@ floorf128
 floorf16
 fma
 fmaf
+fmaf16
 fmax
 fmaxf
 fmin
diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs
@@ -47,6 +47,10 @@ use super::fenv::{
 /// according to the rounding mode characterized by the value of FLT_ROUNDS.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
+    if true {
+        return super::generic::fma_big::<f32, f64>(x, y, z);
+    }
+
     let xy: f64;
     let mut result: f64;
     let mut ui: u64;
diff --git a/src/math/fmaf16.rs b/src/math/fmaf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf16(x: f16, y: f16, z: f16) -> f16 {
+    super::generic::fma_big::<f16, f32>(x, y, z)
+}
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
@@ -0,0 +1,53 @@
+use super::super::fenv::{
+    FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
+};
+use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, IntTy, MinInt};
+
+/// FMA implementation when a hardware-backed larger float type is available.
+pub fn fma_big<F, B>(x: F, y: F, z: F) -> F
+where
+    F: Float + HFloat<D = B>,
+    B: Float + DFloat<H = F>,
+    // F: Float + CastInto<B>,
+    // B: Float + CastInto<F> + CastFrom<F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    let one = IntTy::<B>::ONE;
+
+    let xy: B;
+    let mut result: B;
+    let mut ui: B::Int;
+    let e: i32;
+
+    xy = x.widen() * y.widen();
+    result = xy + z.widen();
+    ui = result.to_bits();
+    e = i32::cast_from(ui >> F::SIG_BITS) & F::EXP_MAX as i32;
+    let zb: B = z.widen();
+
+    let prec_diff = B::SIG_BITS - F::SIG_BITS;
+    let excess_prec = ui & ((one << prec_diff) - one);
+    let x = one << (prec_diff - 1);
+
+    // Common case: the larger precision is fine
+    if excess_prec != x
+        || e == i32::cast_from(F::EXP_MAX)
+        || (result - xy == zb && result - zb == xy)
+        || fegetround() != FE_TONEAREST
+    {
+        // TODO: feclearexcept
+
+        return result.narrow();
+    }
+
+    let neg = ui & B::SIGN_MASK > IntTy::<B>::ZERO;
+    let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
+    if neg == (err < B::ZERO) {
+        ui += one;
+    } else {
+        ui -= one;
+    }
+
+    B::from_bits(ui).narrow()
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
@@ -3,6 +3,7 @@ mod copysign;
 mod fabs;
 mod fdim;
 mod floor;
+mod fma;
 mod rint;
 mod sqrt;
 mod trunc;
@@ -12,6 +13,7 @@ pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
+pub use fma::fma_big;
 pub use rint::rint;
 pub use sqrt::sqrt;
 pub use trunc::trunc;
diff --git a/src/math/mod.rs b/src/math/mod.rs
@@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2;
 use self::rem_pio2_large::rem_pio2_large;
 use self::rem_pio2f::rem_pio2f;
 #[allow(unused_imports)]
-use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt};
+use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt};
 
 // Public modules
 mod acos;
@@ -346,6 +346,7 @@ cfg_if! {
         mod fabsf16;
         mod fdimf16;
         mod floorf16;
+        mod fmaf16;
         mod rintf16;
         mod sqrtf16;
         mod truncf16;
@@ -355,6 +356,7 @@ cfg_if! {
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
         pub use self::floorf16::floorf16;
+        pub use self::fmaf16::fmaf16;
         pub use self::rintf16::rintf16;
         pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
@@ -1,4 +1,5 @@
-use core::{fmt, mem, ops};
+use core::ops::{self, Neg};
+use core::{fmt, mem};
 
 use super::int_traits::{CastFrom, Int, MinInt};
 
@@ -23,7 +24,9 @@ pub trait Float:
     type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
+    type SignedInt: Int
+        + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
+        + Neg<Output = Self::SignedInt>;
 
     const ZERO: Self;
     const NEG_ZERO: Self;
@@ -155,7 +158,6 @@ pub trait Float:
 }
 
 /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
-#[allow(dead_code)]
 pub type IntTy<F> = <F as Float>::Int;
 
 macro_rules! float_impl {
@@ -355,3 +357,63 @@ mod tests {
         assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
     }
 }
+
+/// Trait for floats twice the bit width of another integer.
+#[allow(unused)]
+pub trait DFloat: Float {
+    /// Float that is half the bit width of the floatthis trait is implemented for.
+    type H: HFloat<D = Self>;
+
+    /// Narrow the float type.
+    fn narrow(self) -> Self::H;
+}
+
+/// Trait for floats half the bit width of another float.
+#[allow(unused)]
+pub trait HFloat: Float {
+    /// Float that is double the bit width of the float this trait is implemented for.
+    type D: DFloat<H = Self>;
+
+    /// Widen the float type.
+    fn widen(self) -> Self::D;
+}
+
+macro_rules! impl_d_float {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DFloat for $D {
+                type H = $X;
+
+                fn narrow(self) -> Self::H {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_float {
+    ($($H:ident $X:ident),*) => {
+        $(
+            impl HFloat for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+impl_d_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_d_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_d_float!(f64 f128);
+
+impl_h_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_h_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_h_float!(f64 f128);
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
@@ -90,6 +90,7 @@ pub trait Int:
     fn wrapping_shr(self, other: u32) -> Self;
     fn rotate_left(self, other: u32) -> Self;
     fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn overflowing_sub(self, other: Self) -> (Self, bool);
     fn leading_zeros(self) -> u32;
     fn ilog2(self) -> u32;
 }
@@ -148,6 +149,10 @@ macro_rules! int_impl_common {
             <Self>::overflowing_add(self, other)
         }
 
+        fn overflowing_sub(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_sub(self, other)
+        }
+
         fn leading_zeros(self) -> u32 {
             <Self>::leading_zeros(self)
         }
@@ -397,6 +402,20 @@ macro_rules! cast_into {
     )*};
 }
 
+macro_rules! cast_lossy{
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                unimplemented!("precise casting not available, use `cast_lossy` instead")
+            }
+
+            fn cast_lossy(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
 cast_into!(usize);
 cast_into!(isize);
 cast_into!(u8);
@@ -409,3 +428,28 @@ cast_into!(u64);
 cast_into!(i64);
 cast_into!(u128);
 cast_into!(i128);
+
+cast_into!(bool; u16);
+cast_into!(bool; u32);
+cast_into!(bool; u64);
+cast_into!(bool; u128);
+
+cast_lossy!(i64; f32, f64);
+cast_lossy!(f32; f64);
+cast_lossy!(f64; f32);
+
+cfg_if! {
+    if #[cfg(f16_enabled)] {
+        cast_lossy!(f16; f32, f64);
+        cast_lossy!(f32; f16);
+        cast_lossy!(f64; f16);
+    }
+}
+
+cfg_if! {
+    if #[cfg(f128_enabled)] {
+        cast_lossy!(f128; f32, f64);
+        cast_lossy!(f32; f128);
+        cast_lossy!(f64; f128);
+    }
+}
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
@@ -6,7 +6,7 @@ mod hex_float;
 mod int_traits;
 
 #[allow(unused_imports)]
-pub use float_traits::{Float, IntTy};
+pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[cfg(f16_enabled)]
 pub use hex_float::hf16;

-Original file line number
+Diff line change
 floorf16
 fma
 fmaf
 +fmaf16
 fmax
 fmaxf
 fmin