Merge pull request #249 from plugwash/master

Amanieu · web-flow · commit 75e1b59001db · 2022-01-04T22:58:47.000+01:00
diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs
@@ -1,3 +1,4 @@
+#![allow(unreachable_code)]
 use core::f64;
 
 const TOINT: f64 = 1. / f64::EPSILON;
@@ -15,6 +16,24 @@ pub fn ceil(x: f64) -> f64 {
             return unsafe { ::core::intrinsics::ceilf64(x) }
         }
     }
+    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+    {
+        //use an alternative implementation on x86, because the
+        //main implementation fails with the x87 FPU used by
+        //debian i386, probablly due to excess precision issues.
+        //basic implementation taken from https://github.com/rust-lang/libm/issues/219
+        use super::fabs;
+        if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
+            let truncated = x as i64 as f64;
+            if truncated < x {
+                return truncated + 1.0;
+            } else {
+                return truncated;
+            }
+        } else {
+            return x;
+        }
+    }
     let u: u64 = x.to_bits();
     let e: i64 = (u >> 52 & 0x7ff) as i64;
     let y: f64;
diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs
@@ -1,3 +1,4 @@
+#![allow(unreachable_code)]
 use core::f64;
 
 const TOINT: f64 = 1. / f64::EPSILON;
@@ -15,6 +16,24 @@ pub fn floor(x: f64) -> f64 {
             return unsafe { ::core::intrinsics::floorf64(x) }
         }
     }
+    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+    {
+        //use an alternative implementation on x86, because the
+        //main implementation fails with the x87 FPU used by
+        //debian i386, probablly due to excess precision issues.
+        //basic implementation taken from https://github.com/rust-lang/libm/issues/219
+        use super::fabs;
+        if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
+            let truncated = x as i64 as f64;
+            if truncated > x {
+                return truncated - 1.0;
+            } else {
+                return truncated;
+            }
+        } else {
+            return x;
+        }
+    }
     let ui = x.to_bits();
     let e = ((ui >> 52) & 0x7ff) as i32;
 
diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs
@@ -218,7 +218,11 @@ mod tests {
             -0.00000000000000022204460492503126,
         );
 
-        assert_eq!(fma(-0.992, -0.992, -0.992), -0.007936000000000007,);
+        let result = fma(-0.992, -0.992, -0.992);
+        //force rounding to storage format on x87 to prevent superious errors.
+        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        let result = force_eval!(result);
+        assert_eq!(result, -0.007936000000000007,);
     }
 
     #[test]
diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs
@@ -369,6 +369,12 @@ mod tests {
     }
     #[test]
     fn test_y1f_2002() {
-        assert_eq!(y1f(2.0000002_f32), -0.10703229_f32);
+        //allow slightly different result on x87
+        let res = y1f(2.0000002_f32);
+        if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32)
+        {
+            return;
+        }
+        assert_eq!(res, -0.10703229_f32);
     }
 }
diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs
@@ -1,8 +1,6 @@
 macro_rules! force_eval {
     ($e:expr) => {
-        unsafe {
-            ::core::ptr::read_volatile(&$e);
-        }
+        unsafe { ::core::ptr::read_volatile(&$e) }
     };
 }
 
diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs
@@ -484,6 +484,10 @@ mod tests {
                 let exp = expected(*val);
                 let res = computed(*val);
 
+                #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+                let exp = force_eval!(exp);
+                #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+                let res = force_eval!(res);
                 assert!(
                     if exp.is_nan() {
                         res.is_nan()
diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs
@@ -50,7 +50,12 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
 
     fn medium(x: f64, ix: u32) -> (i32, f64, f64) {
         /* rint(x/(pi/2)), Assume round-to-nearest. */
-        let f_n = x as f64 * INV_PIO2 + TO_INT - TO_INT;
+        let tmp = x as f64 * INV_PIO2 + TO_INT;
+        // force rounding of tmp to it's storage format on x87 to avoid
+        // excess precision issues.
+        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        let tmp = force_eval!(tmp);
+        let f_n = tmp - TO_INT;
         let n = f_n as i32;
         let mut r = x - f_n * PIO2_1;
         let mut w = f_n * PIO2_1T; /* 1st round, good to 85 bits */
@@ -190,20 +195,28 @@ mod tests {
 
     #[test]
     fn test_near_pi() {
+        let arg = 3.141592025756836;
+        let arg = force_eval!(arg);
         assert_eq!(
-            rem_pio2(3.141592025756836),
+            rem_pio2(arg),
             (2, -6.278329573009626e-7, -2.1125998133974653e-23)
         );
+        let arg = 3.141592033207416;
+        let arg = force_eval!(arg);
         assert_eq!(
-            rem_pio2(3.141592033207416),
+            rem_pio2(arg),
             (2, -6.20382377148128e-7, -2.1125998133974653e-23)
         );
+        let arg = 3.141592144966125;
+        let arg = force_eval!(arg);
         assert_eq!(
-            rem_pio2(3.141592144966125),
+            rem_pio2(arg),
             (2, -5.086236681942706e-7, -2.1125998133974653e-23)
         );
+        let arg = 3.141592979431152;
+        let arg = force_eval!(arg);
         assert_eq!(
-            rem_pio2(3.141592979431152),
+            rem_pio2(arg),
             (2, 3.2584135866119817e-7, -2.1125998133974653e-23)
         );
     }
diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs
@@ -43,7 +43,12 @@ pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) {
     if ix < 0x4dc90fdb {
         /* |x| ~< 2^28*(pi/2), medium size */
         /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-        let f_n = x64 * INV_PIO2 + TOINT - TOINT;
+        let tmp = x64 * INV_PIO2 + TOINT;
+        // force rounding of tmp to it's storage format on x87 to avoid
+        // excess precision issues.
+        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        let tmp = force_eval!(tmp);
+        let f_n = tmp - TOINT;
         return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T);
     }
     if ix >= 0x7f800000 {
diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs
@@ -81,5 +81,8 @@ pub fn sin(x: f64) -> f64 {
 fn test_near_pi() {
     let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707
     let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7
-    assert_eq!(sin(x), sx);
+    let result = sin(x);
+    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+    let result = force_eval!(result);
+    assert_eq!(result, sx);
 }
diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs
@@ -57,3 +57,77 @@ pub fn sincos(x: f64) -> (f64, f64) {
         _ => (0.0, 1.0),
     }
 }
+
+// These tests are based on those from sincosf.rs
+#[cfg(test)]
+mod tests {
+    use super::sincos;
+
+    const TOLERANCE: f64 = 1e-6;
+
+    #[test]
+    fn with_pi() {
+        let (s, c) = sincos(core::f64::consts::PI);
+        assert!(
+            (s - 0.0).abs() < TOLERANCE,
+            "|{} - {}| = {} >= {}",
+            s,
+            0.0,
+            (s - 0.0).abs(),
+            TOLERANCE
+        );
+        assert!(
+            (c + 1.0).abs() < TOLERANCE,
+            "|{} + {}| = {} >= {}",
+            c,
+            1.0,
+            (s + 1.0).abs(),
+            TOLERANCE
+        );
+    }
+
+    #[test]
+    fn rotational_symmetry() {
+        use core::f64::consts::PI;
+        const N: usize = 24;
+        for n in 0..N {
+            let theta = 2. * PI * (n as f64) / (N as f64);
+            let (s, c) = sincos(theta);
+            let (s_plus, c_plus) = sincos(theta + 2. * PI);
+            let (s_minus, c_minus) = sincos(theta - 2. * PI);
+
+            assert!(
+                (s - s_plus).abs() < TOLERANCE,
+                "|{} - {}| = {} >= {}",
+                s,
+                s_plus,
+                (s - s_plus).abs(),
+                TOLERANCE
+            );
+            assert!(
+                (s - s_minus).abs() < TOLERANCE,
+                "|{} - {}| = {} >= {}",
+                s,
+                s_minus,
+                (s - s_minus).abs(),
+                TOLERANCE
+            );
+            assert!(
+                (c - c_plus).abs() < TOLERANCE,
+                "|{} - {}| = {} >= {}",
+                c,
+                c_plus,
+                (c - c_plus).abs(),
+                TOLERANCE
+            );
+            assert!(
+                (c - c_minus).abs() < TOLERANCE,
+                "|{} - {}| = {} >= {}",
+                c,
+                c_minus,
+                (c - c_minus).abs(),
+                TOLERANCE
+            );
+        }
+    }
+}
diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs
@@ -147,10 +147,38 @@ mod tests {
             let (s_minus, c_minus) = sincosf(theta - 2. * PI);
 
             const TOLERANCE: f32 = 1e-6;
-            assert!((s - s_plus).abs() < TOLERANCE);
-            assert!((s - s_minus).abs() < TOLERANCE);
-            assert!((c - c_plus).abs() < TOLERANCE);
-            assert!((c - c_minus).abs() < TOLERANCE);
+            assert!(
+                (s - s_plus).abs() < TOLERANCE,
+                "|{} - {}| = {} >= {}",
+                s,
+                s_plus,
+                (s - s_plus).abs(),
+                TOLERANCE
+            );
+            assert!(
+                (s - s_minus).abs() < TOLERANCE,
+                "|{} - {}| = {} >= {}",
+                s,
+                s_minus,
+                (s - s_minus).abs(),
+                TOLERANCE
+            );
+            assert!(
+                (c - c_plus).abs() < TOLERANCE,
+                "|{} - {}| = {} >= {}",
+                c,
+                c_plus,
+                (c - c_plus).abs(),
+                TOLERANCE
+            );
+            assert!(
+                (c - c_minus).abs() < TOLERANCE,
+                "|{} - {}| = {} >= {}",
+                c,
+                c_minus,
+                (c - c_minus).abs(),
+                TOLERANCE
+            );
         }
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -369,6 +369,12 @@ mod tests {`
`369`	`369`	`}`
`370`	`370`	`#[test]`
`371`	`371`	`fn test_y1f_2002() {`
`372`		`- assert_eq!(y1f(2.0000002_f32), -0.10703229_f32);`
	`372`	`+ //allow slightly different result on x87`
	`373`	`+ let res = y1f(2.0000002_f32);`
	`374`	`+ if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32)`
	`375`	`+ {`
	`376`	`+ return;`
	`377`	`+ }`
	`378`	`+ assert_eq!(res, -0.10703229_f32);`
`373`	`379`	`}`
`374`	`380`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,6 @@`
`1`	`1`	`macro_rules! force_eval {`
`2`	`2`	`($e:expr) => {`
`3`		`- unsafe {`
`4`		`- ::core::ptr::read_volatile(&$e);`
`5`		`- }`
	`3`	`+ unsafe { ::core::ptr::read_volatile(&$e) }`
`6`	`4`	`};`
`7`	`5`	`}`
`8`	`6`