Moved the platform-specific code to adc and sbb, added a build.res entry

ejmahler · cuviper · commit 0be00d814741 · 2020-10-30T11:52:38.000-07:00
diff --git a/build.rs b/build.rs
@@ -6,14 +6,19 @@ use std::path::Path;
 
 fn main() {
     let pointer_width = env::var("CARGO_CFG_TARGET_POINTER_WIDTH");
-    if pointer_width.as_ref().map(String::as_str) == Ok("64") {
+    let u64_digit = pointer_width.as_ref().map(String::as_str) == Ok("64");
+    if u64_digit {
         autocfg::emit("u64_digit");
     }
     let ac = autocfg::new();
     if ac.probe_path("std::convert::TryFrom") || ac.probe_path("core::convert::TryFrom") {
         autocfg::emit("has_try_from");
     }
 
+    if u64_digit && (ac.probe_path("core::arch::x86_64::_addcarry_u64") || ac.probe_path("std::arch::x86_64::_addcarry_u64")) {
+        autocfg::emit("use_addcarry_u64");
+    }
+
     autocfg::rerun_path("build.rs");
 
     write_radix_bases().unwrap();
diff --git a/src/algorithms.rs b/src/algorithms.rs
@@ -12,30 +12,42 @@ use crate::bigint::BigInt;
 use crate::bigint::Sign;
 use crate::bigint::Sign::{Minus, NoSign, Plus};
 
-use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit};
+use crate::big_digit::{self, BigDigit, DoubleBigDigit};
+
+#[cfg(not(use_addcarry_u64))] // only needed for the fallback implementation of `sbb`
+use crate::big_digit::SignedDoubleBigDigit;
 
 // Generic functions for add/subtract/multiply with carry/borrow:
 
 // Add with carry:
-#[allow(unused)]
+#[cfg(use_addcarry_u64)]
 #[inline]
-fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit {
-    *acc += DoubleBigDigit::from(a);
-    *acc += DoubleBigDigit::from(b);
-    let lo = *acc as BigDigit;
-    *acc >>= big_digit::BITS;
-    lo
+fn adc(carry: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 {
+    unsafe { core::arch::x86_64::_addcarry_u64(carry, a, b, out) }
+}
+
+#[cfg(not(use_addcarry_u64))] // fallback for environments where we don't have an addcarry intrinsic
+#[inline]
+fn adc(mut carry: DoubleBigDigit, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> DoubleBigDigit {
+    carry += DoubleBigDigit::from(a);
+    carry += DoubleBigDigit::from(b);
+    *out = carry as BigDigit;
+    carry >> big_digit::BITS
 }
 
 // Subtract with borrow:
-#[allow(unused)]
+#[cfg(use_addcarry_u64)]
 #[inline]
-fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit {
-    *acc += SignedDoubleBigDigit::from(a);
-    *acc -= SignedDoubleBigDigit::from(b);
-    let lo = *acc as BigDigit;
-    *acc >>= big_digit::BITS;
-    lo
+fn sbb(carry: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 {
+    unsafe { core::arch::x86_64::_subborrow_u64(carry, a, b, out) }
+}
+#[cfg(not(use_addcarry_u64))] // fallback for environments where we don't have an addcarry intrinsic
+#[inline]
+fn sbb(mut carry: SignedDoubleBigDigit, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> SignedDoubleBigDigit {
+    carry += SignedDoubleBigDigit::from(a);
+    carry -= SignedDoubleBigDigit::from(b);
+    *out = carry as BigDigit;
+    carry >> big_digit::BITS
 }
 
 #[inline]
@@ -134,41 +146,6 @@ pub(crate) fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit {
 /// the addition first hoping that it will fit.
 ///
 /// The caller _must_ ensure that `a` is at least as long as `b`.
-#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits
-#[inline]
-pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
-    debug_assert!(a.len() >= b.len());
-
-    use core::arch::x86_64::_addcarry_u64;
-
-    let mut carry = 0;
-    let (a_lo, a_hi) = a.split_at_mut(b.len());
-
-    for (a, b) in a_lo.iter_mut().zip(b) {
-        // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics
-        carry = unsafe { _addcarry_u64(carry, *a, *b, a) };
-    }
-
-    if carry != 0 {
-        for a in a_hi {
-            // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics
-            carry = unsafe { _addcarry_u64(carry, *a, 0, a) };
-            if carry == 0 {
-                break;
-            }
-        }
-    }
-
-    carry as BigDigit
-}
-
-/// Two argument addition of raw slices, `a += b`, returning the carry.
-///
-/// This is used when the data `Vec` might need to resize to push a non-zero carry, so we perform
-/// the addition first hoping that it will fit.
-///
-/// The caller _must_ ensure that `a` is at least as long as `b`.
-#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64
 #[inline]
 pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
     debug_assert!(a.len() >= b.len());
@@ -177,12 +154,12 @@ pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
     let (a_lo, a_hi) = a.split_at_mut(b.len());
 
     for (a, b) in a_lo.iter_mut().zip(b) {
-        *a = adc(*a, *b, &mut carry);
+        carry = adc(carry, *a, *b, a);
     }
 
     if carry != 0 {
         for a in a_hi {
-            *a = adc(*a, 0, &mut carry);
+            carry = adc(carry, *a, 0, a);
             if carry == 0 {
                 break;
             }
@@ -203,39 +180,6 @@ pub(crate) fn add2(a: &mut [BigDigit], b: &[BigDigit]) {
     debug_assert!(carry == 0);
 }
 
-#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits
-pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
-    use core::arch::x86_64::_subborrow_u64;
-
-    let mut borrow = 0;
-
-    let len = cmp::min(a.len(), b.len());
-    let (a_lo, a_hi) = a.split_at_mut(len);
-    let (b_lo, b_hi) = b.split_at(len);
-
-    for (a, b) in a_lo.iter_mut().zip(b_lo) {
-        // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics
-        borrow = unsafe { _subborrow_u64(borrow, *a, *b, a) };
-    }
-
-    if borrow != 0 {
-        for a in a_hi {
-            // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics
-            borrow = unsafe { _subborrow_u64(borrow, *a, 0, a) };
-            if borrow == 0 {
-                break;
-            }
-        }
-    }
-
-    // note: we're _required_ to fail on underflow
-    assert!(
-        borrow == 0 && b_hi.iter().all(|x| *x == 0),
-        "Cannot subtract b from a because b is larger than a."
-    );
-}
-
-#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64
 pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
     let mut borrow = 0;
 
@@ -244,12 +188,12 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
     let (b_lo, b_hi) = b.split_at(len);
 
     for (a, b) in a_lo.iter_mut().zip(b_lo) {
-        *a = sbb(*a, *b, &mut borrow);
+        borrow = sbb(borrow, *a, *b, a);
     }
 
     if borrow != 0 {
         for a in a_hi {
-            *a = sbb(*a, 0, &mut borrow);
+            borrow = sbb(borrow, *a, 0, a);
             if borrow == 0 {
                 break;
             }
@@ -264,32 +208,14 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
 }
 
 // Only for the Sub impl. `a` and `b` must have same length.
-#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits
-#[inline]
-pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit {
-    use core::arch::x86_64::_subborrow_u64;
-    debug_assert!(b.len() == a.len());
-
-    let mut borrow = 0;
-
-    for (ai, bi) in a.iter().zip(b) {
-        // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics
-        borrow = unsafe { _subborrow_u64(borrow, *ai, *bi, bi) };
-    }
-
-    borrow as BigDigit
-}
-
-// Only for the Sub impl. `a` and `b` must have same length.
-#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64
 #[inline]
 pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit {
     debug_assert!(b.len() == a.len());
 
     let mut borrow = 0;
 
     for (ai, bi) in a.iter().zip(b) {
-        *bi = sbb(*ai, *bi, &mut borrow);
+        borrow = sbb(borrow, *ai, *bi, bi);
     }
 
     borrow as BigDigit