Skip to content

Commit b3d48f4

Browse files
bors[bot]ejmahlercuviper
authored
Merge #141
141: Use the _addcarry and _subborrow intrinsics when available r=cuviper a=ejmahler When compiling for x86_64, with "u64_digit" enabled, some benchmarks are improved by using `_addcarry_u64` instead of the custom-written `adc` function, and using `_subborrow_u64)` instead of the custom-written `sbb` function. The fib and fib2 benchmarks improved the most, most benchmarks improved a little, and a few were worse within the margin of error. The only benchmark that did legitimately worse was the `gcd_euclid` family, but there's a comment after those benchmarks saying `// Integer for BigUint now uses Stein for gcd`. the stein benchmarks showed improvements with this change. Looking at the generated assembly, it was generating adcq instructions both before and after the change, but post-change the code using adc is a little shorter. It's possible that the intrinsic provided just enough of a hint to the compiler that it was able to optimize some things away. The compiler wasn't generating sbb instructions at all, so this adds them -- and once nice thing is that this change eliminates signed->unsigned conversions. Let me know if you'd prefer a different away to organize the platform-specific code. Co-authored-by: Elliott Mahler <join.together@gmail.com> Co-authored-by: Josh Stone <cuviper@gmail.com>
2 parents cf299cf + e03bbc1 commit b3d48f4

File tree

2 files changed

+93
-28
lines changed

2 files changed

+93
-28
lines changed

build.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,31 @@ use std::path::Path;
66

77
fn main() {
88
let pointer_width = env::var("CARGO_CFG_TARGET_POINTER_WIDTH");
9-
if pointer_width.as_ref().map(String::as_str) == Ok("64") {
9+
let u64_digit = pointer_width.as_ref().map(String::as_str) == Ok("64");
10+
if u64_digit {
1011
autocfg::emit("u64_digit");
1112
}
1213
let ac = autocfg::new();
13-
if ac.probe_path("std::convert::TryFrom") || ac.probe_path("core::convert::TryFrom") {
14+
let std = if ac.probe_sysroot_crate("std") {
15+
"std"
16+
} else {
17+
"core"
18+
};
19+
if ac.probe_path(&format!("{}::convert::TryFrom", std)) {
1420
autocfg::emit("has_try_from");
1521
}
1622

23+
if let Ok(target_arch) = env::var("CARGO_CFG_TARGET_ARCH") {
24+
if target_arch == "x86_64" || target_arch == "x86" {
25+
let digit = if u64_digit { "u64" } else { "u32" };
26+
27+
let addcarry = format!("{}::arch::{}::_addcarry_{}", std, target_arch, digit);
28+
if ac.probe_path(&addcarry) {
29+
autocfg::emit("use_addcarry");
30+
}
31+
}
32+
}
33+
1734
autocfg::rerun_path("build.rs");
1835

1936
write_radix_bases().unwrap();

src/algorithms.rs

Lines changed: 74 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,80 @@ use core::iter::repeat;
55
use core::mem;
66
use num_traits::{One, PrimInt, Zero};
77

8+
#[cfg(all(use_addcarry, target_arch = "x86_64"))]
9+
use core::arch::x86_64 as arch;
10+
11+
#[cfg(all(use_addcarry, target_arch = "x86"))]
12+
use core::arch::x86 as arch;
13+
814
use crate::biguint::biguint_from_vec;
915
use crate::biguint::BigUint;
1016

1117
use crate::bigint::BigInt;
1218
use crate::bigint::Sign;
1319
use crate::bigint::Sign::{Minus, NoSign, Plus};
1420

15-
use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit};
21+
use crate::big_digit::{self, BigDigit, DoubleBigDigit};
1622

17-
// Generic functions for add/subtract/multiply with carry/borrow:
23+
// only needed for the fallback implementation of `sbb`
24+
#[cfg(not(use_addcarry))]
25+
use crate::big_digit::SignedDoubleBigDigit;
26+
27+
// Generic functions for add/subtract/multiply with carry/borrow. These are specialized
28+
// for some platforms to take advantage of intrinsics, etc.
1829

1930
// Add with carry:
31+
#[cfg(all(use_addcarry, u64_digit))]
2032
#[inline]
21-
fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit {
22-
*acc += DoubleBigDigit::from(a);
23-
*acc += DoubleBigDigit::from(b);
24-
let lo = *acc as BigDigit;
25-
*acc >>= big_digit::BITS;
26-
lo
33+
fn adc(carry: u8, a: u64, b: u64, out: &mut u64) -> u8 {
34+
// Safety: There are absolutely no safety concerns with calling `_addcarry_u64`.
35+
// It's just unsafe for API consistency with other intrinsics.
36+
unsafe { arch::_addcarry_u64(carry, a, b, out) }
37+
}
38+
39+
#[cfg(all(use_addcarry, not(u64_digit)))]
40+
#[inline]
41+
fn adc(carry: u8, a: u32, b: u32, out: &mut u32) -> u8 {
42+
// Safety: There are absolutely no safety concerns with calling `_addcarry_u32`.
43+
// It's just unsafe for API consistency with other intrinsics.
44+
unsafe { arch::_addcarry_u32(carry, a, b, out) }
45+
}
46+
47+
// fallback for environments where we don't have an addcarry intrinsic
48+
#[cfg(not(use_addcarry))]
49+
#[inline]
50+
fn adc(carry: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 {
51+
let sum = DoubleBigDigit::from(a) + DoubleBigDigit::from(b) + DoubleBigDigit::from(carry);
52+
*out = sum as BigDigit;
53+
(sum >> big_digit::BITS) as u8
2754
}
2855

2956
// Subtract with borrow:
57+
#[cfg(all(use_addcarry, u64_digit))]
3058
#[inline]
31-
fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit {
32-
*acc += SignedDoubleBigDigit::from(a);
33-
*acc -= SignedDoubleBigDigit::from(b);
34-
let lo = *acc as BigDigit;
35-
*acc >>= big_digit::BITS;
36-
lo
59+
fn sbb(borrow: u8, a: u64, b: u64, out: &mut u64) -> u8 {
60+
// Safety: There are absolutely no safety concerns with calling `_subborrow_u64`.
61+
// It's just unsafe for API consistency with other intrinsics.
62+
unsafe { arch::_subborrow_u64(borrow, a, b, out) }
63+
}
64+
65+
#[cfg(all(use_addcarry, not(u64_digit)))]
66+
#[inline]
67+
fn sbb(borrow: u8, a: u32, b: u32, out: &mut u32) -> u8 {
68+
// Safety: There are absolutely no safety concerns with calling `_subborrow_u32`.
69+
// It's just unsafe for API consistency with other intrinsics.
70+
unsafe { arch::_subborrow_u32(borrow, a, b, out) }
71+
}
72+
73+
// fallback for environments where we don't have a subborrow intrinsic
74+
#[cfg(not(use_addcarry))]
75+
#[inline]
76+
fn sbb(borrow: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 {
77+
let difference = SignedDoubleBigDigit::from(a)
78+
- SignedDoubleBigDigit::from(b)
79+
- SignedDoubleBigDigit::from(borrow);
80+
*out = difference as BigDigit;
81+
u8::from(difference < 0)
3782
}
3883

3984
#[inline]
@@ -140,12 +185,12 @@ pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
140185
let (a_lo, a_hi) = a.split_at_mut(b.len());
141186

142187
for (a, b) in a_lo.iter_mut().zip(b) {
143-
*a = adc(*a, *b, &mut carry);
188+
carry = adc(carry, *a, *b, a);
144189
}
145190

146191
if carry != 0 {
147192
for a in a_hi {
148-
*a = adc(*a, 0, &mut carry);
193+
carry = adc(carry, *a, 0, a);
149194
if carry == 0 {
150195
break;
151196
}
@@ -174,12 +219,12 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
174219
let (b_lo, b_hi) = b.split_at(len);
175220

176221
for (a, b) in a_lo.iter_mut().zip(b_lo) {
177-
*a = sbb(*a, *b, &mut borrow);
222+
borrow = sbb(borrow, *a, *b, a);
178223
}
179224

180225
if borrow != 0 {
181226
for a in a_hi {
182-
*a = sbb(*a, 0, &mut borrow);
227+
borrow = sbb(borrow, *a, 0, a);
183228
if borrow == 0 {
184229
break;
185230
}
@@ -195,16 +240,16 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
195240

196241
// Only for the Sub impl. `a` and `b` must have same length.
197242
#[inline]
198-
pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit {
243+
pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> u8 {
199244
debug_assert!(b.len() == a.len());
200245

201246
let mut borrow = 0;
202247

203248
for (ai, bi) in a.iter().zip(b) {
204-
*bi = sbb(*ai, *bi, &mut borrow);
249+
borrow = sbb(borrow, *ai, *bi, bi);
205250
}
206251

207-
borrow as BigDigit
252+
borrow
208253
}
209254

210255
pub(crate) fn sub2rev(a: &[BigDigit], b: &mut [BigDigit]) {
@@ -259,11 +304,14 @@ pub(crate) fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) {
259304
*a = mac_with_carry(*a, b, c, &mut carry);
260305
}
261306

262-
let mut a = a_hi.iter_mut();
263-
while carry != 0 {
264-
let a = a.next().expect("carry overflow during multiplication!");
265-
*a = adc(*a, 0, &mut carry);
266-
}
307+
let (carry_hi, carry_lo) = big_digit::from_doublebigdigit(carry);
308+
309+
let final_carry = if carry_hi == 0 {
310+
__add2(a_hi, &[carry_lo])
311+
} else {
312+
__add2(a_hi, &[carry_hi, carry_lo])
313+
};
314+
assert_eq!(final_carry, 0, "carry overflow during multiplication!");
267315
}
268316

269317
/// Subtract a multiple.

0 commit comments

Comments
 (0)