Skip to content

Commit 2fab5e4

Browse files
bors[bot]cuviper
andauthored
Merge #167 #168
167: Rewrite the internal `unsigned_abs` r=cuviper a=cuviper This was motivated by the new nightly `unsigned_abs()` inherent method on primitives, which is currently raising a future-compatibility warning that it will take precedence in the future. That would actually be fine, since they do the same thing, but the warning is annoying in the meantime. So ours is now `uabs()`, and I also added a new `checked_uabs()` that cleans up the branching a bit. 168: Normalize the comment style r=cuviper a=cuviper Co-authored-by: Josh Stone <cuviper@gmail.com>
3 parents da996d1 + 7da4610 + 8040acf commit 2fab5e4

File tree

3 files changed

+219
-255
lines changed

3 files changed

+219
-255
lines changed

src/algorithms.rs

Lines changed: 74 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -292,81 +292,75 @@ fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) {
292292
mac_digit(&mut acc[i..], y, *xi);
293293
}
294294
} else if x.len() <= 256 {
295-
/*
296-
* Karatsuba multiplication:
297-
*
298-
* The idea is that we break x and y up into two smaller numbers that each have about half
299-
* as many digits, like so (note that multiplying by b is just a shift):
300-
*
301-
* x = x0 + x1 * b
302-
* y = y0 + y1 * b
303-
*
304-
* With some algebra, we can compute x * y with three smaller products, where the inputs to
305-
* each of the smaller products have only about half as many digits as x and y:
306-
*
307-
* x * y = (x0 + x1 * b) * (y0 + y1 * b)
308-
*
309-
* x * y = x0 * y0
310-
* + x0 * y1 * b
311-
* + x1 * y0 * b
312-
* + x1 * y1 * b^2
313-
*
314-
* Let p0 = x0 * y0 and p2 = x1 * y1:
315-
*
316-
* x * y = p0
317-
* + (x0 * y1 + x1 * y0) * b
318-
* + p2 * b^2
319-
*
320-
* The real trick is that middle term:
321-
*
322-
* x0 * y1 + x1 * y0
323-
*
324-
* = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2
325-
*
326-
* = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2
327-
*
328-
* Now we complete the square:
329-
*
330-
* = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2
331-
*
332-
* = -((x1 - x0) * (y1 - y0)) + p0 + p2
333-
*
334-
* Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula:
335-
*
336-
* x * y = p0
337-
* + (p0 + p2 - p1) * b
338-
* + p2 * b^2
339-
*
340-
* Where the three intermediate products are:
341-
*
342-
* p0 = x0 * y0
343-
* p1 = (x1 - x0) * (y1 - y0)
344-
* p2 = x1 * y1
345-
*
346-
* In doing the computation, we take great care to avoid unnecessary temporary variables
347-
* (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a
348-
* bit so we can use the same temporary variable for all the intermediate products:
349-
*
350-
* x * y = p2 * b^2 + p2 * b
351-
* + p0 * b + p0
352-
* - p1 * b
353-
*
354-
* The other trick we use is instead of doing explicit shifts, we slice acc at the
355-
* appropriate offset when doing the add.
356-
*/
357-
358-
/*
359-
* When x is smaller than y, it's significantly faster to pick b such that x is split in
360-
* half, not y:
361-
*/
295+
// Karatsuba multiplication:
296+
//
297+
// The idea is that we break x and y up into two smaller numbers that each have about half
298+
// as many digits, like so (note that multiplying by b is just a shift):
299+
//
300+
// x = x0 + x1 * b
301+
// y = y0 + y1 * b
302+
//
303+
// With some algebra, we can compute x * y with three smaller products, where the inputs to
304+
// each of the smaller products have only about half as many digits as x and y:
305+
//
306+
// x * y = (x0 + x1 * b) * (y0 + y1 * b)
307+
//
308+
// x * y = x0 * y0
309+
// + x0 * y1 * b
310+
// + x1 * y0 * b
311+
// + x1 * y1 * b^2
312+
//
313+
// Let p0 = x0 * y0 and p2 = x1 * y1:
314+
//
315+
// x * y = p0
316+
// + (x0 * y1 + x1 * y0) * b
317+
// + p2 * b^2
318+
//
319+
// The real trick is that middle term:
320+
//
321+
// x0 * y1 + x1 * y0
322+
//
323+
// = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2
324+
//
325+
// = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2
326+
//
327+
// Now we complete the square:
328+
//
329+
// = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2
330+
//
331+
// = -((x1 - x0) * (y1 - y0)) + p0 + p2
332+
//
333+
// Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula:
334+
//
335+
// x * y = p0
336+
// + (p0 + p2 - p1) * b
337+
// + p2 * b^2
338+
//
339+
// Where the three intermediate products are:
340+
//
341+
// p0 = x0 * y0
342+
// p1 = (x1 - x0) * (y1 - y0)
343+
// p2 = x1 * y1
344+
//
345+
// In doing the computation, we take great care to avoid unnecessary temporary variables
346+
// (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a
347+
// bit so we can use the same temporary variable for all the intermediate products:
348+
//
349+
// x * y = p2 * b^2 + p2 * b
350+
// + p0 * b + p0
351+
// - p1 * b
352+
//
353+
// The other trick we use is instead of doing explicit shifts, we slice acc at the
354+
// appropriate offset when doing the add.
355+
356+
// When x is smaller than y, it's significantly faster to pick b such that x is split in
357+
// half, not y:
362358
let b = x.len() / 2;
363359
let (x0, x1) = x.split_at(b);
364360
let (y0, y1) = y.split_at(b);
365361

366-
/*
367-
* We reuse the same BigUint for all the intermediate multiplies and have to size p
368-
* appropriately here: x1.len() >= x0.len and y1.len() >= y0.len():
369-
*/
362+
// We reuse the same BigUint for all the intermediate multiplies and have to size p
363+
// appropriately here: x1.len() >= x0.len and y1.len() >= y0.len():
370364
let len = x1.len() + y1.len() + 1;
371365
let mut p = BigUint { data: vec![0; len] };
372366

@@ -676,28 +670,24 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
676670
};
677671

678672
for j in (0..q_len).rev() {
679-
/*
680-
* When calculating our next guess q0, we don't need to consider the digits below j
681-
* + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
682-
* digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
683-
* two numbers will be zero in all digits up to (j + b.data.len() - 1).
684-
*/
673+
// When calculating our next guess q0, we don't need to consider the digits below j
674+
// + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
675+
// digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
676+
// two numbers will be zero in all digits up to (j + b.data.len() - 1).
685677
let offset = j + b.data.len() - 1;
686678
if offset >= a.data.len() {
687679
continue;
688680
}
689681

690-
/* just avoiding a heap allocation: */
682+
// just avoiding a heap allocation:
691683
let mut a0 = tmp;
692684
a0.data.truncate(0);
693685
a0.data.extend(a.data[offset..].iter().cloned());
694686

695-
/*
696-
* q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
697-
* implicitly at the end, when adding and subtracting to a and q. Not only do we
698-
* save the cost of the shifts, the rest of the arithmetic gets to work with
699-
* smaller numbers.
700-
*/
687+
// q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
688+
// implicitly at the end, when adding and subtracting to a and q. Not only do we
689+
// save the cost of the shifts, the rest of the arithmetic gets to work with
690+
// smaller numbers.
701691
let (mut q0, _) = div_rem_digit(a0, bn);
702692
let mut prod = b * &q0;
703693

0 commit comments

Comments
 (0)