@@ -292,81 +292,75 @@ fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) {
292
292
mac_digit ( & mut acc[ i..] , y, * xi) ;
293
293
}
294
294
} else if x. len ( ) <= 256 {
295
- /*
296
- * Karatsuba multiplication:
297
- *
298
- * The idea is that we break x and y up into two smaller numbers that each have about half
299
- * as many digits, like so (note that multiplying by b is just a shift):
300
- *
301
- * x = x0 + x1 * b
302
- * y = y0 + y1 * b
303
- *
304
- * With some algebra, we can compute x * y with three smaller products, where the inputs to
305
- * each of the smaller products have only about half as many digits as x and y:
306
- *
307
- * x * y = (x0 + x1 * b) * (y0 + y1 * b)
308
- *
309
- * x * y = x0 * y0
310
- * + x0 * y1 * b
311
- * + x1 * y0 * b
312
- * + x1 * y1 * b^2
313
- *
314
- * Let p0 = x0 * y0 and p2 = x1 * y1:
315
- *
316
- * x * y = p0
317
- * + (x0 * y1 + x1 * y0) * b
318
- * + p2 * b^2
319
- *
320
- * The real trick is that middle term:
321
- *
322
- * x0 * y1 + x1 * y0
323
- *
324
- * = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2
325
- *
326
- * = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2
327
- *
328
- * Now we complete the square:
329
- *
330
- * = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2
331
- *
332
- * = -((x1 - x0) * (y1 - y0)) + p0 + p2
333
- *
334
- * Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula:
335
- *
336
- * x * y = p0
337
- * + (p0 + p2 - p1) * b
338
- * + p2 * b^2
339
- *
340
- * Where the three intermediate products are:
341
- *
342
- * p0 = x0 * y0
343
- * p1 = (x1 - x0) * (y1 - y0)
344
- * p2 = x1 * y1
345
- *
346
- * In doing the computation, we take great care to avoid unnecessary temporary variables
347
- * (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a
348
- * bit so we can use the same temporary variable for all the intermediate products:
349
- *
350
- * x * y = p2 * b^2 + p2 * b
351
- * + p0 * b + p0
352
- * - p1 * b
353
- *
354
- * The other trick we use is instead of doing explicit shifts, we slice acc at the
355
- * appropriate offset when doing the add.
356
- */
357
-
358
- /*
359
- * When x is smaller than y, it's significantly faster to pick b such that x is split in
360
- * half, not y:
361
- */
295
+ // Karatsuba multiplication:
296
+ //
297
+ // The idea is that we break x and y up into two smaller numbers that each have about half
298
+ // as many digits, like so (note that multiplying by b is just a shift):
299
+ //
300
+ // x = x0 + x1 * b
301
+ // y = y0 + y1 * b
302
+ //
303
+ // With some algebra, we can compute x * y with three smaller products, where the inputs to
304
+ // each of the smaller products have only about half as many digits as x and y:
305
+ //
306
+ // x * y = (x0 + x1 * b) * (y0 + y1 * b)
307
+ //
308
+ // x * y = x0 * y0
309
+ // + x0 * y1 * b
310
+ // + x1 * y0 * b
311
+ // + x1 * y1 * b^2
312
+ //
313
+ // Let p0 = x0 * y0 and p2 = x1 * y1:
314
+ //
315
+ // x * y = p0
316
+ // + (x0 * y1 + x1 * y0) * b
317
+ // + p2 * b^2
318
+ //
319
+ // The real trick is that middle term:
320
+ //
321
+ // x0 * y1 + x1 * y0
322
+ //
323
+ // = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2
324
+ //
325
+ // = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2
326
+ //
327
+ // Now we complete the square:
328
+ //
329
+ // = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2
330
+ //
331
+ // = -((x1 - x0) * (y1 - y0)) + p0 + p2
332
+ //
333
+ // Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula:
334
+ //
335
+ // x * y = p0
336
+ // + (p0 + p2 - p1) * b
337
+ // + p2 * b^2
338
+ //
339
+ // Where the three intermediate products are:
340
+ //
341
+ // p0 = x0 * y0
342
+ // p1 = (x1 - x0) * (y1 - y0)
343
+ // p2 = x1 * y1
344
+ //
345
+ // In doing the computation, we take great care to avoid unnecessary temporary variables
346
+ // (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a
347
+ // bit so we can use the same temporary variable for all the intermediate products:
348
+ //
349
+ // x * y = p2 * b^2 + p2 * b
350
+ // + p0 * b + p0
351
+ // - p1 * b
352
+ //
353
+ // The other trick we use is instead of doing explicit shifts, we slice acc at the
354
+ // appropriate offset when doing the add.
355
+
356
+ // When x is smaller than y, it's significantly faster to pick b such that x is split in
357
+ // half, not y:
362
358
let b = x. len ( ) / 2 ;
363
359
let ( x0, x1) = x. split_at ( b) ;
364
360
let ( y0, y1) = y. split_at ( b) ;
365
361
366
- /*
367
- * We reuse the same BigUint for all the intermediate multiplies and have to size p
368
- * appropriately here: x1.len() >= x0.len and y1.len() >= y0.len():
369
- */
362
+ // We reuse the same BigUint for all the intermediate multiplies and have to size p
363
+ // appropriately here: x1.len() >= x0.len and y1.len() >= y0.len():
370
364
let len = x1. len ( ) + y1. len ( ) + 1 ;
371
365
let mut p = BigUint { data : vec ! [ 0 ; len] } ;
372
366
@@ -676,28 +670,24 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
676
670
} ;
677
671
678
672
for j in ( 0 ..q_len) . rev ( ) {
679
- /*
680
- * When calculating our next guess q0, we don't need to consider the digits below j
681
- * + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
682
- * digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
683
- * two numbers will be zero in all digits up to (j + b.data.len() - 1).
684
- */
673
+ // When calculating our next guess q0, we don't need to consider the digits below j
674
+ // + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
675
+ // digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
676
+ // two numbers will be zero in all digits up to (j + b.data.len() - 1).
685
677
let offset = j + b. data . len ( ) - 1 ;
686
678
if offset >= a. data . len ( ) {
687
679
continue ;
688
680
}
689
681
690
- /* just avoiding a heap allocation: */
682
+ // just avoiding a heap allocation:
691
683
let mut a0 = tmp;
692
684
a0. data . truncate ( 0 ) ;
693
685
a0. data . extend ( a. data [ offset..] . iter ( ) . cloned ( ) ) ;
694
686
695
- /*
696
- * q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
697
- * implicitly at the end, when adding and subtracting to a and q. Not only do we
698
- * save the cost of the shifts, the rest of the arithmetic gets to work with
699
- * smaller numbers.
700
- */
687
+ // q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
688
+ // implicitly at the end, when adding and subtracting to a and q. Not only do we
689
+ // save the cost of the shifts, the rest of the arithmetic gets to work with
690
+ // smaller numbers.
701
691
let ( mut q0, _) = div_rem_digit ( a0, bn) ;
702
692
let mut prod = b * & q0;
703
693
0 commit comments