@@ -483,77 +483,132 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
483
483
: "=g" (c )
484
484
: "g" (p0 ), "g" (p1 ), "g" (p2 ), "g" (p3 ), "g" (p4 ), "D" (r ), "i" (SECP256K1_N_C_0 ), "i" (SECP256K1_N_C_1 )
485
485
: "rax" , "rdx" , "r8" , "r9" , "r10" , "cc" , "memory" );
486
+
487
+ /* Final reduction of r. */
488
+ secp256k1_scalar_reduce (r , c + secp256k1_scalar_check_overflow (r ));
486
489
#else
487
- uint128_t c ;
488
- uint64_t c0 , c1 , c2 ;
489
- uint64_t n0 = l [4 ], n1 = l [5 ], n2 = l [6 ], n3 = l [7 ];
490
- uint64_t m0 , m1 , m2 , m3 , m4 , m5 ;
491
- uint32_t m6 ;
492
- uint64_t p0 , p1 , p2 , p3 ;
493
- uint32_t p4 ;
490
+ uint128_t c , u , v ;
491
+ uint64_t n4 = l [4 ], n5 = l [5 ], n6 = l [6 ], n7 = l [7 ];
492
+ uint64_t m0 , m1 , m2 , m3 , m4 , m5 , m6 ;
493
+ uint64_t p0 , p1 , p2 , p3 , p4 ;
494
494
495
495
/* Reduce 512 bits into 385. */
496
496
/* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
497
- c0 = l [0 ]; c1 = 0 ; c2 = 0 ;
498
- muladd_fast (n0 , SECP256K1_N_C_0 );
499
- extract_fast (m0 );
500
- sumadd_fast (l [1 ]);
501
- muladd (n1 , SECP256K1_N_C_0 );
502
- muladd (n0 , SECP256K1_N_C_1 );
503
- extract (m1 );
504
- sumadd (l [2 ]);
505
- muladd (n2 , SECP256K1_N_C_0 );
506
- muladd (n1 , SECP256K1_N_C_1 );
507
- sumadd (n0 );
508
- extract (m2 );
509
- sumadd (l [3 ]);
510
- muladd (n3 , SECP256K1_N_C_0 );
511
- muladd (n2 , SECP256K1_N_C_1 );
512
- sumadd (n1 );
513
- extract (m3 );
514
- muladd (n3 , SECP256K1_N_C_1 );
515
- sumadd (n2 );
516
- extract (m4 );
517
- sumadd_fast (n3 );
518
- extract_fast (m5 );
519
- VERIFY_CHECK (c0 <= 1 );
520
- m6 = c0 ;
497
+ c = (uint128_t )n4 * SECP256K1_N_C_0 ;
498
+ c += l [0 ];
499
+ m0 = (uint64_t )c ; c >>= 64 ;
500
+
501
+ u = (uint128_t )n4 * SECP256K1_N_C_1 ;
502
+ u += l [1 ];
503
+ c += (uint128_t )n5 * SECP256K1_N_C_0 ;
504
+ c += (uint64_t )u ; u >>= 64 ;
505
+ m1 = (uint64_t )c ; c >>= 64 ;
506
+
507
+ c += n4 ;
508
+ u += (uint128_t )n5 * SECP256K1_N_C_1 ;
509
+ u += l [2 ];
510
+ v = (uint128_t )n6 * SECP256K1_N_C_0 ;
511
+ c += (uint64_t )u ; u >>= 64 ;
512
+ c += (uint64_t )v ; v >>= 64 ;
513
+ m2 = (uint64_t )c ; c >>= 64 ;
514
+
515
+ c += n5 ;
516
+ u += (uint128_t )n6 * SECP256K1_N_C_1 ;
517
+ u += l [3 ];
518
+ v += (uint128_t )n7 * SECP256K1_N_C_0 ;
519
+ c += (uint64_t )u ; u >>= 64 ;
520
+ c += (uint64_t )v ; v >>= 64 ;
521
+ m3 = (uint64_t )c ; c >>= 64 ;
522
+
523
+ c += n6 ;
524
+ u += (uint128_t )n7 * SECP256K1_N_C_1 ;
525
+ c += (uint64_t )u ; u >>= 64 ;
526
+ c += (uint64_t )v ;
527
+ m4 = (uint64_t )c ; c >>= 64 ;
528
+
529
+ c += n7 ;
530
+ c += (uint64_t )u ;
531
+ m5 = (uint64_t )c ; c >>= 64 ;
532
+
533
+ /* The carry to m6 is 0 or 1, we negate it for use as a mask. */
534
+ m6 = - (uint64_t )c ;
535
+ VERIFY_CHECK (- m6 <= 1 );
521
536
522
537
/* Reduce 385 bits into 258. */
523
538
/* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
524
- c0 = m0 ; c1 = 0 ; c2 = 0 ;
525
- muladd_fast (m4 , SECP256K1_N_C_0 );
526
- extract_fast (p0 );
527
- sumadd_fast (m1 );
528
- muladd (m5 , SECP256K1_N_C_0 );
529
- muladd (m4 , SECP256K1_N_C_1 );
530
- extract (p1 );
531
- sumadd (m2 );
532
- muladd (m6 , SECP256K1_N_C_0 );
533
- muladd (m5 , SECP256K1_N_C_1 );
534
- sumadd (m4 );
535
- extract (p2 );
536
- sumadd_fast (m3 );
537
- muladd_fast (m6 , SECP256K1_N_C_1 );
538
- sumadd_fast (m5 );
539
- extract_fast (p3 );
540
- p4 = c0 + m6 ;
541
- VERIFY_CHECK (p4 <= 2 );
539
+ c = (uint128_t )m4 * SECP256K1_N_C_0 ;
540
+ c += m0 ;
541
+ p0 = (uint64_t )c ; c >>= 64 ;
542
+
543
+ u = (uint128_t )m4 * SECP256K1_N_C_1 ;
544
+ u += m1 ;
545
+ c += (uint128_t )m5 * SECP256K1_N_C_0 ;
546
+ c += (uint64_t )u ; u >>= 64 ;
547
+ p1 = (uint64_t )c ; c >>= 64 ;
548
+
549
+ c += m4 ;
550
+ u += (uint128_t )m5 * SECP256K1_N_C_1 ;
551
+ u += m2 ;
552
+ c += (m6 & SECP256K1_N_C_0 );
553
+ c += (uint64_t )u ; u >>= 64 ;
554
+ p2 = (uint64_t )c ; c >>= 64 ;
555
+
556
+ c += m5 ;
557
+ c += (m6 & SECP256K1_N_C_1 );
558
+ c += m3 ;
559
+ c += (uint64_t )u ;
560
+ p3 = (uint64_t )c ; c >>= 64 ;
561
+
562
+ p4 = (uint64_t )c - m6 ;;
563
+ VERIFY_CHECK (p4 <= 3 );
564
+
565
+ /* Effectively add an extra SECP256K1_N_C during the next pass.
566
+ * Values that would have landed in the range [SECP256K_N, 2^256)
567
+ * will instead "wrap" and carry back to p4 */
568
+ ++ p4 ;
542
569
543
570
/* Reduce 258 bits into 256. */
544
571
/* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
545
- c = p0 + (uint128_t )SECP256K1_N_C_0 * p4 ;
546
- r -> d [0 ] = c & 0xFFFFFFFFFFFFFFFFULL ; c >>= 64 ;
547
- c += p1 + (uint128_t )SECP256K1_N_C_1 * p4 ;
548
- r -> d [1 ] = c & 0xFFFFFFFFFFFFFFFFULL ; c >>= 64 ;
549
- c += p2 + (uint128_t )p4 ;
550
- r -> d [2 ] = c & 0xFFFFFFFFFFFFFFFFULL ; c >>= 64 ;
551
- c += p3 ;
552
- r -> d [3 ] = c & 0xFFFFFFFFFFFFFFFFULL ; c >>= 64 ;
572
+ c = (uint128_t )SECP256K1_N_C_0 * p4 ;
573
+ c += p0 ;
574
+ p0 = (uint64_t )c ; c >>= 64 ;
575
+ c += (uint128_t )SECP256K1_N_C_1 * p4 ;
576
+ c += p1 ;
577
+ p1 = (uint64_t )c ; c >>= 64 ;
578
+ c += p4 ;
579
+ c += p2 ;
580
+ p2 = (uint64_t )c ; c >>= 64 ;
581
+ c += p3 ;
582
+ p3 = (uint64_t )c ; c >>= 64 ;
583
+ VERIFY_CHECK ((uint64_t )c <= 1 );
584
+ p4 = (uint64_t )c ;
585
+
586
+ /* Recover the extra SECP256K1_N_C from the previous pass.
587
+ * If p4 is 1, it becomes a 0 mask - the final pass is a no-op
588
+ * If p4 is 0, the decrement creates a UINT64_MAX mask that enables the
589
+ * addition of SECP256K_N in the final pass, which must result
590
+ * in a final carry, which balances the accounts. */
591
+ -- p4 ;
592
+
593
+ c = p4 & SECP256K1_N_0 ;
594
+ c += p0 ;
595
+ p0 = (uint64_t )c ; c >>= 64 ;
596
+ c += p4 & SECP256K1_N_1 ;
597
+ c += p1 ;
598
+ p1 = (uint64_t )c ; c >>= 64 ;
599
+ c += p4 & SECP256K1_N_2 ;
600
+ c += p2 ;
601
+ p2 = (uint64_t )c ; c >>= 64 ;
602
+ c += p4 & SECP256K1_N_3 ;
603
+ c += p3 ;
604
+ p3 = (uint64_t )c ;
605
+ VERIFY_CHECK ((uint64_t )(c >> 64 ) + p4 == 0 );
606
+
607
+ r -> d [0 ] = p0 ;
608
+ r -> d [1 ] = p1 ;
609
+ r -> d [2 ] = p2 ;
610
+ r -> d [3 ] = p3 ;
553
611
#endif
554
-
555
- /* Final reduction of r. */
556
- secp256k1_scalar_reduce (r , c + secp256k1_scalar_check_overflow (r ));
557
612
}
558
613
559
614
static void secp256k1_scalar_mul_512 (uint64_t l [8 ], const secp256k1_scalar * a , const secp256k1_scalar * b ) {
0 commit comments