@@ -503,77 +503,132 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
503
503
: "=g" (c )
504
504
: "g" (p0 ), "g" (p1 ), "g" (p2 ), "g" (p3 ), "g" (p4 ), "D" (r ), "n" (SECP256K1_N_C_0 ), "n" (SECP256K1_N_C_1 )
505
505
: "rax" , "rdx" , "r8" , "r9" , "r10" , "cc" , "memory" );
506
+
507
+ /* Final reduction of r. */
508
+ secp256k1_scalar_reduce (r , c + secp256k1_scalar_check_overflow (r ));
506
509
#else
507
- uint128_t c ;
508
- uint64_t c0 , c1 , c2 ;
509
- uint64_t n0 = l [4 ], n1 = l [5 ], n2 = l [6 ], n3 = l [7 ];
510
- uint64_t m0 , m1 , m2 , m3 , m4 , m5 ;
511
- uint32_t m6 ;
512
- uint64_t p0 , p1 , p2 , p3 ;
513
- uint32_t p4 ;
510
+ uint128_t c , u , v ;
511
+ uint64_t n4 = l [4 ], n5 = l [5 ], n6 = l [6 ], n7 = l [7 ];
512
+ uint64_t m0 , m1 , m2 , m3 , m4 , m5 , m6 ;
513
+ uint64_t p0 , p1 , p2 , p3 , p4 ;
514
514
515
515
/* Reduce 512 bits into 385. */
516
516
/* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
517
- c0 = l [0 ]; c1 = 0 ; c2 = 0 ;
518
- muladd_fast (n0 , SECP256K1_N_C_0 );
519
- extract_fast (m0 );
520
- sumadd_fast (l [1 ]);
521
- muladd (n1 , SECP256K1_N_C_0 );
522
- muladd (n0 , SECP256K1_N_C_1 );
523
- extract (m1 );
524
- sumadd (l [2 ]);
525
- muladd (n2 , SECP256K1_N_C_0 );
526
- muladd (n1 , SECP256K1_N_C_1 );
527
- sumadd (n0 );
528
- extract (m2 );
529
- sumadd (l [3 ]);
530
- muladd (n3 , SECP256K1_N_C_0 );
531
- muladd (n2 , SECP256K1_N_C_1 );
532
- sumadd (n1 );
533
- extract (m3 );
534
- muladd (n3 , SECP256K1_N_C_1 );
535
- sumadd (n2 );
536
- extract (m4 );
537
- sumadd_fast (n3 );
538
- extract_fast (m5 );
539
- VERIFY_CHECK (c0 <= 1 );
540
- m6 = c0 ;
517
+ c = (uint128_t )n4 * SECP256K1_N_C_0 ;
518
+ c += l [0 ];
519
+ m0 = (uint64_t )c ; c >>= 64 ;
520
+
521
+ u = (uint128_t )n4 * SECP256K1_N_C_1 ;
522
+ u += l [1 ];
523
+ c += (uint128_t )n5 * SECP256K1_N_C_0 ;
524
+ c += (uint64_t )u ; u >>= 64 ;
525
+ m1 = (uint64_t )c ; c >>= 64 ;
526
+
527
+ c += n4 ;
528
+ u += (uint128_t )n5 * SECP256K1_N_C_1 ;
529
+ u += l [2 ];
530
+ v = (uint128_t )n6 * SECP256K1_N_C_0 ;
531
+ c += (uint64_t )u ; u >>= 64 ;
532
+ c += (uint64_t )v ; v >>= 64 ;
533
+ m2 = (uint64_t )c ; c >>= 64 ;
534
+
535
+ c += n5 ;
536
+ u += (uint128_t )n6 * SECP256K1_N_C_1 ;
537
+ u += l [3 ];
538
+ v += (uint128_t )n7 * SECP256K1_N_C_0 ;
539
+ c += (uint64_t )u ; u >>= 64 ;
540
+ c += (uint64_t )v ; v >>= 64 ;
541
+ m3 = (uint64_t )c ; c >>= 64 ;
542
+
543
+ c += n6 ;
544
+ u += (uint128_t )n7 * SECP256K1_N_C_1 ;
545
+ c += (uint64_t )u ; u >>= 64 ;
546
+ c += (uint64_t )v ;
547
+ m4 = (uint64_t )c ; c >>= 64 ;
548
+
549
+ c += n7 ;
550
+ c += (uint64_t )u ;
551
+ m5 = (uint64_t )c ; c >>= 64 ;
552
+
553
+ /* The carry to m6 is 0 or 1, we negate it for use as a mask. */
554
+ m6 = - (uint64_t )c ;
555
+ VERIFY_CHECK (- m6 <= 1 );
541
556
542
557
/* Reduce 385 bits into 258. */
543
558
/* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
544
- c0 = m0 ; c1 = 0 ; c2 = 0 ;
545
- muladd_fast (m4 , SECP256K1_N_C_0 );
546
- extract_fast (p0 );
547
- sumadd_fast (m1 );
548
- muladd (m5 , SECP256K1_N_C_0 );
549
- muladd (m4 , SECP256K1_N_C_1 );
550
- extract (p1 );
551
- sumadd (m2 );
552
- muladd (m6 , SECP256K1_N_C_0 );
553
- muladd (m5 , SECP256K1_N_C_1 );
554
- sumadd (m4 );
555
- extract (p2 );
556
- sumadd_fast (m3 );
557
- muladd_fast (m6 , SECP256K1_N_C_1 );
558
- sumadd_fast (m5 );
559
- extract_fast (p3 );
560
- p4 = c0 + m6 ;
561
- VERIFY_CHECK (p4 <= 2 );
559
+ c = (uint128_t )m4 * SECP256K1_N_C_0 ;
560
+ c += m0 ;
561
+ p0 = (uint64_t )c ; c >>= 64 ;
562
+
563
+ u = (uint128_t )m4 * SECP256K1_N_C_1 ;
564
+ u += m1 ;
565
+ c += (uint128_t )m5 * SECP256K1_N_C_0 ;
566
+ c += (uint64_t )u ; u >>= 64 ;
567
+ p1 = (uint64_t )c ; c >>= 64 ;
568
+
569
+ c += m4 ;
570
+ u += (uint128_t )m5 * SECP256K1_N_C_1 ;
571
+ u += m2 ;
572
+ c += (m6 & SECP256K1_N_C_0 );
573
+ c += (uint64_t )u ; u >>= 64 ;
574
+ p2 = (uint64_t )c ; c >>= 64 ;
575
+
576
+ c += m5 ;
577
+ c += (m6 & SECP256K1_N_C_1 );
578
+ c += m3 ;
579
+ c += (uint64_t )u ;
580
+ p3 = (uint64_t )c ; c >>= 64 ;
581
+
582
+ p4 = (uint64_t )c - m6 ;;
583
+ VERIFY_CHECK (p4 <= 3 );
584
+
585
+ /* Effectively add an extra SECP256K1_N_C during the next pass.
586
+ * Values that would have landed in the range [SECP256K_N, 2^256)
587
+ * will instead "wrap" and carry back to p4 */
588
+ ++ p4 ;
562
589
563
590
/* Reduce 258 bits into 256. */
564
591
/* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
565
- c = p0 + (uint128_t )SECP256K1_N_C_0 * p4 ;
566
- r -> d [0 ] = c & 0xFFFFFFFFFFFFFFFFULL ; c >>= 64 ;
567
- c += p1 + (uint128_t )SECP256K1_N_C_1 * p4 ;
568
- r -> d [1 ] = c & 0xFFFFFFFFFFFFFFFFULL ; c >>= 64 ;
569
- c += p2 + (uint128_t )p4 ;
570
- r -> d [2 ] = c & 0xFFFFFFFFFFFFFFFFULL ; c >>= 64 ;
571
- c += p3 ;
572
- r -> d [3 ] = c & 0xFFFFFFFFFFFFFFFFULL ; c >>= 64 ;
573
- #endif
592
+ c = (uint128_t )SECP256K1_N_C_0 * p4 ;
593
+ c += p0 ;
594
+ p0 = (uint64_t )c ; c >>= 64 ;
595
+ c += (uint128_t )SECP256K1_N_C_1 * p4 ;
596
+ c += p1 ;
597
+ p1 = (uint64_t )c ; c >>= 64 ;
598
+ c += p4 ;
599
+ c += p2 ;
600
+ p2 = (uint64_t )c ; c >>= 64 ;
601
+ c += p3 ;
602
+ p3 = (uint64_t )c ; c >>= 64 ;
603
+ VERIFY_CHECK ((uint64_t )c <= 1 );
604
+ p4 = (uint64_t )c ;
574
605
575
- /* Final reduction of r. */
576
- secp256k1_scalar_reduce (r , c + secp256k1_scalar_check_overflow (r ));
606
+ /* Recover the extra SECP256K1_N_C from the previous pass.
607
+ * If p4 is 1, it becomes a 0 mask - the final pass is a no-op
608
+ * If p4 is 0, the decrement creates a UINT64_MAX mask that enables the
609
+ * addition of SECP256K_N in the final pass, which must result
610
+ * in a final carry, which balances the accounts. */
611
+ -- p4 ;
612
+
613
+ c = p4 & SECP256K1_N_0 ;
614
+ c += p0 ;
615
+ p0 = (uint64_t )c ; c >>= 64 ;
616
+ c += p4 & SECP256K1_N_1 ;
617
+ c += p1 ;
618
+ p1 = (uint64_t )c ; c >>= 64 ;
619
+ c += p4 & SECP256K1_N_2 ;
620
+ c += p2 ;
621
+ p2 = (uint64_t )c ; c >>= 64 ;
622
+ c += p4 & SECP256K1_N_3 ;
623
+ c += p3 ;
624
+ p3 = (uint64_t )c ;
625
+ VERIFY_CHECK ((uint64_t )(c >> 64 ) + p4 == 0 );
626
+
627
+ r -> d [0 ] = p0 ;
628
+ r -> d [1 ] = p1 ;
629
+ r -> d [2 ] = p2 ;
630
+ r -> d [3 ] = p3 ;
631
+ #endif
577
632
}
578
633
579
634
static void secp256k1_scalar_mul_512 (uint64_t l [8 ], const secp256k1_scalar * a , const secp256k1_scalar * b ) {
0 commit comments