Skip to content

Commit 5c70a50

Browse files
committed
Rewrite _scalar_reduce_512
1 parent f371201 commit 5c70a50

File tree

1 file changed

+115
-60
lines changed

1 file changed

+115
-60
lines changed

src/scalar_4x64_impl.h

Lines changed: 115 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -503,77 +503,132 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
503503
: "=g"(c)
504504
: "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
505505
: "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
506+
507+
/* Final reduction of r. */
508+
secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
506509
#else
507-
uint128_t c;
508-
uint64_t c0, c1, c2;
509-
uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
510-
uint64_t m0, m1, m2, m3, m4, m5;
511-
uint32_t m6;
512-
uint64_t p0, p1, p2, p3;
513-
uint32_t p4;
510+
uint128_t c, u, v;
511+
uint64_t n4 = l[4], n5 = l[5], n6 = l[6], n7 = l[7];
512+
uint64_t m0, m1, m2, m3, m4, m5, m6;
513+
uint64_t p0, p1, p2, p3, p4;
514514

515515
/* Reduce 512 bits into 385. */
516516
/* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
517-
c0 = l[0]; c1 = 0; c2 = 0;
518-
muladd_fast(n0, SECP256K1_N_C_0);
519-
extract_fast(m0);
520-
sumadd_fast(l[1]);
521-
muladd(n1, SECP256K1_N_C_0);
522-
muladd(n0, SECP256K1_N_C_1);
523-
extract(m1);
524-
sumadd(l[2]);
525-
muladd(n2, SECP256K1_N_C_0);
526-
muladd(n1, SECP256K1_N_C_1);
527-
sumadd(n0);
528-
extract(m2);
529-
sumadd(l[3]);
530-
muladd(n3, SECP256K1_N_C_0);
531-
muladd(n2, SECP256K1_N_C_1);
532-
sumadd(n1);
533-
extract(m3);
534-
muladd(n3, SECP256K1_N_C_1);
535-
sumadd(n2);
536-
extract(m4);
537-
sumadd_fast(n3);
538-
extract_fast(m5);
539-
VERIFY_CHECK(c0 <= 1);
540-
m6 = c0;
517+
c = (uint128_t)n4 * SECP256K1_N_C_0;
518+
c += l[0];
519+
m0 = (uint64_t)c; c >>= 64;
520+
521+
u = (uint128_t)n4 * SECP256K1_N_C_1;
522+
u += l[1];
523+
c += (uint128_t)n5 * SECP256K1_N_C_0;
524+
c += (uint64_t)u; u >>= 64;
525+
m1 = (uint64_t)c; c >>= 64;
526+
527+
c += n4;
528+
u += (uint128_t)n5 * SECP256K1_N_C_1;
529+
u += l[2];
530+
v = (uint128_t)n6 * SECP256K1_N_C_0;
531+
c += (uint64_t)u; u >>= 64;
532+
c += (uint64_t)v; v >>= 64;
533+
m2 = (uint64_t)c; c >>= 64;
534+
535+
c += n5;
536+
u += (uint128_t)n6 * SECP256K1_N_C_1;
537+
u += l[3];
538+
v += (uint128_t)n7 * SECP256K1_N_C_0;
539+
c += (uint64_t)u; u >>= 64;
540+
c += (uint64_t)v; v >>= 64;
541+
m3 = (uint64_t)c; c >>= 64;
542+
543+
c += n6;
544+
u += (uint128_t)n7 * SECP256K1_N_C_1;
545+
c += (uint64_t)u; u >>= 64;
546+
c += (uint64_t)v;
547+
m4 = (uint64_t)c; c >>= 64;
548+
549+
c += n7;
550+
c += (uint64_t)u;
551+
m5 = (uint64_t)c; c >>= 64;
552+
553+
/* The carry to m6 is 0 or 1, we negate it for use as a mask. */
554+
m6 = -(uint64_t)c;
555+
VERIFY_CHECK(-m6 <= 1);
541556

542557
/* Reduce 385 bits into 258. */
543558
/* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
544-
c0 = m0; c1 = 0; c2 = 0;
545-
muladd_fast(m4, SECP256K1_N_C_0);
546-
extract_fast(p0);
547-
sumadd_fast(m1);
548-
muladd(m5, SECP256K1_N_C_0);
549-
muladd(m4, SECP256K1_N_C_1);
550-
extract(p1);
551-
sumadd(m2);
552-
muladd(m6, SECP256K1_N_C_0);
553-
muladd(m5, SECP256K1_N_C_1);
554-
sumadd(m4);
555-
extract(p2);
556-
sumadd_fast(m3);
557-
muladd_fast(m6, SECP256K1_N_C_1);
558-
sumadd_fast(m5);
559-
extract_fast(p3);
560-
p4 = c0 + m6;
561-
VERIFY_CHECK(p4 <= 2);
559+
c = (uint128_t)m4 * SECP256K1_N_C_0;
560+
c += m0;
561+
p0 = (uint64_t)c; c >>= 64;
562+
563+
u = (uint128_t)m4 * SECP256K1_N_C_1;
564+
u += m1;
565+
c += (uint128_t)m5 * SECP256K1_N_C_0;
566+
c += (uint64_t)u; u >>= 64;
567+
p1 = (uint64_t)c; c >>= 64;
568+
569+
c += m4;
570+
u += (uint128_t)m5 * SECP256K1_N_C_1;
571+
u += m2;
572+
c += (m6 & SECP256K1_N_C_0);
573+
c += (uint64_t)u; u >>= 64;
574+
p2 = (uint64_t)c; c >>= 64;
575+
576+
c += m5;
577+
c += (m6 & SECP256K1_N_C_1);
578+
c += m3;
579+
c += (uint64_t)u;
580+
p3 = (uint64_t)c; c >>= 64;
581+
582+
p4 = (uint64_t)c - m6;;
583+
VERIFY_CHECK(p4 <= 3);
584+
585+
/* Effectively add an extra SECP256K1_N_C during the next pass.
586+
* Values that would have landed in the range [SECP256K_N, 2^256)
587+
* will instead "wrap" and carry back to p4 */
588+
++p4;
562589

563590
/* Reduce 258 bits into 256. */
564591
/* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
565-
c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
566-
r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
567-
c += p1 + (uint128_t)SECP256K1_N_C_1 * p4;
568-
r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
569-
c += p2 + (uint128_t)p4;
570-
r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
571-
c += p3;
572-
r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
573-
#endif
592+
c = (uint128_t)SECP256K1_N_C_0 * p4;
593+
c += p0;
594+
p0 = (uint64_t)c; c >>= 64;
595+
c += (uint128_t)SECP256K1_N_C_1 * p4;
596+
c += p1;
597+
p1 = (uint64_t)c; c >>= 64;
598+
c += p4;
599+
c += p2;
600+
p2 = (uint64_t)c; c >>= 64;
601+
c += p3;
602+
p3 = (uint64_t)c; c >>= 64;
603+
VERIFY_CHECK((uint64_t)c <= 1);
604+
p4 = (uint64_t)c;
574605

575-
/* Final reduction of r. */
576-
secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
606+
/* Recover the extra SECP256K1_N_C from the previous pass.
607+
* If p4 is 1, it becomes a 0 mask - the final pass is a no-op
608+
* If p4 is 0, the decrement creates a UINT64_MAX mask that enables the
609+
* addition of SECP256K_N in the final pass, which must result
610+
* in a final carry, which balances the accounts. */
611+
--p4;
612+
613+
c = p4 & SECP256K1_N_0;
614+
c += p0;
615+
p0 = (uint64_t)c; c >>= 64;
616+
c += p4 & SECP256K1_N_1;
617+
c += p1;
618+
p1 = (uint64_t)c; c >>= 64;
619+
c += p4 & SECP256K1_N_2;
620+
c += p2;
621+
p2 = (uint64_t)c; c >>= 64;
622+
c += p4 & SECP256K1_N_3;
623+
c += p3;
624+
p3 = (uint64_t)c;
625+
VERIFY_CHECK((uint64_t)(c >> 64) + p4 == 0);
626+
627+
r->d[0] = p0;
628+
r->d[1] = p1;
629+
r->d[2] = p2;
630+
r->d[3] = p3;
631+
#endif
577632
}
578633

579634
static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, const secp256k1_scalar *b) {

0 commit comments

Comments
 (0)