Skip to content

Commit 46eaa95

Browse files
committed
Native jacobi symbol algorithm
This introduces variants of the divsteps-based GCD algorithm used for modular inverses to compute Jacobi symbols. Changes compared to the normal vartime divsteps: * Only positive matrices are used, guaranteeing that f and g remain positive. * An additional jac variable is updated to track sign changes during matrix computation. * There is (so far) no proof that this algorithm terminates within reasonable amount of time for every input, but experimentally it appears to almost always need less than 900 iterations. To account for that, only a bounded number of iterations is performed (1500), after which failure is returned. The field logic then falls back to using square roots to determining the result. * The algorithm converges to f=g=gcd(f0,g0) rather than g=0. To keep this test simple, the end condition is f=1, which won't be reached if started with g=0. That case is dealt with specially.
1 parent 74c34e7 commit 46eaa95

File tree

9 files changed

+433
-19
lines changed

9 files changed

+433
-19
lines changed

src/bench_internal.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,17 @@ void bench_field_sqrt(void* arg, int iters) {
209209
CHECK(j <= iters);
210210
}
211211

212+
void bench_field_jacobi_var(void* arg, int iters) {
213+
int i, j = 0;
214+
bench_inv *data = (bench_inv*)arg;
215+
216+
for (i = 0; i < iters; i++) {
217+
j += secp256k1_fe_jacobi_var(&data->fe[0]);
218+
secp256k1_fe_add(&data->fe[0], &data->fe[1]);
219+
}
220+
CHECK(j <= iters);
221+
}
222+
212223
void bench_group_double_var(void* arg, int iters) {
213224
int i;
214225
bench_inv *data = (bench_inv*)arg;
@@ -360,6 +371,7 @@ int main(int argc, char **argv) {
360371
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "mul")) run_benchmark("field_mul", bench_field_mul, bench_setup, NULL, &data, 10, iters*10);
361372
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, iters);
362373
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, iters);
374+
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "jacobi")) run_benchmark("field_jacobi_var", bench_field_jacobi_var, bench_setup, NULL, &data, 10, iters);
363375
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt", bench_field_sqrt, bench_setup, NULL, &data, 10, iters);
364376

365377
if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, iters*10);

src/field.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,7 @@ static void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_f
124124
/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. Both *r and *a must be initialized.*/
125125
static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag);
126126

127+
/** Compute the Jacobi symbol of a / p. 0 if a=0; 1 if a square; -1 if a non-square. */
128+
static int secp256k1_fe_jacobi_var(const secp256k1_fe *a);
129+
127130
#endif /* SECP256K1_FIELD_H */

src/field_10x26_impl.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,4 +1255,32 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
12551255
VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
12561256
}
12571257

1258+
static int secp256k1_fe_jacobi_var(const secp256k1_fe *x) {
1259+
secp256k1_fe tmp;
1260+
secp256k1_modinv32_signed30 s;
1261+
int ret;
1262+
1263+
tmp = *x;
1264+
secp256k1_fe_normalize_var(&tmp);
1265+
secp256k1_fe_to_signed30(&s, &tmp);
1266+
ret = secp256k1_jacobi32_maybe_var(&s, &secp256k1_const_modinfo_fe);
1267+
if (ret == -2) {
1268+
/* secp256k1_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back
1269+
* to computing a square root. This should be extremely rare with random
1270+
* input. */
1271+
secp256k1_fe dummy;
1272+
ret = 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1;
1273+
#ifdef VERIFY
1274+
} else {
1275+
secp256k1_fe dummy;
1276+
if (secp256k1_fe_is_zero(&tmp)) {
1277+
VERIFY_CHECK(ret == 0);
1278+
} else {
1279+
VERIFY_CHECK(ret == 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1);
1280+
}
1281+
#endif
1282+
}
1283+
return ret;
1284+
}
1285+
12581286
#endif /* SECP256K1_FIELD_REPR_IMPL_H */

src/field_5x52_impl.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,4 +577,32 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
577577
#endif
578578
}
579579

580+
static int secp256k1_fe_jacobi_var(const secp256k1_fe *x) {
581+
secp256k1_fe tmp;
582+
secp256k1_modinv64_signed62 s;
583+
int ret;
584+
585+
tmp = *x;
586+
secp256k1_fe_normalize_var(&tmp);
587+
secp256k1_fe_to_signed62(&s, &tmp);
588+
ret = secp256k1_jacobi64_maybe_var(&s, &secp256k1_const_modinfo_fe);
589+
if (ret == -2) {
590+
/* secp256k1_jacobi64_maybe_var failed to compute the Jacobi symbol. Fall back
591+
* to computing a square root. This should be extremely rare with random
592+
* input. */
593+
secp256k1_fe dummy;
594+
ret = 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1;
595+
#ifdef VERIFY
596+
} else {
597+
secp256k1_fe dummy;
598+
if (secp256k1_fe_is_zero(&tmp)) {
599+
VERIFY_CHECK(ret == 0);
600+
} else {
601+
VERIFY_CHECK(ret == 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1);
602+
}
603+
#endif
604+
}
605+
return ret;
606+
}
607+
580608
#endif /* SECP256K1_FIELD_REPR_IMPL_H */

src/modinv32.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,7 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
3939
/* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */
4040
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
4141

42+
/* Compute the Jacobi symbol for x (which must be normalized). Returns -2 if the result cannot be computed. */
43+
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
44+
4245
#endif /* SECP256K1_MODINV32_H */

src/modinv32_impl.h

Lines changed: 158 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,21 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
232232
return zeta;
233233
}
234234

235+
/* inv256[i] = -(2*i+1)^-1 (mod 256) */
236+
static const uint8_t secp256k1_modinv32_inv256[128] = {
237+
0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
238+
0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
239+
0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
240+
0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
241+
0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
242+
0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
243+
0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
244+
0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
245+
0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
246+
0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
247+
0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
248+
};
249+
235250
/* Compute the transition matrix and eta for 30 divsteps (variable time).
236251
*
237252
* Input: eta: initial eta
@@ -243,21 +258,6 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
243258
* Implements the divsteps_n_matrix_var function from the explanation.
244259
*/
245260
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
246-
/* inv256[i] = -(2*i+1)^-1 (mod 256) */
247-
static const uint8_t inv256[128] = {
248-
0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
249-
0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
250-
0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
251-
0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
252-
0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
253-
0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
254-
0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
255-
0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
256-
0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
257-
0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
258-
0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
259-
};
260-
261261
/* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
262262
uint32_t u = 1, v = 0, q = 0, r = 1;
263263
uint32_t f = f0, g = g0, m;
@@ -297,7 +297,7 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
297297
VERIFY_CHECK(limit > 0 && limit <= 30);
298298
m = (UINT32_MAX >> (32 - limit)) & 255U;
299299
/* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
300-
w = (g * inv256[(f >> 1) & 127]) & m;
300+
w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
301301
/* Do so. */
302302
g += f * w;
303303
q += u * w;
@@ -317,6 +317,83 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
317317
return eta;
318318
}
319319

320+
/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
321+
* of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
322+
* Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
323+
*
324+
* Input: eta: initial eta
325+
* f0: bottom limb of initial f
326+
* g0: bottom limb of initial g
327+
* Output: t: transition matrix
328+
* Return: final eta
329+
*/
330+
static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
331+
/* Transformation matrix. */
332+
uint32_t u = 1, v = 0, q = 0, r = 1;
333+
uint32_t f = f0, g = g0, m;
334+
uint16_t w;
335+
int i = 30, limit, zeros;
336+
int jac = *jacp;
337+
338+
for (;;) {
339+
/* Use a sentinel bit to count zeros only up to i. */
340+
zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
341+
/* Perform zeros divsteps at once; they all just divide g by two. */
342+
g >>= zeros;
343+
u <<= zeros;
344+
v <<= zeros;
345+
eta -= zeros;
346+
i -= zeros;
347+
/* Update the bottom bit of jac: when dividing g by an odd power of 2,
348+
* if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
349+
jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
350+
/* We're done once we've done 30 posdivsteps. */
351+
if (i == 0) break;
352+
VERIFY_CHECK((f & 1) == 1);
353+
VERIFY_CHECK((g & 1) == 1);
354+
VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
355+
VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
356+
/* If eta is negative, negate it and replace f,g with g,f. */
357+
if (eta < 0) {
358+
uint32_t tmp;
359+
eta = -eta;
360+
/* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
361+
* if both f and g are 3 mod 4. */
362+
jac ^= ((f & g) >> 1);
363+
tmp = f; f = g; g = tmp;
364+
tmp = u; u = q; q = tmp;
365+
tmp = v; v = r; r = tmp;
366+
}
367+
/* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
368+
* than i can be cancelled out (as we'd be done before that point), and no more than eta+1
369+
* can be done as its sign will flip once that happens. */
370+
limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
371+
/* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
372+
VERIFY_CHECK(limit > 0 && limit <= 30);
373+
m = (UINT32_MAX >> (32 - limit)) & 255U;
374+
/* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
375+
w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
376+
/* Do so. */
377+
g += f * w;
378+
q += u * w;
379+
r += v * w;
380+
VERIFY_CHECK((g & m) == 0);
381+
}
382+
/* Return data in t and return value. */
383+
t->u = (int32_t)u;
384+
t->v = (int32_t)v;
385+
t->q = (int32_t)q;
386+
t->r = (int32_t)r;
387+
/* The determinant of t must be a power of two. This guarantees that multiplication with t
388+
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
389+
* will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
390+
* the aggregate of 30 of them will have determinant 2^30 or -2^30. */
391+
VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
392+
(int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
393+
*jacp = jac;
394+
return eta;
395+
}
396+
320397
/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
321398
*
322399
* On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
@@ -584,4 +661,69 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
584661
*x = d;
585662
}
586663

664+
/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
665+
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
666+
/* Start with f=modulus, g=x, eta=-1. */
667+
secp256k1_modinv32_signed30 f = modinfo->modulus;
668+
secp256k1_modinv32_signed30 g = *x;
669+
int j, len = 9;
670+
int32_t eta = -1; /* eta = -delta; delta is initially 1 */
671+
int32_t cond, fn, gn;
672+
int jac = 0;
673+
int count;
674+
675+
VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
676+
677+
/* The loop below does not converge for input g=0. Deal with this case specifically. */
678+
if (!(g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8])) return 0;
679+
680+
/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
681+
* In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
682+
#ifdef VERIFY
683+
for (count = 0; count < 25; ++count) {
684+
#else
685+
for (count = 0; count < 50; ++count) {
686+
#endif
687+
/* Compute transition matrix and new eta after 30 posdivsteps. */
688+
secp256k1_modinv32_trans2x2 t;
689+
eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
690+
/* Update f,g using that transition matrix. */
691+
#ifdef VERIFY
692+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
693+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
694+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
695+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
696+
#endif
697+
secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
698+
/* If the bottom limb of f is 1, there is a chance that f=1. */
699+
if (f.v[0] == 1) {
700+
cond = 0;
701+
/* Check if the other limbs are also 0. */
702+
for (j = 1; j < len; ++j) {
703+
cond |= f.v[j];
704+
}
705+
/* If so, we're done. */
706+
if (cond == 0) return 1 - 2*(jac & 1);
707+
}
708+
709+
/* Determine if len>1 and limb (len-1) of both f and g is 0. */
710+
fn = f.v[len - 1];
711+
gn = g.v[len - 1];
712+
cond = ((int32_t)len - 2) >> 31;
713+
cond |= fn;
714+
cond |= gn;
715+
/* If so, reduce length. */
716+
if (cond == 0) --len;
717+
#ifdef VERIFY
718+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
719+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
720+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
721+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
722+
#endif
723+
}
724+
725+
/* The loop failed to converge to f=g after 1500 iterations. Return -2, indicating unknown result. */
726+
return -2;
727+
}
728+
587729
#endif /* SECP256K1_MODINV32_IMPL_H */

src/modinv64.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,7 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256
4343
/* Same as secp256k1_modinv64_var, but constant time in x (not in the modulus). */
4444
static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
4545

46+
/* Compute the Jacobi symbol for x (which must be normalized). Returns -2 if the result cannot be computed. */
47+
static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
48+
4649
#endif /* SECP256K1_MODINV64_H */

0 commit comments

Comments
 (0)