From ad6370c19fdf991301902d377904866fe3821993 Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Sun, 29 Nov 2015 17:16:04 +0700 Subject: [PATCH] Reciprocal square root, parallel r-sqrt/inv - Factor out common exponentiation prelude for _fe_sqrt_var/fe_inv - Allow _fe_sqrt_var output to overwrite input - Make argument for _fe_normalizes_to_zero(_var) const - Add _fe_rsqrt_var for calculating reciprocal square root (1/sqrt(x)) - Add _fe_par_rsqrt_inv_var to calculate the r-sqrt of one input and the inverse of a second input, with a single exponentiation. --- src/bench_internal.c | 22 +++++ src/field.h | 28 +++++-- src/field_10x26_impl.h | 4 +- src/field_5x52_impl.h | 4 +- src/field_impl.h | 182 ++++++++++++++++++++--------------------- src/tests.c | 110 ++++++++++++++++++++++++- 6 files changed, 241 insertions(+), 109 deletions(-) diff --git a/src/bench_internal.c b/src/bench_internal.c index 7809f5f8cf..d4efd2f8a6 100644 --- a/src/bench_internal.c +++ b/src/bench_internal.c @@ -191,6 +191,26 @@ void bench_field_sqrt_var(void* arg) { } } +void bench_field_rsqrt_var(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_fe_rsqrt_var(&data->fe_x, &data->fe_y, &data->fe_x); + secp256k1_fe_add(&data->fe_x, &data->fe_y); + } +} + +void bench_field_par_rsqrt_inv_var(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_fe_par_rsqrt_inv_var(&data->fe_x, &data->fe_y, &data->fe_x, &data->fe_y); + secp256k1_fe_add(&data->fe_x, &data->fe_y); + } +} + void bench_group_double_var(void* arg) { int i; bench_inv_t *data = (bench_inv_t*)arg; @@ -334,6 +354,8 @@ int main(int argc, char **argv) { if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, 20000); if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, 20000); if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt_var", bench_field_sqrt_var, bench_setup, NULL, &data, 10, 20000); + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_rsqrt_var", bench_field_rsqrt_var, bench_setup, NULL, &data, 10, 20000); + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt") || have_flag(argc, argv, "inverse")) run_benchmark("field_par_rsqrt_inv_var", bench_field_par_rsqrt_inv_var, bench_setup, NULL, &data, 10, 20000); if (have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, 200000); if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, 200000); diff --git a/src/field.h b/src/field.h index 2d52af5e36..e28fdf71b1 100644 --- a/src/field.h +++ b/src/field.h @@ -39,13 +39,11 @@ static void secp256k1_fe_normalize_weak(secp256k1_fe *r); /** Normalize a field element, without constant-time guarantee. */ static void secp256k1_fe_normalize_var(secp256k1_fe *r); -/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field - * implementation may optionally normalize the input, but this should not be relied upon. */ -static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r); +/** Verify whether a field element represents zero i.e. would normalize to a zero value. */ +static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r); -/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field - * implementation may optionally normalize the input, but this should not be relied upon. */ -static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r); +/** Verify whether a field element represents zero i.e. would normalize to a zero value. */ +static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r); /** Set a field element equal to a small integer. Resulting field element is normalized. */ static void secp256k1_fe_set_int(secp256k1_fe *r, int a); @@ -88,12 +86,28 @@ static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp2 static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a); /** If a has a square root, it is computed in r and 1 is returned. If a does not - * have a square root, the root of its negation is computed and 0 is returned. + * have a square root, the root of -a is computed and 0 is returned. * The input's magnitude can be at most 8. The output magnitude is 1 (but not * guaranteed to be normalized). The result in r will always be a square * itself. */ static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a); +/** If a has a square root, the square root is computed in rs, its reciprocal square root is + * calculated in rr, and 1 is returned. If a does not have a square root, the root (and recip. root) + * of -a are computed and 0 is returned. The input's magnitude can be at most 8. The + * outputs' magnitudes are 1 (but not guaranteed to be normalized). The result in rs will always be + * a square itself. The result in rr will be a square if, and only if, a is a square. + */ +static int secp256k1_fe_rsqrt_var(secp256k1_fe *rs, secp256k1_fe *rr, const secp256k1_fe *a); + +/** Parallel reciprocal square root and inverse. Sets ri to be the (modular) inverse of b. If a has a + * square root, the reciprocal of its square root is computed in rr and 1 is returned. If a does not + * have a square root, the reciprocal root of -a is computed and 0 is returned. The inputs' + * magnitudes can be at most 8. The outputs' magnitudes are 1 (but not guaranteed to be normalized). + * The result in rr will be a square if, and only if, a is a square. + */ +static int secp256k1_fe_par_rsqrt_inv_var(secp256k1_fe *rr, secp256k1_fe *ri, const secp256k1_fe *a, const secp256k1_fe *b); + /** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be * at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */ static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a); diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h index 212cc5396a..8751bb3cc0 100644 --- a/src/field_10x26_impl.h +++ b/src/field_10x26_impl.h @@ -188,7 +188,7 @@ static void secp256k1_fe_normalize_var(secp256k1_fe *r) { #endif } -static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) { +static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) { uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; @@ -217,7 +217,7 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) { return (z0 == 0) | (z1 == 0x3FFFFFFUL); } -static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) { +static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) { uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; uint32_t z0, z1; uint32_t x; diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h index b31e24ab81..3d14962e02 100644 --- a/src/field_5x52_impl.h +++ b/src/field_5x52_impl.h @@ -167,7 +167,7 @@ static void secp256k1_fe_normalize_var(secp256k1_fe *r) { #endif } -static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) { +static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) { uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ @@ -190,7 +190,7 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) { return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL); } -static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) { +static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) { uint64_t t0, t1, t2, t3, t4; uint64_t z0, z1; uint64_t x; diff --git a/src/field_impl.h b/src/field_impl.h index 77f4aae2f9..b9608bb8a6 100644 --- a/src/field_impl.h +++ b/src/field_impl.h @@ -28,29 +28,21 @@ SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe *a, const return secp256k1_fe_normalizes_to_zero_var(&na); } -static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) { - /** Given that p is congruent to 3 mod 4, we can compute the square root of - * a mod p as the (p+1)/4'th power of a. - * - * As (p+1)/4 is an even number, it will have the same result for a and for - * (-a). Only one of these two numbers actually has a square root however, - * so we test at the end by squaring and comparing to the input. - * Also because (p+1)/4 is an even number, the computed square root is - * itself always a square (a ** ((p+1)/4) is the square of a ** ((p+1)/8)). - */ - secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1; +static void secp256k1_fe_common_exp(secp256k1_fe *r1, secp256k1_fe *r2, const secp256k1_fe *a) { + secp256k1_fe t, x, x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223; int j; - /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in - * { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: - * 1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] - */ + CHECK(r1 != r2); + + x = *a; + + secp256k1_fe_sqr(&x2, &x); + secp256k1_fe_mul(&x2, &x2, &x); - secp256k1_fe_sqr(&x2, a); - secp256k1_fe_mul(&x2, &x2, a); + *r2 = x2; secp256k1_fe_sqr(&x3, &x2); - secp256k1_fe_mul(&x3, &x3, a); + secp256k1_fe_mul(&x3, &x3, &x); x6 = x3; for (j=0; j<3; j++) { @@ -108,112 +100,112 @@ static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) { /* The final result is then assembled using a sliding window over the blocks. */ - t1 = x223; + t = x223; for (j=0; j<23; j++) { - secp256k1_fe_sqr(&t1, &t1); + secp256k1_fe_sqr(&t, &t); } - secp256k1_fe_mul(&t1, &t1, &x22); - for (j=0; j<6; j++) { - secp256k1_fe_sqr(&t1, &t1); + secp256k1_fe_mul(&t, &t, &x22); + + for (j=0; j<5; j++) { + secp256k1_fe_sqr(&t, &t); } - secp256k1_fe_mul(&t1, &t1, &x2); - secp256k1_fe_sqr(&t1, &t1); - secp256k1_fe_sqr(r, &t1); + *r1 = t; +} + +static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) { + secp256k1_fe t, x, x2; + + x = *a; + + secp256k1_fe_common_exp(&t, &x2, &x); + + secp256k1_fe_sqr(&t, &t); + secp256k1_fe_mul(&t, &t, &x2); + secp256k1_fe_sqr(&t, &t); + secp256k1_fe_sqr(&t, &t); + + *r = t; /* Check that a square root was actually calculated */ - secp256k1_fe_sqr(&t1, r); - return secp256k1_fe_equal_var(&t1, a); + secp256k1_fe_sqr(&t, &t); + return secp256k1_fe_equal_var(&t, &x); } -static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) { - secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1; +static int secp256k1_fe_rsqrt_var(secp256k1_fe *rs, secp256k1_fe *rr, const secp256k1_fe *a) { + secp256k1_fe t, x, x2; int j; - /** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in - * { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: - * [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] - */ + CHECK(rs != rr); - secp256k1_fe_sqr(&x2, a); - secp256k1_fe_mul(&x2, &x2, a); + x = *a; - secp256k1_fe_sqr(&x3, &x2); - secp256k1_fe_mul(&x3, &x3, a); + secp256k1_fe_common_exp(&t, &x2, &x); - x6 = x3; + secp256k1_fe_mul(&t, &t, &x); for (j=0; j<3; j++) { - secp256k1_fe_sqr(&x6, &x6); + secp256k1_fe_sqr(&t, &t); } - secp256k1_fe_mul(&x6, &x6, &x3); + secp256k1_fe_mul(&t, &t, &x2); - x9 = x6; - for (j=0; j<3; j++) { - secp256k1_fe_sqr(&x9, &x9); - } - secp256k1_fe_mul(&x9, &x9, &x3); + *rr = t; - x11 = x9; - for (j=0; j<2; j++) { - secp256k1_fe_sqr(&x11, &x11); - } - secp256k1_fe_mul(&x11, &x11, &x2); + secp256k1_fe_mul(&t, &t, &x); - x22 = x11; - for (j=0; j<11; j++) { - secp256k1_fe_sqr(&x22, &x22); - } - secp256k1_fe_mul(&x22, &x22, &x11); + *rs = t; - x44 = x22; - for (j=0; j<22; j++) { - secp256k1_fe_sqr(&x44, &x44); - } - secp256k1_fe_mul(&x44, &x44, &x22); + /* Check that a square root was actually calculated */ - x88 = x44; - for (j=0; j<44; j++) { - secp256k1_fe_sqr(&x88, &x88); - } - secp256k1_fe_mul(&x88, &x88, &x44); + secp256k1_fe_sqr(&t, &t); + return secp256k1_fe_equal_var(&t, &x); +} - x176 = x88; - for (j=0; j<88; j++) { - secp256k1_fe_sqr(&x176, &x176); - } - secp256k1_fe_mul(&x176, &x176, &x88); +static int secp256k1_fe_par_rsqrt_inv_var(secp256k1_fe *rr, secp256k1_fe *ri, const secp256k1_fe *a, const secp256k1_fe *b) { - x220 = x176; - for (j=0; j<44; j++) { - secp256k1_fe_sqr(&x220, &x220); - } - secp256k1_fe_mul(&x220, &x220, &x44); + secp256k1_fe b2, ab2, ab4, sqrt, recip, t; + int ret; - x223 = x220; - for (j=0; j<3; j++) { - secp256k1_fe_sqr(&x223, &x223); - } - secp256k1_fe_mul(&x223, &x223, &x3); + CHECK(rr != ri); - /* The final result is then assembled using a sliding window over the blocks. */ + /* Zero inputs could possibly be handled with conditional moves, if necessary */ + CHECK(!secp256k1_fe_normalizes_to_zero(a) && !secp256k1_fe_normalizes_to_zero(b)); - t1 = x223; - for (j=0; j<23; j++) { - secp256k1_fe_sqr(&t1, &t1); - } - secp256k1_fe_mul(&t1, &t1, &x22); - for (j=0; j<5; j++) { - secp256k1_fe_sqr(&t1, &t1); - } - secp256k1_fe_mul(&t1, &t1, a); + /* Calculate the reciprocal sqrt of a.b^4 */ + + secp256k1_fe_sqr(&b2, b); + secp256k1_fe_mul(&ab2, &b2, a); + secp256k1_fe_mul(&ab4, &ab2, &b2); + + ret = secp256k1_fe_rsqrt_var(&sqrt, &recip, &ab4); + + /* Inverse */ + secp256k1_fe_sqr(&t, &recip); + secp256k1_fe_mul(&t, &t, &ab2); + secp256k1_fe_mul(&t, &t, b); + secp256k1_fe_sqr(&t, &t); + secp256k1_fe_mul(ri, b, &t); + + /* Reciprocal */ + secp256k1_fe_mul(rr, &recip, &b2); + + return ret; +} + +static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) { + secp256k1_fe t, x2; + int j; + + secp256k1_fe_common_exp(&t, &x2, a); + + secp256k1_fe_mul(&t, &t, a); for (j=0; j<3; j++) { - secp256k1_fe_sqr(&t1, &t1); + secp256k1_fe_sqr(&t, &t); } - secp256k1_fe_mul(&t1, &t1, &x2); + secp256k1_fe_mul(&t, &t, &x2); for (j=0; j<2; j++) { - secp256k1_fe_sqr(&t1, &t1); + secp256k1_fe_sqr(&t, &t); } - secp256k1_fe_mul(r, a, &t1); + secp256k1_fe_mul(r, a, &t); } static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *a) { diff --git a/src/tests.c b/src/tests.c index 3abfe1254c..49e96af24a 100644 --- a/src/tests.c +++ b/src/tests.c @@ -1647,9 +1647,10 @@ void test_sqrt(const secp256k1_fe *a, const secp256k1_fe *k) { if (k != NULL) { /* Check that the returned root is +/- the given known answer */ secp256k1_fe_negate(&r2, &r1, 1); - secp256k1_fe_add(&r1, k); secp256k1_fe_add(&r2, k); - secp256k1_fe_normalize(&r1); secp256k1_fe_normalize(&r2); - CHECK(secp256k1_fe_is_zero(&r1) || secp256k1_fe_is_zero(&r2)); + CHECK(check_fe_equal(k, &r1) || check_fe_equal(k, &r2)); + + /* Our sqrt guarantees to return the root that is itself a square */ + CHECK(secp256k1_fe_sqrt_var(&r1, &r1) != 0); } } @@ -1687,6 +1688,107 @@ void run_sqrt(void) { } } +void test_rsqrt(const secp256k1_fe *a, const secp256k1_fe *k) { + secp256k1_fe r1, r2; + int v = secp256k1_fe_rsqrt_var(&r1, &r2, a); + CHECK((v == 0) == (k == NULL)); + + secp256k1_fe_mul(&r2, &r2, a); + CHECK(check_fe_equal(&r1, &r2)); + + if (k != NULL) { + /* Check that the returned root is +/- the given known answer */ + secp256k1_fe_negate(&r2, &r1, 1); + CHECK(check_fe_equal(k, &r1) || check_fe_equal(k, &r2)); + + /* Our sqrt guarantees to return the root that is itself a square */ + CHECK(secp256k1_fe_rsqrt_var(&r1, &r2, &r1) != 0); + } +} + +void run_rsqrt(void) { + secp256k1_fe ns, x, s, t; + int i; + + /* Check sqrt(0) is 0 */ + secp256k1_fe_set_int(&x, 0); + secp256k1_fe_sqr(&s, &x); + test_rsqrt(&s, &x); + + /* Check sqrt of small squares (and their negatives) */ + for (i = 1; i <= 100; i++) { + secp256k1_fe_set_int(&x, i); + secp256k1_fe_sqr(&s, &x); + test_rsqrt(&s, &x); + secp256k1_fe_negate(&t, &s, 1); + test_rsqrt(&t, NULL); + } + + /* Consistency checks for large random values */ + for (i = 0; i < 10; i++) { + int j; + random_fe_non_square(&ns); + for (j = 0; j < count; j++) { + random_fe(&x); + secp256k1_fe_sqr(&s, &x); + test_rsqrt(&s, &x); + secp256k1_fe_negate(&t, &s, 1); + test_rsqrt(&t, NULL); + secp256k1_fe_mul(&t, &s, &ns); + test_rsqrt(&t, NULL); + } + } +} + +void test_par_rsqrt_inv(const secp256k1_fe *a, const secp256k1_fe *k) { + secp256k1_fe r1, r2, x, xi, t; + int v; + + random_fe_non_zero(&x); + v = secp256k1_fe_par_rsqrt_inv_var(&r2, &xi, a, &x); + CHECK((v == 0) == (k == NULL)); + + /* Derive the square root from the reciprocal square root */ + secp256k1_fe_mul(&r1, &r2, a); + + if (k != NULL) { + /* Check that the calculated root is +/- the given known answer */ + secp256k1_fe_negate(&t, &r1, 1); + CHECK(check_fe_equal(k, &r1) || check_fe_equal(k, &t)); + } + + CHECK(check_fe_inverse(&x, &xi)); +} + +void run_par_rsqrt_inv(void) { + secp256k1_fe ns, x, s, t; + int i; + + /* Check sqrt of small squares (and their negatives) */ + for (i = 1; i <= 100; i++) { + secp256k1_fe_set_int(&x, i); + secp256k1_fe_sqr(&s, &x); + test_par_rsqrt_inv(&s, &x); + secp256k1_fe_negate(&t, &s, 1); + test_par_rsqrt_inv(&t, NULL); + } + + /* Consistency checks for large random values */ + for (i = 0; i < 10; i++) { + int j; + random_fe_non_square(&ns); + for (j = 0; j < count; j++) { + random_fe_non_zero(&x); + secp256k1_fe_sqr(&s, &x); + test_par_rsqrt_inv(&s, &x); + secp256k1_fe_negate(&t, &s, 1); + test_par_rsqrt_inv(&t, NULL); + secp256k1_fe_mul(&t, &s, &ns); + test_par_rsqrt_inv(&t, NULL); + } + } +} + /***** GROUP TESTS *****/ void ge_equals_ge(const secp256k1_ge *a, const secp256k1_ge *b) { @@ -4323,6 +4425,8 @@ int main(int argc, char **argv) { run_field_convert(); run_sqr(); run_sqrt(); + run_rsqrt(); + run_par_rsqrt_inv(); /* group tests */ run_ge();