bitcoin-core · real-or-random · Nov 7, 2023 · Dec 29, 2022 · Dec 30, 2022 · Dec 30, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+#### Changed
+ - The point multiplication algorithm used for ECDH operations (module `ecdh`) was replaced with a slightly faster one.
+
 ## [0.4.0] - 2023-09-04
 
 #### Added

diff --git a/src/bench_internal.c b/src/bench_internal.c
@@ -14,7 +14,6 @@
 #include "field_impl.h"
 #include "group_impl.h"
 #include "scalar_impl.h"
-#include "ecmult_const_impl.h"
 #include "ecmult_impl.h"
 #include "bench.h"
 
@@ -98,6 +97,18 @@ static void bench_scalar_negate(void* arg, int iters) {
     }
 }
 
+static void bench_scalar_half(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_scalar s = data->scalar[0];
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_scalar_half(&s, &s);
+    }
+
+    data->scalar[0] = s;
+}
+
 static void bench_scalar_mul(void* arg, int iters) {
     int i;
     bench_inv *data = (bench_inv*)arg;
@@ -309,18 +320,6 @@ static void bench_ecmult_wnaf(void* arg, int iters) {
     CHECK(bits <= 256*iters);
 }
 
-static void bench_wnaf_const(void* arg, int iters) {
-    int i, bits = 0, overflow = 0;
-    bench_inv *data = (bench_inv*)arg;
-
-    for (i = 0; i < iters; i++) {
-        bits += secp256k1_wnaf_const(data->wnaf, &data->scalar[0], WINDOW_A, 256);
-        overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
-    }
-    CHECK(overflow >= 0);
-    CHECK(bits <= 256*iters);
-}
-
 static void bench_sha256(void* arg, int iters) {
     int i;
     bench_inv *data = (bench_inv*)arg;
@@ -370,6 +369,7 @@ int main(int argc, char **argv) {
     int d = argc == 1; /* default */
     print_output_table_header_row();
 
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "half")) run_benchmark("scalar_half", bench_scalar_half, bench_setup, NULL, &data, 10, iters*100);
     if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100);
     if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100);
     if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10);
@@ -394,7 +394,6 @@ int main(int argc, char **argv) {
     if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_zinv_var", bench_group_add_zinv_var, bench_setup, NULL, &data, 10, iters*10);
     if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters);
 
-    if (d || have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters);
     if (d || have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, iters);
 
     if (d || have_flag(argc, argv, "hash") || have_flag(argc, argv, "sha256")) run_benchmark("hash_sha256", bench_sha256, bench_setup, NULL, &data, 10, iters);

diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h
diff --git a/src/scalar.h b/src/scalar.h
@@ -25,7 +25,7 @@ static void secp256k1_scalar_clear(secp256k1_scalar *r);
 /** Access bits from a scalar. All requested bits must belong to the same 32-bit limb. */
 static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar *a, unsigned int offset, unsigned int count);
 
-/** Access bits from a scalar. Not constant time. */
+/** Access bits from a scalar. Not constant time in offset and count. */
 static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count);
 
 /** Set a scalar from a big endian byte array. The scalar will be reduced modulo group order `n`.
@@ -54,10 +54,6 @@ static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int
 /** Multiply two scalars (modulo the group order). */
 static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b);
 
-/** Shift a scalar right by some amount strictly between 0 and 16, returning
- *  the low bits that were shifted off */
-static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n);
-
 /** Compute the inverse of a scalar (modulo the group order). */
 static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *a);
 
@@ -67,6 +63,9 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc
 /** Compute the complement of a scalar (modulo the group order). */
 static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a);
 
+/** Multiply a scalar with the multiplicative inverse of 2. */
+static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a);
+
 /** Check whether a scalar equals zero. */
 static int secp256k1_scalar_is_zero(const secp256k1_scalar *a);
 

diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
@@ -199,6 +199,47 @@ static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar
     secp256k1_scalar_verify(r);
 }
 
+static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    /* Writing `/` for field division and `//` for integer division, we compute
+     *
+     *   a/2 = (a - (a&1))/2 + (a&1)/2
+     *       = (a >> 1) + (a&1 ?    1/2 : 0)
+     *       = (a >> 1) + (a&1 ? n//2+1 : 0),
+     *
+     * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n).
+     * For n//2, we have the constants SECP256K1_N_H_0, ...
+     *
+     * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here:
+     * - the left summand is:  a >> 1 = (a - a&1)/2 = (n-2-1)//2           = (n-3)//2
+     * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2
+     * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n.
+     */
+    uint64_t mask = -(uint64_t)(a->d[0] & 1U);
+    secp256k1_uint128 t;
+    secp256k1_scalar_verify(a);
+
+    secp256k1_u128_from_u64(&t, (a->d[0] >> 1) | (a->d[1] << 63));
+    secp256k1_u128_accum_u64(&t, (SECP256K1_N_H_0 + 1U) & mask);
+    r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
+    secp256k1_u128_accum_u64(&t, (a->d[1] >> 1) | (a->d[2] << 63));
+    secp256k1_u128_accum_u64(&t, SECP256K1_N_H_1 & mask);
+    r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
+    secp256k1_u128_accum_u64(&t, (a->d[2] >> 1) | (a->d[3] << 63));
+    secp256k1_u128_accum_u64(&t, SECP256K1_N_H_2 & mask);
+    r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
+    r->d[3] = secp256k1_u128_to_u64(&t) + (a->d[3] >> 1) + (SECP256K1_N_H_3 & mask);
+#ifdef VERIFY
+    /* The line above only computed the bottom 64 bits of r->d[3]; redo the computation
+     * in full 128 bits to make sure the top 64 bits are indeed zero. */
+    secp256k1_u128_accum_u64(&t, a->d[3] >> 1);
+    secp256k1_u128_accum_u64(&t, SECP256K1_N_H_3 & mask);
+    secp256k1_u128_rshift(&t, 64);
+    VERIFY_CHECK(secp256k1_u128_to_u64(&t) == 0);
+
+    secp256k1_scalar_verify(r);
+#endif
+}
+
 SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
     secp256k1_scalar_verify(a);
 
@@ -809,22 +850,6 @@ static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a,
     secp256k1_scalar_verify(r);
 }
 
-static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
-    int ret;
-    secp256k1_scalar_verify(r);
-    VERIFY_CHECK(n > 0);
-    VERIFY_CHECK(n < 16);
-
-    ret = r->d[0] & ((1 << n) - 1);
-    r->d[0] = (r->d[0] >> n) + (r->d[1] << (64 - n));
-    r->d[1] = (r->d[1] >> n) + (r->d[2] << (64 - n));
-    r->d[2] = (r->d[2] >> n) + (r->d[3] << (64 - n));
-    r->d[3] = (r->d[3] >> n);
-
-    secp256k1_scalar_verify(r);
-    return ret;
-}
-
 static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
     secp256k1_scalar_verify(k);
 

diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
@@ -245,6 +245,55 @@ static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar
     secp256k1_scalar_verify(r);
 }
 
+static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    /* Writing `/` for field division and `//` for integer division, we compute
+     *
+     *   a/2 = (a - (a&1))/2 + (a&1)/2
+     *       = (a >> 1) + (a&1 ?    1/2 : 0)
+     *       = (a >> 1) + (a&1 ? n//2+1 : 0),
+     *
+     * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n).
+     * For n//2, we have the constants SECP256K1_N_H_0, ...
+     *
+     * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here:
+     * - the left summand is:  a >> 1 = (a - a&1)/2 = (n-2-1)//2           = (n-3)//2
+     * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2
+     * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n.
+     */
+    uint32_t mask = -(uint32_t)(a->d[0] & 1U);
+    uint64_t t = (uint32_t)((a->d[0] >> 1) | (a->d[1] << 31));
+    secp256k1_scalar_verify(a);
+
+    t += (SECP256K1_N_H_0 + 1U) & mask;
+    r->d[0] = t; t >>= 32;
+    t += (uint32_t)((a->d[1] >> 1) | (a->d[2] << 31));
+    t += SECP256K1_N_H_1 & mask;
+    r->d[1] = t; t >>= 32;
+    t += (uint32_t)((a->d[2] >> 1) | (a->d[3] << 31));
+    t += SECP256K1_N_H_2 & mask;
+    r->d[2] = t; t >>= 32;
+    t += (uint32_t)((a->d[3] >> 1) | (a->d[4] << 31));
+    t += SECP256K1_N_H_3 & mask;
+    r->d[3] = t; t >>= 32;
+    t += (uint32_t)((a->d[4] >> 1) | (a->d[5] << 31));
+    t += SECP256K1_N_H_4 & mask;
+    r->d[4] = t; t >>= 32;
+    t += (uint32_t)((a->d[5] >> 1) | (a->d[6] << 31));
+    t += SECP256K1_N_H_5 & mask;
+    r->d[5] = t; t >>= 32;
+    t += (uint32_t)((a->d[6] >> 1) | (a->d[7] << 31));
+    t += SECP256K1_N_H_6 & mask;
+    r->d[6] = t; t >>= 32;
+    r->d[7] = (uint32_t)t + (uint32_t)(a->d[7] >> 1) + (SECP256K1_N_H_7 & mask);
+#ifdef VERIFY
+    /* The line above only computed the bottom 32 bits of r->d[7]. Redo the computation
+     * in full 64 bits to make sure the top 32 bits are indeed zero. */
+    VERIFY_CHECK((t + (a->d[7] >> 1) + (SECP256K1_N_H_7 & mask)) >> 32 == 0);
+
+    secp256k1_scalar_verify(r);
+#endif
+}
+
 SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
     secp256k1_scalar_verify(a);
 
@@ -613,26 +662,6 @@ static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a,
     secp256k1_scalar_verify(r);
 }
 
-static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
-    int ret;
-    secp256k1_scalar_verify(r);
-    VERIFY_CHECK(n > 0);
-    VERIFY_CHECK(n < 16);
-
-    ret = r->d[0] & ((1 << n) - 1);
-    r->d[0] = (r->d[0] >> n) + (r->d[1] << (32 - n));
-    r->d[1] = (r->d[1] >> n) + (r->d[2] << (32 - n));
-    r->d[2] = (r->d[2] >> n) + (r->d[3] << (32 - n));
-    r->d[3] = (r->d[3] >> n) + (r->d[4] << (32 - n));
-    r->d[4] = (r->d[4] >> n) + (r->d[5] << (32 - n));
-    r->d[5] = (r->d[5] >> n) + (r->d[6] << (32 - n));
-    r->d[6] = (r->d[6] >> n) + (r->d[7] << (32 - n));
-    r->d[7] = (r->d[7] >> n);
-
-    secp256k1_scalar_verify(r);
-    return ret;
-}
-
 static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
     secp256k1_scalar_verify(k);
 

diff --git a/src/scalar_low.h b/src/scalar_low.h
@@ -1,5 +1,5 @@
 /***********************************************************************
- * Copyright (c) 2015 Andrew Poelstra                                  *
+ * Copyright (c) 2015, 2022 Andrew Poelstra, Pieter Wuille             *
  * Distributed under the MIT software license, see the accompanying    *
  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
  ***********************************************************************/
@@ -12,6 +12,13 @@
 /** A scalar modulo the group order of the secp256k1 curve. */
 typedef uint32_t secp256k1_scalar;
 
-#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) (d0)
+/* A compile-time constant equal to 2^32 (modulo order). */
+#define SCALAR_2P32 ((0xffffffffUL % EXHAUSTIVE_TEST_ORDER) + 1U)
+
+/* Compute a*2^32 + b (modulo order). */
+#define SCALAR_HORNER(a, b) (((uint64_t)(a) * SCALAR_2P32 + (b)) % EXHAUSTIVE_TEST_ORDER)
+
+/* Evaluates to the provided 256-bit constant reduced modulo order. */
+#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER((d7), (d6)), (d5)), (d4)), (d3)), (d2)), (d1)), (d0))
 
 #endif /* SECP256K1_SCALAR_REPR_H */
diff --git a/src/scalar_low_impl.h b/src/scalar_low_impl.h
@@ -139,19 +139,6 @@ static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a,
     secp256k1_scalar_verify(r);
 }
 
-static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
-    int ret;
-    secp256k1_scalar_verify(r);
-    VERIFY_CHECK(n > 0);
-    VERIFY_CHECK(n < 16);
-
-    ret = *r & ((1 << n) - 1);
-    *r >>= n;
-
-    secp256k1_scalar_verify(r);
-    return ret;
-}
-
 static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
     secp256k1_scalar_verify(a);
 
@@ -205,4 +192,12 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc
     secp256k1_scalar_verify(r);
 }
 
+static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    secp256k1_scalar_verify(a);
+
+    *r = (*a + ((-(uint32_t)(*a & 1)) & EXHAUSTIVE_TEST_ORDER)) >> 1;
+
+    secp256k1_scalar_verify(r);
+}
+
 #endif /* SECP256K1_SCALAR_REPR_IMPL_H */