Optimization: use Nx32 representation for recoded bits

peterdettman · sipa · commit a0d32b597d4f · 2024-04-19T11:43:46.000-04:00
The existing code needs to deal with the edge case that bit_pos &gt;= 256,
which would lead to an out-of-bounds read from secp256k1_scalar.

Instead, recode the scalar into an array of uint32_t with enough zero
padding at the end to alleviate the issue. This also simplifies the
code, and is necessary for a security improvement in a follow-up
commit.

Original code by Peter Dettman, with modifications by Pieter Wuille.
diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
@@ -56,7 +56,11 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
     secp256k1_fe neg;
     secp256k1_ge_storage adds;
     secp256k1_scalar d;
-    int first = 1;
+    /* Array of uint32_t values large enough to store COMB_BITS bits. Only the bottom
+     * 8 are ever nonzero, but having the zero padding at the end if COMB_BITS>256
+     * avoids the need to deal with out-of-bounds reads from a scalar. */
+    uint32_t recoded[(COMB_BITS + 31) >> 5] = {0};
+    int first = 1, i;
 
     memset(&adds, 0, sizeof(adds));
 
@@ -103,6 +107,11 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
 
     /* Compute the scalar d = (gn + ctx->scalar_offset). */
     secp256k1_scalar_add(&d, &ctx->scalar_offset, gn);
+    /* Convert to recoded array. */
+    for (i = 0; i < 8; ++i) {
+        recoded[i] = secp256k1_scalar_get_bits_limb32(&d, 32 * i, 32);
+    }
+    secp256k1_scalar_clear(&d);
 
     /* In secp256k1_ecmult_gen_prec_table we have precomputed sums of the
      * (2*d[i]-1) * 2^(i-1) * G points, for various combinations of i positions.
@@ -188,8 +197,8 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
             /* Gather the mask(block)-selected bits of d into bits. They're packed:
              * bits[tooth] = d[(block*COMB_TEETH + tooth)*COMB_SPACING + comb_off]. */
             uint32_t bits = 0, sign, abs, index, tooth;
-            for (tooth = 0; tooth < COMB_TEETH && bit_pos < 256; ++tooth) {
-                uint32_t bit = secp256k1_scalar_get_bits_limb32(&d, bit_pos, 1);
+            for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
+                uint32_t bit = (recoded[bit_pos >> 5] >> (bit_pos & 0x1f)) & 1;
                 bits |= bit << tooth;
                 bit_pos += COMB_SPACING;
             }
@@ -243,7 +252,7 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
     secp256k1_fe_clear(&neg);
     secp256k1_ge_clear(&add);
     memset(&adds, 0, sizeof(adds));
-    secp256k1_scalar_clear(&d);
+    memset(&recoded, 0, sizeof(recoded));
 }
 
 /* Setup blinding values for secp256k1_ecmult_gen. */