Skip to content

Commit 8932956

Browse files
committed
Faster const-time normalization
1 parent 485f608 commit 8932956

File tree

2 files changed

+68
-72
lines changed

2 files changed

+68
-72
lines changed

src/field_10x26_impl.h

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -73,33 +73,15 @@ static void secp256k1_fe_normalize(secp256k1_fe *r) {
7373
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
7474
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
7575

76-
/* Reduce t9 at the start so there will be at most a single carry from the first pass */
77-
uint32_t m;
78-
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
76+
/* Reduce t9 at the start so there will be at most a single carry from the first pass.
77+
* x is incremented before the first pass and then decremented before the second pass
78+
* to ensure that the result doesn't fall into the range [P, 2^256). */
79+
uint32_t x = (t9 >> 22) + 1; t9 &= 0x03FFFFFUL;
7980

8081
/* The first pass ensures the magnitude is 1, ... */
8182
t0 += x * 0x3D1UL; t1 += (x << 6);
8283
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
8384
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
84-
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
85-
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
86-
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
87-
t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
88-
t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
89-
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
90-
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
91-
92-
/* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
93-
VERIFY_CHECK(t9 >> 23 == 0);
94-
95-
/* At most a single final reduction is needed; check if the value is >= the field characteristic */
96-
x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
97-
& ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
98-
99-
/* Apply the final reduction (for constant-time behaviour, we do it always) */
100-
t0 += x * 0x3D1UL; t1 += (x << 6);
101-
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
102-
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
10385
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
10486
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
10587
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
@@ -108,11 +90,24 @@ static void secp256k1_fe_normalize(secp256k1_fe *r) {
10890
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
10991
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
11092

111-
/* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
112-
VERIFY_CHECK(t9 >> 22 == x);
93+
/* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
94+
VERIFY_CHECK(t9 >> 23 == 0);
11395

114-
/* Mask off the possible multiple of 2^256 from the final reduction */
115-
t9 &= 0x03FFFFFUL;
96+
/* The second pass subtracts (2^256 - P) from (t0..t9) iff there was no carry.
97+
* No underflow is possible as we just added at least that amount in the first pass. */
98+
x = (t9 >> 22) - 1; t9 &= 0x03FFFFFUL;
99+
VERIFY_CHECK(x == 0 || x == -(uint32_t)1);
100+
101+
t0 -= x & 0x3D1UL; t1 -= x & 0x40UL;
102+
t1 -= (t0 >> 31); t0 &= 0x3FFFFFFUL;
103+
t2 -= (t1 >> 31); t1 &= 0x3FFFFFFUL;
104+
t3 -= (t2 >> 31); t2 &= 0x3FFFFFFUL;
105+
t4 -= (t3 >> 31); t3 &= 0x3FFFFFFUL;
106+
t5 -= (t4 >> 31); t4 &= 0x3FFFFFFUL;
107+
t6 -= (t5 >> 31); t5 &= 0x3FFFFFFUL;
108+
t7 -= (t6 >> 31); t6 &= 0x3FFFFFFUL;
109+
t8 -= (t7 >> 31); t7 &= 0x3FFFFFFUL;
110+
t9 -= (t8 >> 31); t8 &= 0x3FFFFFFUL;
116111

117112
r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
118113
r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
@@ -215,29 +210,32 @@ static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
215210
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
216211
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
217212

218-
/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
219-
uint32_t z0, z1;
213+
/* z1 tracks a possible raw value of 0, z2 tracks a possible raw value of P */
214+
uint32_t z0, z1, z2;
220215

221-
/* Reduce t9 at the start so there will be at most a single carry from the first pass */
222-
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
216+
/* Reduce t9 at the start so there will be at most a single carry from the first pass
217+
* x is incremented before the first pass so both match values have internal zeros */
218+
uint32_t x = (t9 >> 22) + 1; t9 &= 0x03FFFFFUL;
223219

224220
/* The first pass ensures the magnitude is 1, ... */
225221
t0 += x * 0x3D1UL; t1 += (x << 6);
226-
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL;
227-
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
228-
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
229-
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
230-
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
231-
t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
232-
t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
233-
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
234-
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
235-
z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
222+
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
223+
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
224+
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 = t2;
225+
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3;
226+
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4;
227+
t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5;
228+
t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6;
229+
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7;
230+
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8;
231+
232+
z1 = z0 | (t0 ^ 0x3D1UL) | (t1 ^ 0x40UL) | t9;
233+
z2 = z0 | t0 | t1 | (t9 ^ 0x400000UL);
236234

237235
/* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
238236
VERIFY_CHECK(t9 >> 23 == 0);
239237

240-
return (z0 == 0) | (z1 == 0x3FFFFFFUL);
238+
return (z1 == 0) | (z2 == 0);
241239
}
242240

243241
static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {

src/field_5x52_impl.h

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -76,36 +76,31 @@ static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m) {
7676
static void secp256k1_fe_normalize(secp256k1_fe *r) {
7777
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
7878

79-
/* Reduce t4 at the start so there will be at most a single carry from the first pass */
80-
uint64_t m;
81-
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
79+
/* Reduce t4 at the start so there will be at most a single carry from the first pass.
80+
* x is incremented before the first pass and then decremented before the second pass
81+
* to ensure that the result doesn't fall into the range [P, 2^256). */
82+
uint64_t x = (t4 >> 48) + 1; t4 &= 0x0FFFFFFFFFFFFULL;
8283

8384
/* The first pass ensures the magnitude is 1, ... */
8485
t0 += x * 0x1000003D1ULL;
8586
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
86-
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1;
87-
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2;
88-
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3;
89-
90-
/* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
91-
VERIFY_CHECK(t4 >> 49 == 0);
92-
93-
/* At most a single final reduction is needed; check if the value is >= the field characteristic */
94-
x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL)
95-
& (t0 >= 0xFFFFEFFFFFC2FULL));
96-
97-
/* Apply the final reduction (for constant-time behaviour, we do it always) */
98-
t0 += x * 0x1000003D1ULL;
99-
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
10087
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
10188
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
10289
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
10390

104-
/* If t4 didn't carry to bit 48 already, then it should have after any final reduction */
105-
VERIFY_CHECK(t4 >> 48 == x);
91+
/* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element). */
92+
VERIFY_CHECK(t4 >> 49 == 0);
93+
94+
/* The second pass subtracts (2^256 - P) from (t0..t4) iff there was no carry.
95+
* No underflow is possible as we just added at least that amount in the first pass. */
96+
x = (t4 >> 48) - 1; t4 &= 0x0FFFFFFFFFFFFULL;
97+
VERIFY_CHECK(x == 0 || x == -(uint64_t)1);
10698

107-
/* Mask off the possible multiple of 2^256 from the final reduction */
108-
t4 &= 0x0FFFFFFFFFFFFULL;
99+
t0 -= x & 0x1000003D1ULL;
100+
t1 -= (t0 >> 63); t0 &= 0xFFFFFFFFFFFFFULL;
101+
t2 -= (t1 >> 63); t1 &= 0xFFFFFFFFFFFFFULL;
102+
t3 -= (t2 >> 63); t2 &= 0xFFFFFFFFFFFFFULL;
103+
t4 -= (t3 >> 63); t3 &= 0xFFFFFFFFFFFFFULL;
109104

110105
r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
111106

@@ -187,24 +182,27 @@ static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
187182
static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
188183
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
189184

190-
/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
191-
uint64_t z0, z1;
185+
/* z1 tracks a possible raw value of 0, z2 tracks a possible raw value of P */
186+
uint64_t z0, z1, z2;
192187

193-
/* Reduce t4 at the start so there will be at most a single carry from the first pass */
194-
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
188+
/* Reduce t4 at the start so there will be at most a single carry from the first pass
189+
* x is incremented before the first pass so both match values have internal zeros */
190+
uint64_t x = (t4 >> 48) + 1; t4 &= 0x0FFFFFFFFFFFFULL;
195191

196192
/* The first pass ensures the magnitude is 1, ... */
197193
t0 += x * 0x1000003D1ULL;
198-
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0 = t0; z1 = t0 ^ 0x1000003D0ULL;
199-
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1;
200-
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2;
201-
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3;
202-
z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
194+
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
195+
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 = t1;
196+
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2;
197+
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3;
198+
199+
z1 = z0 | (t0 ^ 0x1000003D1ULL) | t4;
200+
z2 = z0 | t0 | (t4 ^ 0x1000000000000ULL);
203201

204202
/* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
205203
VERIFY_CHECK(t4 >> 49 == 0);
206204

207-
return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
205+
return (z1 == 0) | (z2 == 0);
208206
}
209207

210208
static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {

0 commit comments

Comments
 (0)