Skip to content

Faster const-time normalization #1028

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 39 additions & 41 deletions src/field_10x26_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,33 +73,15 @@ static void secp256k1_fe_normalize(secp256k1_fe *r) {
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];

/* Reduce t9 at the start so there will be at most a single carry from the first pass */
uint32_t m;
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
/* Reduce t9 at the start so there will be at most a single carry from the first pass.
* x is incremented before the first pass and then decremented before the second pass
* to ensure that the result doesn't fall into the range [P, 2^256). */
uint32_t x = (t9 >> 22) + 1; t9 &= 0x03FFFFFUL;

/* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x3D1UL; t1 += (x << 6);
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;

/* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
VERIFY_CHECK(t9 >> 23 == 0);

/* At most a single final reduction is needed; check if the value is >= the field characteristic */
x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
& ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));

/* Apply the final reduction (for constant-time behaviour, we do it always) */
t0 += x * 0x3D1UL; t1 += (x << 6);
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
Expand All @@ -108,11 +90,24 @@ static void secp256k1_fe_normalize(secp256k1_fe *r) {
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;

/* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
VERIFY_CHECK(t9 >> 22 == x);
/* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
VERIFY_CHECK(t9 >> 23 == 0);

/* Mask off the possible multiple of 2^256 from the final reduction */
t9 &= 0x03FFFFFUL;
/* The second pass subtracts (2^256 - P) from (t0..t9) iff there was no carry.
* No underflow is possible as we just added at least that amount in the first pass. */
x = (t9 >> 22) - 1; t9 &= 0x03FFFFFUL;
VERIFY_CHECK(x == 0 || x == -(uint32_t)1);

t0 -= x & 0x3D1UL; t1 -= x & 0x40UL;
t1 -= (t0 >> 31); t0 &= 0x3FFFFFFUL;
t2 -= (t1 >> 31); t1 &= 0x3FFFFFFUL;
t3 -= (t2 >> 31); t2 &= 0x3FFFFFFUL;
t4 -= (t3 >> 31); t3 &= 0x3FFFFFFUL;
t5 -= (t4 >> 31); t4 &= 0x3FFFFFFUL;
t6 -= (t5 >> 31); t5 &= 0x3FFFFFFUL;
t7 -= (t6 >> 31); t6 &= 0x3FFFFFFUL;
t8 -= (t7 >> 31); t7 &= 0x3FFFFFFUL;
t9 -= (t8 >> 31); t8 &= 0x3FFFFFFUL;

r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
Expand Down Expand Up @@ -215,29 +210,32 @@ static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];

/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
uint32_t z0, z1;
/* z1 tracks a possible raw value of 0, z2 tracks a possible raw value of P */
uint32_t z0, z1, z2;

/* Reduce t9 at the start so there will be at most a single carry from the first pass */
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
/* Reduce t9 at the start so there will be at most a single carry from the first pass
* x is incremented before the first pass so both match values have internal zeros */
uint32_t x = (t9 >> 22) + 1; t9 &= 0x03FFFFFUL;

/* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x3D1UL; t1 += (x << 6);
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL;
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 = t2;
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3;
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4;
t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5;
t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6;
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7;
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8;

z1 = z0 | (t0 ^ 0x3D1UL) | (t1 ^ 0x40UL) | t9;
z2 = z0 | t0 | t1 | (t9 ^ 0x400000UL);

/* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
VERIFY_CHECK(t9 >> 23 == 0);

return (z0 == 0) | (z1 == 0x3FFFFFFUL);
return (z1 == 0) | (z2 == 0);
}

static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {
Expand Down
60 changes: 29 additions & 31 deletions src/field_5x52_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,36 +76,31 @@ static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m) {
static void secp256k1_fe_normalize(secp256k1_fe *r) {
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];

/* Reduce t4 at the start so there will be at most a single carry from the first pass */
uint64_t m;
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
/* Reduce t4 at the start so there will be at most a single carry from the first pass.
* x is incremented before the first pass and then decremented before the second pass
* to ensure that the result doesn't fall into the range [P, 2^256). */
uint64_t x = (t4 >> 48) + 1; t4 &= 0x0FFFFFFFFFFFFULL;

/* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x1000003D1ULL;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that x is bigger than before, how we know that t0 += x * 0x1000003D1ULL won't overflow the uint64_t?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most, if not all, callers use field elements with a maximum magnitude of 8, so there's plenty of room. The practical limit is actually around 31 (because field_10x26 can only represent magnitude 32 and several methods assume there's a free bit available for carries on entry). I think it would be a good idea to actually enforce some exact magnitude limit on input to field methods though (probably 16).

t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1;
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2;
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3;

/* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
VERIFY_CHECK(t4 >> 49 == 0);

/* At most a single final reduction is needed; check if the value is >= the field characteristic */
x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL)
& (t0 >= 0xFFFFEFFFFFC2FULL));

/* Apply the final reduction (for constant-time behaviour, we do it always) */
t0 += x * 0x1000003D1ULL;
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;

/* If t4 didn't carry to bit 48 already, then it should have after any final reduction */
VERIFY_CHECK(t4 >> 48 == x);
/* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element). */
VERIFY_CHECK(t4 >> 49 == 0);

/* The second pass subtracts (2^256 - P) from (t0..t4) iff there was no carry.
* No underflow is possible as we just added at least that amount in the first pass. */
x = (t4 >> 48) - 1; t4 &= 0x0FFFFFFFFFFFFULL;
VERIFY_CHECK(x == 0 || x == -(uint64_t)1);

/* Mask off the possible multiple of 2^256 from the final reduction */
t4 &= 0x0FFFFFFFFFFFFULL;
t0 -= x & 0x1000003D1ULL;
t1 -= (t0 >> 63); t0 &= 0xFFFFFFFFFFFFFULL;
t2 -= (t1 >> 63); t1 &= 0xFFFFFFFFFFFFFULL;
t3 -= (t2 >> 63); t2 &= 0xFFFFFFFFFFFFFULL;
t4 -= (t3 >> 63); t3 &= 0xFFFFFFFFFFFFFULL;

r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;

Expand Down Expand Up @@ -187,24 +182,27 @@ static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];

/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
uint64_t z0, z1;
/* z1 tracks a possible raw value of 0, z2 tracks a possible raw value of P */
uint64_t z0, z1, z2;

/* Reduce t4 at the start so there will be at most a single carry from the first pass */
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
/* Reduce t4 at the start so there will be at most a single carry from the first pass
* x is incremented before the first pass so both match values have internal zeros */
uint64_t x = (t4 >> 48) + 1; t4 &= 0x0FFFFFFFFFFFFULL;

/* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x1000003D1ULL;
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0 = t0; z1 = t0 ^ 0x1000003D0ULL;
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1;
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2;
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3;
z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 = t1;
t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2;
t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3;

z1 = z0 | (t0 ^ 0x1000003D1ULL) | t4;
z2 = z0 | t0 | (t4 ^ 0x1000000000000ULL);

/* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
VERIFY_CHECK(t4 >> 49 == 0);

return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
return (z1 == 0) | (z2 == 0);
}

static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {
Expand Down