Skip to content

Commit ad6370c

Browse files
committed
Reciprocal square root, parallel r-sqrt/inv
- Factor out common exponentiation prelude for _fe_sqrt_var/fe_inv - Allow _fe_sqrt_var output to overwrite input - Make argument for _fe_normalizes_to_zero(_var) const - Add _fe_rsqrt_var for calculating reciprocal square root (1/sqrt(x)) - Add _fe_par_rsqrt_inv_var to calculate the r-sqrt of one input and the inverse of a second input, with a single exponentiation.
1 parent 3026daa commit ad6370c

File tree

6 files changed

+241
-109
lines changed

6 files changed

+241
-109
lines changed

src/bench_internal.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,26 @@ void bench_field_sqrt_var(void* arg) {
191191
}
192192
}
193193

194+
void bench_field_rsqrt_var(void* arg) {
195+
int i;
196+
bench_inv_t *data = (bench_inv_t*)arg;
197+
198+
for (i = 0; i < 20000; i++) {
199+
secp256k1_fe_rsqrt_var(&data->fe_x, &data->fe_y, &data->fe_x);
200+
secp256k1_fe_add(&data->fe_x, &data->fe_y);
201+
}
202+
}
203+
204+
void bench_field_par_rsqrt_inv_var(void* arg) {
205+
int i;
206+
bench_inv_t *data = (bench_inv_t*)arg;
207+
208+
for (i = 0; i < 20000; i++) {
209+
secp256k1_fe_par_rsqrt_inv_var(&data->fe_x, &data->fe_y, &data->fe_x, &data->fe_y);
210+
secp256k1_fe_add(&data->fe_x, &data->fe_y);
211+
}
212+
}
213+
194214
void bench_group_double_var(void* arg) {
195215
int i;
196216
bench_inv_t *data = (bench_inv_t*)arg;
@@ -334,6 +354,8 @@ int main(int argc, char **argv) {
334354
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, 20000);
335355
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, 20000);
336356
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt_var", bench_field_sqrt_var, bench_setup, NULL, &data, 10, 20000);
357+
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_rsqrt_var", bench_field_rsqrt_var, bench_setup, NULL, &data, 10, 20000);
358+
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt") || have_flag(argc, argv, "inverse")) run_benchmark("field_par_rsqrt_inv_var", bench_field_par_rsqrt_inv_var, bench_setup, NULL, &data, 10, 20000);
337359

338360
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, 200000);
339361
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, 200000);

src/field.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,11 @@ static void secp256k1_fe_normalize_weak(secp256k1_fe *r);
3939
/** Normalize a field element, without constant-time guarantee. */
4040
static void secp256k1_fe_normalize_var(secp256k1_fe *r);
4141

42-
/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field
43-
* implementation may optionally normalize the input, but this should not be relied upon. */
44-
static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r);
42+
/** Verify whether a field element represents zero i.e. would normalize to a zero value. */
43+
static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r);
4544

46-
/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field
47-
* implementation may optionally normalize the input, but this should not be relied upon. */
48-
static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r);
45+
/** Verify whether a field element represents zero i.e. would normalize to a zero value. */
46+
static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r);
4947

5048
/** Set a field element equal to a small integer. Resulting field element is normalized. */
5149
static void secp256k1_fe_set_int(secp256k1_fe *r, int a);
@@ -88,12 +86,28 @@ static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp2
8886
static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a);
8987

9088
/** If a has a square root, it is computed in r and 1 is returned. If a does not
91-
* have a square root, the root of its negation is computed and 0 is returned.
89+
* have a square root, the root of -a is computed and 0 is returned.
9290
* The input's magnitude can be at most 8. The output magnitude is 1 (but not
9391
* guaranteed to be normalized). The result in r will always be a square
9492
* itself. */
9593
static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a);
9694

95+
/** If a has a square root, the square root is computed in rs, its reciprocal square root is
96+
* calculated in rr, and 1 is returned. If a does not have a square root, the root (and recip. root)
97+
* of -a are computed and 0 is returned. The input's magnitude can be at most 8. The
98+
* outputs' magnitudes are 1 (but not guaranteed to be normalized). The result in rs will always be
99+
* a square itself. The result in rr will be a square if, and only if, a is a square.
100+
*/
101+
static int secp256k1_fe_rsqrt_var(secp256k1_fe *rs, secp256k1_fe *rr, const secp256k1_fe *a);
102+
103+
/** Parallel reciprocal square root and inverse. Sets ri to be the (modular) inverse of b. If a has a
104+
* square root, the reciprocal of its square root is computed in rr and 1 is returned. If a does not
105+
* have a square root, the reciprocal root of -a is computed and 0 is returned. The inputs'
106+
* magnitudes can be at most 8. The outputs' magnitudes are 1 (but not guaranteed to be normalized).
107+
* The result in rr will be a square if, and only if, a is a square.
108+
*/
109+
static int secp256k1_fe_par_rsqrt_inv_var(secp256k1_fe *rr, secp256k1_fe *ri, const secp256k1_fe *a, const secp256k1_fe *b);
110+
97111
/** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be
98112
* at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */
99113
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a);

src/field_10x26_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
188188
#endif
189189
}
190190

191-
static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
191+
static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
192192
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
193193
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
194194

@@ -217,7 +217,7 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
217217
return (z0 == 0) | (z1 == 0x3FFFFFFUL);
218218
}
219219

220-
static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) {
220+
static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {
221221
uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
222222
uint32_t z0, z1;
223223
uint32_t x;

src/field_5x52_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
167167
#endif
168168
}
169169

170-
static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
170+
static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
171171
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
172172

173173
/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
@@ -190,7 +190,7 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
190190
return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
191191
}
192192

193-
static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) {
193+
static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {
194194
uint64_t t0, t1, t2, t3, t4;
195195
uint64_t z0, z1;
196196
uint64_t x;

src/field_impl.h

Lines changed: 87 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,21 @@ SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe *a, const
2828
return secp256k1_fe_normalizes_to_zero_var(&na);
2929
}
3030

31-
static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) {
32-
/** Given that p is congruent to 3 mod 4, we can compute the square root of
33-
* a mod p as the (p+1)/4'th power of a.
34-
*
35-
* As (p+1)/4 is an even number, it will have the same result for a and for
36-
* (-a). Only one of these two numbers actually has a square root however,
37-
* so we test at the end by squaring and comparing to the input.
38-
* Also because (p+1)/4 is an even number, the computed square root is
39-
* itself always a square (a ** ((p+1)/4) is the square of a ** ((p+1)/8)).
40-
*/
41-
secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
31+
static void secp256k1_fe_common_exp(secp256k1_fe *r1, secp256k1_fe *r2, const secp256k1_fe *a) {
32+
secp256k1_fe t, x, x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223;
4233
int j;
4334

44-
/** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
45-
* { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
46-
* 1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
47-
*/
35+
CHECK(r1 != r2);
36+
37+
x = *a;
38+
39+
secp256k1_fe_sqr(&x2, &x);
40+
secp256k1_fe_mul(&x2, &x2, &x);
4841

49-
secp256k1_fe_sqr(&x2, a);
50-
secp256k1_fe_mul(&x2, &x2, a);
42+
*r2 = x2;
5143

5244
secp256k1_fe_sqr(&x3, &x2);
53-
secp256k1_fe_mul(&x3, &x3, a);
45+
secp256k1_fe_mul(&x3, &x3, &x);
5446

5547
x6 = x3;
5648
for (j=0; j<3; j++) {
@@ -108,112 +100,112 @@ static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) {
108100

109101
/* The final result is then assembled using a sliding window over the blocks. */
110102

111-
t1 = x223;
103+
t = x223;
112104
for (j=0; j<23; j++) {
113-
secp256k1_fe_sqr(&t1, &t1);
105+
secp256k1_fe_sqr(&t, &t);
114106
}
115-
secp256k1_fe_mul(&t1, &t1, &x22);
116-
for (j=0; j<6; j++) {
117-
secp256k1_fe_sqr(&t1, &t1);
107+
secp256k1_fe_mul(&t, &t, &x22);
108+
109+
for (j=0; j<5; j++) {
110+
secp256k1_fe_sqr(&t, &t);
118111
}
119-
secp256k1_fe_mul(&t1, &t1, &x2);
120-
secp256k1_fe_sqr(&t1, &t1);
121-
secp256k1_fe_sqr(r, &t1);
112+
*r1 = t;
113+
}
114+
115+
static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) {
116+
secp256k1_fe t, x, x2;
117+
118+
x = *a;
119+
120+
secp256k1_fe_common_exp(&t, &x2, &x);
121+
122+
secp256k1_fe_sqr(&t, &t);
123+
secp256k1_fe_mul(&t, &t, &x2);
124+
secp256k1_fe_sqr(&t, &t);
125+
secp256k1_fe_sqr(&t, &t);
126+
127+
*r = t;
122128

123129
/* Check that a square root was actually calculated */
124130

125-
secp256k1_fe_sqr(&t1, r);
126-
return secp256k1_fe_equal_var(&t1, a);
131+
secp256k1_fe_sqr(&t, &t);
132+
return secp256k1_fe_equal_var(&t, &x);
127133
}
128134

129-
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) {
130-
secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
135+
static int secp256k1_fe_rsqrt_var(secp256k1_fe *rs, secp256k1_fe *rr, const secp256k1_fe *a) {
136+
secp256k1_fe t, x, x2;
131137
int j;
132138

133-
/** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
134-
* { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
135-
* [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
136-
*/
139+
CHECK(rs != rr);
137140

138-
secp256k1_fe_sqr(&x2, a);
139-
secp256k1_fe_mul(&x2, &x2, a);
141+
x = *a;
140142

141-
secp256k1_fe_sqr(&x3, &x2);
142-
secp256k1_fe_mul(&x3, &x3, a);
143+
secp256k1_fe_common_exp(&t, &x2, &x);
143144

144-
x6 = x3;
145+
secp256k1_fe_mul(&t, &t, &x);
145146
for (j=0; j<3; j++) {
146-
secp256k1_fe_sqr(&x6, &x6);
147+
secp256k1_fe_sqr(&t, &t);
147148
}
148-
secp256k1_fe_mul(&x6, &x6, &x3);
149+
secp256k1_fe_mul(&t, &t, &x2);
149150

150-
x9 = x6;
151-
for (j=0; j<3; j++) {
152-
secp256k1_fe_sqr(&x9, &x9);
153-
}
154-
secp256k1_fe_mul(&x9, &x9, &x3);
151+
*rr = t;
155152

156-
x11 = x9;
157-
for (j=0; j<2; j++) {
158-
secp256k1_fe_sqr(&x11, &x11);
159-
}
160-
secp256k1_fe_mul(&x11, &x11, &x2);
153+
secp256k1_fe_mul(&t, &t, &x);
161154

162-
x22 = x11;
163-
for (j=0; j<11; j++) {
164-
secp256k1_fe_sqr(&x22, &x22);
165-
}
166-
secp256k1_fe_mul(&x22, &x22, &x11);
155+
*rs = t;
167156

168-
x44 = x22;
169-
for (j=0; j<22; j++) {
170-
secp256k1_fe_sqr(&x44, &x44);
171-
}
172-
secp256k1_fe_mul(&x44, &x44, &x22);
157+
/* Check that a square root was actually calculated */
173158

174-
x88 = x44;
175-
for (j=0; j<44; j++) {
176-
secp256k1_fe_sqr(&x88, &x88);
177-
}
178-
secp256k1_fe_mul(&x88, &x88, &x44);
159+
secp256k1_fe_sqr(&t, &t);
160+
return secp256k1_fe_equal_var(&t, &x);
161+
}
179162

180-
x176 = x88;
181-
for (j=0; j<88; j++) {
182-
secp256k1_fe_sqr(&x176, &x176);
183-
}
184-
secp256k1_fe_mul(&x176, &x176, &x88);
163+
static int secp256k1_fe_par_rsqrt_inv_var(secp256k1_fe *rr, secp256k1_fe *ri, const secp256k1_fe *a, const secp256k1_fe *b) {
185164

186-
x220 = x176;
187-
for (j=0; j<44; j++) {
188-
secp256k1_fe_sqr(&x220, &x220);
189-
}
190-
secp256k1_fe_mul(&x220, &x220, &x44);
165+
secp256k1_fe b2, ab2, ab4, sqrt, recip, t;
166+
int ret;
191167

192-
x223 = x220;
193-
for (j=0; j<3; j++) {
194-
secp256k1_fe_sqr(&x223, &x223);
195-
}
196-
secp256k1_fe_mul(&x223, &x223, &x3);
168+
CHECK(rr != ri);
197169

198-
/* The final result is then assembled using a sliding window over the blocks. */
170+
/* Zero inputs could possibly be handled with conditional moves, if necessary */
171+
CHECK(!secp256k1_fe_normalizes_to_zero(a) && !secp256k1_fe_normalizes_to_zero(b));
199172

200-
t1 = x223;
201-
for (j=0; j<23; j++) {
202-
secp256k1_fe_sqr(&t1, &t1);
203-
}
204-
secp256k1_fe_mul(&t1, &t1, &x22);
205-
for (j=0; j<5; j++) {
206-
secp256k1_fe_sqr(&t1, &t1);
207-
}
208-
secp256k1_fe_mul(&t1, &t1, a);
173+
/* Calculate the reciprocal sqrt of a.b^4 */
174+
175+
secp256k1_fe_sqr(&b2, b);
176+
secp256k1_fe_mul(&ab2, &b2, a);
177+
secp256k1_fe_mul(&ab4, &ab2, &b2);
178+
179+
ret = secp256k1_fe_rsqrt_var(&sqrt, &recip, &ab4);
180+
181+
/* Inverse */
182+
secp256k1_fe_sqr(&t, &recip);
183+
secp256k1_fe_mul(&t, &t, &ab2);
184+
secp256k1_fe_mul(&t, &t, b);
185+
secp256k1_fe_sqr(&t, &t);
186+
secp256k1_fe_mul(ri, b, &t);
187+
188+
/* Reciprocal */
189+
secp256k1_fe_mul(rr, &recip, &b2);
190+
191+
return ret;
192+
}
193+
194+
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) {
195+
secp256k1_fe t, x2;
196+
int j;
197+
198+
secp256k1_fe_common_exp(&t, &x2, a);
199+
200+
secp256k1_fe_mul(&t, &t, a);
209201
for (j=0; j<3; j++) {
210-
secp256k1_fe_sqr(&t1, &t1);
202+
secp256k1_fe_sqr(&t, &t);
211203
}
212-
secp256k1_fe_mul(&t1, &t1, &x2);
204+
secp256k1_fe_mul(&t, &t, &x2);
213205
for (j=0; j<2; j++) {
214-
secp256k1_fe_sqr(&t1, &t1);
206+
secp256k1_fe_sqr(&t, &t);
215207
}
216-
secp256k1_fe_mul(r, a, &t1);
208+
secp256k1_fe_mul(r, a, &t);
217209
}
218210

219211
static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *a) {

0 commit comments

Comments
 (0)