Skip to content

Commit a5198c5

Browse files
committed
Remove memory fences where we can do better
1 parent f423d2e commit a5198c5

File tree

2 files changed

+70
-51
lines changed

2 files changed

+70
-51
lines changed

src/bench.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,21 @@
1313
#include "sys/time.h"
1414

1515

16+
# if defined(__GNUC__)
17+
# define ALWAYS_INLINE SECP256K1_INLINE __attribute__((__always_inline__))
18+
# elif defined(_MSC_VER) && !defined(__clang__)
19+
# define ALWAYS_INLINE SECP256K1_INLINE __forceinline
20+
# elif defined(__CLANG__) && __has_attribute(__always_inline__)
21+
# define ALWAYS_INLINE SECP256K1_INLINE __attribute__((__always_inline__))
22+
# else
23+
# define ALWAYS_INLINE SECP256K1_INLINE
24+
# endif
25+
1626
/* A memory fence to prevent compiler optimizations
1727
It tells the optimizer that it can do whatever it wants with *p so the optimizer can't optimize *p out.
1828
The nice thing is that because the assembly is actually empty it doesn't add any instrcutions
1929
*Notice: This is a best effort, nothing promise us it will always work.* */
20-
static void memory_fence(void *p) {
30+
ALWAYS_INLINE static void memory_fence(void *p) {
2131
__asm__ __volatile__("": : "g"(p) : "memory");
2232
}
2333

src/bench_internal.c

Lines changed: 59 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ typedef struct {
2727
int wnaf[256];
2828
} bench_inv;
2929

30-
3130
void bench_setup(void* arg) {
3231
bench_inv *data = (bench_inv*)arg;
3332

@@ -58,13 +57,13 @@ void bench_setup(void* arg) {
5857
}
5958

6059
void bench_scalar_add(void* arg) {
61-
int i;
60+
int i, j = 0;
6261
bench_inv *data = (bench_inv*)arg;
6362

6463
for (i = 0; i < 2000000; i++) {
65-
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
66-
memory_fence(data);
64+
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
6765
}
66+
CHECK(j <= 2000000);
6867
}
6968

7069
void bench_scalar_negate(void* arg) {
@@ -73,8 +72,8 @@ void bench_scalar_negate(void* arg) {
7372

7473
for (i = 0; i < 2000000; i++) {
7574
secp256k1_scalar_negate(&data->scalar_x, &data->scalar_x);
76-
memory_fence(data);
7775
}
76+
memory_fence(data);
7877
}
7978

8079
void bench_scalar_sqr(void* arg) {
@@ -83,8 +82,8 @@ void bench_scalar_sqr(void* arg) {
8382

8483
for (i = 0; i < 200000; i++) {
8584
secp256k1_scalar_sqr(&data->scalar_x, &data->scalar_x);
86-
memory_fence(data);
8785
}
86+
memory_fence(data);
8887
}
8988

9089
void bench_scalar_mul(void* arg) {
@@ -93,44 +92,45 @@ void bench_scalar_mul(void* arg) {
9392

9493
for (i = 0; i < 200000; i++) {
9594
secp256k1_scalar_mul(&data->scalar_x, &data->scalar_x, &data->scalar_y);
96-
memory_fence(data);
9795
}
96+
memory_fence(data);
9897
}
9998

10099
#ifdef USE_ENDOMORPHISM
101100
void bench_scalar_split(void* arg) {
102-
int i;
101+
int i, j = 0;
103102
bench_inv *data = (bench_inv*)arg;
104103

105104
for (i = 0; i < 20000; i++) {
106-
secp256k1_scalar l, r;
107-
secp256k1_scalar_split_lambda(&l, &r, &data->scalar_x);
108-
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
109-
memory_fence(data);
105+
secp256k1_scalar_split_lambda(&data->scalar_x, &data->scalar_y, &data->scalar_x);
106+
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
110107
}
108+
CHECK(j <= 2000);
111109
}
112110
#endif
113111

114112
void bench_scalar_inverse(void* arg) {
115-
int i;
113+
int i, j = 0;
116114
bench_inv *data = (bench_inv*)arg;
117115

118116
for (i = 0; i < 2000; i++) {
119117
secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x);
120-
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
121-
memory_fence(data);
118+
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
122119
}
120+
memory_fence(data);
121+
CHECK(j <= 2000);
123122
}
124123

125124
void bench_scalar_inverse_var(void* arg) {
126-
int i;
125+
int i, j = 0;
127126
bench_inv *data = (bench_inv*)arg;
128127

129128
for (i = 0; i < 2000; i++) {
130129
secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x);
131-
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
132-
memory_fence(data);
130+
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
133131
}
132+
memory_fence(data);
133+
CHECK(j <= 2000);
134134
}
135135

136136
void bench_field_normalize(void* arg) {
@@ -139,8 +139,8 @@ void bench_field_normalize(void* arg) {
139139

140140
for (i = 0; i < 2000000; i++) {
141141
secp256k1_fe_normalize(&data->fe_x);
142-
memory_fence(data);
143142
}
143+
memory_fence(data);
144144
}
145145

146146
void bench_field_normalize_weak(void* arg) {
@@ -149,8 +149,8 @@ void bench_field_normalize_weak(void* arg) {
149149

150150
for (i = 0; i < 2000000; i++) {
151151
secp256k1_fe_normalize_weak(&data->fe_x);
152-
memory_fence(data);
153152
}
153+
memory_fence(data);
154154
}
155155

156156
void bench_field_mul(void* arg) {
@@ -159,8 +159,8 @@ void bench_field_mul(void* arg) {
159159

160160
for (i = 0; i < 200000; i++) {
161161
secp256k1_fe_mul(&data->fe_x, &data->fe_x, &data->fe_y);
162-
memory_fence(data);
163162
}
163+
memory_fence(data);
164164
}
165165

166166
void bench_field_sqr(void* arg) {
@@ -169,8 +169,8 @@ void bench_field_sqr(void* arg) {
169169

170170
for (i = 0; i < 200000; i++) {
171171
secp256k1_fe_sqr(&data->fe_x, &data->fe_x);
172-
memory_fence(data);
173172
}
173+
memory_fence(data);
174174
}
175175

176176
void bench_field_inverse(void* arg) {
@@ -180,8 +180,8 @@ void bench_field_inverse(void* arg) {
180180
for (i = 0; i < 20000; i++) {
181181
secp256k1_fe_inv(&data->fe_x, &data->fe_x);
182182
secp256k1_fe_add(&data->fe_x, &data->fe_y);
183-
memory_fence(data);
184183
}
184+
memory_fence(data);
185185
}
186186

187187
void bench_field_inverse_var(void* arg) {
@@ -191,21 +191,22 @@ void bench_field_inverse_var(void* arg) {
191191
for (i = 0; i < 20000; i++) {
192192
secp256k1_fe_inv_var(&data->fe_x, &data->fe_x);
193193
secp256k1_fe_add(&data->fe_x, &data->fe_y);
194-
memory_fence(data);
195194
}
195+
memory_fence(data);
196196
}
197197

198198
void bench_field_sqrt(void* arg) {
199-
int i;
199+
int i, j=0;
200200
bench_inv *data = (bench_inv*)arg;
201201
secp256k1_fe t;
202202

203203
for (i = 0; i < 20000; i++) {
204204
t = data->fe_x;
205-
secp256k1_fe_sqrt(&data->fe_x, &t);
205+
j += secp256k1_fe_sqrt(&data->fe_x, &t);
206206
secp256k1_fe_add(&data->fe_x, &data->fe_y);
207-
memory_fence(data);
208207
}
208+
memory_fence(data);
209+
CHECK(j <= 20000);
209210
}
210211

211212
void bench_group_double_var(void* arg) {
@@ -214,8 +215,8 @@ void bench_group_double_var(void* arg) {
214215

215216
for (i = 0; i < 200000; i++) {
216217
secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL);
217-
memory_fence(data);
218218
}
219+
memory_fence(data);
219220
}
220221

221222
void bench_group_add_var(void* arg) {
@@ -224,8 +225,8 @@ void bench_group_add_var(void* arg) {
224225

225226
for (i = 0; i < 200000; i++) {
226227
secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL);
227-
memory_fence(data);
228228
}
229+
memory_fence(data);
229230
}
230231

231232
void bench_group_add_affine(void* arg) {
@@ -234,8 +235,8 @@ void bench_group_add_affine(void* arg) {
234235

235236
for (i = 0; i < 200000; i++) {
236237
secp256k1_gej_add_ge(&data->gej_x, &data->gej_x, &data->ge_y);
237-
memory_fence(data);
238238
}
239+
memory_fence(data);
239240
}
240241

241242
void bench_group_add_affine_var(void* arg) {
@@ -244,40 +245,42 @@ void bench_group_add_affine_var(void* arg) {
244245

245246
for (i = 0; i < 200000; i++) {
246247
secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL);
247-
memory_fence(data);
248248
}
249+
memory_fence(data);
249250
}
250251

251252
void bench_group_jacobi_var(void* arg) {
252-
int i;
253+
int i, j = 0;
253254
bench_inv *data = (bench_inv*)arg;
254255

255256
for (i = 0; i < 20000; i++) {
256-
secp256k1_gej_has_quad_y_var(&data->gej_x);
257-
memory_fence(data);
257+
j += secp256k1_gej_has_quad_y_var(&data->gej_x);
258258
}
259+
CHECK(j == 20000);
259260
}
260261

261262
void bench_ecmult_wnaf(void* arg) {
262-
int i;
263+
int i, bits=0, overflow = 0;
263264
bench_inv *data = (bench_inv*)arg;
264265

265266
for (i = 0; i < 20000; i++) {
266-
secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A);
267-
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
268-
memory_fence(data);
267+
bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A);
268+
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
269269
}
270+
CHECK(overflow >= 0);
271+
CHECK(bits <= 256*20000);
270272
}
271273

272274
void bench_wnaf_const(void* arg) {
273-
int i;
275+
int i, bits = 0, overflow = 0;
274276
bench_inv *data = (bench_inv*)arg;
275277

276278
for (i = 0; i < 20000; i++) {
277-
secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A, 256);
278-
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
279-
memory_fence(data);
279+
bits += secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A, 256);
280+
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
280281
}
282+
CHECK(overflow >= 0);
283+
CHECK(bits <= 256*20000);
281284
}
282285

283286

@@ -290,8 +293,8 @@ void bench_sha256(void* arg) {
290293
secp256k1_sha256_initialize(&sha);
291294
secp256k1_sha256_write(&sha, data->data, 32);
292295
secp256k1_sha256_finalize(&sha, data->data);
293-
memory_fence(data);
294296
}
297+
memory_fence(data);
295298
}
296299

297300
void bench_hmac_sha256(void* arg) {
@@ -303,8 +306,8 @@ void bench_hmac_sha256(void* arg) {
303306
secp256k1_hmac_sha256_initialize(&hmac, data->data, 32);
304307
secp256k1_hmac_sha256_write(&hmac, data->data, 32);
305308
secp256k1_hmac_sha256_finalize(&hmac, data->data);
306-
memory_fence(data);
307309
}
310+
memory_fence(data);
308311
}
309312

310313
void bench_rfc6979_hmac_sha256(void* arg) {
@@ -315,29 +318,35 @@ void bench_rfc6979_hmac_sha256(void* arg) {
315318
for (i = 0; i < 20000; i++) {
316319
secp256k1_rfc6979_hmac_sha256_initialize(&rng, data->data, 64);
317320
secp256k1_rfc6979_hmac_sha256_generate(&rng, data->data, 32);
318-
memory_fence(data);
319321
}
322+
memory_fence(data);
320323
}
321324

322325
void bench_context_verify(void* arg) {
323326
int i;
327+
secp256k1_context* ctx;
324328
(void)arg;
325329
for (i = 0; i < 20; i++) {
326-
secp256k1_context_destroy(secp256k1_context_create(SECP256K1_CONTEXT_VERIFY));
330+
ctx = secp256k1_context_create(SECP256K1_CONTEXT_VERIFY);
331+
memory_fence(ctx);
332+
secp256k1_context_destroy(ctx);
327333
}
328334
}
329335

330336
void bench_context_sign(void* arg) {
331337
int i;
338+
secp256k1_context* ctx;
332339
(void)arg;
333340
for (i = 0; i < 200; i++) {
334-
secp256k1_context_destroy(secp256k1_context_create(SECP256K1_CONTEXT_SIGN));
341+
ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN);
342+
memory_fence(ctx);
343+
secp256k1_context_destroy(ctx);
335344
}
336345
}
337346

338347
#ifndef USE_NUM_NONE
339348
void bench_num_jacobi(void* arg) {
340-
int i, j;
349+
int i, j = 0;
341350
bench_inv *data = (bench_inv*)arg;
342351
secp256k1_num nx, norder;
343352

@@ -346,9 +355,9 @@ void bench_num_jacobi(void* arg) {
346355
secp256k1_scalar_get_num(&norder, &data->scalar_y);
347356

348357
for (i = 0; i < 200000; i++) {
349-
j = secp256k1_num_jacobi(&nx, &norder);
350-
memory_fence(&j);
358+
j += secp256k1_num_jacobi(&nx, &norder);
351359
}
360+
CHECK(j <= 200000);
352361
}
353362
#endif
354363

0 commit comments

Comments
 (0)