Skip to content

Commit 0584882

Browse files
committed
Merge bitcoin#27598: bench: Add SHA256 implementation specific benchmarks
ce6df7d bench: Add SHA256 implementation specific benchmarks (Hennadii Stepanov) 5f72417 Add ability to specify SHA256 implementation for benchmark purposes (Hennadii Stepanov) Pull request description: On the master branch, only the best available `SHA256` implementation is being benchmarked. This PR makes `bench_bitcoin` benchmark all `SHA256` implementations that are available on the system. For example: - on Linux: ``` $ ./src/bench/bench_bitcoin -filter=SHA.* Using the 'x86_shani(1way,2way)' SHA256 implementation | ns/byte | byte/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1.00 | 1,002,545,462.93 | 0.4% | 0.01 | `SHA1` | 2.91 | 344,117,991.18 | 0.1% | 0.03 | `SHA256 using the 'standard' SHA256 implementation` | 2.21 | 453,081,794.40 | 0.1% | 0.02 | `SHA256 using the 'sse4(1way),sse41(4way)' SHA256 implementation` | 2.21 | 453,396,506.58 | 0.1% | 0.02 | `SHA256 using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation` | 0.53 | 1,870,520,687.49 | 0.1% | 0.01 | `SHA256 using the 'x86_shani(1way,2way)' SHA256 implementation` | 7.90 | 126,627,134.33 | 0.0% | 0.01 | `SHA256D64_1024 using the 'standard' SHA256 implementation` | 3.94 | 253,850,206.07 | 0.0% | 0.01 | `SHA256D64_1024 using the 'sse4(1way),sse41(4way)' SHA256 implementation` | 1.40 | 716,247,553.38 | 0.4% | 0.01 | `SHA256D64_1024 using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation` | 1.26 | 792,706,270.13 | 0.9% | 0.01 | `SHA256D64_1024 using the 'x86_shani(1way,2way)' SHA256 implementation` | 6.75 | 148,172,097.64 | 0.2% | 0.01 | `SHA256_32b using the 'standard' SHA256 implementation` | 4.90 | 204,156,289.96 | 0.1% | 0.01 | `SHA256_32b using the 'sse4(1way),sse41(4way)' SHA256 implementation` | 4.90 | 204,101,274.22 | 0.1% | 0.01 | `SHA256_32b using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation` | 1.70 | 589,052,595.35 | 0.4% | 0.01 | `SHA256_32b using the 'x86_shani(1way,2way)' SHA256 implementation` | 2.21 | 453,441,736.14 | 1.0% | 0.02 | `SHA3_256_1M` | 1.92 | 521,807,101.48 | 1.0% | 0.02 | `SHA512` ``` - on macOS (M1): ``` % ./src/bench/bench_bitcoin -filter=SHA.\* Using the 'arm_shani(1way,2way)' SHA256 implementation | ns/byte | byte/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1.36 | 737,644,274.00 | 0.6% | 0.02 | `SHA1` | 3.08 | 324,556,777.15 | 0.2% | 0.03 | `SHA256 using the 'standard' SHA256 implementation` | 0.45 | 2,198,104,135.18 | 0.3% | 0.01 | `SHA256 using the 'arm_shani(1way,2way)' SHA256 implementation` | 8.84 | 113,131,299.18 | 0.0% | 0.01 | `SHA256D64_1024 using the 'standard' SHA256 implementation` | 0.94 | 1,059,406,239.36 | 0.0% | 0.01 | `SHA256D64_1024 using the 'arm_shani(1way,2way)' SHA256 implementation` | 6.17 | 162,050,659.51 | 0.2% | 0.01 | `SHA256_32b using the 'standard' SHA256 implementation` | 1.15 | 866,637,155.98 | 0.0% | 0.01 | `SHA256_32b using the 'arm_shani(1way,2way)' SHA256 implementation` | 1.69 | 592,636,491.59 | 0.2% | 0.02 | `SHA3_256_1M` | 1.89 | 528,785,775.66 | 0.0% | 0.02 | `SHA512` ``` Found it useful, while working on bitcoin#24773. ACKs for top commit: martinus: ACK ce6df7d. I would have created a helper function in the test to avoid the code duplication for each test, but that's just me nitpicking. Here are results from my Ryzen 7950X, with `./src/bench/bench_bitcoin -filter="SHA256.*" -min-time=1000`: MarcoFalke: review ACK ce6df7d 🏵 sipa: ACK ce6df7d Tree-SHA512: e3de50e11b9a3a0d1e05583786041d4dc9afa2022e2115d75d6d1f63b11f62f6336f093001e53a631431d558c4dae29c596755c9e2d6aa78c382270116cc1f7f
2 parents db7b5df + ce6df7d commit 0584882

File tree

3 files changed

+163
-23
lines changed

3 files changed

+163
-23
lines changed

src/bench/crypto_hash.cpp

Lines changed: 121 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <crypto/siphash.h>
1414
#include <hash.h>
1515
#include <random.h>
16+
#include <tinyformat.h>
1617
#include <uint256.h>
1718

1819
/* Number of bytes to hash per iteration */
@@ -36,13 +37,48 @@ static void SHA1(benchmark::Bench& bench)
3637
});
3738
}
3839

39-
static void SHA256(benchmark::Bench& bench)
40+
static void SHA256_STANDARD(benchmark::Bench& bench)
4041
{
42+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD)));
4143
uint8_t hash[CSHA256::OUTPUT_SIZE];
4244
std::vector<uint8_t> in(BUFFER_SIZE,0);
4345
bench.batch(in.size()).unit("byte").run([&] {
4446
CSHA256().Write(in.data(), in.size()).Finalize(hash);
4547
});
48+
SHA256AutoDetect();
49+
}
50+
51+
static void SHA256_SSE4(benchmark::Bench& bench)
52+
{
53+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4)));
54+
uint8_t hash[CSHA256::OUTPUT_SIZE];
55+
std::vector<uint8_t> in(BUFFER_SIZE,0);
56+
bench.batch(in.size()).unit("byte").run([&] {
57+
CSHA256().Write(in.data(), in.size()).Finalize(hash);
58+
});
59+
SHA256AutoDetect();
60+
}
61+
62+
static void SHA256_AVX2(benchmark::Bench& bench)
63+
{
64+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2)));
65+
uint8_t hash[CSHA256::OUTPUT_SIZE];
66+
std::vector<uint8_t> in(BUFFER_SIZE,0);
67+
bench.batch(in.size()).unit("byte").run([&] {
68+
CSHA256().Write(in.data(), in.size()).Finalize(hash);
69+
});
70+
SHA256AutoDetect();
71+
}
72+
73+
static void SHA256_SHANI(benchmark::Bench& bench)
74+
{
75+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI)));
76+
uint8_t hash[CSHA256::OUTPUT_SIZE];
77+
std::vector<uint8_t> in(BUFFER_SIZE,0);
78+
bench.batch(in.size()).unit("byte").run([&] {
79+
CSHA256().Write(in.data(), in.size()).Finalize(hash);
80+
});
81+
SHA256AutoDetect();
4682
}
4783

4884
static void SHA3_256_1M(benchmark::Bench& bench)
@@ -54,22 +90,92 @@ static void SHA3_256_1M(benchmark::Bench& bench)
5490
});
5591
}
5692

57-
static void SHA256_32b(benchmark::Bench& bench)
93+
static void SHA256_32b_STANDARD(benchmark::Bench& bench)
94+
{
95+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD)));
96+
std::vector<uint8_t> in(32,0);
97+
bench.batch(in.size()).unit("byte").run([&] {
98+
CSHA256()
99+
.Write(in.data(), in.size())
100+
.Finalize(in.data());
101+
});
102+
SHA256AutoDetect();
103+
}
104+
105+
static void SHA256_32b_SSE4(benchmark::Bench& bench)
106+
{
107+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4)));
108+
std::vector<uint8_t> in(32,0);
109+
bench.batch(in.size()).unit("byte").run([&] {
110+
CSHA256()
111+
.Write(in.data(), in.size())
112+
.Finalize(in.data());
113+
});
114+
SHA256AutoDetect();
115+
}
116+
117+
static void SHA256_32b_AVX2(benchmark::Bench& bench)
118+
{
119+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2)));
120+
std::vector<uint8_t> in(32,0);
121+
bench.batch(in.size()).unit("byte").run([&] {
122+
CSHA256()
123+
.Write(in.data(), in.size())
124+
.Finalize(in.data());
125+
});
126+
SHA256AutoDetect();
127+
}
128+
129+
static void SHA256_32b_SHANI(benchmark::Bench& bench)
58130
{
131+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI)));
59132
std::vector<uint8_t> in(32,0);
60133
bench.batch(in.size()).unit("byte").run([&] {
61134
CSHA256()
62135
.Write(in.data(), in.size())
63136
.Finalize(in.data());
64137
});
138+
SHA256AutoDetect();
139+
}
140+
141+
static void SHA256D64_1024_STANDARD(benchmark::Bench& bench)
142+
{
143+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD)));
144+
std::vector<uint8_t> in(64 * 1024, 0);
145+
bench.batch(in.size()).unit("byte").run([&] {
146+
SHA256D64(in.data(), in.data(), 1024);
147+
});
148+
SHA256AutoDetect();
149+
}
150+
151+
static void SHA256D64_1024_SSE4(benchmark::Bench& bench)
152+
{
153+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4)));
154+
std::vector<uint8_t> in(64 * 1024, 0);
155+
bench.batch(in.size()).unit("byte").run([&] {
156+
SHA256D64(in.data(), in.data(), 1024);
157+
});
158+
SHA256AutoDetect();
159+
}
160+
161+
static void SHA256D64_1024_AVX2(benchmark::Bench& bench)
162+
{
163+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2)));
164+
std::vector<uint8_t> in(64 * 1024, 0);
165+
bench.batch(in.size()).unit("byte").run([&] {
166+
SHA256D64(in.data(), in.data(), 1024);
167+
});
168+
SHA256AutoDetect();
65169
}
66170

67-
static void SHA256D64_1024(benchmark::Bench& bench)
171+
static void SHA256D64_1024_SHANI(benchmark::Bench& bench)
68172
{
173+
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI)));
69174
std::vector<uint8_t> in(64 * 1024, 0);
70175
bench.batch(in.size()).unit("byte").run([&] {
71176
SHA256D64(in.data(), in.data(), 1024);
72177
});
178+
SHA256AutoDetect();
73179
}
74180

75181
static void SHA512(benchmark::Bench& bench)
@@ -152,13 +258,22 @@ static void MuHashPrecompute(benchmark::Bench& bench)
152258

153259
BENCHMARK(BenchRIPEMD160, benchmark::PriorityLevel::HIGH);
154260
BENCHMARK(SHA1, benchmark::PriorityLevel::HIGH);
155-
BENCHMARK(SHA256, benchmark::PriorityLevel::HIGH);
261+
BENCHMARK(SHA256_STANDARD, benchmark::PriorityLevel::HIGH);
262+
BENCHMARK(SHA256_SSE4, benchmark::PriorityLevel::HIGH);
263+
BENCHMARK(SHA256_AVX2, benchmark::PriorityLevel::HIGH);
264+
BENCHMARK(SHA256_SHANI, benchmark::PriorityLevel::HIGH);
156265
BENCHMARK(SHA512, benchmark::PriorityLevel::HIGH);
157266
BENCHMARK(SHA3_256_1M, benchmark::PriorityLevel::HIGH);
158267

159-
BENCHMARK(SHA256_32b, benchmark::PriorityLevel::HIGH);
268+
BENCHMARK(SHA256_32b_STANDARD, benchmark::PriorityLevel::HIGH);
269+
BENCHMARK(SHA256_32b_SSE4, benchmark::PriorityLevel::HIGH);
270+
BENCHMARK(SHA256_32b_AVX2, benchmark::PriorityLevel::HIGH);
271+
BENCHMARK(SHA256_32b_SHANI, benchmark::PriorityLevel::HIGH);
160272
BENCHMARK(SipHash_32b, benchmark::PriorityLevel::HIGH);
161-
BENCHMARK(SHA256D64_1024, benchmark::PriorityLevel::HIGH);
273+
BENCHMARK(SHA256D64_1024_STANDARD, benchmark::PriorityLevel::HIGH);
274+
BENCHMARK(SHA256D64_1024_SSE4, benchmark::PriorityLevel::HIGH);
275+
BENCHMARK(SHA256D64_1024_AVX2, benchmark::PriorityLevel::HIGH);
276+
BENCHMARK(SHA256D64_1024_SHANI, benchmark::PriorityLevel::HIGH);
162277
BENCHMARK(FastRandom_32bit, benchmark::PriorityLevel::HIGH);
163278
BENCHMARK(FastRandom_1bit, benchmark::PriorityLevel::HIGH);
164279

src/crypto/sha256.cpp

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -579,9 +579,15 @@ bool AVXEnabled()
579579
} // namespace
580580

581581

582-
std::string SHA256AutoDetect()
582+
std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implementation)
583583
{
584584
std::string ret = "standard";
585+
Transform = sha256::Transform;
586+
TransformD64 = sha256::TransformD64;
587+
TransformD64_2way = nullptr;
588+
TransformD64_4way = nullptr;
589+
TransformD64_8way = nullptr;
590+
585591
#if defined(USE_ASM) && defined(HAVE_GETCPUID)
586592
bool have_sse4 = false;
587593
bool have_xsave = false;
@@ -592,16 +598,22 @@ std::string SHA256AutoDetect()
592598

593599
uint32_t eax, ebx, ecx, edx;
594600
GetCPUID(1, 0, eax, ebx, ecx, edx);
595-
have_sse4 = (ecx >> 19) & 1;
601+
if (use_implementation & sha256_implementation::USE_SSE4) {
602+
have_sse4 = (ecx >> 19) & 1;
603+
}
596604
have_xsave = (ecx >> 27) & 1;
597605
have_avx = (ecx >> 28) & 1;
598606
if (have_xsave && have_avx) {
599607
enabled_avx = AVXEnabled();
600608
}
601609
if (have_sse4) {
602610
GetCPUID(7, 0, eax, ebx, ecx, edx);
603-
have_avx2 = (ebx >> 5) & 1;
604-
have_x86_shani = (ebx >> 29) & 1;
611+
if (use_implementation & sha256_implementation::USE_AVX2) {
612+
have_avx2 = (ebx >> 5) & 1;
613+
}
614+
if (use_implementation & sha256_implementation::USE_SHANI) {
615+
have_x86_shani = (ebx >> 29) & 1;
616+
}
605617
}
606618

607619
#if defined(ENABLE_X86_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
@@ -637,27 +649,28 @@ std::string SHA256AutoDetect()
637649

638650
#if defined(ENABLE_ARM_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
639651
bool have_arm_shani = false;
640-
652+
if (use_implementation & sha256_implementation::USE_SHANI) {
641653
#if defined(__linux__)
642654
#if defined(__arm__) // 32-bit
643-
if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) {
644-
have_arm_shani = true;
645-
}
655+
if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) {
656+
have_arm_shani = true;
657+
}
646658
#endif
647659
#if defined(__aarch64__) // 64-bit
648-
if (getauxval(AT_HWCAP) & HWCAP_SHA2) {
649-
have_arm_shani = true;
650-
}
660+
if (getauxval(AT_HWCAP) & HWCAP_SHA2) {
661+
have_arm_shani = true;
662+
}
651663
#endif
652664
#endif
653665

654666
#if defined(MAC_OSX)
655-
int val = 0;
656-
size_t len = sizeof(val);
657-
if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) {
658-
have_arm_shani = val != 0;
659-
}
667+
int val = 0;
668+
size_t len = sizeof(val);
669+
if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) {
670+
have_arm_shani = val != 0;
671+
}
660672
#endif
673+
}
661674

662675
if (have_arm_shani) {
663676
Transform = sha256_arm_shani::Transform;

src/crypto/sha256.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,22 @@ class CSHA256
2626
CSHA256& Reset();
2727
};
2828

29+
namespace sha256_implementation {
30+
enum UseImplementation : uint8_t {
31+
STANDARD = 0,
32+
USE_SSE4 = 1 << 0,
33+
USE_AVX2 = 1 << 1,
34+
USE_SHANI = 1 << 2,
35+
USE_SSE4_AND_AVX2 = USE_SSE4 | USE_AVX2,
36+
USE_SSE4_AND_SHANI = USE_SSE4 | USE_SHANI,
37+
USE_ALL = USE_SSE4 | USE_AVX2 | USE_SHANI,
38+
};
39+
}
40+
2941
/** Autodetect the best available SHA256 implementation.
3042
* Returns the name of the implementation.
3143
*/
32-
std::string SHA256AutoDetect();
44+
std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implementation = sha256_implementation::USE_ALL);
3345

3446
/** Compute multiple double-SHA256's of 64-byte blobs.
3547
* output: pointer to a blocks*32 byte output buffer

0 commit comments

Comments
 (0)