Merge bitcoin/bitcoin#27575: Introduce platform-agnostic ALWAYS_INLINE macro

fanquake · fanquake · commit b13830eff663 · 2023-05-09T14:45:52.000+01:00
3f19875 scripted-diff: Use platform-agnostic `ALWAYS_INLINE` macro (Hennadii Stepanov) e16c22f Introduce platform-agnostic `ALWAYS_INLINE` macro (Hennadii Stepanov) Pull request description: Split from bitcoin/bitcoin#24773 as requested in bitcoin/bitcoin#24773 (comment). ACKs for top commit: theuni: utACK 3f19875 fanquake: ACK 3f19875 Tree-SHA512: a19b713433bb4d3c5fff1ddb4d1413837823a400c1d46363a8181e7632b059846ba92264be1c867f35f532af90945ed20887103471b09c07623e0f3905b4098b
diff --git a/src/attributes.h b/src/attributes.h
@@ -16,4 +16,12 @@
 #  define LIFETIMEBOUND
 #endif
 
+#if defined(__GNUC__)
+#  define ALWAYS_INLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#  define ALWAYS_INLINE __forceinline
+#else
+#  error No known always_inline attribute for this platform.
+#endif
+
 #endif // BITCOIN_ATTRIBUTES_H
diff --git a/src/crypto/sha256_avx2.cpp b/src/crypto/sha256_avx2.cpp
@@ -7,6 +7,7 @@
 #include <stdint.h>
 #include <immintrin.h>
 
+#include <attributes.h>
 #include <crypto/common.h>
 
 namespace sha256d64_avx2 {
@@ -36,7 +37,7 @@ __m256i inline sigma0(__m256i x) { return Xor(Or(ShR(x, 7), ShL(x, 25)), Or(ShR(
 __m256i inline sigma1(__m256i x) { return Xor(Or(ShR(x, 17), ShL(x, 15)), Or(ShR(x, 19), ShL(x, 13)), ShR(x, 10)); }
 
 /** One round of SHA-256. */
-void inline __attribute__((always_inline)) Round(__m256i a, __m256i b, __m256i c, __m256i& d, __m256i e, __m256i f, __m256i g, __m256i& h, __m256i k)
+void ALWAYS_INLINE Round(__m256i a, __m256i b, __m256i c, __m256i& d, __m256i e, __m256i f, __m256i g, __m256i& h, __m256i k)
 {
     __m256i t1 = Add(h, Sigma1(e), Ch(e, f, g), k);
     __m256i t2 = Add(Sigma0(a), Maj(a, b, c));
diff --git a/src/crypto/sha256_sse41.cpp b/src/crypto/sha256_sse41.cpp
@@ -7,6 +7,7 @@
 #include <stdint.h>
 #include <immintrin.h>
 
+#include <attributes.h>
 #include <crypto/common.h>
 
 namespace sha256d64_sse41 {
@@ -36,7 +37,7 @@ __m128i inline sigma0(__m128i x) { return Xor(Or(ShR(x, 7), ShL(x, 25)), Or(ShR(
 __m128i inline sigma1(__m128i x) { return Xor(Or(ShR(x, 17), ShL(x, 15)), Or(ShR(x, 19), ShL(x, 13)), ShR(x, 10)); }
 
 /** One round of SHA-256. */
-void inline __attribute__((always_inline)) Round(__m128i a, __m128i b, __m128i c, __m128i& d, __m128i e, __m128i f, __m128i g, __m128i& h, __m128i k)
+void ALWAYS_INLINE Round(__m128i a, __m128i b, __m128i c, __m128i& d, __m128i e, __m128i f, __m128i g, __m128i& h, __m128i k)
 {
     __m128i t1 = Add(h, Sigma1(e), Ch(e, f, g), k);
     __m128i t2 = Add(Sigma0(a), Maj(a, b, c));
diff --git a/src/crypto/sha256_x86_shani.cpp b/src/crypto/sha256_x86_shani.cpp
@@ -11,64 +11,66 @@
 #include <stdint.h>
 #include <immintrin.h>
 
+#include <attributes.h>
+
 namespace {
 
 alignas(__m128i) const uint8_t MASK[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};
 alignas(__m128i) const uint8_t INIT0[16] = {0x8c, 0x68, 0x05, 0x9b, 0x7f, 0x52, 0x0e, 0x51, 0x85, 0xae, 0x67, 0xbb, 0x67, 0xe6, 0x09, 0x6a};
 alignas(__m128i) const uint8_t INIT1[16] = {0x19, 0xcd, 0xe0, 0x5b, 0xab, 0xd9, 0x83, 0x1f, 0x3a, 0xf5, 0x4f, 0xa5, 0x72, 0xf3, 0x6e, 0x3c};
 
-void inline  __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0)
+void ALWAYS_INLINE QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0)
 {
     const __m128i msg = _mm_set_epi64x(k1, k0);
     state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
     state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(msg, 0x0e));
 }
 
-void inline  __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, __m128i m, uint64_t k1, uint64_t k0)
+void ALWAYS_INLINE QuadRound(__m128i& state0, __m128i& state1, __m128i m, uint64_t k1, uint64_t k0)
 {
     const __m128i msg = _mm_add_epi32(m, _mm_set_epi64x(k1, k0));
     state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
     state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(msg, 0x0e));
 }
 
-void inline  __attribute__((always_inline)) ShiftMessageA(__m128i& m0, __m128i m1)
+void ALWAYS_INLINE ShiftMessageA(__m128i& m0, __m128i m1)
 {
     m0 = _mm_sha256msg1_epu32(m0, m1);
 }
 
-void inline  __attribute__((always_inline)) ShiftMessageC(__m128i& m0, __m128i m1, __m128i& m2)
+void ALWAYS_INLINE ShiftMessageC(__m128i& m0, __m128i m1, __m128i& m2)
 {
     m2 = _mm_sha256msg2_epu32(_mm_add_epi32(m2, _mm_alignr_epi8(m1, m0, 4)), m1);
 }
 
-void inline __attribute__((always_inline)) ShiftMessageB(__m128i& m0, __m128i m1, __m128i& m2)
+void ALWAYS_INLINE ShiftMessageB(__m128i& m0, __m128i m1, __m128i& m2)
 {
     ShiftMessageC(m0, m1, m2);
     ShiftMessageA(m0, m1);
 }
 
-void inline __attribute__((always_inline)) Shuffle(__m128i& s0, __m128i& s1)
+void ALWAYS_INLINE Shuffle(__m128i& s0, __m128i& s1)
 {
     const __m128i t1 = _mm_shuffle_epi32(s0, 0xB1);
     const __m128i t2 = _mm_shuffle_epi32(s1, 0x1B);
     s0 = _mm_alignr_epi8(t1, t2, 0x08);
     s1 = _mm_blend_epi16(t2, t1, 0xF0);
 }
 
-void inline __attribute__((always_inline)) Unshuffle(__m128i& s0, __m128i& s1)
+void ALWAYS_INLINE Unshuffle(__m128i& s0, __m128i& s1)
 {
     const __m128i t1 = _mm_shuffle_epi32(s0, 0x1B);
     const __m128i t2 = _mm_shuffle_epi32(s1, 0xB1);
     s0 = _mm_blend_epi16(t1, t2, 0xF0);
     s1 = _mm_alignr_epi8(t2, t1, 0x08);
 }
 
-__m128i inline  __attribute__((always_inline)) Load(const unsigned char* in)
+__m128i ALWAYS_INLINE Load(const unsigned char* in)
 {
     return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i*)in), _mm_load_si128((const __m128i*)MASK));
 }
 
-void inline  __attribute__((always_inline)) Save(unsigned char* out, __m128i s)
+void ALWAYS_INLINE Save(unsigned char* out, __m128i s)
 {
     _mm_storeu_si128((__m128i*)out, _mm_shuffle_epi8(s, _mm_load_si128((const __m128i*)MASK)));
 }

Original file line number	Diff line number	Diff line change
`@@ -11,64 +11,66 @@`
`11`	`11`	`#include <stdint.h>`
`12`	`12`	`#include <immintrin.h>`
`13`	`13`
	`14`	`+#include <attributes.h>`
	`15`	`+`
`14`	`16`	`namespace {`
`15`	`17`
`16`	`18`	`alignas(__m128i) const uint8_t MASK[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};`
`17`	`19`	`alignas(__m128i) const uint8_t INIT0[16] = {0x8c, 0x68, 0x05, 0x9b, 0x7f, 0x52, 0x0e, 0x51, 0x85, 0xae, 0x67, 0xbb, 0x67, 0xe6, 0x09, 0x6a};`
`18`	`20`	`alignas(__m128i) const uint8_t INIT1[16] = {0x19, 0xcd, 0xe0, 0x5b, 0xab, 0xd9, 0x83, 0x1f, 0x3a, 0xf5, 0x4f, 0xa5, 0x72, 0xf3, 0x6e, 0x3c};`
`19`	`21`
`20`		`-void inline __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0)`
	`22`	`+void ALWAYS_INLINE QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0)`
`21`	`23`	`{`
`22`	`24`	`const __m128i msg = _mm_set_epi64x(k1, k0);`
`23`	`25`	`state1 = _mm_sha256rnds2_epu32(state1, state0, msg);`
`24`	`26`	`state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(msg, 0x0e));`
`25`	`27`	`}`
`26`	`28`
`27`		`-void inline __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, __m128i m, uint64_t k1, uint64_t k0)`
	`29`	`+void ALWAYS_INLINE QuadRound(__m128i& state0, __m128i& state1, __m128i m, uint64_t k1, uint64_t k0)`
`28`	`30`	`{`
`29`	`31`	`const __m128i msg = _mm_add_epi32(m, _mm_set_epi64x(k1, k0));`
`30`	`32`	`state1 = _mm_sha256rnds2_epu32(state1, state0, msg);`
`31`	`33`	`state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(msg, 0x0e));`
`32`	`34`	`}`
`33`	`35`
`34`		`-void inline __attribute__((always_inline)) ShiftMessageA(__m128i& m0, __m128i m1)`
	`36`	`+void ALWAYS_INLINE ShiftMessageA(__m128i& m0, __m128i m1)`
`35`	`37`	`{`
`36`	`38`	`m0 = _mm_sha256msg1_epu32(m0, m1);`
`37`	`39`	`}`
`38`	`40`
`39`		`-void inline __attribute__((always_inline)) ShiftMessageC(__m128i& m0, __m128i m1, __m128i& m2)`
	`41`	`+void ALWAYS_INLINE ShiftMessageC(__m128i& m0, __m128i m1, __m128i& m2)`
`40`	`42`	`{`
`41`	`43`	`m2 = _mm_sha256msg2_epu32(_mm_add_epi32(m2, _mm_alignr_epi8(m1, m0, 4)), m1);`
`42`	`44`	`}`
`43`	`45`
`44`		`-void inline __attribute__((always_inline)) ShiftMessageB(__m128i& m0, __m128i m1, __m128i& m2)`
	`46`	`+void ALWAYS_INLINE ShiftMessageB(__m128i& m0, __m128i m1, __m128i& m2)`
`45`	`47`	`{`
`46`	`48`	`ShiftMessageC(m0, m1, m2);`
`47`	`49`	`ShiftMessageA(m0, m1);`
`48`	`50`	`}`
`49`	`51`
`50`		`-void inline __attribute__((always_inline)) Shuffle(__m128i& s0, __m128i& s1)`
	`52`	`+void ALWAYS_INLINE Shuffle(__m128i& s0, __m128i& s1)`
`51`	`53`	`{`
`52`	`54`	`const __m128i t1 = _mm_shuffle_epi32(s0, 0xB1);`
`53`	`55`	`const __m128i t2 = _mm_shuffle_epi32(s1, 0x1B);`
`54`	`56`	`s0 = _mm_alignr_epi8(t1, t2, 0x08);`
`55`	`57`	`s1 = _mm_blend_epi16(t2, t1, 0xF0);`
`56`	`58`	`}`
`57`	`59`
`58`		`-void inline __attribute__((always_inline)) Unshuffle(__m128i& s0, __m128i& s1)`
	`60`	`+void ALWAYS_INLINE Unshuffle(__m128i& s0, __m128i& s1)`
`59`	`61`	`{`
`60`	`62`	`const __m128i t1 = _mm_shuffle_epi32(s0, 0x1B);`
`61`	`63`	`const __m128i t2 = _mm_shuffle_epi32(s1, 0xB1);`
`62`	`64`	`s0 = _mm_blend_epi16(t1, t2, 0xF0);`
`63`	`65`	`s1 = _mm_alignr_epi8(t2, t1, 0x08);`
`64`	`66`	`}`
`65`	`67`
`66`		`-__m128i inline __attribute__((always_inline)) Load(const unsigned char* in)`
	`68`	`+__m128i ALWAYS_INLINE Load(const unsigned char* in)`
`67`	`69`	`{`
`68`	`70`	`return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i)in), _mm_load_si128((const __m128i)MASK));`
`69`	`71`	`}`
`70`	`72`
`71`		`-void inline __attribute__((always_inline)) Save(unsigned char* out, __m128i s)`
	`73`	`+void ALWAYS_INLINE Save(unsigned char* out, __m128i s)`
`72`	`74`	`{`
`73`	`75`	`_mm_storeu_si128((__m128i)out, _mm_shuffle_epi8(s, _mm_load_si128((const __m128i)MASK)));`
`74`	`76`	`}`