|
11 | 11 | #include <stdint.h>
|
12 | 12 | #include <immintrin.h>
|
13 | 13 |
|
| 14 | +#include <attributes.h> |
| 15 | + |
14 | 16 | namespace {
|
15 | 17 |
|
16 | 18 | alignas(__m128i) const uint8_t MASK[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};
|
17 | 19 | alignas(__m128i) const uint8_t INIT0[16] = {0x8c, 0x68, 0x05, 0x9b, 0x7f, 0x52, 0x0e, 0x51, 0x85, 0xae, 0x67, 0xbb, 0x67, 0xe6, 0x09, 0x6a};
|
18 | 20 | alignas(__m128i) const uint8_t INIT1[16] = {0x19, 0xcd, 0xe0, 0x5b, 0xab, 0xd9, 0x83, 0x1f, 0x3a, 0xf5, 0x4f, 0xa5, 0x72, 0xf3, 0x6e, 0x3c};
|
19 | 21 |
|
20 |
| -void inline __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0) |
| 22 | +void ALWAYS_INLINE QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0) |
21 | 23 | {
|
22 | 24 | const __m128i msg = _mm_set_epi64x(k1, k0);
|
23 | 25 | state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
|
24 | 26 | state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(msg, 0x0e));
|
25 | 27 | }
|
26 | 28 |
|
27 |
| -void inline __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, __m128i m, uint64_t k1, uint64_t k0) |
| 29 | +void ALWAYS_INLINE QuadRound(__m128i& state0, __m128i& state1, __m128i m, uint64_t k1, uint64_t k0) |
28 | 30 | {
|
29 | 31 | const __m128i msg = _mm_add_epi32(m, _mm_set_epi64x(k1, k0));
|
30 | 32 | state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
|
31 | 33 | state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(msg, 0x0e));
|
32 | 34 | }
|
33 | 35 |
|
34 |
| -void inline __attribute__((always_inline)) ShiftMessageA(__m128i& m0, __m128i m1) |
| 36 | +void ALWAYS_INLINE ShiftMessageA(__m128i& m0, __m128i m1) |
35 | 37 | {
|
36 | 38 | m0 = _mm_sha256msg1_epu32(m0, m1);
|
37 | 39 | }
|
38 | 40 |
|
39 |
| -void inline __attribute__((always_inline)) ShiftMessageC(__m128i& m0, __m128i m1, __m128i& m2) |
| 41 | +void ALWAYS_INLINE ShiftMessageC(__m128i& m0, __m128i m1, __m128i& m2) |
40 | 42 | {
|
41 | 43 | m2 = _mm_sha256msg2_epu32(_mm_add_epi32(m2, _mm_alignr_epi8(m1, m0, 4)), m1);
|
42 | 44 | }
|
43 | 45 |
|
44 |
| -void inline __attribute__((always_inline)) ShiftMessageB(__m128i& m0, __m128i m1, __m128i& m2) |
| 46 | +void ALWAYS_INLINE ShiftMessageB(__m128i& m0, __m128i m1, __m128i& m2) |
45 | 47 | {
|
46 | 48 | ShiftMessageC(m0, m1, m2);
|
47 | 49 | ShiftMessageA(m0, m1);
|
48 | 50 | }
|
49 | 51 |
|
50 |
| -void inline __attribute__((always_inline)) Shuffle(__m128i& s0, __m128i& s1) |
| 52 | +void ALWAYS_INLINE Shuffle(__m128i& s0, __m128i& s1) |
51 | 53 | {
|
52 | 54 | const __m128i t1 = _mm_shuffle_epi32(s0, 0xB1);
|
53 | 55 | const __m128i t2 = _mm_shuffle_epi32(s1, 0x1B);
|
54 | 56 | s0 = _mm_alignr_epi8(t1, t2, 0x08);
|
55 | 57 | s1 = _mm_blend_epi16(t2, t1, 0xF0);
|
56 | 58 | }
|
57 | 59 |
|
58 |
| -void inline __attribute__((always_inline)) Unshuffle(__m128i& s0, __m128i& s1) |
| 60 | +void ALWAYS_INLINE Unshuffle(__m128i& s0, __m128i& s1) |
59 | 61 | {
|
60 | 62 | const __m128i t1 = _mm_shuffle_epi32(s0, 0x1B);
|
61 | 63 | const __m128i t2 = _mm_shuffle_epi32(s1, 0xB1);
|
62 | 64 | s0 = _mm_blend_epi16(t1, t2, 0xF0);
|
63 | 65 | s1 = _mm_alignr_epi8(t2, t1, 0x08);
|
64 | 66 | }
|
65 | 67 |
|
66 |
| -__m128i inline __attribute__((always_inline)) Load(const unsigned char* in) |
| 68 | +__m128i ALWAYS_INLINE Load(const unsigned char* in) |
67 | 69 | {
|
68 | 70 | return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i*)in), _mm_load_si128((const __m128i*)MASK));
|
69 | 71 | }
|
70 | 72 |
|
71 |
| -void inline __attribute__((always_inline)) Save(unsigned char* out, __m128i s) |
| 73 | +void ALWAYS_INLINE Save(unsigned char* out, __m128i s) |
72 | 74 | {
|
73 | 75 | _mm_storeu_si128((__m128i*)out, _mm_shuffle_epi8(s, _mm_load_si128((const __m128i*)MASK)));
|
74 | 76 | }
|
|
0 commit comments