4
4
5
5
#include "blake3_impl.h"
6
6
7
+ #if defined(_MSC_VER )
8
+ #include <Windows.h>
9
+ #endif
10
+
7
11
#if defined(IS_X86 )
8
12
#if defined(_MSC_VER )
9
13
#include <intrin.h>
10
14
#elif defined(__GNUC__ )
11
15
#include <immintrin.h>
12
16
#else
13
- #error " Unimplemented!"
17
+ #undef IS_X86 /* Unimplemented! */
14
18
#endif
15
19
#endif
16
20
21
+ #if !defined(BLAKE3_ATOMICS )
22
+ #if defined(__has_include )
23
+ #if __has_include (< stdatomic .h > ) && !defined(_MSC_VER )
24
+ #define BLAKE3_ATOMICS 1
25
+ #else
26
+ #define BLAKE3_ATOMICS 0
27
+ #endif /* __has_include(<stdatomic.h>) && !defined(_MSC_VER) */
28
+ #else
29
+ #define BLAKE3_ATOMICS 0
30
+ #endif /* defined(__has_include) */
31
+ #endif /* BLAKE3_ATOMICS */
32
+
33
+ #if BLAKE3_ATOMICS
34
+ #define ATOMIC_INT _Atomic int
35
+ #define ATOMIC_LOAD (x ) x
36
+ #define ATOMIC_STORE (x , y ) x = y
37
+ #elif defined(_MSC_VER )
38
+ #define ATOMIC_INT LONG
39
+ #define ATOMIC_LOAD (x ) InterlockedOr(&x, 0)
40
+ #define ATOMIC_STORE (x , y ) InterlockedExchange(&x, y)
41
+ #else
42
+ #define ATOMIC_INT int
43
+ #define ATOMIC_LOAD (x ) x
44
+ #define ATOMIC_STORE (x , y ) x = y
45
+ #endif
46
+
17
47
#define MAYBE_UNUSED (x ) (void)((x))
18
48
19
49
#if defined(IS_X86 )
@@ -59,7 +89,6 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
59
89
#endif
60
90
}
61
91
62
- #endif
63
92
64
93
enum cpu_feature {
65
94
SSE2 = 1 << 0 ,
@@ -76,7 +105,7 @@ enum cpu_feature {
76
105
#if !defined(BLAKE3_TESTING )
77
106
static /* Allow the variable to be controlled manually for testing */
78
107
#endif
79
- enum cpu_feature g_cpu_features = UNDEFINED ;
108
+ ATOMIC_INT g_cpu_features = UNDEFINED ;
80
109
81
110
LLVM_ATTRIBUTE_USED
82
111
#if !defined(BLAKE3_TESTING )
@@ -85,14 +114,16 @@ static
85
114
enum cpu_feature
86
115
get_cpu_features (void ) {
87
116
88
- if (g_cpu_features != UNDEFINED ) {
89
- return g_cpu_features ;
117
+ /* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */
118
+ enum cpu_feature features = ATOMIC_LOAD (g_cpu_features );
119
+ if (features != UNDEFINED ) {
120
+ return features ;
90
121
} else {
91
122
#if defined(IS_X86 )
92
123
uint32_t regs [4 ] = {0 };
93
124
uint32_t * eax = & regs [0 ], * ebx = & regs [1 ], * ecx = & regs [2 ], * edx = & regs [3 ];
94
125
(void )edx ;
95
- enum cpu_feature features = 0 ;
126
+ features = 0 ;
96
127
cpuid (regs , 0 );
97
128
const int max_id = * eax ;
98
129
cpuid (regs , 1 );
@@ -102,7 +133,7 @@ static
102
133
if (* edx & (1UL << 26 ))
103
134
features |= SSE2 ;
104
135
#endif
105
- if (* ecx & (1UL << 0 ))
136
+ if (* ecx & (1UL << 9 ))
106
137
features |= SSSE3 ;
107
138
if (* ecx & (1UL << 19 ))
108
139
features |= SSE41 ;
@@ -125,14 +156,15 @@ static
125
156
}
126
157
}
127
158
}
128
- g_cpu_features = features ;
159
+ ATOMIC_STORE ( g_cpu_features , features ) ;
129
160
return features ;
130
161
#else
131
162
/* How to detect NEON? */
132
163
return 0 ;
133
164
#endif
134
165
}
135
166
}
167
+ #endif
136
168
137
169
void blake3_compress_in_place (uint32_t cv [8 ],
138
170
const uint8_t block [BLAKE3_BLOCK_LEN ],
0 commit comments