Skip to content

Commit 454f20d

Browse files
committed
[Support/BLAKE3] Make g_cpu_features thread safe
g_cpu_features can be updated multiple times by get_cpu_features(), which reports a thread sanitizer error when used with multiple lld threads. Ported the following commits from BLAKE3-team: BLAKE3-team/BLAKE3@12823b8 BLAKE3-team/BLAKE3@34d293e
1 parent ba1c486 commit 454f20d

File tree

1 file changed

+38
-38
lines changed

1 file changed

+38
-38
lines changed

llvm/lib/Support/BLAKE3/blake3_dispatch.c

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,44 +4,44 @@
44

55
#include "blake3_impl.h"
66

7+
#if defined(_MSC_VER)
8+
#include <Windows.h>
9+
#endif
10+
711
#if defined(IS_X86)
812
#if defined(_MSC_VER)
913
#include <intrin.h>
1014
#elif defined(__GNUC__)
1115
#include <immintrin.h>
1216
#else
13-
#error "Unimplemented!"
17+
#undef IS_X86 /* Unimplemented! */
1418
#endif
1519
#endif
1620

17-
/* Atomic access abstraction (since MSVC does not do C11 yet) */
18-
#if defined(_MSC_VER) && !defined(__clang__)
19-
#if !defined(IS_X86)
20-
#include <intrin.h>
21-
#endif
22-
#pragma warning(disable : 5105)
23-
#ifndef FORCEINLINE
24-
#define FORCEINLINE inline __forceinline
25-
#endif
26-
typedef volatile long atomic32_t;
27-
static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
28-
return _InterlockedOr(src, 0);
29-
}
30-
static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
31-
_InterlockedExchange(dst, val);
32-
}
21+
#if !defined(BLAKE3_ATOMICS)
22+
#if defined(__has_include)
23+
#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
24+
#define BLAKE3_ATOMICS 1
3325
#else
34-
#include <stdatomic.h>
35-
#ifndef FORCEINLINE
36-
#define FORCEINLINE inline __attribute__((__always_inline__))
37-
#endif
38-
typedef volatile _Atomic(int32_t) atomic32_t;
39-
static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
40-
return atomic_load_explicit(src, memory_order_relaxed);
41-
}
42-
static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
43-
atomic_store_explicit(dst, val, memory_order_relaxed);
44-
}
26+
#define BLAKE3_ATOMICS 0
27+
#endif /* __has_include(<stdatomic.h>) && !defined(_MSC_VER) */
28+
#else
29+
#define BLAKE3_ATOMICS 0
30+
#endif /* defined(__has_include) */
31+
#endif /* BLAKE3_ATOMICS */
32+
33+
#if BLAKE3_ATOMICS
34+
#define ATOMIC_INT _Atomic int
35+
#define ATOMIC_LOAD(x) x
36+
#define ATOMIC_STORE(x, y) x = y
37+
#elif defined(_MSC_VER)
38+
#define ATOMIC_INT LONG
39+
#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
40+
#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
41+
#else
42+
#define ATOMIC_INT int
43+
#define ATOMIC_LOAD(x) x
44+
#define ATOMIC_STORE(x, y) x = y
4545
#endif
4646

4747
#define MAYBE_UNUSED(x) (void)((x))
@@ -89,7 +89,6 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
8989
#endif
9090
}
9191

92-
#endif
9392

9493
enum cpu_feature {
9594
SSE2 = 1 << 0,
@@ -106,24 +105,25 @@ enum cpu_feature {
106105
#if !defined(BLAKE3_TESTING)
107106
static /* Allow the variable to be controlled manually for testing */
108107
#endif
109-
atomic32_t g_cpu_features = UNDEFINED;
108+
ATOMIC_INT g_cpu_features = UNDEFINED;
110109

111110
LLVM_ATTRIBUTE_USED
112111
#if !defined(BLAKE3_TESTING)
113112
static
114113
#endif
115114
enum cpu_feature
116115
get_cpu_features(void) {
117-
enum cpu_feature _cpu_features;
118-
_cpu_features = (enum cpu_feature)atomic_load32(&g_cpu_features);
119-
if (_cpu_features != UNDEFINED) {
120-
return _cpu_features;
116+
117+
/* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */
118+
enum cpu_feature features = ATOMIC_LOAD(g_cpu_features);
119+
if (features != UNDEFINED) {
120+
return features;
121121
} else {
122122
#if defined(IS_X86)
123123
uint32_t regs[4] = {0};
124124
uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
125125
(void)edx;
126-
enum cpu_feature features = 0;
126+
features = 0;
127127
cpuid(regs, 0);
128128
const int max_id = *eax;
129129
cpuid(regs, 1);
@@ -133,7 +133,7 @@ static
133133
if (*edx & (1UL << 26))
134134
features |= SSE2;
135135
#endif
136-
if (*ecx & (1UL << 0))
136+
if (*ecx & (1UL << 9))
137137
features |= SSSE3;
138138
if (*ecx & (1UL << 19))
139139
features |= SSE41;
@@ -156,15 +156,15 @@ static
156156
}
157157
}
158158
}
159-
atomic_store32(&g_cpu_features, (int32_t)features);
159+
ATOMIC_STORE(g_cpu_features, features);
160160
return features;
161161
#else
162162
/* How to detect NEON? */
163-
atomic_store32(&g_cpu_features, 0);
164163
return 0;
165164
#endif
166165
}
167166
}
167+
#endif
168168

169169
void blake3_compress_in_place(uint32_t cv[8],
170170
const uint8_t block[BLAKE3_BLOCK_LEN],

0 commit comments

Comments
 (0)