diff --git a/llvm/lib/Support/BLAKE3/blake3_dispatch.c b/llvm/lib/Support/BLAKE3/blake3_dispatch.c index e96e714225f41..41d013495cb5b 100644 --- a/llvm/lib/Support/BLAKE3/blake3_dispatch.c +++ b/llvm/lib/Support/BLAKE3/blake3_dispatch.c @@ -4,16 +4,46 @@ #include "blake3_impl.h" +#if defined(_MSC_VER) +#include +#endif + #if defined(IS_X86) #if defined(_MSC_VER) #include #elif defined(__GNUC__) #include #else -#error "Unimplemented!" +#undef IS_X86 /* Unimplemented! */ #endif #endif +#if !defined(BLAKE3_ATOMICS) +#if defined(__has_include) +#if __has_include() && !defined(_MSC_VER) +#define BLAKE3_ATOMICS 1 +#else +#define BLAKE3_ATOMICS 0 +#endif /* __has_include() && !defined(_MSC_VER) */ +#else +#define BLAKE3_ATOMICS 0 +#endif /* defined(__has_include) */ +#endif /* BLAKE3_ATOMICS */ + +#if BLAKE3_ATOMICS +#define ATOMIC_INT _Atomic int +#define ATOMIC_LOAD(x) x +#define ATOMIC_STORE(x, y) x = y +#elif defined(_MSC_VER) +#define ATOMIC_INT LONG +#define ATOMIC_LOAD(x) InterlockedOr(&x, 0) +#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y) +#else +#define ATOMIC_INT int +#define ATOMIC_LOAD(x) x +#define ATOMIC_STORE(x, y) x = y +#endif + #define MAYBE_UNUSED(x) (void)((x)) #if defined(IS_X86) @@ -59,7 +89,6 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { #endif } -#endif enum cpu_feature { SSE2 = 1 << 0, @@ -76,7 +105,7 @@ enum cpu_feature { #if !defined(BLAKE3_TESTING) static /* Allow the variable to be controlled manually for testing */ #endif - enum cpu_feature g_cpu_features = UNDEFINED; + ATOMIC_INT g_cpu_features = UNDEFINED; LLVM_ATTRIBUTE_USED #if !defined(BLAKE3_TESTING) @@ -85,14 +114,16 @@ static enum cpu_feature get_cpu_features(void) { - if (g_cpu_features != UNDEFINED) { - return g_cpu_features; + /* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */ + enum cpu_feature features = ATOMIC_LOAD(g_cpu_features); + if (features != UNDEFINED) { + return features; } else { #if defined(IS_X86) uint32_t regs[4] = {0}; uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3]; (void)edx; - enum cpu_feature features = 0; + features = 0; cpuid(regs, 0); const int max_id = *eax; cpuid(regs, 1); @@ -102,7 +133,7 @@ static if (*edx & (1UL << 26)) features |= SSE2; #endif - if (*ecx & (1UL << 0)) + if (*ecx & (1UL << 9)) features |= SSSE3; if (*ecx & (1UL << 19)) features |= SSE41; @@ -125,7 +156,7 @@ static } } } - g_cpu_features = features; + ATOMIC_STORE(g_cpu_features, features); return features; #else /* How to detect NEON? */ @@ -133,6 +164,7 @@ static #endif } } +#endif void blake3_compress_in_place(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],