|
14 | 14 | #endif
|
15 | 15 | #endif
|
16 | 16 |
|
| 17 | +/* Atomic access abstraction (since MSVC does not do C11 yet) */ |
| 18 | +#if defined(_MSC_VER) && !defined(__clang__) |
| 19 | +#if !defined(IS_X86) |
| 20 | +#include <intrin.h> |
| 21 | +#endif |
| 22 | +#pragma warning(disable : 5105) |
| 23 | +#ifndef FORCEINLINE |
| 24 | +#define FORCEINLINE inline __forceinline |
| 25 | +#endif |
| 26 | +typedef volatile long atomic32_t; |
| 27 | +static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { |
| 28 | + return _InterlockedOr(src, 0); |
| 29 | +} |
| 30 | +static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) { |
| 31 | + _InterlockedExchange(dst, val); |
| 32 | +} |
| 33 | +#else |
| 34 | +#include <stdatomic.h> |
| 35 | +#ifndef FORCEINLINE |
| 36 | +#define FORCEINLINE inline __attribute__((__always_inline__)) |
| 37 | +#endif |
| 38 | +typedef volatile _Atomic(int32_t) atomic32_t; |
| 39 | +static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { |
| 40 | + return atomic_load_explicit(src, memory_order_relaxed); |
| 41 | +} |
| 42 | +static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) { |
| 43 | + atomic_store_explicit(dst, val, memory_order_relaxed); |
| 44 | +} |
| 45 | +#endif |
| 46 | + |
17 | 47 | #define MAYBE_UNUSED(x) (void)((x))
|
18 | 48 |
|
19 | 49 | #if defined(IS_X86)
|
@@ -76,17 +106,18 @@ enum cpu_feature {
|
76 | 106 | #if !defined(BLAKE3_TESTING)
|
77 | 107 | static /* Allow the variable to be controlled manually for testing */
|
78 | 108 | #endif
|
79 |
| - enum cpu_feature g_cpu_features = UNDEFINED; |
| 109 | + atomic32_t g_cpu_features = UNDEFINED; |
80 | 110 |
|
81 | 111 | LLVM_ATTRIBUTE_USED
|
82 | 112 | #if !defined(BLAKE3_TESTING)
|
83 | 113 | static
|
84 | 114 | #endif
|
85 | 115 | enum cpu_feature
|
86 | 116 | get_cpu_features(void) {
|
87 |
| - |
88 |
| - if (g_cpu_features != UNDEFINED) { |
89 |
| - return g_cpu_features; |
| 117 | + enum cpu_feature _cpu_features; |
| 118 | + _cpu_features = (enum cpu_feature)atomic_load32(&g_cpu_features); |
| 119 | + if (_cpu_features != UNDEFINED) { |
| 120 | + return _cpu_features; |
90 | 121 | } else {
|
91 | 122 | #if defined(IS_X86)
|
92 | 123 | uint32_t regs[4] = {0};
|
@@ -125,10 +156,11 @@ static
|
125 | 156 | }
|
126 | 157 | }
|
127 | 158 | }
|
128 |
| - g_cpu_features = features; |
| 159 | + atomic_store32(&g_cpu_features, (int32_t)features); |
129 | 160 | return features;
|
130 | 161 | #else
|
131 | 162 | /* How to detect NEON? */
|
| 163 | + atomic_store32(&g_cpu_features, 0); |
132 | 164 | return 0;
|
133 | 165 | #endif
|
134 | 166 | }
|
|
0 commit comments