Skip to content

Commit 933d6be

Browse files
authored
[FMV][compiler-rt] Fix cpu features initialization. (llvm#95149)
To detect features we either use HWCAPs or directly extract system register bitfields and compare with a value. In many cases equality comparisons give wrong results for example FEAT_SVE is not set if SVE2 is available (see the issue llvm#93651). I am also making the access to __aarch64_cpu_features atomic. The corresponding PR for the ACLE specification is ARM-software/acle#322.
1 parent 5563d91 commit 933d6be

File tree

7 files changed

+68
-105
lines changed

7 files changed

+68
-105
lines changed

compiler-rt/lib/builtins/aarch64/sme-abi-vg.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "../cpu_model/aarch64.h"
66

77
struct FEATURES {
8-
long long features;
8+
unsigned long long features;
99
};
1010

1111
extern struct FEATURES __aarch64_cpu_features;
@@ -23,14 +23,18 @@ extern bool __aarch64_has_sme_and_tpidr2_el0;
2323
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
2424
#endif
2525
__attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {
26-
if (!__aarch64_cpu_features.features)
27-
__init_cpu_features();
26+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
27+
return;
28+
29+
__init_cpu_features();
2830
}
2931

3032
__attribute__((target("sve"))) long
3133
__arm_get_current_vg(void) __arm_streaming_compatible {
3234
struct SME_STATE State = __arm_sme_state();
33-
bool HasSVE = __aarch64_cpu_features.features & (1ULL << FEAT_SVE);
35+
unsigned long long features =
36+
__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
37+
bool HasSVE = features & (1ULL << FEAT_SVE);
3438

3539
if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
3640
return 0;

compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
void __init_cpu_features_resolver(unsigned long hwcap,
22
const __ifunc_arg_t *arg) {
3-
if (__aarch64_cpu_features.features)
3+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
44
return;
55

66
// ifunc resolvers don't have hwcaps in arguments on Android API lower
@@ -17,7 +17,7 @@ void __init_cpu_features_resolver(unsigned long hwcap,
1717

1818
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
1919
// CPU features already initialized.
20-
if (__aarch64_cpu_features.features)
20+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
2121
return;
2222

2323
// Don't set any CPU features,

compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
void __init_cpu_features_resolver(unsigned long hwcap,
22
const __ifunc_arg_t *arg) {
3-
if (__aarch64_cpu_features.features)
3+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
44
return;
55

66
__init_cpu_features_constructor(hwcap, arg);
@@ -10,7 +10,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
1010
unsigned long hwcap = 0;
1111
unsigned long hwcap2 = 0;
1212
// CPU features already initialized.
13-
if (__aarch64_cpu_features.features)
13+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
1414
return;
1515

1616
int res = 0;

compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include <zircon/syscalls.h>
33

44
void __init_cpu_features_resolver() {
5-
if (__aarch64_cpu_features.features)
5+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
66
return;
77

88
// This ensures the vDSO is a direct link-time dependency of anything that
@@ -13,8 +13,8 @@ void __init_cpu_features_resolver() {
1313
if (status != ZX_OK)
1414
return;
1515

16-
#define setCPUFeature(cpu_feature) \
17-
__aarch64_cpu_features.features |= 1ULL << cpu_feature
16+
unsigned long long feat = 0;
17+
#define setCPUFeature(cpu_feature) feat |= 1ULL << cpu_feature
1818

1919
if (features & ZX_ARM64_FEATURE_ISA_FP)
2020
setCPUFeature(FEAT_FP);
@@ -48,4 +48,6 @@ void __init_cpu_features_resolver() {
4848
setCPUFeature(FEAT_SVE);
4949

5050
setCPUFeature(FEAT_INIT);
51+
52+
__atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
5153
}

compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc

Lines changed: 43 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@
33
#define HAVE_SYS_AUXV_H
44
#endif
55

6-
7-
86
static void __init_cpu_features_constructor(unsigned long hwcap,
97
const __ifunc_arg_t *arg) {
10-
#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
8+
unsigned long long feat = 0;
9+
#define setCPUFeature(F) feat |= 1ULL << F
1110
#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
1211
#define extractBits(val, start, number) \
1312
(val & ((1ULL << number) - 1ULL) << start) >> start
@@ -20,26 +19,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
2019
setCPUFeature(FEAT_PMULL);
2120
if (hwcap & HWCAP_FLAGM)
2221
setCPUFeature(FEAT_FLAGM);
23-
if (hwcap2 & HWCAP2_FLAGM2) {
24-
setCPUFeature(FEAT_FLAGM);
22+
if (hwcap2 & HWCAP2_FLAGM2)
2523
setCPUFeature(FEAT_FLAGM2);
26-
}
27-
if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
24+
if (hwcap & HWCAP_SM4)
2825
setCPUFeature(FEAT_SM4);
2926
if (hwcap & HWCAP_ASIMDDP)
3027
setCPUFeature(FEAT_DOTPROD);
3128
if (hwcap & HWCAP_ASIMDFHM)
3229
setCPUFeature(FEAT_FP16FML);
33-
if (hwcap & HWCAP_FPHP) {
30+
if (hwcap & HWCAP_FPHP)
3431
setCPUFeature(FEAT_FP16);
35-
setCPUFeature(FEAT_FP);
36-
}
3732
if (hwcap & HWCAP_DIT)
3833
setCPUFeature(FEAT_DIT);
3934
if (hwcap & HWCAP_ASIMDRDM)
4035
setCPUFeature(FEAT_RDM);
41-
if (hwcap & HWCAP_ILRCPC)
42-
setCPUFeature(FEAT_RCPC2);
4336
if (hwcap & HWCAP_AES)
4437
setCPUFeature(FEAT_AES);
4538
if (hwcap & HWCAP_SHA1)
@@ -52,23 +45,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
5245
setCPUFeature(FEAT_FCMA);
5346
if (hwcap & HWCAP_SB)
5447
setCPUFeature(FEAT_SB);
55-
if (hwcap & HWCAP_SSBS)
48+
if (hwcap & HWCAP_SSBS) {
49+
setCPUFeature(FEAT_SSBS);
5650
setCPUFeature(FEAT_SSBS2);
51+
}
5752
if (hwcap2 & HWCAP2_MTE) {
5853
setCPUFeature(FEAT_MEMTAG);
5954
setCPUFeature(FEAT_MEMTAG2);
6055
}
61-
if (hwcap2 & HWCAP2_MTE3) {
62-
setCPUFeature(FEAT_MEMTAG);
63-
setCPUFeature(FEAT_MEMTAG2);
56+
if (hwcap2 & HWCAP2_MTE3)
6457
setCPUFeature(FEAT_MEMTAG3);
65-
}
6658
if (hwcap2 & HWCAP2_SVEAES)
6759
setCPUFeature(FEAT_SVE_AES);
68-
if (hwcap2 & HWCAP2_SVEPMULL) {
69-
setCPUFeature(FEAT_SVE_AES);
60+
if (hwcap2 & HWCAP2_SVEPMULL)
7061
setCPUFeature(FEAT_SVE_PMULL128);
71-
}
7262
if (hwcap2 & HWCAP2_SVEBITPERM)
7363
setCPUFeature(FEAT_SVE_BITPERM);
7464
if (hwcap2 & HWCAP2_SVESHA3)
@@ -105,6 +95,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
10595
setCPUFeature(FEAT_WFXT);
10696
if (hwcap2 & HWCAP2_SME)
10797
setCPUFeature(FEAT_SME);
98+
if (hwcap2 & HWCAP2_SME2)
99+
setCPUFeature(FEAT_SME2);
108100
if (hwcap2 & HWCAP2_SME_I16I64)
109101
setCPUFeature(FEAT_SME_I64);
110102
if (hwcap2 & HWCAP2_SME_F64F64)
@@ -113,86 +105,45 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
113105
setCPUFeature(FEAT_MOPS);
114106
if (hwcap & HWCAP_CPUID) {
115107
unsigned long ftr;
116-
getCPUFeature(ID_AA64PFR1_EL1, ftr);
117-
// ID_AA64PFR1_EL1.MTE >= 0b0001
118-
if (extractBits(ftr, 8, 4) >= 0x1)
119-
setCPUFeature(FEAT_MEMTAG);
120-
// ID_AA64PFR1_EL1.SSBS == 0b0001
121-
if (extractBits(ftr, 4, 4) == 0x1)
122-
setCPUFeature(FEAT_SSBS);
123-
// ID_AA64PFR1_EL1.SME == 0b0010
124-
if (extractBits(ftr, 24, 4) == 0x2)
125-
setCPUFeature(FEAT_SME2);
126-
getCPUFeature(ID_AA64PFR0_EL1, ftr);
127-
// ID_AA64PFR0_EL1.FP != 0b1111
128-
if (extractBits(ftr, 16, 4) != 0xF) {
129-
setCPUFeature(FEAT_FP);
130-
// ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
131-
setCPUFeature(FEAT_SIMD);
132-
}
133-
// ID_AA64PFR0_EL1.SVE != 0b0000
134-
if (extractBits(ftr, 32, 4) != 0x0) {
135-
// get ID_AA64ZFR0_EL1, that name supported
136-
// if sve enabled only
137-
getCPUFeature(S3_0_C0_C4_4, ftr);
138-
// ID_AA64ZFR0_EL1.SVEver == 0b0000
139-
if (extractBits(ftr, 0, 4) == 0x0)
140-
setCPUFeature(FEAT_SVE);
141-
// ID_AA64ZFR0_EL1.SVEver == 0b0001
142-
if (extractBits(ftr, 0, 4) == 0x1)
143-
setCPUFeature(FEAT_SVE2);
144-
// ID_AA64ZFR0_EL1.BF16 != 0b0000
145-
if (extractBits(ftr, 20, 4) != 0x0)
146-
setCPUFeature(FEAT_SVE_BF16);
147-
}
148-
getCPUFeature(ID_AA64ISAR0_EL1, ftr);
149-
// ID_AA64ISAR0_EL1.SHA3 != 0b0000
150-
if (extractBits(ftr, 32, 4) != 0x0)
151-
setCPUFeature(FEAT_SHA3);
108+
152109
getCPUFeature(ID_AA64ISAR1_EL1, ftr);
153-
// ID_AA64ISAR1_EL1.DPB >= 0b0001
154-
if (extractBits(ftr, 0, 4) >= 0x1)
155-
setCPUFeature(FEAT_DPB);
156-
// ID_AA64ISAR1_EL1.LRCPC != 0b0000
157-
if (extractBits(ftr, 20, 4) != 0x0)
158-
setCPUFeature(FEAT_RCPC);
159-
// ID_AA64ISAR1_EL1.LRCPC == 0b0011
160-
if (extractBits(ftr, 20, 4) == 0x3)
161-
setCPUFeature(FEAT_RCPC3);
162-
// ID_AA64ISAR1_EL1.SPECRES == 0b0001
163-
if (extractBits(ftr, 40, 4) == 0x2)
110+
/* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */
111+
if (extractBits(ftr, 40, 4) >= 0x1)
164112
setCPUFeature(FEAT_PREDRES);
165-
// ID_AA64ISAR1_EL1.BF16 != 0b0000
166-
if (extractBits(ftr, 44, 4) != 0x0)
167-
setCPUFeature(FEAT_BF16);
168-
// ID_AA64ISAR1_EL1.LS64 >= 0b0001
113+
/* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */
169114
if (extractBits(ftr, 60, 4) >= 0x1)
170115
setCPUFeature(FEAT_LS64);
171-
// ID_AA64ISAR1_EL1.LS64 >= 0b0010
116+
/* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */
172117
if (extractBits(ftr, 60, 4) >= 0x2)
173118
setCPUFeature(FEAT_LS64_V);
174-
// ID_AA64ISAR1_EL1.LS64 >= 0b0011
119+
/* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */
175120
if (extractBits(ftr, 60, 4) >= 0x3)
176121
setCPUFeature(FEAT_LS64_ACCDATA);
177-
} else {
178-
// Set some features in case of no CPUID support
179-
if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
180-
setCPUFeature(FEAT_FP);
181-
// FP and AdvSIMD fields have the same value
182-
setCPUFeature(FEAT_SIMD);
183-
}
184-
if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
185-
setCPUFeature(FEAT_DPB);
186-
if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
187-
setCPUFeature(FEAT_RCPC);
188-
if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
189-
setCPUFeature(FEAT_BF16);
190-
if (hwcap2 & HWCAP2_SVEBF16)
191-
setCPUFeature(FEAT_SVE_BF16);
192-
if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
193-
setCPUFeature(FEAT_SVE2);
194-
if (hwcap & HWCAP_SHA3)
195-
setCPUFeature(FEAT_SHA3);
196122
}
123+
if (hwcap & HWCAP_FP) {
124+
setCPUFeature(FEAT_FP);
125+
// FP and AdvSIMD fields have the same value
126+
setCPUFeature(FEAT_SIMD);
127+
}
128+
if (hwcap & HWCAP_DCPOP)
129+
setCPUFeature(FEAT_DPB);
130+
if (hwcap & HWCAP_LRCPC)
131+
setCPUFeature(FEAT_RCPC);
132+
if (hwcap & HWCAP_ILRCPC)
133+
setCPUFeature(FEAT_RCPC2);
134+
if (hwcap2 & HWCAP2_LRCPC3)
135+
setCPUFeature(FEAT_RCPC3);
136+
if (hwcap2 & HWCAP2_BF16)
137+
setCPUFeature(FEAT_BF16);
138+
if (hwcap2 & HWCAP2_SVEBF16)
139+
setCPUFeature(FEAT_SVE_BF16);
140+
if (hwcap & HWCAP_SVE)
141+
setCPUFeature(FEAT_SVE);
142+
if (hwcap2 & HWCAP2_SVE2)
143+
setCPUFeature(FEAT_SVE2);
144+
if (hwcap & HWCAP_SHA3)
145+
setCPUFeature(FEAT_SHA3);
197146
setCPUFeature(FEAT_INIT);
147+
148+
__atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
198149
}

compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
void __init_cpu_features_resolver(unsigned long hwcap,
22
const __ifunc_arg_t *arg) {
3-
if (__aarch64_cpu_features.features)
3+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
44
return;
55
__init_cpu_features_constructor(hwcap, arg);
66
}
77

88
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
99
// CPU features already initialized.
10-
if (__aarch64_cpu_features.features)
10+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
1111
return;
1212

1313
unsigned long hwcap = getauxval(AT_HWCAP);

compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,12 @@
178178
#ifndef HWCAP2_SVE_EBF16
179179
#define HWCAP2_SVE_EBF16 (1ULL << 33)
180180
#endif
181+
#ifndef HWCAP2_SME2
182+
#define HWCAP2_SME2 (1UL << 37)
183+
#endif
181184
#ifndef HWCAP2_MOPS
182185
#define HWCAP2_MOPS (1ULL << 43)
183186
#endif
187+
#ifndef HWCAP2_LRCPC3
188+
#define HWCAP2_LRCPC3 (1UL << 46)
189+
#endif

0 commit comments

Comments
 (0)