Skip to content

Commit 84e5451

Browse files
authored
[AArch64] Add support for -mcpu=gb10. (#146515)
This patch adds support for -mcpu=gb10 (NVIDIA GB10). This is a big.LITTLE cluster of Cortex-X925 and Cortex-A725 cores. The appropriate MIDR numbers are added to detect them in -mcpu=native. We did not add an -mcpu=cortex-x925.cortex-a725 option because GB10 does include the crypto instructions which we want on by default, and the current convention is to not enable such extensions for Arm Cortex cores in -mcpu where they are optional in the IP. Relevant GCC patch: https://gcc.gnu.org/pipermail/gcc-patches/2025-June/687005.html
1 parent e2aa878 commit 84e5451

File tree

6 files changed

+104
-4
lines changed

6 files changed

+104
-4
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// REQUIRES: aarch64-registered-target
2+
// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=gb10 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s
3+
4+
// CHECK: Extensions enabled for the given AArch64 target
5+
// CHECK-EMPTY:
6+
// CHECK-NEXT: Architecture Feature(s) Description
7+
// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support
8+
// CHECK-NEXT: FEAT_AMUv1 Enable Armv8.4-A Activity Monitors extension
9+
// CHECK-NEXT: FEAT_AMUv1p1 Enable Armv8.6-A Activity Monitors Virtualization support
10+
// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions
11+
// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension
12+
// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification
13+
// CHECK-NEXT: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets
14+
// CHECK-NEXT: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions
15+
// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction
16+
// CHECK-NEXT: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
17+
// CHECK-NEXT: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence
18+
// CHECK-NEXT: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence
19+
// CHECK-NEXT: FEAT_DotProd Enable dot product support
20+
// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension
21+
// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension
22+
// CHECK-NEXT: FEAT_FCMA Enable Armv8.3-A Floating-point complex number support
23+
// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension
24+
// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions
25+
// CHECK-NEXT: FEAT_FP Enable Armv8.0-A Floating Point Extensions
26+
// CHECK-NEXT: FEAT_FP16 Enable half-precision floating-point data processing
27+
// CHECK-NEXT: FEAT_FPAC Enable Armv8.3-A Pointer Authentication Faulting enhancement
28+
// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int
29+
// CHECK-NEXT: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions
30+
// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons
31+
// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register
32+
// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension
33+
// CHECK-NEXT: FEAT_JSCVT Enable Armv8.3-A JavaScript FP conversion instructions
34+
// CHECK-NEXT: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension
35+
// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension
36+
// CHECK-NEXT: FEAT_LRCPC2 Enable Armv8.4-A RCPC instructions with Immediate Offsets
37+
// CHECK-NEXT: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions
38+
// CHECK-NEXT: FEAT_LSE2 Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules
39+
// CHECK-NEXT: FEAT_MPAM Enable Armv8.4-A Memory system Partitioning and Monitoring extension
40+
// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension
41+
// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable Armv8.4-A Nested Virtualization Enchancement
42+
// CHECK-NEXT: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension
43+
// CHECK-NEXT: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants
44+
// CHECK-NEXT: FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension
45+
// CHECK-NEXT: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension
46+
// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions
47+
// CHECK-NEXT: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions
48+
// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
49+
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
50+
// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support
51+
// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
52+
// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support
53+
// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension
54+
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
55+
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
56+
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
57+
// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
58+
// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
59+
// CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable SVE AES and quadword SVE polynomial multiply instructions
60+
// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
61+
// CHECK-NEXT: FEAT_SVE_SHA3 Enable SVE SHA3 instructions
62+
// CHECK-NEXT: FEAT_SVE_SM4 Enable SM4 SVE2 instructions
63+
// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
64+
// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension
65+
// CHECK-NEXT: FEAT_TRF Enable Armv8.4-A Trace extension
66+
// CHECK-NEXT: FEAT_UAO Enable Armv8.2-A UAO PState
67+
// CHECK-NEXT: FEAT_VHE Enable Armv8.1-A Virtual Host extension
68+
// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction
69+
// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction

clang/test/Misc/target-invalid-cpu-note/aarch64.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
// CHECK-SAME: {{^}}, exynos-m5
7676
// CHECK-SAME: {{^}}, falkor
7777
// CHECK-SAME: {{^}}, fujitsu-monaka
78+
// CHECK-SAME: {{^}}, gb10
7879
// CHECK-SAME: {{^}}, generic
7980
// CHECK-SAME: {{^}}, grace
8081
// CHECK-SAME: {{^}}, kryo

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,7 @@ def ProcessorFeatures {
11171117
FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
11181118
FeatureSSBS, FeatureCCIDX,
11191119
FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM];
1120+
list<SubtargetFeature> GB10 = !listconcat(X925, [FeatureSVEAES, FeatureSVESHA3, FeatureSVE2SM4]);
11201121
list<SubtargetFeature> Grace = !listconcat(NeoverseV2, [FeatureSVE2SM4, FeatureSVEAES, FeatureSVESHA3]);
11211122

11221123
// ETE and TRBE are future architecture extensions. We temporarily enable them
@@ -1203,6 +1204,8 @@ def : ProcessorModel<"cortex-x4", NeoverseV2Model, ProcessorFeatures.X4,
12031204
[TuneX4]>;
12041205
def : ProcessorModel<"cortex-x925", NeoverseV2Model, ProcessorFeatures.X925,
12051206
[TuneX925]>;
1207+
def : ProcessorModel<"gb10", NeoverseV2Model, ProcessorFeatures.GB10,
1208+
[TuneX925]>;
12061209
def : ProcessorModel<"grace", NeoverseV2Model, ProcessorFeatures.Grace,
12071210
[TuneNeoverseV2]>;
12081211
def : ProcessorModel<"neoverse-e1", CortexA53Model,

llvm/lib/TargetParser/Host.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,25 +176,43 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
176176
SmallVector<StringRef, 32> Lines;
177177
ProcCpuinfoContent.split(Lines, '\n');
178178

179-
// Look for the CPU implementer line.
179+
// Look for the CPU implementer and hardware lines, and store the CPU part
180+
// numbers found.
180181
StringRef Implementer;
181182
StringRef Hardware;
182-
StringRef Part;
183+
SmallVector<StringRef, 32> Parts;
183184
for (StringRef Line : Lines) {
184185
if (Line.consume_front("CPU implementer"))
185186
Implementer = Line.ltrim("\t :");
186187
else if (Line.consume_front("Hardware"))
187188
Hardware = Line.ltrim("\t :");
188189
else if (Line.consume_front("CPU part"))
189-
Part = Line.ltrim("\t :");
190+
Parts.emplace_back(Line.ltrim("\t :"));
190191
}
191192

193+
// Last `Part' seen, in case we don't analyse all `Parts' parsed.
194+
StringRef Part = Parts.empty() ? StringRef() : Parts.back();
195+
196+
// Remove duplicate `Parts'.
197+
llvm::sort(Parts);
198+
Parts.erase(llvm::unique(Parts), Parts.end());
199+
200+
auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) {
201+
if (Parts.size() == 2)
202+
return (Parts[0] == Big && Parts[1] == Little) ||
203+
(Parts[1] == Big && Parts[0] == Little);
204+
return false;
205+
};
206+
192207
if (Implementer == "0x41") { // ARM Ltd.
193208
// MSM8992/8994 may give cpu part for the core that the kernel is running on,
194209
// which is undeterministic and wrong. Always return cortex-a53 for these SoC.
195210
if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996"))
196211
return "cortex-a53";
197212

213+
// Detect big.LITTLE systems.
214+
if (MatchBigLittle(Parts, "0xd85", "0xd87"))
215+
return "cortex-x925";
198216

199217
// The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
200218
// values correspond to the "Part number" in the CP15/c0 register. The

llvm/unittests/TargetParser/Host.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,14 @@ TEST(getLinuxHostCPUName, AArch64) {
122122
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
123123
"CPU part : 0xd48"),
124124
"cortex-x2");
125+
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
126+
"CPU part : 0xd85\n"
127+
"CPU part : 0xd87"),
128+
"cortex-x925");
129+
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
130+
"CPU part : 0xd87\n"
131+
"CPU part : 0xd85"),
132+
"cortex-x925");
125133
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
126134
"CPU part : 0xc00"),
127135
"falkor");

llvm/unittests/TargetParser/TargetParserTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,7 @@ INSTANTIATE_TEST_SUITE_P(
11641164
AArch64CPUTestParams("a64fx", "armv8.2-a"),
11651165
AArch64CPUTestParams("fujitsu-monaka", "armv9.3-a"),
11661166
AArch64CPUTestParams("carmel", "armv8.2-a"),
1167+
AArch64CPUTestParams("gb10", "armv9.2-a"),
11671168
AArch64CPUTestParams("grace", "armv9-a"),
11681169
AArch64CPUTestParams("olympus", "armv9.2-a"),
11691170
AArch64CPUTestParams("saphira", "armv8.4-a"),
@@ -1260,7 +1261,7 @@ INSTANTIATE_TEST_SUITE_P(
12601261
AArch64CPUAliasTestParams::PrintToStringParamName);
12611262

12621263
// Note: number of CPUs includes aliases.
1263-
static constexpr unsigned NumAArch64CPUArchs = 90;
1264+
static constexpr unsigned NumAArch64CPUArchs = 91;
12641265

12651266
TEST(TargetParserTest, testAArch64CPUArchList) {
12661267
SmallVector<StringRef, NumAArch64CPUArchs> List;

0 commit comments

Comments
 (0)