Skip to content

Commit 9fe272c

Browse files
authored
Merge pull request #37702 from JuliaLang/yyc/cpu
Collection of a few CPU detection improvements
2 parents d7b391d + e9ad329 commit 9fe272c

File tree

3 files changed

+71
-31
lines changed

3 files changed

+71
-31
lines changed

src/features_aarch64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ JL_FEATURE_DEF(bf16, 32 + 14, 110000) // HWCAP2_BF16, ARMv8.2-BF16. Required in
6060
// JL_FEATURE_DEF(dgh, 32 + 15, UINT32_MAX) // HWCAP2_DGH, ARMv8.0-DGH. Not implement in LLVM yet
6161
JL_FEATURE_DEF(rand, 32 + 16, 0) // HWCAP2_RNG, ARMv8.5-RNG
6262
JL_FEATURE_DEF(bti, 32 + 17, 0) // HWCAP2_BTI
63+
JL_FEATURE_DEF(mte, 32 + 18, 0) // HWCAP2_MTE, ARMv8.5-MemTag (reserved as of kernel 5.9-rc1)
6364

6465
// custom bits to match llvm model
6566
JL_FEATURE_DEF(v8_1a, 32 * 2 + 0, 0)
@@ -74,7 +75,6 @@ JL_FEATURE_DEF(v8_6a, 32 * 2 + 5, 110000)
7475
// am: ID_AA64PFR0_EL1.AMU (0b1, 0b10)
7576
// specrestrict: ID_AA64PFR0_EL1.CSV2 (0b10)
7677
// predres: ID_AA64PFR0_EL1.CSV3 (0b1)
77-
// mte: ID_AA64PFR1_EL1.MTE (0b1, 0b10)
7878
// ecv: ID_AA64MMFR0_EL1.ECV (0b1, 0b10) (LLVM 11)
7979
// lor: ID_AA64MMFR1_EL1.LO (0b1)
8080
// perfmon: ID_AA64DFR0_EL1.PMUVer (0b1, 0b100, 0b101, 0b110)

src/processor_arm.cpp

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ enum class CPU : uint32_t {
8989
arm_cortex_x1,
9090
arm_neoverse_e1,
9191
arm_neoverse_n1,
92-
arm_zeus,
92+
arm_neoverse_v1,
93+
arm_neoverse_n2,
9394

9495
// Cavium
9596
// aarch64
@@ -277,7 +278,9 @@ constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
277278
// .DIT: dit
278279
// .BT: bti
279280

280-
// ID_AA64PFR1_EL1.SSBS: ssbs
281+
// ID_AA64PFR1_EL1
282+
// .SSBS: ssbs
283+
// .MTE: mte
281284

282285
// ID_AA64MMFR2_EL1.AT: uscat
283286

@@ -305,7 +308,9 @@ constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, full
305308
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
306309
constexpr auto arm_neoverse_e1 = armv8_2a | get_feature_masks(rcpc, fullfp16, ssbs);
307310
constexpr auto arm_neoverse_n1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
308-
constexpr auto arm_zeus = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
311+
constexpr auto arm_neoverse_v1 = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
312+
constexpr auto arm_neoverse_n2 = armv8_5a | get_feature_masks(sve, i8mm, bf16, fullfp16, sve2,
313+
sve2_bitperm, rand, mte);
309314
constexpr auto cavium_thunderx = armv8a_crc_crypto;
310315
constexpr auto cavium_thunderx88 = armv8a_crc_crypto;
311316
constexpr auto cavium_thunderx88p1 = armv8a_crc_crypto;
@@ -367,7 +372,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
367372
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
368373
{"neoverse-e1", CPU::arm_neoverse_e1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_e1},
369374
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
370-
{"zeus", CPU::arm_zeus, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_zeus},
375+
{"neoverse-v1", CPU::arm_neoverse_v1, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_v1},
376+
{"neoverse-n2", CPU::arm_neoverse_n2, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_n2},
371377
{"thunderx", CPU::cavium_thunderx, CPU::generic, 0, Feature::cavium_thunderx},
372378
{"thunderxt88", CPU::cavium_thunderx88, CPU::generic, 0, Feature::cavium_thunderx88},
373379
{"thunderxt88p1", CPU::cavium_thunderx88p1, CPU::cavium_thunderx88, UINT32_MAX,
@@ -558,6 +564,8 @@ constexpr auto arm_cortex_a77 = armv8_2a;
558564
constexpr auto arm_cortex_a78 = armv8_2a;
559565
constexpr auto arm_cortex_x1 = armv8_2a;
560566
constexpr auto arm_neoverse_n1 = armv8_2a;
567+
constexpr auto arm_neoverse_v1 = armv8_4a;
568+
constexpr auto arm_neoverse_n2 = armv8_5a;
561569
constexpr auto nvidia_denver1 = armv8a; // TODO? (crc, crypto)
562570
constexpr auto nvidia_denver2 = armv8a_crc_crypto;
563571
constexpr auto apm_xgene1 = armv8a;
@@ -640,6 +648,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
640648
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
641649
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
642650
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
651+
{"neoverse-v1", CPU::arm_neoverse_v1, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_v1},
652+
{"neoverse-n2", CPU::arm_neoverse_n2, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_n2},
643653
{"denver1", CPU::nvidia_denver1, CPU::arm_cortex_a53, UINT32_MAX, Feature::nvidia_denver1},
644654
{"denver2", CPU::nvidia_denver2, CPU::arm_cortex_a57, UINT32_MAX, Feature::nvidia_denver2},
645655
{"xgene1", CPU::apm_xgene1, CPU::armv8_a, UINT32_MAX, Feature::apm_xgene1},
@@ -809,7 +819,7 @@ static std::set<CPUID> get_cpuinfo(void)
809819
static CPU get_cpu_name(CPUID cpuid)
810820
{
811821
switch (cpuid.implementer) {
812-
case 0x41: // ARM
822+
case 0x41: // 'A': ARM
813823
switch (cpuid.part) {
814824
case 0xb02: return CPU::arm_mpcore;
815825
case 0xb36: return CPU::arm_1136jf_s;
@@ -849,20 +859,22 @@ static CPU get_cpu_name(CPUID cpuid)
849859
case 0xd20: return CPU::arm_cortex_m23;
850860
case 0xd21: return CPU::arm_cortex_m33;
851861
// case 0xd22: return CPU::arm_cortex_m55;
852-
case 0xd40: return CPU::arm_zeus;
862+
case 0xd40: return CPU::arm_neoverse_v1;
853863
case 0xd41: return CPU::arm_cortex_a78;
854864
case 0xd43: return CPU::arm_cortex_a65ae;
855865
case 0xd44: return CPU::arm_cortex_x1;
866+
case 0xd49: return CPU::arm_neoverse_n2;
856867
case 0xd4a: return CPU::arm_neoverse_e1;
857868
default: return CPU::generic;
858869
}
859-
case 0x42: // Broadcom (Cavium)
870+
case 0x42: // 'B': Broadcom (Cavium)
860871
switch (cpuid.part) {
872+
// case 0x00f: return CPU::broadcom_brahma_b15;
861873
// case 0x100: return CPU::broadcom_brahma_b53;
862874
case 0x516: return CPU::cavium_thunderx2t99p1;
863875
default: return CPU::generic;
864876
}
865-
case 0x43: // Cavium
877+
case 0x43: // 'C': Cavium
866878
switch (cpuid.part) {
867879
case 0xa0: return CPU::cavium_thunderx;
868880
case 0xa1:
@@ -881,73 +893,87 @@ static CPU get_cpu_name(CPUID cpuid)
881893
case 0xb8: return CPU::marvell_thunderx3t110;
882894
default: return CPU::generic;
883895
}
884-
case 0x46: // Fujitsu
896+
case 0x46: // 'F': Fujitsu
885897
switch (cpuid.part) {
886898
case 0x1: return CPU::fujitsu_a64fx;
887899
default: return CPU::generic;
888900
}
889-
case 0x48: // HiSilicon
901+
case 0x48: // 'H': HiSilicon
890902
switch (cpuid.part) {
891903
case 0xd01: return CPU::hisilicon_tsv110;
904+
case 0xd40: return CPU::arm_cortex_a76; // Kirin 980
892905
default: return CPU::generic;
893906
}
894-
case 0x4e: // NVIDIA
907+
case 0x4e: // 'N': NVIDIA
895908
switch (cpuid.part) {
896909
case 0x000: return CPU::nvidia_denver1;
897910
case 0x003: return CPU::nvidia_denver2;
898911
case 0x004: return CPU::nvidia_carmel;
899912
default: return CPU::generic;
900913
}
901-
case 0x50: // AppliedMicro
914+
case 0x50: // 'P': AppliedMicro
902915
// x-gene 2
903916
// x-gene 3
904917
switch (cpuid.part) {
905918
case 0x000: return CPU::apm_xgene1;
906919
default: return CPU::generic;
907920
}
908-
case 0x51: // Qualcomm
921+
case 0x51: // 'Q': Qualcomm
909922
switch (cpuid.part) {
910923
case 0x00f:
911924
case 0x02d:
912925
return CPU::qualcomm_scorpion;
913926
case 0x04d:
914927
case 0x06f:
915928
return CPU::qualcomm_krait;
916-
case 0x201:
917-
case 0x205:
918-
case 0x211:
929+
case 0x201: // silver
930+
case 0x205: // gold
931+
case 0x211: // silver
919932
return CPU::qualcomm_kyro;
920-
case 0x800:
921-
case 0x801:
922-
case 0x802:
923-
case 0x803:
924-
case 0x804:
925-
case 0x805:
926-
return CPU::arm_cortex_a73; // second-generation Kryo
933+
// kryo 2xx
934+
case 0x800: // gold
935+
return CPU::arm_cortex_a73;
936+
case 0x801: // silver
937+
return CPU::arm_cortex_a53;
938+
// kryo 3xx
939+
case 0x802: // gold
940+
return CPU::arm_cortex_a75;
941+
case 0x803: // silver
942+
return CPU::arm_cortex_a55;
943+
// kryo 4xx
944+
case 0x804: // gold
945+
return CPU::arm_cortex_a76;
946+
case 0x805: // silver
947+
return CPU::arm_cortex_a55;
948+
// kryo 5xx seems to be using ID for cortex-a77 directly
927949
case 0xc00:
928950
return CPU::qualcomm_falkor;
929951
case 0xc01:
930952
return CPU::qualcomm_saphira;
931953
default: return CPU::generic;
932954
}
933-
case 0x53: // Samsung
934-
if (cpuid.part == 1)
955+
case 0x53: // 'S': Samsung
956+
if (cpuid.part == 1) {
957+
if (cpuid.variant == 4)
958+
return CPU::samsung_exynos_m2;
935959
return CPU::samsung_exynos_m1;
960+
}
936961
if (cpuid.variant != 1)
937962
return CPU::generic;
938963
switch (cpuid.part) {
939964
case 0x2: return CPU::samsung_exynos_m3;
940965
case 0x3: return CPU::samsung_exynos_m4;
966+
case 0x4: return CPU::samsung_exynos_m5;
941967
default: return CPU::generic;
942968
}
943-
case 0x56: // Marvell
969+
case 0x56: // 'V': Marvell
944970
switch (cpuid.part) {
945971
case 0x581:
946972
case 0x584:
947973
return CPU::marvell_pj4;
948974
default: return CPU::generic;
949975
}
950-
case 0x61: // Apple
976+
case 0x61: // 'a': Apple
951977
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html
952978
switch (cpuid.part) {
953979
case 0x0: // Swift
@@ -978,12 +1004,12 @@ static CPU get_cpu_name(CPUID cpuid)
9781004
return CPU::apple_a13;
9791005
default: return CPU::generic;
9801006
}
981-
case 0x68: // Huaxintong Semiconductor
1007+
case 0x68: // 'h': Huaxintong Semiconductor
9821008
switch (cpuid.part) {
9831009
case 0x0: return CPU::hxt_phecda;
9841010
default: return CPU::generic;
9851011
}
986-
case 0x69: // Intel
1012+
case 0x69: // 'i': Intel
9871013
switch (cpuid.part) {
9881014
case 0x001: return CPU::intel_3735d;
9891015
default: return CPU::generic;
@@ -1241,6 +1267,8 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
12411267
CPU::arm_cortex_a75,
12421268
CPU::arm_cortex_a76,
12431269
CPU::arm_neoverse_n1,
1270+
CPU::arm_neoverse_n2,
1271+
CPU::arm_neoverse_v1,
12441272
CPU::nvidia_denver2,
12451273
CPU::nvidia_carmel,
12461274
CPU::samsung_exynos_m1,
@@ -1327,6 +1355,8 @@ static inline const char *normalize_cpu_name(llvm::StringRef name)
13271355
{
13281356
if (name == "ares")
13291357
return "neoverse-n1";
1358+
if (name == "zeus")
1359+
return "neoverse-v1";
13301360
if (name == "cyclone")
13311361
return "apple-a7";
13321362
if (name == "typhoon")
@@ -1747,7 +1777,7 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
17471777
# if JL_LLVM_VERSION > 100000
17481778
"+tme,"
17491779
# endif
1750-
"+am,+specrestrict,+predres,+mte,+lor,+perfmon,+spe,+tracev8.4",
1780+
"+am,+specrestrict,+predres,+lor,+perfmon,+spe,+tracev8.4",
17511781
#else
17521782
"+dotprod",
17531783
#endif

src/processor_x86.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ enum class CPU : uint32_t {
7979
intel_corei7_icelake_client,
8080
intel_corei7_icelake_server,
8181
intel_corei7_tigerlake,
82+
intel_corei7_sapphirerapids,
8283
intel_knights_landing,
8384
intel_knights_mill,
8485

@@ -209,6 +210,9 @@ constexpr auto icelake = cannonlake | get_feature_masks(avx512bitalg, vaes, avx5
209210
constexpr auto icelake_server = icelake | get_feature_masks(pconfig, wbnoinvd);
210211
constexpr auto tigerlake = icelake | get_feature_masks(avx512vp2intersect, movdiri,
211212
movdir64b, shstk);
213+
constexpr auto sapphirerapids = icelake_server |
214+
get_feature_masks(amx_tile, amx_int8, amx_bf16, avx512bf16, serialize, cldemote, waitpkg,
215+
ptwrite, tsxldtrk, enqcmd, shstk, avx512vp2intersect, movdiri, movdir64b);
212216

213217
constexpr auto k8_sse3 = get_feature_masks(sse3, cx16);
214218
constexpr auto amdfam10 = k8_sse3 | get_feature_masks(sse4a, lzcnt, popcnt, sahf);
@@ -260,6 +264,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
260264
Feature::icelake_server},
261265
{"tigerlake", CPU::intel_corei7_tigerlake, CPU::intel_corei7_icelake_client, 100000,
262266
Feature::tigerlake},
267+
{"sapphirerapids", CPU::intel_corei7_sapphirerapids, CPU::intel_corei7_icelake_server, 120000,
268+
Feature::sapphirerapids},
263269

264270
{"athlon64", CPU::amd_athlon_64, CPU::generic, 0, Feature::generic},
265271
{"athlon-fx", CPU::amd_athlon_fx, CPU::generic, 0, Feature::generic},
@@ -419,6 +425,10 @@ static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t br
419425
case 0x8d:
420426
return CPU::intel_corei7_tigerlake;
421427

428+
// Sapphire Rapids
429+
case 0x8f:
430+
return CPU::intel_corei7_sapphirerapids;
431+
422432
case 0x1c: // Most 45 nm Intel Atom processors
423433
case 0x26: // 45 nm Atom Lincroft
424434
case 0x27: // 32 nm Atom Medfield

0 commit comments

Comments
 (0)