@@ -89,7 +89,8 @@ enum class CPU : uint32_t {
89
89
arm_cortex_x1,
90
90
arm_neoverse_e1,
91
91
arm_neoverse_n1,
92
- arm_zeus,
92
+ arm_neoverse_v1,
93
+ arm_neoverse_n2,
93
94
94
95
// Cavium
95
96
// aarch64
@@ -277,7 +278,9 @@ constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
277
278
// .DIT: dit
278
279
// .BT: bti
279
280
280
- // ID_AA64PFR1_EL1.SSBS: ssbs
281
+ // ID_AA64PFR1_EL1
282
+ // .SSBS: ssbs
283
+ // .MTE: mte
281
284
282
285
// ID_AA64MMFR2_EL1.AT: uscat
283
286
@@ -305,7 +308,9 @@ constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, full
305
308
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
306
309
constexpr auto arm_neoverse_e1 = armv8_2a | get_feature_masks(rcpc, fullfp16, ssbs);
307
310
constexpr auto arm_neoverse_n1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
308
- constexpr auto arm_zeus = armv8_4a | get_feature_masks(sve, i8mm, bf16 , fullfp16, ssbs, rand);
311
+ constexpr auto arm_neoverse_v1 = armv8_4a | get_feature_masks(sve, i8mm, bf16 , fullfp16, ssbs, rand);
312
+ constexpr auto arm_neoverse_n2 = armv8_5a | get_feature_masks(sve, i8mm, bf16 , fullfp16, sve2,
313
+ sve2_bitperm, rand, mte);
309
314
constexpr auto cavium_thunderx = armv8a_crc_crypto;
310
315
constexpr auto cavium_thunderx88 = armv8a_crc_crypto;
311
316
constexpr auto cavium_thunderx88p1 = armv8a_crc_crypto;
@@ -367,7 +372,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
367
372
{" cortex-x1" , CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000 , Feature::arm_cortex_x1},
368
373
{" neoverse-e1" , CPU::arm_neoverse_e1, CPU::arm_cortex_a76, 100000 , Feature::arm_neoverse_e1},
369
374
{" neoverse-n1" , CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000 , Feature::arm_neoverse_n1},
370
- {" zeus" , CPU::arm_zeus, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_zeus},
375
+ {" neoverse-v1" , CPU::arm_neoverse_v1, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_v1},
376
+ {" neoverse-n2" , CPU::arm_neoverse_n2, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_n2},
371
377
{" thunderx" , CPU::cavium_thunderx, CPU::generic, 0 , Feature::cavium_thunderx},
372
378
{" thunderxt88" , CPU::cavium_thunderx88, CPU::generic, 0 , Feature::cavium_thunderx88},
373
379
{" thunderxt88p1" , CPU::cavium_thunderx88p1, CPU::cavium_thunderx88, UINT32_MAX,
@@ -558,6 +564,8 @@ constexpr auto arm_cortex_a77 = armv8_2a;
558
564
constexpr auto arm_cortex_a78 = armv8_2a;
559
565
constexpr auto arm_cortex_x1 = armv8_2a;
560
566
constexpr auto arm_neoverse_n1 = armv8_2a;
567
+ constexpr auto arm_neoverse_v1 = armv8_4a;
568
+ constexpr auto arm_neoverse_n2 = armv8_5a;
561
569
constexpr auto nvidia_denver1 = armv8a; // TODO? (crc, crypto)
562
570
constexpr auto nvidia_denver2 = armv8a_crc_crypto;
563
571
constexpr auto apm_xgene1 = armv8a;
@@ -640,6 +648,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
640
648
{" cortex-a78" , CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000 , Feature::arm_cortex_a78},
641
649
{" cortex-x1" , CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000 , Feature::arm_cortex_x1},
642
650
{" neoverse-n1" , CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000 , Feature::arm_neoverse_n1},
651
+ {" neoverse-v1" , CPU::arm_neoverse_v1, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_v1},
652
+ {" neoverse-n2" , CPU::arm_neoverse_n2, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_n2},
643
653
{" denver1" , CPU::nvidia_denver1, CPU::arm_cortex_a53, UINT32_MAX, Feature::nvidia_denver1},
644
654
{" denver2" , CPU::nvidia_denver2, CPU::arm_cortex_a57, UINT32_MAX, Feature::nvidia_denver2},
645
655
{" xgene1" , CPU::apm_xgene1, CPU::armv8_a, UINT32_MAX, Feature::apm_xgene1},
@@ -809,7 +819,7 @@ static std::set<CPUID> get_cpuinfo(void)
809
819
static CPU get_cpu_name (CPUID cpuid)
810
820
{
811
821
switch (cpuid.implementer ) {
812
- case 0x41 : // ARM
822
+ case 0x41 : // 'A': ARM
813
823
switch (cpuid.part ) {
814
824
case 0xb02 : return CPU::arm_mpcore;
815
825
case 0xb36 : return CPU::arm_1136jf_s;
@@ -849,20 +859,22 @@ static CPU get_cpu_name(CPUID cpuid)
849
859
case 0xd20 : return CPU::arm_cortex_m23;
850
860
case 0xd21 : return CPU::arm_cortex_m33;
851
861
// case 0xd22: return CPU::arm_cortex_m55;
852
- case 0xd40 : return CPU::arm_zeus ;
862
+ case 0xd40 : return CPU::arm_neoverse_v1 ;
853
863
case 0xd41 : return CPU::arm_cortex_a78;
854
864
case 0xd43 : return CPU::arm_cortex_a65ae;
855
865
case 0xd44 : return CPU::arm_cortex_x1;
866
+ case 0xd49 : return CPU::arm_neoverse_n2;
856
867
case 0xd4a : return CPU::arm_neoverse_e1;
857
868
default : return CPU::generic;
858
869
}
859
- case 0x42 : // Broadcom (Cavium)
870
+ case 0x42 : // 'B': Broadcom (Cavium)
860
871
switch (cpuid.part ) {
872
+ // case 0x00f: return CPU::broadcom_brahma_b15;
861
873
// case 0x100: return CPU::broadcom_brahma_b53;
862
874
case 0x516 : return CPU::cavium_thunderx2t99p1;
863
875
default : return CPU::generic;
864
876
}
865
- case 0x43 : // Cavium
877
+ case 0x43 : // 'C': Cavium
866
878
switch (cpuid.part ) {
867
879
case 0xa0 : return CPU::cavium_thunderx;
868
880
case 0xa1 :
@@ -881,73 +893,87 @@ static CPU get_cpu_name(CPUID cpuid)
881
893
case 0xb8 : return CPU::marvell_thunderx3t110;
882
894
default : return CPU::generic;
883
895
}
884
- case 0x46 : // Fujitsu
896
+ case 0x46 : // 'F': Fujitsu
885
897
switch (cpuid.part ) {
886
898
case 0x1 : return CPU::fujitsu_a64fx;
887
899
default : return CPU::generic;
888
900
}
889
- case 0x48 : // HiSilicon
901
+ case 0x48 : // 'H': HiSilicon
890
902
switch (cpuid.part ) {
891
903
case 0xd01 : return CPU::hisilicon_tsv110;
904
+ case 0xd40 : return CPU::arm_cortex_a76; // Kirin 980
892
905
default : return CPU::generic;
893
906
}
894
- case 0x4e : // NVIDIA
907
+ case 0x4e : // 'N': NVIDIA
895
908
switch (cpuid.part ) {
896
909
case 0x000 : return CPU::nvidia_denver1;
897
910
case 0x003 : return CPU::nvidia_denver2;
898
911
case 0x004 : return CPU::nvidia_carmel;
899
912
default : return CPU::generic;
900
913
}
901
- case 0x50 : // AppliedMicro
914
+ case 0x50 : // 'P': AppliedMicro
902
915
// x-gene 2
903
916
// x-gene 3
904
917
switch (cpuid.part ) {
905
918
case 0x000 : return CPU::apm_xgene1;
906
919
default : return CPU::generic;
907
920
}
908
- case 0x51 : // Qualcomm
921
+ case 0x51 : // 'Q': Qualcomm
909
922
switch (cpuid.part ) {
910
923
case 0x00f :
911
924
case 0x02d :
912
925
return CPU::qualcomm_scorpion;
913
926
case 0x04d :
914
927
case 0x06f :
915
928
return CPU::qualcomm_krait;
916
- case 0x201 :
917
- case 0x205 :
918
- case 0x211 :
929
+ case 0x201 : // silver
930
+ case 0x205 : // gold
931
+ case 0x211 : // silver
919
932
return CPU::qualcomm_kyro;
920
- case 0x800 :
921
- case 0x801 :
922
- case 0x802 :
923
- case 0x803 :
924
- case 0x804 :
925
- case 0x805 :
926
- return CPU::arm_cortex_a73; // second-generation Kryo
933
+ // kryo 2xx
934
+ case 0x800 : // gold
935
+ return CPU::arm_cortex_a73;
936
+ case 0x801 : // silver
937
+ return CPU::arm_cortex_a53;
938
+ // kryo 3xx
939
+ case 0x802 : // gold
940
+ return CPU::arm_cortex_a75;
941
+ case 0x803 : // silver
942
+ return CPU::arm_cortex_a55;
943
+ // kryo 4xx
944
+ case 0x804 : // gold
945
+ return CPU::arm_cortex_a76;
946
+ case 0x805 : // silver
947
+ return CPU::arm_cortex_a55;
948
+ // kryo 5xx seems to be using ID for cortex-a77 directly
927
949
case 0xc00 :
928
950
return CPU::qualcomm_falkor;
929
951
case 0xc01 :
930
952
return CPU::qualcomm_saphira;
931
953
default : return CPU::generic;
932
954
}
933
- case 0x53 : // Samsung
934
- if (cpuid.part == 1 )
955
+ case 0x53 : // 'S': Samsung
956
+ if (cpuid.part == 1 ) {
957
+ if (cpuid.variant == 4 )
958
+ return CPU::samsung_exynos_m2;
935
959
return CPU::samsung_exynos_m1;
960
+ }
936
961
if (cpuid.variant != 1 )
937
962
return CPU::generic;
938
963
switch (cpuid.part ) {
939
964
case 0x2 : return CPU::samsung_exynos_m3;
940
965
case 0x3 : return CPU::samsung_exynos_m4;
966
+ case 0x4 : return CPU::samsung_exynos_m5;
941
967
default : return CPU::generic;
942
968
}
943
- case 0x56 : // Marvell
969
+ case 0x56 : // 'V': Marvell
944
970
switch (cpuid.part ) {
945
971
case 0x581 :
946
972
case 0x584 :
947
973
return CPU::marvell_pj4;
948
974
default : return CPU::generic;
949
975
}
950
- case 0x61 : // Apple
976
+ case 0x61 : // 'a': Apple
951
977
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html
952
978
switch (cpuid.part ) {
953
979
case 0x0 : // Swift
@@ -978,12 +1004,12 @@ static CPU get_cpu_name(CPUID cpuid)
978
1004
return CPU::apple_a13;
979
1005
default : return CPU::generic;
980
1006
}
981
- case 0x68 : // Huaxintong Semiconductor
1007
+ case 0x68 : // 'h': Huaxintong Semiconductor
982
1008
switch (cpuid.part ) {
983
1009
case 0x0 : return CPU::hxt_phecda;
984
1010
default : return CPU::generic;
985
1011
}
986
- case 0x69 : // Intel
1012
+ case 0x69 : // 'i': Intel
987
1013
switch (cpuid.part ) {
988
1014
case 0x001 : return CPU::intel_3735d;
989
1015
default : return CPU::generic;
@@ -1241,6 +1267,8 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
1241
1267
CPU::arm_cortex_a75,
1242
1268
CPU::arm_cortex_a76,
1243
1269
CPU::arm_neoverse_n1,
1270
+ CPU::arm_neoverse_n2,
1271
+ CPU::arm_neoverse_v1,
1244
1272
CPU::nvidia_denver2,
1245
1273
CPU::nvidia_carmel,
1246
1274
CPU::samsung_exynos_m1,
@@ -1327,6 +1355,8 @@ static inline const char *normalize_cpu_name(llvm::StringRef name)
1327
1355
{
1328
1356
if (name == " ares" )
1329
1357
return " neoverse-n1" ;
1358
+ if (name == " zeus" )
1359
+ return " neoverse-v1" ;
1330
1360
if (name == " cyclone" )
1331
1361
return " apple-a7" ;
1332
1362
if (name == " typhoon" )
@@ -1747,7 +1777,7 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
1747
1777
# if JL_LLVM_VERSION > 100000
1748
1778
" +tme,"
1749
1779
# endif
1750
- " +am,+specrestrict,+predres,+mte,+ lor,+perfmon,+spe,+tracev8.4" ,
1780
+ " +am,+specrestrict,+predres,+lor,+perfmon,+spe,+tracev8.4" ,
1751
1781
#else
1752
1782
" +dotprod" ,
1753
1783
#endif
0 commit comments