Skip to content

Commit 8e44d85

Browse files
yuyichaosimeonschaub
authored andcommitted
A few processor detection/features tweaks (JuliaLang#36831)
* Missing feature from Apple A13 * Enable Cortex-A78 and Cortex-X1 on LLVM 11 llvm/llvm-project@954db63 https://reviews.llvm.org/D83206 * More relaxed Zen detection: treat all family 23 as Zen* and treat all model >= 0x30 as Zen2. GCC uses a similar fallback structure albeit based on feature. This should still generate **correct** code since that is always controlled by available features. It should be as good a scheduling model estimate as anything else. Fix JuliaLang#36826
1 parent c3e7bfc commit 8e44d85

File tree

2 files changed

+13
-11
lines changed

2 files changed

+13
-11
lines changed

src/processor_arm.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,8 @@ constexpr auto arm_cortex_a73 = armv8a_crc;
301301
constexpr auto arm_cortex_a75 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16);
302302
constexpr auto arm_cortex_a76 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
303303
constexpr auto arm_cortex_a77 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
304-
constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
305-
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
304+
constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
305+
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
306306
constexpr auto arm_neoverse_e1 = armv8_2a | get_feature_masks(rcpc, fullfp16, ssbs);
307307
constexpr auto arm_neoverse_n1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
308308
constexpr auto arm_zeus = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
@@ -336,7 +336,7 @@ constexpr auto apple_a7 = armv8a_crc_crypto;
336336
constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
337337
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
338338
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
339-
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fullfp16, sha3);
339+
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
340340
constexpr auto apple_s4 = apple_a12;
341341
constexpr auto apple_s5 = apple_a12;
342342

@@ -363,8 +363,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
363363
{"cortex-a76", CPU::arm_cortex_a76, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
364364
{"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
365365
{"cortex-a77", CPU::arm_cortex_a77, CPU::arm_cortex_a76, 110000, Feature::arm_cortex_a77},
366-
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, UINT32_MAX, Feature::arm_cortex_a78},
367-
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, UINT32_MAX, Feature::arm_cortex_x1},
366+
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
367+
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
368368
{"neoverse-e1", CPU::arm_neoverse_e1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_e1},
369369
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
370370
{"zeus", CPU::arm_zeus, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_zeus},
@@ -637,8 +637,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
637637
{"cortex-a76", CPU::arm_cortex_a76, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
638638
{"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
639639
{"cortex-a77", CPU::arm_cortex_a77, CPU::arm_cortex_a76, 110000, Feature::arm_cortex_a77},
640-
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, UINT32_MAX, Feature::arm_cortex_a78},
641-
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, UINT32_MAX, Feature::arm_cortex_x1},
640+
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
641+
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
642642
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
643643
{"denver1", CPU::nvidia_denver1, CPU::arm_cortex_a53, UINT32_MAX, Feature::nvidia_denver1},
644644
{"denver2", CPU::nvidia_denver2, CPU::arm_cortex_a57, UINT32_MAX, Feature::nvidia_denver2},

src/processor_x86.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -539,11 +539,13 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
539539
case 22:
540540
return CPU::amd_btver2;
541541
case 23:
542-
if ((model >= 0x30 && model <= 0x3f) || model == 0x71)
542+
// Known models:
543+
// Zen: 1, 17
544+
// Zen+: 8, 24
545+
// Zen2: 96, 113
546+
if (model >= 0x30)
543547
return CPU::amd_znver2;
544-
if (model <= 0x0f)
545-
return CPU::amd_znver1;
546-
return CPU::amd_btver1;
548+
return CPU::amd_znver1;
547549
}
548550
}
549551

0 commit comments

Comments
 (0)