diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9397546c8fc5d..71bf7dd9f09a1 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4911,7 +4911,8 @@ class Sema final : public SemaBase { bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str); /// Check Target Version attrs - bool checkTargetVersionAttr(SourceLocation Loc, Decl *D, StringRef Str); + bool checkTargetVersionAttr(SourceLocation Loc, Decl *D, StringRef Str, + SmallString<64> &); bool checkTargetClonesAttrString( SourceLocation LiteralLoc, StringRef Str, const StringLiteral *Literal, Decl *D, bool &HasDefault, bool &HasCommas, bool &HasNotDefault, diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index b82c46966cf0b..e2ede08942183 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -1336,10 +1336,13 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr, }); llvm::SmallDenseSet UniqueFeats; - for (auto &Feat : Features) + for (auto &Feat : Features) { + if (!getTarget().doesFeatureAffectCodeGen(Feat)) + continue; if (auto Ext = llvm::AArch64::parseFMVExtension(Feat)) if (UniqueFeats.insert(Ext->Name).second) Out << 'M' << Ext->Name; + } } std::unique_ptr diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index eba29e609cb05..6503f687710fc 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3279,20 +3279,52 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { } bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, Decl *D, - StringRef AttrStr) { + StringRef AttrStr, SmallString<64> &NewStr) { enum FirstParam { Unsupported }; enum SecondParam { None }; enum ThirdParam { Target, TargetClones, TargetVersion }; - llvm::SmallVector Features; - if (Context.getTargetInfo().getTriple().isRISCV()) { - llvm::SmallVector AttrStrs; - AttrStr.split(AttrStrs, ';'); - - bool HasArch = false; - bool HasPriority = false; - bool HasDefault = false; - bool DuplicateAttr = false; - for (auto &AttrStr : AttrStrs) { + + StringRef PriorityString[5] = {"priority5", "priority4", "priority3", + "priority2", "priority1"}; + + llvm::SmallVector AttrStrs; + AttrStr.split(AttrStrs, ';'); + + bool HasArch = false; + bool HasFeatures = false; + bool HasPriority = false; + bool HasDefault = false; + bool DuplicateAttr = false; + for (StringRef AttrStr : AttrStrs) { + AttrStr = AttrStr.trim(); + if (AttrStr.starts_with("default")) { + if (HasDefault) + DuplicateAttr = true; + HasDefault = true; + if (Context.getTargetInfo().getTriple().isAArch64()) + NewStr.append(AttrStr); + } else if (AttrStr.consume_front("priority=")) { + if (HasPriority) + DuplicateAttr = true; + HasPriority = true; + unsigned Digit; + if (AttrStr.getAsInteger(0, Digit)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << AttrStr << TargetVersion; + if (Context.getTargetInfo().getTriple().isAArch64()) { + if (Digit < 1 || Digit > 32) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << AttrStr << TargetVersion; + // Convert priority=[1-32] -> priority1 + ... + priority5 + for (int BitPos = 4; BitPos >= 0; --BitPos) { + if ((32 - Digit) & (1U << BitPos)) { + if (!NewStr.empty()) + NewStr.append("+"); + NewStr.append(PriorityString[BitPos]); + } + } + } + } else if (Context.getTargetInfo().getTriple().isRISCV()) { // Only support arch=+ext,... syntax. if (AttrStr.starts_with("arch=+")) { if (HasArch) @@ -3307,51 +3339,46 @@ bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, Decl *D, })) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << AttrStr << TargetVersion; - } else if (AttrStr.starts_with("default")) { - if (HasDefault) - DuplicateAttr = true; - HasDefault = true; - } else if (AttrStr.consume_front("priority=")) { - if (HasPriority) - DuplicateAttr = true; - HasPriority = true; - unsigned Digit; - if (AttrStr.getAsInteger(0, Digit)) - return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << AttrStr << TargetVersion; } else { return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << AttrStr << TargetVersion; } + } else if (Context.getTargetInfo().getTriple().isAArch64()) { + llvm::SmallVector Features; + AttrStr.split(Features, "+"); + for (StringRef Feat : Features) { + Feat = Feat.trim(); + if (!Context.getTargetInfo().validateCpuSupports(Feat)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Feat << TargetVersion; + if (!NewStr.empty()) + NewStr.append("+"); + NewStr.append(Feat); + } + HasFeatures = !Features.empty(); } + } - if (((HasPriority || HasArch) && HasDefault) || DuplicateAttr || - (HasPriority && !HasArch)) - return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << AttrStr << TargetVersion; + if ((HasDefault && (HasPriority || HasArch || HasFeatures)) || + DuplicateAttr || (HasPriority && !HasArch && !HasFeatures)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << AttrStr << TargetVersion; + + if (Context.getTargetInfo().getTriple().isRISCV()) + NewStr = AttrStr; - return false; - } - AttrStr.split(Features, "+"); - for (auto &CurFeature : Features) { - CurFeature = CurFeature.trim(); - if (CurFeature == "default") - continue; - if (!Context.getTargetInfo().validateCpuSupports(CurFeature)) - return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << CurFeature << TargetVersion; - } return false; } static void handleTargetVersionAttr(Sema &S, Decl *D, const ParsedAttr &AL) { StringRef Str; + SmallString<64> NewStr; SourceLocation LiteralLoc; if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) || - S.checkTargetVersionAttr(LiteralLoc, D, Str)) + S.checkTargetVersionAttr(LiteralLoc, D, Str, NewStr)) return; TargetVersionAttr *NewAttr = - ::new (S.Context) TargetVersionAttr(S.Context, AL, Str); + ::new (S.Context) TargetVersionAttr(S.Context, AL, NewStr); D->addAttr(NewAttr); } @@ -3368,7 +3395,7 @@ static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) { bool Sema::checkTargetClonesAttrString( SourceLocation LiteralLoc, StringRef Str, const StringLiteral *Literal, - Decl *D, bool &HasDefault, bool &HasCommas, bool &HasNotDefault, + Decl *D, bool &HasDefault, bool &HasCommas, bool &HasNonDefault, SmallVectorImpl> &StringsBuffer) { enum FirstParam { Unsupported, Duplicate, Unknown }; enum SecondParam { None, CPU, Tune }; @@ -3380,6 +3407,9 @@ bool Sema::checkTargetClonesAttrString( return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << "" << TargetClones; + StringRef PriorityString[5] = {"priority5", "priority4", "priority3", + "priority2", "priority1"}; + std::pair Parts = {{}, Str}; while (!Parts.second.empty()) { Parts = Parts.second.split(','); @@ -3394,90 +3424,116 @@ bool Sema::checkTargetClonesAttrString( return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << "" << TargetClones; - if (TInfo.getTriple().isAArch64()) { - // AArch64 target clones specific - if (Cur == "default") { + if (TInfo.getTriple().isX86()) { + if (Cur.starts_with("arch=")) { + if (!Context.getTargetInfo().isValidCPUName( + Cur.drop_front(sizeof("arch=") - 1))) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << CPU << Cur.drop_front(sizeof("arch=") - 1) + << TargetClones; + } else if (Cur == "default") { DefaultIsDupe = HasDefault; HasDefault = true; - if (llvm::is_contained(StringsBuffer, Cur) || DefaultIsDupe) - Diag(CurLoc, diag::warn_target_clone_duplicate_options); - else - StringsBuffer.push_back(Cur); - } else { - std::pair CurParts = {{}, Cur}; - llvm::SmallVector CurFeatures; - while (!CurParts.second.empty()) { - CurParts = CurParts.second.split('+'); - StringRef CurFeature = CurParts.first.trim(); - if (!TInfo.validateCpuSupports(CurFeature)) { - Diag(CurLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << CurFeature << TargetClones; - continue; - } - if (TInfo.doesFeatureAffectCodeGen(CurFeature)) - HasCodeGenImpact = true; - CurFeatures.push_back(CurFeature); - } - // Canonize TargetClones Attributes - llvm::sort(CurFeatures); - SmallString<64> Res; - for (auto &CurFeat : CurFeatures) { - if (!Res.empty()) - Res.append("+"); - Res.append(CurFeat); - } - if (llvm::is_contained(StringsBuffer, Res) || DefaultIsDupe) - Diag(CurLoc, diag::warn_target_clone_duplicate_options); - else if (!HasCodeGenImpact) - // Ignore features in target_clone attribute that don't impact - // code generation - Diag(CurLoc, diag::warn_target_clone_no_impact_options); - else if (!Res.empty()) { - StringsBuffer.push_back(Res); - HasNotDefault = true; - } + } else if (!Context.getTargetInfo().isValidFeatureName(Cur) || + Context.getTargetInfo().getFMVPriority(Cur) == 0) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Cur << TargetClones; + if (llvm::is_contained(StringsBuffer, Cur) || DefaultIsDupe) + Diag(CurLoc, diag::warn_target_clone_duplicate_options); + // Note: Add even if there are duplicates, since it changes name mangling. + StringsBuffer.push_back(Cur); + } else { + // Other targets ( currently AArch64 and RISC-V ) + if (TInfo.getTriple().isRISCV()) { + // Suppress warn_target_clone_mixed_values + HasCommas = false; + + // Cur is split's parts of Str. RISC-V uses Str directly, + // so skip when encountered more than once. + if (!Str.starts_with(Cur)) + continue; } - } else if (TInfo.getTriple().isRISCV()) { - // Suppress warn_target_clone_mixed_values - HasCommas = false; - - // Cur is split's parts of Str. RISC-V uses Str directly, - // so skip when encountered more than once. - if (!Str.starts_with(Cur)) - continue; - + SmallString<64> NewStr; llvm::SmallVector AttrStrs; Str.split(AttrStrs, ";"); bool IsPriority = false; bool IsDefault = false; - for (auto &AttrStr : AttrStrs) { - // Only support arch=+ext,... syntax. - if (AttrStr.starts_with("arch=+")) { - ParsedTargetAttr TargetAttr = - Context.getTargetInfo().parseTargetAttr(AttrStr); - - if (TargetAttr.Features.empty() || - llvm::any_of(TargetAttr.Features, [&](const StringRef Ext) { - return !RISCV().isValidFMVExtension(Ext); - })) - return Diag(CurLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << Str << TargetClones; - } else if (AttrStr.starts_with("default")) { + for (StringRef AttrStr : AttrStrs) { + AttrStr = AttrStr.trim(); + if (AttrStr == "default") { IsDefault = true; DefaultIsDupe = HasDefault; HasDefault = true; + if (TInfo.getTriple().isAArch64()) + NewStr.append(AttrStr); } else if (AttrStr.consume_front("priority=")) { IsPriority = true; unsigned Digit; if (AttrStr.getAsInteger(0, Digit)) return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << Str << TargetClones; - } else { - return Diag(CurLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << Str << TargetClones; + if (TInfo.getTriple().isAArch64()) { + if (Digit < 1 || Digit > 32) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Str << TargetClones; + // Convert priority=[1-32] -> priority1 + ... + priority5 + for (int BitPos = 4; BitPos >= 0; --BitPos) { + if ((32 - Digit) & (1U << BitPos)) { + if (!NewStr.empty()) + NewStr.append("+"); + NewStr.append(PriorityString[BitPos]); + } + } + } + } else if (TInfo.getTriple().isRISCV()) { + // Only support arch=+ext,... syntax. + if (AttrStr.starts_with("arch=+")) { + ParsedTargetAttr TargetAttr = + Context.getTargetInfo().parseTargetAttr(AttrStr); + + if (TargetAttr.Features.empty() || + llvm::any_of(TargetAttr.Features, [&](const StringRef Ext) { + return !RISCV().isValidFMVExtension(Ext); + })) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Str << TargetClones; + } else { + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Str << TargetClones; + } + } else if (TInfo.getTriple().isAArch64()) { + llvm::SmallVector Features; + llvm::SmallVector ValidFeatures; + AttrStr.split(Features, "+"); + for (StringRef Feat : Features) { + Feat = Feat.trim(); + if (!TInfo.validateCpuSupports(Feat)) { + Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Feat << TargetClones; + continue; + } + if (TInfo.doesFeatureAffectCodeGen(Feat)) + HasCodeGenImpact = true; + ValidFeatures.push_back(Feat); + } + HasNonDefault = !ValidFeatures.empty(); + // Ignore features in target_clone attribute that don't impact + // code generation + if (!HasCodeGenImpact) + Diag(CurLoc, diag::warn_target_clone_no_impact_options); + + // Canonize TargetClones Attributes + llvm::sort(ValidFeatures); + for (StringRef Feat : ValidFeatures) { + if (!NewStr.empty()) + NewStr.append("+"); + NewStr.append(Feat); + } } } + if (TInfo.getTriple().isAArch64()) + Str = NewStr; if (IsPriority && IsDefault) return Diag(CurLoc, diag::warn_unsupported_target_attribute) @@ -3486,25 +3542,6 @@ bool Sema::checkTargetClonesAttrString( if (llvm::is_contained(StringsBuffer, Str) || DefaultIsDupe) Diag(CurLoc, diag::warn_target_clone_duplicate_options); StringsBuffer.push_back(Str); - } else { - // Other targets ( currently X86 ) - if (Cur.starts_with("arch=")) { - if (!Context.getTargetInfo().isValidCPUName( - Cur.drop_front(sizeof("arch=") - 1))) - return Diag(CurLoc, diag::warn_unsupported_target_attribute) - << Unsupported << CPU << Cur.drop_front(sizeof("arch=") - 1) - << TargetClones; - } else if (Cur == "default") { - DefaultIsDupe = HasDefault; - HasDefault = true; - } else if (!Context.getTargetInfo().isValidFeatureName(Cur) || - Context.getTargetInfo().getFMVPriority(Cur) == 0) - return Diag(CurLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << Cur << TargetClones; - if (llvm::is_contained(StringsBuffer, Cur) || DefaultIsDupe) - Diag(CurLoc, diag::warn_target_clone_duplicate_options); - // Note: Add even if there are duplicates, since it changes name mangling. - StringsBuffer.push_back(Cur); } } if (Str.rtrim().ends_with(",")) @@ -3530,7 +3567,7 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { SmallVector Strings; SmallVector, 2> StringsBuffer; - bool HasCommas = false, HasDefault = false, HasNotDefault = false; + bool HasCommas = false, HasDefault = false, HasNonDefault = false; for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) { StringRef CurStr; @@ -3539,7 +3576,7 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { S.checkTargetClonesAttrString( LiteralLoc, CurStr, cast(AL.getArgAsExpr(I)->IgnoreParenCasts()), D, - HasDefault, HasCommas, HasNotDefault, StringsBuffer)) + HasDefault, HasCommas, HasNonDefault, StringsBuffer)) return; } for (auto &SmallStr : StringsBuffer) @@ -3565,7 +3602,7 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } // No multiversion if we have default version only. - if (S.Context.getTargetInfo().getTriple().isAArch64() && !HasNotDefault) + if (S.Context.getTargetInfo().getTriple().isAArch64() && !HasNonDefault) return; cast(D)->setIsMultiVersion(); diff --git a/clang/test/AST/attr-target-version.c b/clang/test/AST/attr-target-version.c index b537f5e685a31..adaef66193dee 100644 --- a/clang/test/AST/attr-target-version.c +++ b/clang/test/AST/attr-target-version.c @@ -2,7 +2,78 @@ int __attribute__((target_version("sve2-bitperm + sha2"))) foov(void) { return 1; } int __attribute__((target_clones(" lse + fp + sha3 ", "default"))) fooc(void) { return 2; } -// CHECK: TargetVersionAttr -// CHECK: sve2-bitperm + sha2 + +int __attribute__((target_version("priority=1;aes"))) explicit_priority(void) { return 1; } +int __attribute__((target_version("priority=2;bf16"))) explicit_priority(void) { return 2; } +int __attribute__((target_version("priority=3;bti"))) explicit_priority(void) { return 3; } +int __attribute__((target_version("priority=4;crc"))) explicit_priority(void) { return 4; } +int __attribute__((target_version("priority=5;dit"))) explicit_priority(void) { return 5; } +int __attribute__((target_version("priority=6;dotprod"))) explicit_priority(void) { return 6; } +int __attribute__((target_version("priority=7;dpb"))) explicit_priority(void) { return 7; } +int __attribute__((target_version("priority=8;dpb2"))) explicit_priority(void) { return 8; } +int __attribute__((target_version("f32mm;priority=9"))) explicit_priority(void) { return 9; } +int __attribute__((target_version("f64mm;priority=10"))) explicit_priority(void) { return 10; } +int __attribute__((target_version("fcma;priority=11"))) explicit_priority(void) { return 11; } +int __attribute__((target_version("flagm;priority=12"))) explicit_priority(void) { return 12; } +int __attribute__((target_version("flagm2;priority=13"))) explicit_priority(void) { return 13; } +int __attribute__((target_version("fp;priority=14"))) explicit_priority(void) { return 14; } +int __attribute__((target_version("fp16;priority=15"))) explicit_priority(void) { return 15; } +int __attribute__((target_version("fp16fml;priority=16"))) explicit_priority(void) { return 16; } + +int __attribute__((target_clones( + "priority=17;frintts", + "priority=18;i8mm", + "priority=19;jscvt", + "priority=20;lse", + "priority=21;memtag", + "priority=22;mops", + "priority=23;rcpc", + "priority=24;rcpc2", + "rcpc3;priority=25", + "rdm;priority=26", + "rng;priority=27", + "sb;priority=28", + "sha2;priority=29", + "sha3;priority=30", + "simd;priority=31", + "sm4;priority=32", + "default"))) explicit_priority(void) { return 0; } + +// CHECK: TargetVersionAttr {{.*}} "sve2-bitperm+sha2" +// CHECK: TargetClonesAttr {{.*}} fp+lse+sha3 default + +// CHECK: TargetVersionAttr {{.*}} "priority1+priority2+priority3+priority4+priority5+aes" +// CHECK: TargetVersionAttr {{.*}} "priority1+priority2+priority3+priority4+bf16" +// CHECK: TargetVersionAttr {{.*}} "priority1+priority2+priority3+priority5+bti" +// CHECK: TargetVersionAttr {{.*}} "priority1+priority2+priority3+crc" +// CHECK: TargetVersionAttr {{.*}} "priority1+priority2+priority4+priority5+dit" +// CHECK: TargetVersionAttr {{.*}} "priority1+priority2+priority4+dotprod" +// CHECK: TargetVersionAttr {{.*}} "priority1+priority2+priority5+dpb" +// CHECK: TargetVersionAttr {{.*}} "priority1+priority2+dpb2" +// CHECK: TargetVersionAttr {{.*}} "f32mm+priority1+priority3+priority4+priority5" +// CHECK: TargetVersionAttr {{.*}} "f64mm+priority1+priority3+priority4" +// CHECK: TargetVersionAttr {{.*}} "fcma+priority1+priority3+priority5" +// CHECK: TargetVersionAttr {{.*}} "flagm+priority1+priority3" +// CHECK: TargetVersionAttr {{.*}} "flagm2+priority1+priority4+priority5" +// CHECK: TargetVersionAttr {{.*}} "fp+priority1+priority4" +// CHECK: TargetVersionAttr {{.*}} "fp16+priority1+priority5" +// CHECK: TargetVersionAttr {{.*}} "fp16fml+priority1" + // CHECK: TargetClonesAttr -// CHECK: fp+lse+sha3 default +// CHECK: priority2+priority3+priority4+priority5+frintts +// CHECK: priority2+priority3+priority4+i8mm +// CHECK: priority2+priority3+priority5+jscvt +// CHECK: priority2+priority3+lse +// CHECK: priority2+priority4+priority5+memtag +// CHECK: priority2+priority4+mops +// CHECK: priority2+priority5+rcpc +// CHECK: priority2+rcpc2 +// CHECK: rcpc3+priority3+priority4+priority5 +// CHECK: rdm+priority3+priority4 +// CHECK: rng+priority3+priority5 +// CHECK: sb+priority3 +// CHECK: sha2+priority4+priority5 +// CHECK: sha3+priority4 +// CHECK: simd+priority5 +// CHECK: sm4 +// CHECK: default diff --git a/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c b/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c index e7e611e09542e..44a798a84a8a2 100644 --- a/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c +++ b/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_IMPLICIT_DEFAULT // RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_DEFAULT +// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_VERSION_PRIORITY +// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_CLONES_PRIORITY #if defined(CHECK_IMPLICIT_DEFAULT) @@ -21,4 +23,18 @@ __attribute__((target_version("default"))) int explicit_default_bad(void) { retu // expected-note@-2 {{previous definition is here}} __attribute__((target_clones("aes", "lse", "default"))) int explicit_default_bad(void) { return 1; } +#elif defined(CHECK_EXPLICIT_VERSION_PRIORITY) + +__attribute__((target_version("aes"))) int explicit_version_priority(void) { return 0; } +// expected-error@+2 {{definition with same mangled name 'explicit_version_priority._Maes' as another definition}} +// expected-note@-2 {{previous definition is here}} +__attribute__((target_version("priority1+aes"))) int explicit_version_priority(void) { return 1; } + +#elif defined(CHECK_EXPLICIT_CLONES_PRIORITY) + +__attribute__((target_version("aes+priority2"))) int explicit_clones_priority(void) { return 0; } +// expected-error@+2 {{definition with same mangled name 'explicit_clones_priority._Maes' as another definition}} +// expected-note@-2 {{previous definition is here}} +__attribute__((target_clones("priority1+aes", "lse"))) int explicit_clones_priority(void) { return 1; } + #endif diff --git a/clang/test/CodeGen/AArch64/fmv-explicit-priority.c b/clang/test/CodeGen/AArch64/fmv-explicit-priority.c new file mode 100644 index 0000000000000..5e0b73daea028 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fmv-explicit-priority.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O3 -fno-inline -emit-llvm -o - %s | FileCheck %s + +__attribute__((target_version("priority=1;lse"))) int foo(void) { return 1; } +__attribute__((target_version("priority=2;sve2"))) int foo(void) { return 2; } +__attribute__((target_version("priority=3;sve"))) int foo(void) { return 3; } +__attribute__((target_version( "default"))) int foo(void) { return 0; } + +__attribute__((target_clones("priority=1;lse+sve2", "priority=2;lse", "priority=3;sve", "default"))) +int fmv_caller(void) { return foo(); } + + +__attribute__((target_version("aes"))) int bar(void) { return 1; } +__attribute__((target_version("priority=1;sm4"))) int bar(void) { return 2; } +__attribute__((target_version("default"))) int bar(void) { return 0; } + +__attribute__((target("aes"))) int regular_caller_aes() { return bar(); } +__attribute__((target("sm4"))) int regular_caller_sm4() { return bar(); } +//. +// CHECK: @__aarch64_cpu_features = external dso_local local_unnamed_addr global { i64 } +// CHECK: @foo = weak_odr ifunc i32 (), ptr @foo.resolver +// CHECK: @fmv_caller = weak_odr ifunc i32 (), ptr @fmv_caller.resolver +// CHECK: @bar = weak_odr ifunc i32 (), ptr @bar.resolver +//. +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@foo._Mlse +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@foo._Msve2 +// CHECK-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@foo._Msve +// CHECK-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@foo.default +// CHECK-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@fmv_caller._MlseMsve2 +// CHECK-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo._Mlse() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@fmv_caller._Mlse +// CHECK-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo._Mlse() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: noinline nounwind vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@fmv_caller._Msve +// CHECK-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo() #[[ATTR12:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@fmv_caller.default +// CHECK-SAME: () #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo.default() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@bar._Maes +// CHECK-SAME: () #[[ATTR8:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@bar._Msm4 +// CHECK-SAME: () #[[ATTR9:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@bar.default +// CHECK-SAME: () #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK: Function Attrs: noinline nounwind +// CHECK-LABEL: define {{[^@]+}}@regular_caller_aes +// CHECK-SAME: () local_unnamed_addr #[[ATTR10:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar() #[[ATTR12]] +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@regular_caller_sm4 +// CHECK-SAME: () local_unnamed_addr #[[ATTR11:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar._Msm4() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK-LABEL: define {{[^@]+}}@foo.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: tail call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 128 +// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0 +// CHECK-NEXT: br i1 [[DOTNOT]], label [[RESOLVER_ELSE:%.*]], label [[COMMON_RET:%.*]] +// CHECK: common.ret: +// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi ptr [ @foo._Mlse, [[RESOLVER_ENTRY:%.*]] ], [ @foo._Msve2, [[RESOLVER_ELSE]] ], [ [[FOO__MSVE_FOO_DEFAULT:%.*]], [[RESOLVER_ELSE2:%.*]] ] +// CHECK-NEXT: ret ptr [[COMMON_RET_OP]] +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 69793284352 +// CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 69793284352 +// CHECK-NEXT: br i1 [[TMP3]], label [[COMMON_RET]], label [[RESOLVER_ELSE2]] +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP0]], 1073807616 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 1073807616 +// CHECK-NEXT: [[FOO__MSVE_FOO_DEFAULT]] = select i1 [[TMP5]], ptr @foo._Msve, ptr @foo.default +// CHECK-NEXT: br label [[COMMON_RET]] +// +// +// CHECK-LABEL: define {{[^@]+}}@fmv_caller.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: tail call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 69793284480 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 69793284480 +// CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: common.ret: +// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi ptr [ @fmv_caller._MlseMsve2, [[RESOLVER_ENTRY:%.*]] ], [ @fmv_caller._Mlse, [[RESOLVER_ELSE]] ], [ [[FMV_CALLER__MSVE_FMV_CALLER_DEFAULT:%.*]], [[RESOLVER_ELSE2:%.*]] ] +// CHECK-NEXT: ret ptr [[COMMON_RET_OP]] +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 128 +// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[DOTNOT]], label [[RESOLVER_ELSE2]], label [[COMMON_RET]] +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP0]], 1073807616 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 1073807616 +// CHECK-NEXT: [[FMV_CALLER__MSVE_FMV_CALLER_DEFAULT]] = select i1 [[TMP5]], ptr @fmv_caller._Msve, ptr @fmv_caller.default +// CHECK-NEXT: br label [[COMMON_RET]] +// +// +// CHECK-LABEL: define {{[^@]+}}@bar.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: tail call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 800 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 800 +// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 33536 +// CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], 33536 +// CHECK-NEXT: [[BAR__MAES_BAR_DEFAULT:%.*]] = select i1 [[TMP4]], ptr @bar._Maes, ptr @bar.default +// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[TMP2]], ptr @bar._Msm4, ptr [[BAR__MAES_BAR_DEFAULT]] +// CHECK-NEXT: ret ptr [[COMMON_RET_OP]] +// +//. +// CHECK: attributes #[[ATTR0]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="lse,priority1,priority2,priority3,priority4,priority5" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" } +// CHECK: attributes #[[ATTR1]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="priority1,priority2,priority3,priority4,sve2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve,+sve2" } +// CHECK: attributes #[[ATTR2]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="priority1,priority2,priority3,priority5,sve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } +// CHECK: attributes #[[ATTR3]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #[[ATTR4]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="lse,priority1,priority2,priority3,priority4,priority5,sve2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+lse,+sve,+sve2" } +// CHECK: attributes #[[ATTR5]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="lse,priority1,priority2,priority3,priority4" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" } +// CHECK: attributes #[[ATTR6]] = { noinline nounwind vscale_range(1,16) "fmv-features"="priority1,priority2,priority3,priority5,sve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } +// CHECK: attributes #[[ATTR7]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #[[ATTR8]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="aes" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+fp-armv8,+neon" } +// CHECK: attributes #[[ATTR9]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="priority1,priority2,priority3,priority4,priority5,sm4" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" } +// CHECK: attributes #[[ATTR10]] = { noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+fp-armv8,+neon" } +// CHECK: attributes #[[ATTR11]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" } +// CHECK: attributes #[[ATTR12]] = { nounwind } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 90d92e0fcf55c..c8e9a67b627f6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1916,9 +1916,13 @@ class TargetTransformInfo { LLVM_ABI bool hasArmWideBranch(bool Thumb) const; /// Returns a bitmask constructed from the target-features or fmv-features - /// metadata of a function. + /// metadata of a function corresponding to its Arch Extensions. LLVM_ABI uint64_t getFeatureMask(const Function &F) const; + /// Returns a bitmask constructed from the target-features or fmv-features + /// metadata of a function corresponding to its FMV priority. + LLVM_ABI uint64_t getPriorityMask(const Function &F) const; + /// Returns true if this is an instance of a function with multiple versions. LLVM_ABI bool isMultiversionedFunction(const Function &F) const; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index c22928c9bcd94..628c92ddebd21 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1126,6 +1126,7 @@ class TargetTransformInfoImplBase { virtual bool hasArmWideBranch(bool) const { return false; } virtual uint64_t getFeatureMask(const Function &F) const { return 0; } + virtual uint64_t getPriorityMask(const Function &F) const { return 0; } virtual bool isMultiversionedFunction(const Function &F) const { return false; diff --git a/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc b/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc index f2bad28ada93e..cde0d16e0b32e 100644 --- a/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc +++ b/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc @@ -59,7 +59,12 @@ enum FeatPriorities { PRIOR_SME_I64, PRIOR_SME2, PRIOR_MOPS, - PRIOR_CSSC + PRIOR_CSSC, + PRIOR_5, + PRIOR_4, + PRIOR_3, + PRIOR_2, + PRIOR_1 }; #endif diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 3ebd9d487ba04..fb4002e277616 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1426,6 +1426,10 @@ uint64_t TargetTransformInfo::getFeatureMask(const Function &F) const { return TTIImpl->getFeatureMask(F); } +uint64_t TargetTransformInfo::getPriorityMask(const Function &F) const { + return TTIImpl->getPriorityMask(F); +} + bool TargetTransformInfo::isMultiversionedFunction(const Function &F) const { return TTIImpl->isMultiversionedFunction(F); } diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td index b0f76ec6a6480..efcb6f552d788 100644 --- a/llvm/lib/Target/AArch64/AArch64FMV.td +++ b/llvm/lib/Target/AArch64/AArch64FMV.td @@ -83,3 +83,8 @@ def : FMVExtension<"sve2-sha3", "SVE_SHA3">; def : FMVExtension<"sve2-sm4", "SVE_SM4">; def : FMVExtension<"wfxt", "WFXT">; def : FMVExtension<"cssc", "CSSC">; +let FeatureBit = "FEAT_MAX" in def : FMVExtension<"priority1", "1">; +let FeatureBit = "FEAT_MAX" in def : FMVExtension<"priority2", "2">; +let FeatureBit = "FEAT_MAX" in def : FMVExtension<"priority3", "3">; +let FeatureBit = "FEAT_MAX" in def : FMVExtension<"priority4", "4">; +let FeatureBit = "FEAT_MAX" in def : FMVExtension<"priority5", "5">; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 3387dee8aa4c8..f920efc0386ea 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -249,12 +249,23 @@ static bool hasPossibleIncompatibleOps(const Function *F) { return false; } -uint64_t AArch64TTIImpl::getFeatureMask(const Function &F) const { +static void extractAttrFeatures(const Function &F, const AArch64TTIImpl *TTI, + SmallVectorImpl &Features) { StringRef AttributeStr = - isMultiversionedFunction(F) ? "fmv-features" : "target-features"; + TTI->isMultiversionedFunction(F) ? "fmv-features" : "target-features"; StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString(); - SmallVector Features; FeatureStr.split(Features, ","); +} + +uint64_t AArch64TTIImpl::getFeatureMask(const Function &F) const { + SmallVector Features; + extractAttrFeatures(F, this, Features); + return AArch64::getCpuSupportsMask(Features); +} + +uint64_t AArch64TTIImpl::getPriorityMask(const Function &F) const { + SmallVector Features; + extractAttrFeatures(F, this, Features); return AArch64::getFMVPriority(Features); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 9ada70bd7086a..c653e06785eee 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -92,6 +92,7 @@ class AArch64TTIImpl final : public BasicTTIImplBase { unsigned DefaultCallPenalty) const override; uint64_t getFeatureMask(const Function &F) const override; + uint64_t getPriorityMask(const Function &F) const override; bool isMultiversionedFunction(const Function &F) const override; diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index 4a2523440f0f0..37d82abf34db8 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -55,21 +55,30 @@ std::optional lookupFMVByID(AArch64::ArchExtKind ExtID) { return {}; } +std::optional getFMVInfoFrom(StringRef Feature) { + std::optional FMV = AArch64::parseFMVExtension(Feature); + if (!FMV && Feature.starts_with('+')) + if (std::optional Ext = + AArch64::targetFeatureToExtension(Feature)) + FMV = lookupFMVByID(Ext->ID); + return FMV; +} + uint64_t AArch64::getFMVPriority(ArrayRef Features) { // Transitively enable the Arch Extensions which correspond to each feature. ExtensionSet FeatureBits; + uint64_t PriorityMask = 0; for (const StringRef Feature : Features) { - std::optional FMV = parseFMVExtension(Feature); - if (!FMV && Feature.starts_with('+')) { - if (std::optional Info = targetFeatureToExtension(Feature)) - FMV = lookupFMVByID(Info->ID); + if (std::optional FMV = getFMVInfoFrom(Feature)) { + // FMV feature without a corresponding Arch Extension may affect priority + if (FMV->ID) + FeatureBits.enable(*FMV->ID); + else + PriorityMask |= (1ULL << FMV->PriorityBit); } - if (FMV && FMV->ID) - FeatureBits.enable(*FMV->ID); } // Construct a bitmask for all the transitively enabled Arch Extensions. - uint64_t PriorityMask = 0; for (const FMVInfo &Info : getFMVInfo()) if (Info.ID && FeatureBits.Enabled.test(*Info.ID)) PriorityMask |= (1ULL << Info.PriorityBit); @@ -81,9 +90,9 @@ uint64_t AArch64::getCpuSupportsMask(ArrayRef Features) { // Transitively enable the Arch Extensions which correspond to each feature. ExtensionSet FeatureBits; for (const StringRef Feature : Features) - if (std::optional Info = parseFMVExtension(Feature)) - if (Info->ID) - FeatureBits.enable(*Info->ID); + if (std::optional FMV = getFMVInfoFrom(Feature)) + if (FMV->ID) + FeatureBits.enable(*FMV->ID); // Construct a bitmask for all the transitively enabled Arch Extensions. uint64_t FeaturesMask = 0; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 7f5a2a982982d..9c467dc012a5a 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2693,8 +2693,10 @@ static bool OptimizeNonTrivialIFuncs( Module &M, function_ref GetTTI) { bool Changed = false; - // Cache containing the mask constructed from a function's target features. + // Cache containing the feature mask constructed from a function's metadata. DenseMap FeatureMask; + // Cache containing the priority mask constructed from a function's metadata. + DenseMap PriorityMask; for (GlobalIFunc &IF : M.ifuncs()) { if (IF.isInterposable()) @@ -2724,16 +2726,19 @@ static bool OptimizeNonTrivialIFuncs( LLVM_DEBUG(dbgs() << "Statically resolving calls to function " << Resolver->getName() << "\n"); - // Cache the feature mask for each callee. + // Cache the masks for each callee. for (Function *Callee : Callees) { - auto [It, Inserted] = FeatureMask.try_emplace(Callee); - if (Inserted) - It->second = TTI.getFeatureMask(*Callee); + auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Callee); + if (FeatInserted) + FeatIt->second = TTI.getFeatureMask(*Callee); + auto [PriorIt, PriorInserted] = PriorityMask.try_emplace(Callee); + if (PriorInserted) + PriorIt->second = TTI.getPriorityMask(*Callee); } // Sort the callee versions in decreasing priority order. sort(Callees, [&](auto *LHS, auto *RHS) { - return FeatureMask[LHS] > FeatureMask[RHS]; + return PriorityMask[LHS] > PriorityMask[RHS]; }); // Find the callsites and cache the feature mask for each caller. @@ -2746,6 +2751,9 @@ static bool OptimizeNonTrivialIFuncs( auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Caller); if (FeatInserted) FeatIt->second = TTI.getFeatureMask(*Caller); + auto [PriorIt, PriorInserted] = PriorityMask.try_emplace(Caller); + if (PriorInserted) + PriorIt->second = TTI.getPriorityMask(*Caller); auto [CallIt, CallInserted] = CallSites.try_emplace(Caller); if (CallInserted) Callers.push_back(Caller); @@ -2756,7 +2764,7 @@ static bool OptimizeNonTrivialIFuncs( // Sort the caller versions in decreasing priority order. sort(Callers, [&](auto *LHS, auto *RHS) { - return FeatureMask[LHS] > FeatureMask[RHS]; + return PriorityMask[LHS] > PriorityMask[RHS]; }); auto implies = [](uint64_t A, uint64_t B) { return (A & B) == B; };