Skip to content

Commit 22576e2

Browse files
authored
[Clang][AArch64] Add pessimistic vscale_range for sve/sme (llvm#137624)
The "target-features" function attribute is not currently considered when adding vscale_range to a function. When +sve/+sme are pushed onto functions with "#pragma attribute push(+sve/+sme)", the function potentially misses out on optimizations that rely on vscale_range being present.
1 parent aacebae commit 22576e2

File tree

10 files changed

+54
-21
lines changed

10 files changed

+54
-21
lines changed

clang/include/clang/Basic/TargetInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,8 +1036,8 @@ class TargetInfo : public TransferrableTargetInfo,
10361036

10371037
/// Returns target-specific min and max values VScale_Range.
10381038
virtual std::optional<std::pair<unsigned, unsigned>>
1039-
getVScaleRange(const LangOptions &LangOpts,
1040-
bool IsArmStreamingFunction) const {
1039+
getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
1040+
llvm::StringMap<bool> *FeatureMap = nullptr) const {
10411041
return std::nullopt;
10421042
}
10431043
/// The __builtin_clz* and __builtin_ctz* built-in

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -794,12 +794,17 @@ AArch64TargetInfo::getTargetBuiltins() const {
794794

795795
std::optional<std::pair<unsigned, unsigned>>
796796
AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts,
797-
bool IsArmStreamingFunction) const {
797+
bool IsArmStreamingFunction,
798+
llvm::StringMap<bool> *FeatureMap) const {
798799
if (LangOpts.VScaleMin || LangOpts.VScaleMax)
799800
return std::pair<unsigned, unsigned>(
800801
LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax);
801802

802-
if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")))
803+
if (hasFeature("sve") || (FeatureMap && (FeatureMap->lookup("sve"))))
804+
return std::pair<unsigned, unsigned>(1, 16);
805+
806+
if (IsArmStreamingFunction &&
807+
(hasFeature("sme") || (FeatureMap && (FeatureMap->lookup("sme")))))
803808
return std::pair<unsigned, unsigned>(1, 16);
804809

805810
return std::nullopt;

clang/lib/Basic/Targets/AArch64.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
187187
llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
188188

189189
std::optional<std::pair<unsigned, unsigned>>
190-
getVScaleRange(const LangOptions &LangOpts,
191-
bool IsArmStreamingFunction) const override;
190+
getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
191+
llvm::StringMap<bool> *FeatureMap = nullptr) const override;
192192
bool doesFeatureAffectCodeGen(StringRef Name) const override;
193193
bool validateCpuSupports(StringRef FeatureStr) const override;
194194
bool hasFeature(StringRef Feature) const override;

clang/lib/Basic/Targets/RISCV.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,8 @@ bool RISCVTargetInfo::initFeatureMap(
332332

333333
std::optional<std::pair<unsigned, unsigned>>
334334
RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts,
335-
bool IsArmStreamingFunction) const {
335+
bool IsArmStreamingFunction,
336+
llvm::StringMap<bool> *FeatureMap) const {
336337
// RISCV::RVVBitsPerBlock is 64.
337338
unsigned VScaleMin = ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock;
338339

clang/lib/Basic/Targets/RISCV.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ class RISCVTargetInfo : public TargetInfo {
9999
const std::vector<std::string> &FeaturesVec) const override;
100100

101101
std::optional<std::pair<unsigned, unsigned>>
102-
getVScaleRange(const LangOptions &LangOpts,
103-
bool IsArmStreamingFunction) const override;
102+
getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
103+
llvm::StringMap<bool> *FeatureMap = nullptr) const override;
104104

105105
bool hasFeature(StringRef Feature) const override;
106106

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,9 +1115,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
11151115
Fn->removeFnAttr("zero-call-used-regs");
11161116

11171117
// Add vscale_range attribute if appropriate.
1118+
llvm::StringMap<bool> FeatureMap;
1119+
bool IsArmStreaming = false;
1120+
if (FD) {
1121+
getContext().getFunctionFeatureMap(FeatureMap, FD);
1122+
IsArmStreaming = IsArmStreamingFunction(FD, true);
1123+
}
11181124
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
1119-
getContext().getTargetInfo().getVScaleRange(
1120-
getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false);
1125+
getContext().getTargetInfo().getVScaleRange(getLangOpts(), IsArmStreaming,
1126+
&FeatureMap);
11211127
if (VScaleRange) {
11221128
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
11231129
getLLVMContext(), VScaleRange->first, VScaleRange->second));

clang/test/CodeGen/AArch64/cpu-supports-target.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ int test_versions() {
220220
//.
221221
// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
222222
// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" }
223-
// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
223+
// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
224224
//.
225225
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
226226
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}

clang/test/CodeGen/AArch64/targetattr.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -199,26 +199,44 @@ __attribute__((target("cpu=apple-m4")))
199199
//
200200
void applem4() {}
201201

202+
__attribute__((target("+sme")))
203+
// CHECK-LABEL: define {{[^@]+}}@plussmestreaming
204+
// CHECK-SAME: () #[[ATTR19:[0-9]+]] {
205+
// CHECK-NEXT: entry:
206+
// CHECK-NEXT: ret void
207+
//
208+
void plussmestreaming(void) __arm_streaming {}
209+
210+
__attribute__((target("+sme")))
211+
// CHECK-LABEL: define {{[^@]+}}@plussmelocallystreaming
212+
// CHECK-SAME: () #[[ATTR20:[0-9]+]] {
213+
// CHECK-NEXT: entry:
214+
// CHECK-NEXT: ret void
215+
//
216+
__arm_locally_streaming void plussmelocallystreaming(void) {}
217+
202218
//.
203219
// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
204-
// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
205-
// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" }
206-
// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
207-
// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" }
220+
// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
221+
// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" }
222+
// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
223+
// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" }
208224
// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="cortex-a710" }
209225
// CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+ete,+fp-armv8,+neon,+trbe,+v8a" }
210226
// CHECK: attributes #[[ATTR7]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" }
211227
// CHECK: attributes #[[ATTR8]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+perfmon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.1a,+v8.2a,+v8a" "tune-cpu"="cortex-a710" }
212-
// CHECK: attributes #[[ATTR9]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" }
213-
// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" }
228+
// CHECK: attributes #[[ATTR9]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" }
229+
// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" }
214230
// CHECK: attributes #[[ATTR11]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" }
215-
// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
231+
// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
216232
// CHECK: attributes #[[ATTR13]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16" }
217-
// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
218-
// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
233+
// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
234+
// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone vscale_range(1,16) "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
219235
// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
220236
// CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" }
221237
// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" }
238+
// CHECK: attributes #[[ATTR19]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" }
239+
// CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" }
222240
//.
223241
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
224242
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}

clang/test/CodeGen/X86/avx512-error.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@ __m512d zmm_error(__m512d a) {
2929
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
3030
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
3131
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
32+
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
3233
#endif

clang/test/CodeGen/target-avx-abi-diag.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,6 @@ __attribute__((target("avx512f"))) void call_avx512_ok2(void) {
9898
// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
9999
// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
100100
// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
101+
// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
102+
// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
101103
#endif

0 commit comments

Comments
 (0)