Skip to content

Commit d8d4c18

Browse files
[AArch64][SME] Disable inlining of callees with new ZT0 state (#121338)
Inlining must be disabled for new-ZT0 callees as the callee is required to save ZT0 and toggle PSTATE.ZA on entry.
1 parent 648e256 commit d8d4c18

File tree

6 files changed

+61
-13
lines changed

6 files changed

+61
-13
lines changed

clang/include/clang/Basic/DiagnosticFrontendKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,8 @@ def warn_function_always_inline_attribute_mismatch : Warning<
291291
"inlining may change runtime behaviour">, InGroup<AArch64SMEAttributes>;
292292
def err_function_always_inline_new_za : Error<
293293
"always_inline function %0 has new za state">;
294+
def err_function_always_inline_new_zt0
295+
: Error<"always_inline function %0 has new zt0 state">;
294296

295297
def warn_avx_calling_convention
296298
: Warning<"AVX vector %select{return|argument}0 of type %1 without '%2' "

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,8 +1169,9 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
11691169
enum class ArmSMEInlinability : uint8_t {
11701170
Ok = 0,
11711171
ErrorCalleeRequiresNewZA = 1 << 0,
1172-
WarnIncompatibleStreamingModes = 1 << 1,
1173-
ErrorIncompatibleStreamingModes = 1 << 2,
1172+
ErrorCalleeRequiresNewZT0 = 1 << 1,
1173+
WarnIncompatibleStreamingModes = 1 << 2,
1174+
ErrorIncompatibleStreamingModes = 1 << 3,
11741175

11751176
IncompatibleStreamingModes =
11761177
WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
@@ -1198,9 +1199,12 @@ static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
11981199
else
11991200
Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
12001201
}
1201-
if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
1202+
if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) {
12021203
if (NewAttr->isNewZA())
12031204
Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
1205+
if (NewAttr->isNewZT0())
1206+
Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0;
1207+
}
12041208

12051209
return Inlinability;
12061210
}
@@ -1227,6 +1231,11 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
12271231
ArmSMEInlinability::ErrorCalleeRequiresNewZA)
12281232
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
12291233
<< Callee->getDeclName();
1234+
1235+
if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) ==
1236+
ArmSMEInlinability::ErrorCalleeRequiresNewZT0)
1237+
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_zt0)
1238+
<< Callee->getDeclName();
12301239
}
12311240

12321241
// If the target does not have floating-point registers, but we are using a

clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_FLATTEN -o - | FileCheck %s
2-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_FLATTEN -o - | FileCheck %s
2+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s
33

44
// REQUIRES: aarch64-registered-target
55

@@ -20,6 +20,7 @@ void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
2020
void fn_streaming(void) __arm_streaming { was_inlined(); }
2121
__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
2222
__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
23+
__arm_new("zt0") void fn_streaming_new_zt0(void) __arm_streaming { was_inlined(); }
2324

2425
FN_ATTR
2526
void caller(void) {
@@ -28,6 +29,7 @@ void caller(void) {
2829
STMT_ATTR fn_streaming();
2930
STMT_ATTR fn_locally_streaming();
3031
STMT_ATTR fn_streaming_new_za();
32+
STMT_ATTR fn_streaming_new_zt0();
3133
}
3234
// CHECK-LABEL: void @caller()
3335
// CHECK-NEXT: entry:
@@ -36,13 +38,15 @@ void caller(void) {
3638
// CHECK-NEXT: call void @fn_streaming
3739
// CHECK-NEXT: call void @fn_locally_streaming
3840
// CHECK-NEXT: call void @fn_streaming_new_za
41+
// CHECK-NEXT: call void @fn_streaming_new_zt0
3942

4043
FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible {
4144
STMT_ATTR fn();
4245
STMT_ATTR fn_streaming_compatible();
4346
STMT_ATTR fn_streaming();
4447
STMT_ATTR fn_locally_streaming();
4548
STMT_ATTR fn_streaming_new_za();
49+
STMT_ATTR fn_streaming_new_zt0();
4650
}
4751
// CHECK-LABEL: void @caller_streaming_compatible()
4852
// CHECK-NEXT: entry:
@@ -51,13 +55,15 @@ FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible {
5155
// CHECK-NEXT: call void @fn_streaming
5256
// CHECK-NEXT: call void @fn_locally_streaming
5357
// CHECK-NEXT: call void @fn_streaming_new_za
58+
// CHECK-NEXT: call void @fn_streaming_new_zt0
5459

5560
FN_ATTR void caller_streaming(void) __arm_streaming {
5661
STMT_ATTR fn();
5762
STMT_ATTR fn_streaming_compatible();
5863
STMT_ATTR fn_streaming();
5964
STMT_ATTR fn_locally_streaming();
6065
STMT_ATTR fn_streaming_new_za();
66+
STMT_ATTR fn_streaming_new_zt0();
6167
}
6268
// CHECK-LABEL: void @caller_streaming()
6369
// CHECK-NEXT: entry:
@@ -66,6 +72,7 @@ FN_ATTR void caller_streaming(void) __arm_streaming {
6672
// CHECK-NEXT: call void @was_inlined
6773
// CHECK-NEXT: call void @was_inlined
6874
// CHECK-NEXT: call void @fn_streaming_new_za
75+
// CHECK-NEXT: call void @fn_streaming_new_zt0
6976

7077
FN_ATTR __arm_locally_streaming
7178
void caller_locally_streaming(void) {
@@ -74,6 +81,7 @@ void caller_locally_streaming(void) {
7481
STMT_ATTR fn_streaming();
7582
STMT_ATTR fn_locally_streaming();
7683
STMT_ATTR fn_streaming_new_za();
84+
STMT_ATTR fn_streaming_new_zt0();
7785
}
7886
// CHECK-LABEL: void @caller_locally_streaming()
7987
// CHECK-NEXT: entry:
@@ -82,3 +90,4 @@ void caller_locally_streaming(void) {
8290
// CHECK-NEXT: call void @was_inlined
8391
// CHECK-NEXT: call void @was_inlined
8492
// CHECK-NEXT: call void @fn_streaming_new_za
93+
// CHECK-NEXT: call void @fn_streaming_new_zt0

clang/test/CodeGen/AArch64/sme-inline-streaming-attrs.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_NONE %s
2-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_COMPATIBLE %s
3-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_STREAMING %s
4-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_LOCALLY %s
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_NONE %s
2+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_COMPATIBLE %s
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_STREAMING %s
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_LOCALLY %s
55

66
// REQUIRES: aarch64-registered-target
77

@@ -10,13 +10,17 @@ __ai void inlined_fn(void) {}
1010
__ai void inlined_fn_streaming_compatible(void) __arm_streaming_compatible {}
1111
__ai void inlined_fn_streaming(void) __arm_streaming {}
1212
__ai __arm_locally_streaming void inlined_fn_local(void) {}
13+
__ai __arm_new("za") void inlined_fn_za(void) {}
14+
__ai __arm_new("zt0") void inlined_fn_zt0(void) {}
1315

1416
#ifdef TEST_NONE
1517
void caller(void) {
1618
inlined_fn();
1719
inlined_fn_streaming_compatible();
1820
inlined_fn_streaming(); // expected-error {{always_inline function 'inlined_fn_streaming' and its caller 'caller' have mismatching streaming attributes}}
1921
inlined_fn_local(); // expected-error {{always_inline function 'inlined_fn_local' and its caller 'caller' have mismatching streaming attributes}}
22+
inlined_fn_za(); // expected-error {{always_inline function 'inlined_fn_za' has new za state}}
23+
inlined_fn_zt0(); // expected-error {{always_inline function 'inlined_fn_zt0' has new zt0 state}}
2024
}
2125
#endif
2226

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
259259
CalleeAttrs.set(SMEAttrs::SM_Enabled, true);
260260
}
261261

262-
if (CalleeAttrs.isNewZA())
262+
if (CalleeAttrs.isNewZA() || CalleeAttrs.isNewZT0())
263263
return false;
264264

265265
if (CallerAttrs.requiresLazySave(CalleeAttrs) ||

llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -391,9 +391,33 @@ define void @nonzt0_callee() {
391391
ret void
392392
}
393393

394+
define void @new_zt0_callee() "aarch64_new_zt0" {
395+
; CHECK-LABEL: define void @new_zt0_callee
396+
; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
397+
; CHECK-NEXT: call void asm sideeffect "
398+
; CHECK-NEXT: call void @inlined_body()
399+
; CHECK-NEXT: ret void
400+
;
401+
call void asm sideeffect "; inlineasm", ""()
402+
call void @inlined_body()
403+
ret void
404+
}
405+
406+
define void @nonzt0_caller_new_zt0_callee_dont_inline() {
407+
; CHECK-LABEL: define void @nonzt0_caller_new_zt0_callee_dont_inline
408+
; CHECK-SAME: () #[[ATTR0]] {
409+
; CHECK-NEXT: entry:
410+
; CHECK-NEXT: call void @new_zt0_callee()
411+
; CHECK-NEXT: ret void
412+
;
413+
entry:
414+
call void @new_zt0_callee()
415+
ret void
416+
}
417+
394418
define void @shared_zt0_caller_nonzt0_callee_dont_inline() "aarch64_inout_zt0" {
395419
; CHECK-LABEL: define void @shared_zt0_caller_nonzt0_callee_dont_inline
396-
; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
420+
; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
397421
; CHECK-NEXT: call void @nonzt0_callee()
398422
; CHECK-NEXT: ret void
399423
;
@@ -403,7 +427,7 @@ define void @shared_zt0_caller_nonzt0_callee_dont_inline() "aarch64_inout_zt0" {
403427

404428
define void @shared_zt0_callee() "aarch64_inout_zt0" {
405429
; CHECK-LABEL: define void @shared_zt0_callee
406-
; CHECK-SAME: () #[[ATTR4]] {
430+
; CHECK-SAME: () #[[ATTR5]] {
407431
; CHECK-NEXT: call void asm sideeffect "
408432
; CHECK-NEXT: call void @inlined_body()
409433
; CHECK-NEXT: ret void
@@ -415,7 +439,7 @@ define void @shared_zt0_callee() "aarch64_inout_zt0" {
415439

416440
define void @shared_zt0_caller_shared_zt0_callee_inline() "aarch64_inout_zt0" {
417441
; CHECK-LABEL: define void @shared_zt0_caller_shared_zt0_callee_inline
418-
; CHECK-SAME: () #[[ATTR4]] {
442+
; CHECK-SAME: () #[[ATTR5]] {
419443
; CHECK-NEXT: call void asm sideeffect "
420444
; CHECK-NEXT: call void @inlined_body()
421445
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)