Skip to content

Commit 7ee2c72

Browse files
[AArch64] Mark aarch64_set_fpmr as IntrWriteMem (#146353)
llvm.aarch64.set.fpmr only writes to inaccessible memory. Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the optimiser can treat it as a pure write. The original patch did not add this property, causing the intrinsic to be conservatively treated as readwrite. This commit fixes that.
1 parent d84df61 commit 7ee2c72

File tree

2 files changed

+38
-1
lines changed

2 files changed

+38
-1
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Check memory attribute for FP8 function
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -target-feature +sve -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +ssve-fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
6+
7+
#include <arm_neon.h>
8+
#include <arm_sme.h>
9+
10+
11+
// SIMD
12+
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) {
13+
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
14+
}
15+
16+
// SVE
17+
svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) __arm_streaming {
18+
return svcvtlt2_f16_mf8_fpm(zn, fpm);
19+
}
20+
21+
// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR3:#.*]]
22+
// CHECK: declare <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8>) [[ATTR4:#.*]]
23+
24+
25+
// SME
26+
svfloat32_t test_svmlalltt_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming {
27+
return svmlalltt_lane_f32_mf8_fpm(zda, zn, zm, 7, fpm);
28+
}
29+
30+
// CHECK: declare <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmlalltt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR4]]
31+
32+
// CHECK: declare <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half>, <8 x half>) [[ATTR4]]
33+
34+
// CHECK: attributes [[ATTR1:#.*]] = {{{.*}}}
35+
// CHECK: attributes [[ATTR2:#.*]] = {{{.*}}}
36+
// CHECK: attributes [[ATTR3]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
37+
// CHECK: attributes [[ATTR4]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: read) }

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ let TargetPrefix = "aarch64" in {
761761
class RNDR_Intrinsic
762762
: DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>;
763763
class FPMR_Set_Intrinsic
764-
: DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleMemOnly]>;
764+
: DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
765765
}
766766

767767
// FP environment registers.

0 commit comments

Comments
 (0)