Skip to content

Commit 9fcea2e

Browse files
committed
[ARM] Add neon vector support for roundeven
As per #142559, this marks froundeven as legal for Neon and upgrades the existing arm.neon.vrintn intrinsics.
1 parent 258c048 commit 9fcea2e

File tree

10 files changed

+39
-98
lines changed

10 files changed

+39
-98
lines changed

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -845,8 +845,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
845845
NEONMAP0(vrndiq_v),
846846
NEONMAP1(vrndm_v, floor, Add1ArgType),
847847
NEONMAP1(vrndmq_v, floor, Add1ArgType),
848-
NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
849-
NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
848+
NEONMAP1(vrndn_v, roundeven, Add1ArgType),
849+
NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
850850
NEONMAP1(vrndp_v, ceil, Add1ArgType),
851851
NEONMAP1(vrndpq_v, ceil, Add1ArgType),
852852
NEONMAP1(vrndq_v, trunc, Add1ArgType),
@@ -3132,7 +3132,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
31323132
case NEON::BI__builtin_neon_vrndns_f32: {
31333133
Value *Arg = EmitScalarExpr(E->getArg(0));
31343134
llvm::Type *Tys[] = {Arg->getType()};
3135-
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
3135+
Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
31363136
return Builder.CreateCall(F, {Arg}, "vrndn"); }
31373137

31383138
case NEON::BI__builtin_neon_vset_lane_i8:

clang/test/CodeGen/arm-neon-directed-rounding.c

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ float32x4_t test_vrndmq_f32(float32x4_t a) {
116116
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
117117
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
118118
// CHECK-A32-NEXT: [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
119-
// CHECK-A32-NEXT: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> [[VRNDN_V_I]])
119+
// CHECK-A32-NEXT: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> [[VRNDN_V_I]])
120120
// CHECK-A32-NEXT: [[VRNDN_V2_I:%.*]] = bitcast <2 x float> [[VRNDN_V1_I]] to <8 x i8>
121121
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <2 x i32>
122122
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
@@ -141,7 +141,7 @@ float32x2_t test_vrndn_f32(float32x2_t a) {
141141
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
142142
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
143143
// CHECK-A32-NEXT: [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
144-
// CHECK-A32-NEXT: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> [[VRNDNQ_V_I]])
144+
// CHECK-A32-NEXT: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VRNDNQ_V_I]])
145145
// CHECK-A32-NEXT: [[VRNDNQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDNQ_V1_I]] to <16 x i8>
146146
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <4 x i32>
147147
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
@@ -310,24 +310,18 @@ float32x4_t test_vrndq_f32(float32x4_t a) {
310310
return vrndq_f32(a);
311311
}
312312

313-
// CHECK-A32-LABEL: define dso_local float @test_vrndns_f32(
314-
// CHECK-A32-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
315-
// CHECK-A32-NEXT: [[ENTRY:.*:]]
316-
// CHECK-A32-NEXT: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float [[A]])
317-
// CHECK-A32-NEXT: ret float [[VRNDN_I]]
318-
//
319-
// CHECK-A64-LABEL: define dso_local float @test_vrndns_f32(
320-
// CHECK-A64-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
321-
// CHECK-A64-NEXT: [[ENTRY:.*:]]
322-
// CHECK-A64-NEXT: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float [[A]])
323-
// CHECK-A64-NEXT: ret float [[VRNDN_I]]
313+
// CHECK-LABEL: define dso_local float @test_vrndns_f32(
314+
// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
315+
// CHECK-NEXT: [[ENTRY:.*:]]
316+
// CHECK-NEXT: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float [[A]])
317+
// CHECK-NEXT: ret float [[VRNDN_I]]
324318
//
325319
float32_t test_vrndns_f32(float32_t a) {
326320
return vrndns_f32(a);
327321
}
328322

329323
// CHECK-LABEL: define dso_local <2 x float> @test_vrndi_f32(
330-
// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
324+
// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
331325
// CHECK-NEXT: [[ENTRY:.*:]]
332326
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
333327
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>

clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ float16x8_t test_vrndmq_f16(float16x8_t a) {
618618
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
619619
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
620620
// CHECK-NEXT: [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
621-
// CHECK-NEXT: [[VRNDN_V1_I:%.*]] = call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> [[VRNDN_V_I]])
621+
// CHECK-NEXT: [[VRNDN_V1_I:%.*]] = call <4 x half> @llvm.roundeven.v4f16(<4 x half> [[VRNDN_V_I]])
622622
// CHECK-NEXT: [[VRNDN_V2_I:%.*]] = bitcast <4 x half> [[VRNDN_V1_I]] to <8 x i8>
623623
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <4 x i16>
624624
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
@@ -634,7 +634,7 @@ float16x4_t test_vrndn_f16(float16x4_t a) {
634634
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
635635
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
636636
// CHECK-NEXT: [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
637-
// CHECK-NEXT: [[VRNDNQ_V1_I:%.*]] = call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> [[VRNDNQ_V_I]])
637+
// CHECK-NEXT: [[VRNDNQ_V1_I:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[VRNDNQ_V_I]])
638638
// CHECK-NEXT: [[VRNDNQ_V2_I:%.*]] = bitcast <8 x half> [[VRNDNQ_V1_I]] to <16 x i8>
639639
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <8 x i16>
640640
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,6 @@ class Neon_3Arg_Long_Intrinsic
451451
LLVMTruncatedType<0>],
452452
[IntrNoMem]>;
453453

454-
class Neon_1FloatArg_Intrinsic
455-
: DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
456-
457454
class Neon_CvtFxToFP_Intrinsic
458455
: DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
459456
[IntrNoMem]>;
@@ -677,9 +674,6 @@ def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
677674
def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
678675
def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
679676

680-
// Vector and Scalar Rounding.
681-
def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
682-
683677
// De-interleaving vector loads from N-element structures.
684678
// Source operands are the address and alignment.
685679
def int_arm_neon_vld1 : DefaultAttrsIntrinsic<

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,7 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
720720
.StartsWith("vqsubs.", Intrinsic::ssub_sat)
721721
.StartsWith("vqsubu.", Intrinsic::usub_sat)
722722
.StartsWith("vrinta.", Intrinsic::round)
723+
.StartsWith("vrintn.", Intrinsic::roundeven)
723724
.StartsWith("vrintm.", Intrinsic::floor)
724725
.StartsWith("vrintp.", Intrinsic::ceil)
725726
.StartsWith("vrintx.", Intrinsic::rint)

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,6 +1548,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
15481548
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
15491549
setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
15501550
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1551+
setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Legal);
1552+
setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
15511553
setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
15521554
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
15531555
setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
@@ -1571,6 +1573,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
15711573
setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal);
15721574
setOperationAction(ISD::FROUND, MVT::v4f16, Legal);
15731575
setOperationAction(ISD::FROUND, MVT::v8f16, Legal);
1576+
setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Legal);
1577+
setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Legal);
15741578
setOperationAction(ISD::FCEIL, MVT::v4f16, Legal);
15751579
setOperationAction(ISD::FCEIL, MVT::v8f16, Legal);
15761580
setOperationAction(ISD::FTRUNC, MVT::v4f16, Legal);

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7312,7 +7312,7 @@ multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
73127312
}
73137313
}
73147314

7315-
defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
7315+
defm VRINTNN : VRINT_FPI<"n", 0b000, froundeven>;
73167316
defm VRINTXN : VRINT_FPI<"x", 0b001, frint>;
73177317
defm VRINTAN : VRINT_FPI<"a", 0b010, fround>;
73187318
defm VRINTZN : VRINT_FPI<"z", 0b011, ftrunc>;

llvm/lib/Target/ARM/ARMInstrVFP.td

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,13 +1135,8 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
11351135
Requires<[HasFPARMv8,HasDPVFP]>;
11361136
}
11371137

1138-
// Match either froundeven or int_arm_neon_vrintn
1139-
def vrintn_or_froundeven : PatFrags<(ops node:$src),
1140-
[(int_arm_neon_vrintn node:$src),
1141-
(froundeven node:$src)]>;
1142-
11431138
defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>;
1144-
defm VRINTN : vrint_inst_anpm<"n", 0b01, vrintn_or_froundeven>;
1139+
defm VRINTN : vrint_inst_anpm<"n", 0b01, froundeven>;
11451140
defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
11461141
defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
11471142

llvm/test/CodeGen/ARM/vrint.ll

Lines changed: 4 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,21 +1084,7 @@ define <4 x half> @frintn_4h(<4 x half> %A) nounwind {
10841084
;
10851085
; CHECK-FP16-LABEL: frintn_4h:
10861086
; CHECK-FP16: @ %bb.0:
1087-
; CHECK-FP16-NEXT: vmovx.f16 s2, s0
1088-
; CHECK-FP16-NEXT: vrintn.f16 s2, s2
1089-
; CHECK-FP16-NEXT: vmov r0, s2
1090-
; CHECK-FP16-NEXT: vrintn.f16 s2, s0
1091-
; CHECK-FP16-NEXT: vmov r1, s2
1092-
; CHECK-FP16-NEXT: vrintn.f16 s2, s1
1093-
; CHECK-FP16-NEXT: vmovx.f16 s0, s1
1094-
; CHECK-FP16-NEXT: vrintn.f16 s0, s0
1095-
; CHECK-FP16-NEXT: vmov.16 d16[0], r1
1096-
; CHECK-FP16-NEXT: vmov.16 d16[1], r0
1097-
; CHECK-FP16-NEXT: vmov r0, s2
1098-
; CHECK-FP16-NEXT: vmov.16 d16[2], r0
1099-
; CHECK-FP16-NEXT: vmov r0, s0
1100-
; CHECK-FP16-NEXT: vmov.16 d16[3], r0
1101-
; CHECK-FP16-NEXT: vorr d0, d16, d16
1087+
; CHECK-FP16-NEXT: vrintn.f16 d0, d0
11021088
; CHECK-FP16-NEXT: bx lr
11031089
%tmp3 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A)
11041090
ret <4 x half> %tmp3
@@ -1248,35 +1234,7 @@ define <8 x half> @frintn_8h(<8 x half> %A) nounwind {
12481234
;
12491235
; CHECK-FP16-LABEL: frintn_8h:
12501236
; CHECK-FP16: @ %bb.0:
1251-
; CHECK-FP16-NEXT: vmovx.f16 s4, s2
1252-
; CHECK-FP16-NEXT: vrintn.f16 s4, s4
1253-
; CHECK-FP16-NEXT: vmov r0, s4
1254-
; CHECK-FP16-NEXT: vrintn.f16 s4, s2
1255-
; CHECK-FP16-NEXT: vmov r1, s4
1256-
; CHECK-FP16-NEXT: vrintn.f16 s4, s3
1257-
; CHECK-FP16-NEXT: vmov.16 d17[0], r1
1258-
; CHECK-FP16-NEXT: vmov.16 d17[1], r0
1259-
; CHECK-FP16-NEXT: vmov r0, s4
1260-
; CHECK-FP16-NEXT: vmovx.f16 s4, s3
1261-
; CHECK-FP16-NEXT: vrintn.f16 s4, s4
1262-
; CHECK-FP16-NEXT: vmov.16 d17[2], r0
1263-
; CHECK-FP16-NEXT: vmov r0, s4
1264-
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
1265-
; CHECK-FP16-NEXT: vrintn.f16 s4, s4
1266-
; CHECK-FP16-NEXT: vmov.16 d17[3], r0
1267-
; CHECK-FP16-NEXT: vmov r0, s4
1268-
; CHECK-FP16-NEXT: vrintn.f16 s4, s0
1269-
; CHECK-FP16-NEXT: vmovx.f16 s0, s1
1270-
; CHECK-FP16-NEXT: vmov r1, s4
1271-
; CHECK-FP16-NEXT: vrintn.f16 s4, s1
1272-
; CHECK-FP16-NEXT: vrintn.f16 s0, s0
1273-
; CHECK-FP16-NEXT: vmov.16 d16[0], r1
1274-
; CHECK-FP16-NEXT: vmov.16 d16[1], r0
1275-
; CHECK-FP16-NEXT: vmov r0, s4
1276-
; CHECK-FP16-NEXT: vmov.16 d16[2], r0
1277-
; CHECK-FP16-NEXT: vmov r0, s0
1278-
; CHECK-FP16-NEXT: vmov.16 d16[3], r0
1279-
; CHECK-FP16-NEXT: vorr q0, q8, q8
1237+
; CHECK-FP16-NEXT: vrintn.f16 q0, q0
12801238
; CHECK-FP16-NEXT: bx lr
12811239
%tmp3 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A)
12821240
ret <8 x half> %tmp3
@@ -1302,9 +1260,7 @@ define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
13021260
;
13031261
; CHECK-LABEL: frintn_2s:
13041262
; CHECK: @ %bb.0:
1305-
; CHECK-NEXT: vrintn.f32 s3, s1
1306-
; CHECK-NEXT: vrintn.f32 s2, s0
1307-
; CHECK-NEXT: vmov.f64 d0, d1
1263+
; CHECK-NEXT: vrintn.f32 d0, d0
13081264
; CHECK-NEXT: bx lr
13091265
%tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
13101266
ret <2 x float> %tmp3
@@ -1336,11 +1292,7 @@ define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
13361292
;
13371293
; CHECK-LABEL: frintn_4s:
13381294
; CHECK: @ %bb.0:
1339-
; CHECK-NEXT: vrintn.f32 s7, s3
1340-
; CHECK-NEXT: vrintn.f32 s6, s2
1341-
; CHECK-NEXT: vrintn.f32 s5, s1
1342-
; CHECK-NEXT: vrintn.f32 s4, s0
1343-
; CHECK-NEXT: vorr q0, q1, q1
1295+
; CHECK-NEXT: vrintn.f32 q0, q0
13441296
; CHECK-NEXT: bx lr
13451297
%tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
13461298
ret <4 x float> %tmp3

llvm/test/CodeGen/ARM/vrintn.ll

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=armv8 -mattr=+neon %s -o - | FileCheck %s
23

34
declare float @llvm.arm.neon.vrintn.f32(float) nounwind readnone
45
declare <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float>) nounwind readnone
56
declare <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float>) nounwind readnone
67

7-
; CHECK-LABEL: vrintn_f32:
8-
; CHECK: vrintn.f32
98
define float @vrintn_f32(ptr %A) nounwind {
9+
; CHECK-LABEL: vrintn_f32:
10+
; CHECK: @ %bb.0:
11+
; CHECK-NEXT: vldr s0, [r0]
12+
; CHECK-NEXT: vrintn.f32 s0, s0
13+
; CHECK-NEXT: vmov r0, s0
14+
; CHECK-NEXT: bx lr
1015
%tmp1 = load float, ptr %A
1116
%tmp2 = call float @llvm.arm.neon.vrintn.f32(float %tmp1)
1217
ret float %tmp2
@@ -74,10 +79,9 @@ define <4 x half> @roundeven_4h(<4 x half> %A) nounwind {
7479
define <2 x float> @roundeven_2s(<2 x float> %A) nounwind {
7580
; CHECK-LABEL: roundeven_2s:
7681
; CHECK: @ %bb.0:
77-
; CHECK-NEXT: vmov d0, r0, r1
78-
; CHECK-NEXT: vrintn.f32 s3, s1
79-
; CHECK-NEXT: vrintn.f32 s2, s0
80-
; CHECK-NEXT: vmov r0, r1, d1
82+
; CHECK-NEXT: vmov d16, r0, r1
83+
; CHECK-NEXT: vrintn.f32 d16, d16
84+
; CHECK-NEXT: vmov r0, r1, d16
8185
; CHECK-NEXT: bx lr
8286
%tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
8387
ret <2 x float> %tmp3
@@ -86,14 +90,11 @@ define <2 x float> @roundeven_2s(<2 x float> %A) nounwind {
8690
define <4 x float> @roundeven_4s(<4 x float> %A) nounwind {
8791
; CHECK-LABEL: roundeven_4s:
8892
; CHECK: @ %bb.0:
89-
; CHECK-NEXT: vmov d1, r2, r3
90-
; CHECK-NEXT: vmov d0, r0, r1
91-
; CHECK-NEXT: vrintn.f32 s7, s3
92-
; CHECK-NEXT: vrintn.f32 s6, s2
93-
; CHECK-NEXT: vrintn.f32 s5, s1
94-
; CHECK-NEXT: vrintn.f32 s4, s0
95-
; CHECK-NEXT: vmov r2, r3, d3
96-
; CHECK-NEXT: vmov r0, r1, d2
93+
; CHECK-NEXT: vmov d17, r2, r3
94+
; CHECK-NEXT: vmov d16, r0, r1
95+
; CHECK-NEXT: vrintn.f32 q8, q8
96+
; CHECK-NEXT: vmov r0, r1, d16
97+
; CHECK-NEXT: vmov r2, r3, d17
9798
; CHECK-NEXT: bx lr
9899
%tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
99100
ret <4 x float> %tmp3

0 commit comments

Comments
 (0)