Skip to content

Commit 825ad86

Browse files
authored
[DAG] Fold nested add(add(reduce(a), b), add(reduce(c), d)) (#115150)
This patch reassociates `add(add(vecreduce(a), b), add(vecreduce(c), d))` into `add(vecreduce(add(a, c)), add(b, d))`, to combine the reductions into a single node. This comes up after unrolling vectorized loops. There is another small change to move reassociateReduction inside fadd outside of a AllowNewConst block, as new constants will not be created and it should be OK to perform the combine later after legalization.
1 parent db72f6c commit 825ad86

File tree

3 files changed

+149
-201
lines changed

3 files changed

+149
-201
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1346,6 +1346,40 @@ SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
13461346
DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
13471347
N0.getOperand(0), N1.getOperand(0)));
13481348
}
1349+
1350+
// Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1351+
// op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1352+
// single node.
1353+
SDValue A, B, C, D, RedA, RedB;
1354+
if (sd_match(N0, m_OneUse(m_c_BinOp(
1355+
Opc,
1356+
m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1357+
m_Value(RedA)),
1358+
m_Value(B)))) &&
1359+
sd_match(N1, m_OneUse(m_c_BinOp(
1360+
Opc,
1361+
m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1362+
m_Value(RedB)),
1363+
m_Value(D)))) &&
1364+
!sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1365+
!sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1366+
A.getValueType() == C.getValueType() &&
1367+
hasOperation(Opc, A.getValueType()) &&
1368+
TLI.shouldReassociateReduction(RedOpc, VT)) {
1369+
if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1370+
(!N0->getFlags().hasAllowReassociation() ||
1371+
!N1->getFlags().hasAllowReassociation() ||
1372+
!RedA->getFlags().hasAllowReassociation() ||
1373+
!RedB->getFlags().hasAllowReassociation()))
1374+
return SDValue();
1375+
SelectionDAG::FlagInserter FlagsInserter(
1376+
DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1377+
RedB->getFlags());
1378+
SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1379+
SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1380+
SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1381+
return DAG.getNode(Opc, DL, VT, Red, Op2);
1382+
}
13491383
return SDValue();
13501384
}
13511385

@@ -17555,12 +17589,15 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
1755517589
DAG.getConstantFP(4.0, DL, VT));
1755617590
}
1755717591
}
17592+
} // enable-unsafe-fp-math && AllowNewConst
1755817593

17594+
if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17595+
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
1755917596
// Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
1756017597
if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
1756117598
VT, N0, N1, Flags))
1756217599
return SD;
17563-
} // enable-unsafe-fp-math
17600+
}
1756417601

1756517602
// FADD -> FMA combines:
1756617603
if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {

llvm/test/CodeGen/AArch64/double_reduct.ll

Lines changed: 52 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -833,13 +833,11 @@ define i32 @smax_i32_same(<4 x i32> %a, <4 x i32> %b) {
833833
define float @nested_fadd_f32(<4 x float> %a, <4 x float> %b, float %c, float %d) {
834834
; CHECK-SD-LABEL: nested_fadd_f32:
835835
; CHECK-SD: // %bb.0:
836-
; CHECK-SD-NEXT: faddp v1.4s, v1.4s, v1.4s
836+
; CHECK-SD-NEXT: fadd v0.4s, v0.4s, v1.4s
837+
; CHECK-SD-NEXT: fadd s2, s2, s3
837838
; CHECK-SD-NEXT: faddp v0.4s, v0.4s, v0.4s
838-
; CHECK-SD-NEXT: faddp s1, v1.2s
839839
; CHECK-SD-NEXT: faddp s0, v0.2s
840-
; CHECK-SD-NEXT: fadd s1, s1, s3
841840
; CHECK-SD-NEXT: fadd s0, s0, s2
842-
; CHECK-SD-NEXT: fadd s0, s0, s1
843841
; CHECK-SD-NEXT: ret
844842
;
845843
; CHECK-GI-LABEL: nested_fadd_f32:
@@ -905,15 +903,12 @@ define float @nested_fadd_f32_slow(<4 x float> %a, <4 x float> %b, float %c, flo
905903
define float @nested_mul_f32(<4 x float> %a, <4 x float> %b, float %c, float %d) {
906904
; CHECK-SD-LABEL: nested_mul_f32:
907905
; CHECK-SD: // %bb.0:
908-
; CHECK-SD-NEXT: ext v4.16b, v1.16b, v1.16b, #8
909-
; CHECK-SD-NEXT: ext v5.16b, v0.16b, v0.16b, #8
910-
; CHECK-SD-NEXT: fmul v1.2s, v1.2s, v4.2s
911-
; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v5.2s
912-
; CHECK-SD-NEXT: fmul s1, s1, v1.s[1]
906+
; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s
907+
; CHECK-SD-NEXT: fmul s2, s2, s3
908+
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
909+
; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
913910
; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
914-
; CHECK-SD-NEXT: fmul s1, s1, s3
915911
; CHECK-SD-NEXT: fmul s0, s0, s2
916-
; CHECK-SD-NEXT: fmul s0, s0, s1
917912
; CHECK-SD-NEXT: ret
918913
;
919914
; CHECK-GI-LABEL: nested_mul_f32:
@@ -941,12 +936,10 @@ define float @nested_mul_f32(<4 x float> %a, <4 x float> %b, float %c, float %d)
941936
define i32 @nested_add_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
942937
; CHECK-SD-LABEL: nested_add_i32:
943938
; CHECK-SD: // %bb.0:
944-
; CHECK-SD-NEXT: addv s1, v1.4s
939+
; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
940+
; CHECK-SD-NEXT: add w8, w0, w1
945941
; CHECK-SD-NEXT: addv s0, v0.4s
946-
; CHECK-SD-NEXT: fmov w8, s1
947942
; CHECK-SD-NEXT: fmov w9, s0
948-
; CHECK-SD-NEXT: add w9, w9, w0
949-
; CHECK-SD-NEXT: add w8, w8, w1
950943
; CHECK-SD-NEXT: add w0, w9, w8
951944
; CHECK-SD-NEXT: ret
952945
;
@@ -971,12 +964,10 @@ define i32 @nested_add_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
971964
define i32 @nested_add_c1_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
972965
; CHECK-SD-LABEL: nested_add_c1_i32:
973966
; CHECK-SD: // %bb.0:
974-
; CHECK-SD-NEXT: addv s1, v1.4s
967+
; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
968+
; CHECK-SD-NEXT: add w8, w0, w1
975969
; CHECK-SD-NEXT: addv s0, v0.4s
976-
; CHECK-SD-NEXT: fmov w8, s1
977970
; CHECK-SD-NEXT: fmov w9, s0
978-
; CHECK-SD-NEXT: add w9, w0, w9
979-
; CHECK-SD-NEXT: add w8, w8, w1
980971
; CHECK-SD-NEXT: add w0, w9, w8
981972
; CHECK-SD-NEXT: ret
982973
;
@@ -1001,12 +992,10 @@ define i32 @nested_add_c1_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1001992
define i32 @nested_add_c2_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1002993
; CHECK-SD-LABEL: nested_add_c2_i32:
1003994
; CHECK-SD: // %bb.0:
1004-
; CHECK-SD-NEXT: addv s1, v1.4s
995+
; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
996+
; CHECK-SD-NEXT: add w8, w0, w1
1005997
; CHECK-SD-NEXT: addv s0, v0.4s
1006-
; CHECK-SD-NEXT: fmov w8, s1
1007998
; CHECK-SD-NEXT: fmov w9, s0
1008-
; CHECK-SD-NEXT: add w9, w9, w0
1009-
; CHECK-SD-NEXT: add w8, w1, w8
1010999
; CHECK-SD-NEXT: add w0, w9, w8
10111000
; CHECK-SD-NEXT: ret
10121001
;
@@ -1065,19 +1054,14 @@ define i32 @nested_add_manyreduct_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
10651054
define i32 @nested_mul_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
10661055
; CHECK-SD-LABEL: nested_mul_i32:
10671056
; CHECK-SD: // %bb.0:
1068-
; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1069-
; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1070-
; CHECK-SD-NEXT: mul v0.2s, v0.2s, v3.2s
1071-
; CHECK-SD-NEXT: mul v1.2s, v1.2s, v2.2s
1072-
; CHECK-SD-NEXT: mov w8, v0.s[1]
1057+
; CHECK-SD-NEXT: mul v0.4s, v0.4s, v1.4s
1058+
; CHECK-SD-NEXT: mul w8, w0, w1
1059+
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1060+
; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s
1061+
; CHECK-SD-NEXT: mov w9, v0.s[1]
10731062
; CHECK-SD-NEXT: fmov w10, s0
1074-
; CHECK-SD-NEXT: mov w9, v1.s[1]
1075-
; CHECK-SD-NEXT: mul w8, w10, w8
1076-
; CHECK-SD-NEXT: fmov w10, s1
10771063
; CHECK-SD-NEXT: mul w9, w10, w9
1078-
; CHECK-SD-NEXT: mul w8, w8, w0
1079-
; CHECK-SD-NEXT: mul w9, w9, w1
1080-
; CHECK-SD-NEXT: mul w0, w8, w9
1064+
; CHECK-SD-NEXT: mul w0, w9, w8
10811065
; CHECK-SD-NEXT: ret
10821066
;
10831067
; CHECK-GI-LABEL: nested_mul_i32:
@@ -1107,19 +1091,14 @@ define i32 @nested_mul_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11071091
define i32 @nested_and_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11081092
; CHECK-SD-LABEL: nested_and_i32:
11091093
; CHECK-SD: // %bb.0:
1110-
; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1111-
; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1112-
; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
1113-
; CHECK-SD-NEXT: and v0.8b, v0.8b, v3.8b
1114-
; CHECK-SD-NEXT: fmov x8, d1
1094+
; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
1095+
; CHECK-SD-NEXT: and w8, w0, w1
1096+
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1097+
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
11151098
; CHECK-SD-NEXT: fmov x9, d0
11161099
; CHECK-SD-NEXT: lsr x10, x9, #32
1117-
; CHECK-SD-NEXT: lsr x11, x8, #32
1118-
; CHECK-SD-NEXT: and w9, w9, w0
1119-
; CHECK-SD-NEXT: and w8, w8, w1
1120-
; CHECK-SD-NEXT: and w9, w9, w10
1121-
; CHECK-SD-NEXT: and w8, w8, w11
1122-
; CHECK-SD-NEXT: and w0, w9, w8
1100+
; CHECK-SD-NEXT: and w8, w9, w8
1101+
; CHECK-SD-NEXT: and w0, w8, w10
11231102
; CHECK-SD-NEXT: ret
11241103
;
11251104
; CHECK-GI-LABEL: nested_and_i32:
@@ -1149,19 +1128,14 @@ define i32 @nested_and_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11491128
define i32 @nested_or_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11501129
; CHECK-SD-LABEL: nested_or_i32:
11511130
; CHECK-SD: // %bb.0:
1152-
; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1153-
; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1154-
; CHECK-SD-NEXT: orr v1.8b, v1.8b, v2.8b
1155-
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v3.8b
1156-
; CHECK-SD-NEXT: fmov x8, d1
1131+
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
1132+
; CHECK-SD-NEXT: orr w8, w0, w1
1133+
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1134+
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
11571135
; CHECK-SD-NEXT: fmov x9, d0
11581136
; CHECK-SD-NEXT: lsr x10, x9, #32
1159-
; CHECK-SD-NEXT: lsr x11, x8, #32
1160-
; CHECK-SD-NEXT: orr w9, w9, w0
1161-
; CHECK-SD-NEXT: orr w8, w8, w1
1162-
; CHECK-SD-NEXT: orr w9, w9, w10
1163-
; CHECK-SD-NEXT: orr w8, w8, w11
1164-
; CHECK-SD-NEXT: orr w0, w9, w8
1137+
; CHECK-SD-NEXT: orr w8, w9, w8
1138+
; CHECK-SD-NEXT: orr w0, w8, w10
11651139
; CHECK-SD-NEXT: ret
11661140
;
11671141
; CHECK-GI-LABEL: nested_or_i32:
@@ -1191,19 +1165,14 @@ define i32 @nested_or_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11911165
define i32 @nested_xor_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11921166
; CHECK-SD-LABEL: nested_xor_i32:
11931167
; CHECK-SD: // %bb.0:
1194-
; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1195-
; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1196-
; CHECK-SD-NEXT: eor v1.8b, v1.8b, v2.8b
1197-
; CHECK-SD-NEXT: eor v0.8b, v0.8b, v3.8b
1198-
; CHECK-SD-NEXT: fmov x8, d1
1168+
; CHECK-SD-NEXT: eor v0.16b, v0.16b, v1.16b
1169+
; CHECK-SD-NEXT: eor w8, w0, w1
1170+
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1171+
; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b
11991172
; CHECK-SD-NEXT: fmov x9, d0
12001173
; CHECK-SD-NEXT: lsr x10, x9, #32
1201-
; CHECK-SD-NEXT: lsr x11, x8, #32
1202-
; CHECK-SD-NEXT: eor w9, w9, w0
1203-
; CHECK-SD-NEXT: eor w8, w8, w1
1204-
; CHECK-SD-NEXT: eor w9, w9, w10
1205-
; CHECK-SD-NEXT: eor w8, w8, w11
1206-
; CHECK-SD-NEXT: eor w0, w9, w8
1174+
; CHECK-SD-NEXT: eor w8, w9, w8
1175+
; CHECK-SD-NEXT: eor w0, w8, w10
12071176
; CHECK-SD-NEXT: ret
12081177
;
12091178
; CHECK-GI-LABEL: nested_xor_i32:
@@ -1233,14 +1202,11 @@ define i32 @nested_xor_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
12331202
define i32 @nested_smin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
12341203
; CHECK-SD-LABEL: nested_smin_i32:
12351204
; CHECK-SD: // %bb.0:
1205+
; CHECK-SD-NEXT: smin v0.4s, v0.4s, v1.4s
1206+
; CHECK-SD-NEXT: cmp w0, w1
1207+
; CHECK-SD-NEXT: csel w8, w0, w1, lt
12361208
; CHECK-SD-NEXT: sminv s0, v0.4s
1237-
; CHECK-SD-NEXT: sminv s1, v1.4s
12381209
; CHECK-SD-NEXT: fmov w9, s0
1239-
; CHECK-SD-NEXT: fmov w8, s1
1240-
; CHECK-SD-NEXT: cmp w9, w0
1241-
; CHECK-SD-NEXT: csel w9, w9, w0, lt
1242-
; CHECK-SD-NEXT: cmp w8, w1
1243-
; CHECK-SD-NEXT: csel w8, w8, w1, lt
12441210
; CHECK-SD-NEXT: cmp w9, w8
12451211
; CHECK-SD-NEXT: csel w0, w9, w8, lt
12461212
; CHECK-SD-NEXT: ret
@@ -1269,14 +1235,11 @@ define i32 @nested_smin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
12691235
define i32 @nested_smax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
12701236
; CHECK-SD-LABEL: nested_smax_i32:
12711237
; CHECK-SD: // %bb.0:
1238+
; CHECK-SD-NEXT: smax v0.4s, v0.4s, v1.4s
1239+
; CHECK-SD-NEXT: cmp w0, w1
1240+
; CHECK-SD-NEXT: csel w8, w0, w1, gt
12721241
; CHECK-SD-NEXT: smaxv s0, v0.4s
1273-
; CHECK-SD-NEXT: smaxv s1, v1.4s
12741242
; CHECK-SD-NEXT: fmov w9, s0
1275-
; CHECK-SD-NEXT: fmov w8, s1
1276-
; CHECK-SD-NEXT: cmp w9, w0
1277-
; CHECK-SD-NEXT: csel w9, w9, w0, gt
1278-
; CHECK-SD-NEXT: cmp w8, w1
1279-
; CHECK-SD-NEXT: csel w8, w8, w1, gt
12801243
; CHECK-SD-NEXT: cmp w9, w8
12811244
; CHECK-SD-NEXT: csel w0, w9, w8, gt
12821245
; CHECK-SD-NEXT: ret
@@ -1305,14 +1268,11 @@ define i32 @nested_smax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
13051268
define i32 @nested_umin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
13061269
; CHECK-SD-LABEL: nested_umin_i32:
13071270
; CHECK-SD: // %bb.0:
1271+
; CHECK-SD-NEXT: umin v0.4s, v0.4s, v1.4s
1272+
; CHECK-SD-NEXT: cmp w0, w1
1273+
; CHECK-SD-NEXT: csel w8, w0, w1, lo
13081274
; CHECK-SD-NEXT: uminv s0, v0.4s
1309-
; CHECK-SD-NEXT: uminv s1, v1.4s
13101275
; CHECK-SD-NEXT: fmov w9, s0
1311-
; CHECK-SD-NEXT: fmov w8, s1
1312-
; CHECK-SD-NEXT: cmp w9, w0
1313-
; CHECK-SD-NEXT: csel w9, w9, w0, lo
1314-
; CHECK-SD-NEXT: cmp w8, w1
1315-
; CHECK-SD-NEXT: csel w8, w8, w1, lo
13161276
; CHECK-SD-NEXT: cmp w9, w8
13171277
; CHECK-SD-NEXT: csel w0, w9, w8, lo
13181278
; CHECK-SD-NEXT: ret
@@ -1341,14 +1301,11 @@ define i32 @nested_umin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
13411301
define i32 @nested_umax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
13421302
; CHECK-SD-LABEL: nested_umax_i32:
13431303
; CHECK-SD: // %bb.0:
1304+
; CHECK-SD-NEXT: umax v0.4s, v0.4s, v1.4s
1305+
; CHECK-SD-NEXT: cmp w0, w1
1306+
; CHECK-SD-NEXT: csel w8, w0, w1, hi
13441307
; CHECK-SD-NEXT: umaxv s0, v0.4s
1345-
; CHECK-SD-NEXT: umaxv s1, v1.4s
13461308
; CHECK-SD-NEXT: fmov w9, s0
1347-
; CHECK-SD-NEXT: fmov w8, s1
1348-
; CHECK-SD-NEXT: cmp w9, w0
1349-
; CHECK-SD-NEXT: csel w9, w9, w0, hi
1350-
; CHECK-SD-NEXT: cmp w8, w1
1351-
; CHECK-SD-NEXT: csel w8, w8, w1, hi
13521309
; CHECK-SD-NEXT: cmp w9, w8
13531310
; CHECK-SD-NEXT: csel w0, w9, w8, hi
13541311
; CHECK-SD-NEXT: ret
@@ -1377,11 +1334,10 @@ define i32 @nested_umax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
13771334
define float @nested_fmin_float(<4 x float> %a, <4 x float> %b, float %c, float %d) {
13781335
; CHECK-SD-LABEL: nested_fmin_float:
13791336
; CHECK-SD: // %bb.0:
1380-
; CHECK-SD-NEXT: fminnmv s1, v1.4s
1337+
; CHECK-SD-NEXT: fminnm v0.4s, v0.4s, v1.4s
1338+
; CHECK-SD-NEXT: fminnm s2, s2, s3
13811339
; CHECK-SD-NEXT: fminnmv s0, v0.4s
1382-
; CHECK-SD-NEXT: fminnm s1, s1, s3
13831340
; CHECK-SD-NEXT: fminnm s0, s0, s2
1384-
; CHECK-SD-NEXT: fminnm s0, s0, s1
13851341
; CHECK-SD-NEXT: ret
13861342
;
13871343
; CHECK-GI-LABEL: nested_fmin_float:
@@ -1403,11 +1359,10 @@ define float @nested_fmin_float(<4 x float> %a, <4 x float> %b, float %c, float
14031359
define float @nested_fmax_float(<4 x float> %a, <4 x float> %b, float %c, float %d) {
14041360
; CHECK-SD-LABEL: nested_fmax_float:
14051361
; CHECK-SD: // %bb.0:
1406-
; CHECK-SD-NEXT: fmaxnmv s1, v1.4s
1362+
; CHECK-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
1363+
; CHECK-SD-NEXT: fmaxnm s2, s2, s3
14071364
; CHECK-SD-NEXT: fmaxnmv s0, v0.4s
1408-
; CHECK-SD-NEXT: fmaxnm s1, s1, s3
14091365
; CHECK-SD-NEXT: fmaxnm s0, s0, s2
1410-
; CHECK-SD-NEXT: fmaxnm s0, s0, s1
14111366
; CHECK-SD-NEXT: ret
14121367
;
14131368
; CHECK-GI-LABEL: nested_fmax_float:

0 commit comments

Comments
 (0)