Skip to content

Commit 0d0182a

Browse files
committed
Fixups and an extra test case.
1 parent f633662 commit 0d0182a

File tree

2 files changed

+39
-8
lines changed

2 files changed

+39
-8
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6391,7 +6391,7 @@ bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
63916391

63926392
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
63936393
// reciprocal.
6394-
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
6394+
// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
63956395
bool CombinerHelper::matchRepeatedFPDivisor(
63966396
MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
63976397
assert(MI.getOpcode() == TargetOpcode::G_FDIV);
@@ -6413,19 +6413,18 @@ bool CombinerHelper::matchRepeatedFPDivisor(
64136413
// Exit early if the target does not want this transform or if there can't
64146414
// possibly be enough uses of the divisor to make the transform worthwhile.
64156415
unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6416-
64176416
if (!MinUses)
64186417
return false;
64196418

6420-
// Find all FDIV users of the same divisor. Use a set because duplicates may
6421-
// be present in the user list. For the moment we limit all instructions to a
6422-
// single BB and use the first Instr in MatchInfo as the dominating position.
6419+
// Find all FDIV users of the same divisor. For the moment we limit all
6420+
// instructions to a single BB and use the first Instr in MatchInfo as the
6421+
// dominating position.
64236422
MatchInfo.push_back(&MI);
64246423
for (auto &U : MRI.use_nodbg_instructions(Y)) {
64256424
if (&U == &MI || U.getParent() != MI.getParent())
64266425
continue;
64276426
if (U.getOpcode() == TargetOpcode::G_FDIV &&
6428-
U.getOperand(2).getReg() == Y) {
6427+
U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) {
64296428
// This division is eligible for optimization only if global unsafe math
64306429
// is enabled or if this division allows reciprocal formation.
64316430
if (UnsafeMath || U.getFlag(MachineInstr::MIFlag::FmArcp)) {
@@ -6448,13 +6447,14 @@ void CombinerHelper::applyRepeatedFPDivisor(
64486447
Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
64496448
LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
64506449
auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6451-
MatchInfo[0]->getOperand(2).getReg());
6450+
MatchInfo[0]->getOperand(2).getReg(),
6451+
MatchInfo[0]->getFlags());
64526452

64536453
// Replace all found div's with fmul instructions.
64546454
for (MachineInstr *MI : MatchInfo) {
64556455
Builder.setInsertPt(*MI->getParent(), MI);
64566456
Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6457-
Div->getOperand(0).getReg());
6457+
Div->getOperand(0).getReg(), MI->getFlags());
64586458
MI->eraseFromParent();
64596459
}
64606460
}

llvm/test/CodeGen/AArch64/fdiv-combine.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,36 @@ define void @two_fdiv_double(double %D, double %a, double %b) #0 {
103103
ret void
104104
}
105105

106+
define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 {
107+
; CHECK-SD-LABEL: four_fdiv_multi_float:
108+
; CHECK-SD: // %bb.0:
109+
; CHECK-SD-NEXT: fmov s4, #1.00000000
110+
; CHECK-SD-NEXT: fdiv s5, s4, s0
111+
; CHECK-SD-NEXT: fmul s4, s1, s5
112+
; CHECK-SD-NEXT: fmul s1, s2, s5
113+
; CHECK-SD-NEXT: fmul s2, s3, s5
114+
; CHECK-SD-NEXT: fmul s3, s0, s5
115+
; CHECK-SD-NEXT: fmov s0, s4
116+
; CHECK-SD-NEXT: b foo_4f
117+
;
118+
; CHECK-GI-LABEL: four_fdiv_multi_float:
119+
; CHECK-GI: // %bb.0:
120+
; CHECK-GI-NEXT: fmov s4, #1.00000000
121+
; CHECK-GI-NEXT: fdiv s5, s4, s0
122+
; CHECK-GI-NEXT: fdiv s4, s0, s0
123+
; CHECK-GI-NEXT: fmul s0, s1, s5
124+
; CHECK-GI-NEXT: fmul s1, s2, s5
125+
; CHECK-GI-NEXT: fmul s2, s3, s5
126+
; CHECK-GI-NEXT: fmov s3, s4
127+
; CHECK-GI-NEXT: b foo_4f
128+
%div = fdiv float %a, %D
129+
%div1 = fdiv float %b, %D
130+
%div2 = fdiv float %c, %D
131+
%div3 = fdiv float %D, %D
132+
tail call void @foo_4f(float %div, float %div1, float %div2, float %div3)
133+
ret void
134+
}
135+
106136
define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
107137
; CHECK-LABEL: splat_three_fdiv_4xfloat:
108138
; CHECK: // %bb.0:
@@ -215,6 +245,7 @@ entry:
215245
}
216246

217247
declare void @foo_3f(float, float, float)
248+
declare void @foo_4f(float, float, float, float)
218249
declare void @foo_3d(double, double, double)
219250
declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
220251
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)

0 commit comments

Comments
 (0)