Fixups and an extra test case.

davemgreen · davemgreen · commit 0d0182a02aba · 2025-06-17T08:09:51.000+01:00
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6391,7 +6391,7 @@ bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
 
 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
 // reciprocal.
-// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
 bool CombinerHelper::matchRepeatedFPDivisor(
     MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
   assert(MI.getOpcode() == TargetOpcode::G_FDIV);
@@ -6413,19 +6413,18 @@ bool CombinerHelper::matchRepeatedFPDivisor(
   // Exit early if the target does not want this transform or if there can't
   // possibly be enough uses of the divisor to make the transform worthwhile.
   unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
-
   if (!MinUses)
     return false;
 
-  // Find all FDIV users of the same divisor. Use a set because duplicates may
-  // be present in the user list. For the moment we limit all instructions to a
-  // single BB and use the first Instr in MatchInfo as the dominating position.
+  // Find all FDIV users of the same divisor. For the moment we limit all
+  // instructions to a single BB and use the first Instr in MatchInfo as the
+  // dominating position.
   MatchInfo.push_back(&MI);
   for (auto &U : MRI.use_nodbg_instructions(Y)) {
     if (&U == &MI || U.getParent() != MI.getParent())
       continue;
     if (U.getOpcode() == TargetOpcode::G_FDIV &&
-        U.getOperand(2).getReg() == Y) {
+        U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) {
       // This division is eligible for optimization only if global unsafe math
       // is enabled or if this division allows reciprocal formation.
       if (UnsafeMath || U.getFlag(MachineInstr::MIFlag::FmArcp)) {
@@ -6448,13 +6447,14 @@ void CombinerHelper::applyRepeatedFPDivisor(
   Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
   LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
   auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
-                               MatchInfo[0]->getOperand(2).getReg());
+                               MatchInfo[0]->getOperand(2).getReg(),
+                               MatchInfo[0]->getFlags());
 
   // Replace all found div's with fmul instructions.
   for (MachineInstr *MI : MatchInfo) {
     Builder.setInsertPt(*MI->getParent(), MI);
     Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
-                      Div->getOperand(0).getReg());
+                      Div->getOperand(0).getReg(), MI->getFlags());
     MI->eraseFromParent();
   }
 }
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -103,6 +103,36 @@ define void @two_fdiv_double(double %D, double %a, double %b) #0 {
   ret void
 }
 
+define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-SD-LABEL: four_fdiv_multi_float:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fmov s4, #1.00000000
+; CHECK-SD-NEXT:    fdiv s5, s4, s0
+; CHECK-SD-NEXT:    fmul s4, s1, s5
+; CHECK-SD-NEXT:    fmul s1, s2, s5
+; CHECK-SD-NEXT:    fmul s2, s3, s5
+; CHECK-SD-NEXT:    fmul s3, s0, s5
+; CHECK-SD-NEXT:    fmov s0, s4
+; CHECK-SD-NEXT:    b foo_4f
+;
+; CHECK-GI-LABEL: four_fdiv_multi_float:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov s4, #1.00000000
+; CHECK-GI-NEXT:    fdiv s5, s4, s0
+; CHECK-GI-NEXT:    fdiv s4, s0, s0
+; CHECK-GI-NEXT:    fmul s0, s1, s5
+; CHECK-GI-NEXT:    fmul s1, s2, s5
+; CHECK-GI-NEXT:    fmul s2, s3, s5
+; CHECK-GI-NEXT:    fmov s3, s4
+; CHECK-GI-NEXT:    b foo_4f
+  %div = fdiv float %a, %D
+  %div1 = fdiv float %b, %D
+  %div2 = fdiv float %c, %D
+  %div3 = fdiv float %D, %D
+  tail call void @foo_4f(float %div, float %div1, float %div2, float %div3)
+  ret void
+}
+
 define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
 ; CHECK-LABEL: splat_three_fdiv_4xfloat:
 ; CHECK:       // %bb.0:
@@ -215,6 +245,7 @@ entry:
 }
 
 declare void @foo_3f(float, float, float)
+declare void @foo_4f(float, float, float, float)
 declare void @foo_3d(double, double, double)
 declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
 declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)