!fixup, formatting and address comments.

ElvisWang123 · ElvisWang123 · commit 36e10329f6c3 · 2025-03-24T22:54:11.000-07:00
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9419,6 +9419,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
          "entry block must be set to a VPRegionBlock having a non-empty entry "
          "VPBasicBlock");
 
+  for (ElementCount VF : Range)
+    Plan->addVF(VF);
+  Plan->setName("Initial VPlan");
+
   // Update wide induction increments to use the same step as the corresponding
   // wide induction. This enables detecting induction increments directly in
   // VPlan and removes redundant splats.
@@ -9465,9 +9469,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
                              CostCtx, Range);
   }
 
+  // Update VF after convertToAbstractRecipes. Cannot set the VF here since
+  // `handleUncountableEarlyExit` will check the VF of the plan, need to set
+  // before it and update.
+  // TODO: Use a better method that only set the VF for plan once.
+  Plan->clearVF();
   for (ElementCount VF : Range)
     Plan->addVF(VF);
-  Plan->setName("Initial VPlan");
 
   // Interleave memory: for each Interleave Group we marked earlier as relevant
   // for this VPlan, replace the Recipes widening its memory instructions with a
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2291,7 +2291,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
   }
 
   /// For VPExtendedReductionRecipe.
-  /// Note that IsNonNeg flag and the debug location are for extend instruction.
+  /// Note that IsNonNeg flag and the debug location are from the extend.
   VPReductionRecipe(const unsigned char SC, const RecurKind RdxKind,
                     ArrayRef<VPValue *> Operands, VPValue *CondOp,
                     bool IsOrdered, NonNegFlagsTy NonNeg, DebugLoc DL)
@@ -2302,7 +2302,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
   }
 
   /// For VPMulAccumulateReductionRecipe.
-  /// Note that the NUW/NSW and DL are for mul instruction.
+  /// Note that the NUW/NSW and DL are from the Mul.
   VPReductionRecipe(const unsigned char SC, const RecurKind RdxKind,
                     ArrayRef<VPValue *> Operands, VPValue *CondOp,
                     bool IsOrdered, WrapFlagsTy WrapFlags, DebugLoc DL)
@@ -2320,9 +2320,9 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
                           ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
                           IsOrdered, DL) {}
 
-  VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp,
-                    VPValue *VecOp, VPValue *CondOp, bool IsOrdered,
-                    DebugLoc DL = {})
+  VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
+                    VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
+                    bool IsOrdered, DebugLoc DL = {})
       : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
                           ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
                           IsOrdered, DL) {}
@@ -2434,16 +2434,17 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
   VPExtendedReductionRecipe(VPExtendedReductionRecipe *ExtRed)
       : VPReductionRecipe(
             VPDef::VPExtendedReductionSC, ExtRed->getRecurrenceKind(),
-            {ExtRed->getChainOp(), ExtRed->getVecOp()},
-            ExtRed->getCondOp(), ExtRed->isOrdered(), NonNegFlagsTy(ExtRed->isNonNeg()), ExtRed->getDebugLoc()),
+            {ExtRed->getChainOp(), ExtRed->getVecOp()}, ExtRed->getCondOp(),
+            ExtRed->isOrdered(), NonNegFlagsTy(ExtRed->isNonNeg()),
+            ExtRed->getDebugLoc()),
         ExtOp(ExtRed->getExtOpcode()), ResultTy(ExtRed->getResultType()) {}
 
 public:
   VPExtendedReductionRecipe(VPReductionRecipe *R, VPWidenCastRecipe *Ext)
-      : VPReductionRecipe(
-            VPDef::VPExtendedReductionSC, R->getRecurrenceKind(),
-            {R->getChainOp(), Ext->getOperand(0)}, R->getCondOp(),
-            R->isOrdered(), NonNegFlagsTy(Ext->isNonNeg()), Ext->getDebugLoc()),
+      : VPReductionRecipe(VPDef::VPExtendedReductionSC, R->getRecurrenceKind(),
+                          {R->getChainOp(), Ext->getOperand(0)}, R->getCondOp(),
+                          R->isOrdered(), NonNegFlagsTy(Ext->isNonNeg()),
+                          Ext->getDebugLoc()),
         ExtOp(Ext->getOpcode()), ResultTy(Ext->getResultType()) {}
 
   ~VPExtendedReductionRecipe() override = default;
@@ -2472,9 +2473,7 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
 #endif
 
   /// The scalar type after extended.
-  Type *getResultType() const {
-    return ResultTy;
-  }
+  Type *getResultType() const { return ResultTy; }
 
   /// Is the extend ZExt?
   bool isZExt() const { return getExtOpcode() == Instruction::ZExt; }
@@ -2500,11 +2499,13 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
   /// For cloning VPMulAccumulateReductionRecipe.
   VPMulAccumulateReductionRecipe(VPMulAccumulateReductionRecipe *MulAcc)
       : VPReductionRecipe(
-            VPDef::VPMulAccumulateReductionSC,
-            MulAcc->getRecurrenceKind(),
+            VPDef::VPMulAccumulateReductionSC, MulAcc->getRecurrenceKind(),
             {MulAcc->getChainOp(), MulAcc->getVecOp0(), MulAcc->getVecOp1()},
-            MulAcc->getCondOp(), MulAcc->isOrdered(), WrapFlagsTy(MulAcc->hasNoUnsignedWrap(), MulAcc->hasNoSignedWrap()), MulAcc->getDebugLoc()),
-        ExtOp(MulAcc->getExtOpcode()), IsNonNeg(MulAcc->isNonNeg()), ResultTy(MulAcc->getResultType()) {}
+            MulAcc->getCondOp(), MulAcc->isOrdered(),
+            WrapFlagsTy(MulAcc->hasNoUnsignedWrap(), MulAcc->hasNoSignedWrap()),
+            MulAcc->getDebugLoc()),
+        ExtOp(MulAcc->getExtOpcode()), IsNonNeg(MulAcc->isNonNeg()),
+        ResultTy(MulAcc->getResultType()) {}
 
 public:
   VPMulAccumulateReductionRecipe(VPReductionRecipe *R, VPWidenRecipe *Mul,
@@ -2516,8 +2517,10 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
             R->getCondOp(), R->isOrdered(),
             WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
             R->getDebugLoc()),
-        ExtOp(Ext0->getOpcode()), IsNonNeg(Ext0->isNonNeg()), ResultTy(ResultTy) {
-    assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind())== Instruction::Add &&
+        ExtOp(Ext0->getOpcode()), IsNonNeg(Ext0->isNonNeg()),
+        ResultTy(ResultTy) {
+    assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) ==
+               Instruction::Add &&
            "The reduction instruction in MulAccumulateteReductionRecipe must "
            "be Add");
   }
@@ -2530,7 +2533,8 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
             WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
             R->getDebugLoc()),
         ExtOp(Instruction::CastOps::CastOpsEnd) {
-    assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) == Instruction::Add &&
+    assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) ==
+               Instruction::Add &&
            "The reduction instruction in MulAccumulateReductionRecipe must be "
            "Add");
   }
@@ -2562,7 +2566,8 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
 #endif
 
   Type *getResultType() const {
-    assert(isExtended() && "Only support getResultType when this recipe contains implicit extend.");
+    assert(isExtended() && "Only support getResultType when this recipe "
+                           "contains implicit extend.");
     return ResultTy;
   }
 
@@ -3792,6 +3797,8 @@ class VPlan {
     VFs.insert(VF);
   }
 
+  void clearVF() { VFs.clear(); }
+
   bool hasVF(ElementCount VF) const { return VFs.count(VF); }
   bool hasScalableVF() const {
     return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2396,6 +2396,8 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
   auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
   unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
   FastMathFlags FMFs = getFastMathFlags();
+  std::optional<FastMathFlags> OptionalFMF =
+      ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;
 
   // TODO: Support any-of reductions.
   assert(
@@ -2410,12 +2412,7 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
     return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
   }
 
-  if (ElementTy->isFloatingPointTy())
-    return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, FMFs,
-                                              Ctx.CostKind);
-  // Cannot get correct cost when quering TTI with FMFs not contains `reassoc`
-  // for non-FP reductions.
-  return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, std::nullopt,
+  return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,
                                             Ctx.CostKind);
 }
 
@@ -2426,7 +2423,6 @@ VPExtendedReductionRecipe::computeCost(ElementCount VF,
   Type *RedTy = Ctx.Types.inferScalarType(this);
   auto *SrcVecTy =
       cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF));
-
   assert(RedTy->isIntegerTy() &&
          "ExtendedReduction only support integer type currently.");
   return Ctx.TTI.getExtendedReductionCost(Opcode, isZExt(), RedTy, SrcVecTy,
@@ -2439,7 +2435,6 @@ VPMulAccumulateReductionRecipe::computeCost(ElementCount VF,
   Type *RedTy = Ctx.Types.inferScalarType(this);
   auto *SrcVecTy =
       cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF));
-
   return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy,
                                         Ctx.CostKind);
 }
@@ -2494,7 +2489,10 @@ void VPExtendedReductionRecipe::print(raw_ostream &O, const Twine &Indent,
   O << " = ";
   getChainOp()->printAsOperand(O, SlotTracker);
   O << " +";
-  O << " reduce." << Instruction::getOpcodeName(RecurrenceDescriptor::getOpcode(getRecurrenceKind())) << " (";
+  O << " reduce."
+    << Instruction::getOpcodeName(
+           RecurrenceDescriptor::getOpcode(getRecurrenceKind()))
+    << " (";
   getVecOp()->printAsOperand(O, SlotTracker);
   O << " extended to " << *getResultType();
   if (isConditional()) {
@@ -2511,7 +2509,10 @@ void VPMulAccumulateReductionRecipe::print(raw_ostream &O, const Twine &Indent,
   O << " = ";
   getChainOp()->printAsOperand(O, SlotTracker);
   O << " + ";
-  O << "reduce." << Instruction::getOpcodeName(RecurrenceDescriptor::getOpcode(getRecurrenceKind())) << " (";
+  O << "reduce."
+    << Instruction::getOpcodeName(
+           RecurrenceDescriptor::getOpcode(getRecurrenceKind()))
+    << " (";
   O << "mul";
   printFlags(O);
   if (isExtended())
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -17,6 +17,7 @@
 #include "VPlanAnalysis.h"
 #include "VPlanCFG.h"
 #include "VPlanDominatorTree.h"
+#include "VPlanHelpers.h"
 #include "VPlanPatternMatch.h"
 #include "VPlanUtils.h"
 #include "VPlanVerifier.h"
@@ -2384,7 +2385,8 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
         IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
                                        Instruction::CastOps::ZExt,
                                    Mul, RecipeA, RecipeB, nullptr))
-      return new VPMulAccumulateReductionRecipe(Red, Mul, RecipeA, RecipeB, RecipeA->getResultType());
+      return new VPMulAccumulateReductionRecipe(Red, Mul, RecipeA, RecipeB,
+                                                RecipeA->getResultType());
     // Matched reduce.add(mul)
     if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
       return new VPMulAccumulateReductionRecipe(Red, Mul);
@@ -2405,7 +2407,8 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
         IsMulAccValidAndClampRange(Ext0->getOpcode() ==
                                        Instruction::CastOps::ZExt,
                                    Mul, Ext0, Ext1, Ext))
-      return new VPMulAccumulateReductionRecipe(Red, Mul, Ext0, Ext1, Ext->getResultType());
+      return new VPMulAccumulateReductionRecipe(Red, Mul, Ext0, Ext1,
+                                                Ext->getResultType());
   }
   return nullptr;
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -14,7 +14,6 @@
 #define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
 
 #include "VPlan.h"
-#include "VPlanHelpers.h"
 #include "VPlanVerifier.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/Support/CommandLine.h"
@@ -29,6 +28,8 @@ class PredicatedScalarEvolution;
 class TargetLibraryInfo;
 class VPBuilder;
 class VPRecipeBuilder;
+class VPCostContext;
+class VFRange;
 
 extern cl::opt<bool> VerifyEachVPlan;