Skip to content

Commit 6b3d2b6

Browse files
authored
[VPlan] Add VPExpressionRecipe, replacing extended reduction recipes. (#144281)
This patch adds a new recipe to combine multiple recipes into an 'expression' recipe, which should be considered as single entity for cost-modeling and transforms. The recipe needs to be 'decomposed', i.e. replaced by its individual recipes before execute. This subsumes VPExtendedReductionRecipe and VPMulAccumulateReductionRecipe and should make it easier to extend to include more types of bundled patterns, like e.g. extends folded into loads or various arithmetic instructions, if supported by the target. It allows avoiding re-creating the original recipes when converting to concrete recipes, together with removing the need to record various information. The current version of the patch still retains the original printing matching VPExtendedReductionRecipe and VPMulAccumulateReductionRecipe, but this specialized print could be replaced with printing the bundled recipes directly. PR: #144281
1 parent 7da8ed8 commit 6b3d2b6

File tree

7 files changed

+348
-404
lines changed

7 files changed

+348
-404
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 119 additions & 214 deletions
Original file line numberDiff line numberDiff line change
@@ -528,11 +528,10 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
528528
case VPRecipeBase::VPDerivedIVSC:
529529
case VPRecipeBase::VPEVLBasedIVPHISC:
530530
case VPRecipeBase::VPExpandSCEVSC:
531+
case VPRecipeBase::VPExpressionSC:
531532
case VPRecipeBase::VPInstructionSC:
532533
case VPRecipeBase::VPReductionEVLSC:
533534
case VPRecipeBase::VPReductionSC:
534-
case VPRecipeBase::VPMulAccumulateReductionSC:
535-
case VPRecipeBase::VPExtendedReductionSC:
536535
case VPRecipeBase::VPReplicateSC:
537536
case VPRecipeBase::VPScalarIVStepsSC:
538537
case VPRecipeBase::VPVectorPointerSC:
@@ -852,9 +851,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
852851
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
853852
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
854853
R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
855-
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC ||
856-
R->getVPDefID() == VPRecipeBase::VPExtendedReductionSC ||
857-
R->getVPDefID() == VPRecipeBase::VPMulAccumulateReductionSC;
854+
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
858855
}
859856

860857
static inline bool classof(const VPUser *U) {
@@ -2440,28 +2437,6 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24402437
setUnderlyingValue(I);
24412438
}
24422439

2443-
/// For VPExtendedReductionRecipe.
2444-
/// Note that the debug location is from the extend.
2445-
VPReductionRecipe(const unsigned char SC, const RecurKind RdxKind,
2446-
ArrayRef<VPValue *> Operands, VPValue *CondOp,
2447-
bool IsOrdered, DebugLoc DL)
2448-
: VPRecipeWithIRFlags(SC, Operands, DL), RdxKind(RdxKind),
2449-
IsOrdered(IsOrdered), IsConditional(CondOp) {
2450-
if (CondOp)
2451-
addOperand(CondOp);
2452-
}
2453-
2454-
/// For VPMulAccumulateReductionRecipe.
2455-
/// Note that the NUW/NSW flags and the debug location are from the Mul.
2456-
VPReductionRecipe(const unsigned char SC, const RecurKind RdxKind,
2457-
ArrayRef<VPValue *> Operands, VPValue *CondOp,
2458-
bool IsOrdered, WrapFlagsTy WrapFlags, DebugLoc DL)
2459-
: VPRecipeWithIRFlags(SC, Operands, WrapFlags, DL), RdxKind(RdxKind),
2460-
IsOrdered(IsOrdered), IsConditional(CondOp) {
2461-
if (CondOp)
2462-
addOperand(CondOp);
2463-
}
2464-
24652440
public:
24662441
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
24672442
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
@@ -2487,9 +2462,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24872462

24882463
static inline bool classof(const VPRecipeBase *R) {
24892464
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2490-
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2491-
R->getVPDefID() == VPRecipeBase::VPExtendedReductionSC ||
2492-
R->getVPDefID() == VPRecipeBase::VPMulAccumulateReductionSC;
2465+
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
24932466
}
24942467

24952468
static inline bool classof(const VPUser *U) {
@@ -2628,190 +2601,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
26282601
}
26292602
};
26302603

2631-
/// A recipe to represent inloop extended reduction operations, performing a
2632-
/// reduction on a extended vector operand into a scalar value, and adding the
2633-
/// result to a chain. This recipe is abstract and needs to be lowered to
2634-
/// concrete recipes before codegen. The operands are {ChainOp, VecOp,
2635-
/// [Condition]}.
2636-
class VPExtendedReductionRecipe : public VPReductionRecipe {
2637-
/// Opcode of the extend for VecOp.
2638-
Instruction::CastOps ExtOp;
2639-
2640-
/// The scalar type after extending.
2641-
Type *ResultTy;
2642-
2643-
/// For cloning VPExtendedReductionRecipe.
2644-
VPExtendedReductionRecipe(VPExtendedReductionRecipe *ExtRed)
2645-
: VPReductionRecipe(
2646-
VPDef::VPExtendedReductionSC, ExtRed->getRecurrenceKind(),
2647-
{ExtRed->getChainOp(), ExtRed->getVecOp()}, ExtRed->getCondOp(),
2648-
ExtRed->isOrdered(), ExtRed->getDebugLoc()),
2649-
ExtOp(ExtRed->getExtOpcode()), ResultTy(ExtRed->getResultType()) {
2650-
transferFlags(*ExtRed);
2651-
setUnderlyingValue(ExtRed->getUnderlyingValue());
2652-
}
2653-
2654-
public:
2655-
VPExtendedReductionRecipe(VPReductionRecipe *R, VPWidenCastRecipe *Ext)
2656-
: VPReductionRecipe(VPDef::VPExtendedReductionSC, R->getRecurrenceKind(),
2657-
{R->getChainOp(), Ext->getOperand(0)}, R->getCondOp(),
2658-
R->isOrdered(), Ext->getDebugLoc()),
2659-
ExtOp(Ext->getOpcode()), ResultTy(Ext->getResultType()) {
2660-
assert((ExtOp == Instruction::CastOps::ZExt ||
2661-
ExtOp == Instruction::CastOps::SExt) &&
2662-
"VPExtendedReductionRecipe only supports zext and sext.");
2663-
2664-
transferFlags(*Ext);
2665-
setUnderlyingValue(R->getUnderlyingValue());
2666-
}
2667-
2668-
~VPExtendedReductionRecipe() override = default;
2669-
2670-
VPExtendedReductionRecipe *clone() override {
2671-
return new VPExtendedReductionRecipe(this);
2672-
}
2673-
2674-
VP_CLASSOF_IMPL(VPDef::VPExtendedReductionSC);
2675-
2676-
void execute(VPTransformState &State) override {
2677-
llvm_unreachable("VPExtendedReductionRecipe should be transform to "
2678-
"VPExtendedRecipe + VPReductionRecipe before execution.");
2679-
};
2680-
2681-
/// Return the cost of VPExtendedReductionRecipe.
2682-
InstructionCost computeCost(ElementCount VF,
2683-
VPCostContext &Ctx) const override;
2684-
2685-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2686-
/// Print the recipe.
2687-
void print(raw_ostream &O, const Twine &Indent,
2688-
VPSlotTracker &SlotTracker) const override;
2689-
#endif
2690-
2691-
/// The scalar type after extending.
2692-
Type *getResultType() const { return ResultTy; }
2693-
2694-
/// Is the extend ZExt?
2695-
bool isZExt() const { return getExtOpcode() == Instruction::ZExt; }
2696-
2697-
/// Get the opcode of the extend for VecOp.
2698-
Instruction::CastOps getExtOpcode() const { return ExtOp; }
2699-
};
2700-
2701-
/// A recipe to represent inloop MulAccumulateReduction operations, multiplying
2702-
/// the vector operands (which may be extended), performing a reduction.add on
2703-
/// the result, and adding the scalar result to a chain. This recipe is abstract
2704-
/// and needs to be lowered to concrete recipes before codegen. The operands are
2705-
/// {ChainOp, VecOp1, VecOp2, [Condition]}.
2706-
class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
2707-
/// Opcode of the extend for VecOp1 and VecOp2.
2708-
Instruction::CastOps ExtOp;
2709-
2710-
/// Non-neg flag of the extend recipe.
2711-
bool IsNonNeg = false;
2712-
2713-
/// The scalar type after extending.
2714-
Type *ResultTy = nullptr;
2715-
2716-
/// For cloning VPMulAccumulateReductionRecipe.
2717-
VPMulAccumulateReductionRecipe(VPMulAccumulateReductionRecipe *MulAcc)
2718-
: VPReductionRecipe(
2719-
VPDef::VPMulAccumulateReductionSC, MulAcc->getRecurrenceKind(),
2720-
{MulAcc->getChainOp(), MulAcc->getVecOp0(), MulAcc->getVecOp1()},
2721-
MulAcc->getCondOp(), MulAcc->isOrdered(),
2722-
WrapFlagsTy(MulAcc->hasNoUnsignedWrap(), MulAcc->hasNoSignedWrap()),
2723-
MulAcc->getDebugLoc()),
2724-
ExtOp(MulAcc->getExtOpcode()), IsNonNeg(MulAcc->isNonNeg()),
2725-
ResultTy(MulAcc->getResultType()) {
2726-
transferFlags(*MulAcc);
2727-
setUnderlyingValue(MulAcc->getUnderlyingValue());
2728-
}
2729-
2730-
public:
2731-
VPMulAccumulateReductionRecipe(VPReductionRecipe *R, VPWidenRecipe *Mul,
2732-
VPWidenCastRecipe *Ext0,
2733-
VPWidenCastRecipe *Ext1, Type *ResultTy)
2734-
: VPReductionRecipe(
2735-
VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind(),
2736-
{R->getChainOp(), Ext0->getOperand(0), Ext1->getOperand(0)},
2737-
R->getCondOp(), R->isOrdered(),
2738-
WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
2739-
R->getDebugLoc()),
2740-
ExtOp(Ext0->getOpcode()), ResultTy(ResultTy) {
2741-
assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) ==
2742-
Instruction::Add &&
2743-
"The reduction instruction in MulAccumulateteReductionRecipe must "
2744-
"be Add");
2745-
assert((ExtOp == Instruction::CastOps::ZExt ||
2746-
ExtOp == Instruction::CastOps::SExt) &&
2747-
"VPMulAccumulateReductionRecipe only supports zext and sext.");
2748-
setUnderlyingValue(R->getUnderlyingValue());
2749-
// Only set the non-negative flag if the original recipe contains.
2750-
if (Ext0->hasNonNegFlag())
2751-
IsNonNeg = Ext0->isNonNeg();
2752-
}
2753-
2754-
VPMulAccumulateReductionRecipe(VPReductionRecipe *R, VPWidenRecipe *Mul,
2755-
Type *ResultTy)
2756-
: VPReductionRecipe(
2757-
VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind(),
2758-
{R->getChainOp(), Mul->getOperand(0), Mul->getOperand(1)},
2759-
R->getCondOp(), R->isOrdered(),
2760-
WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
2761-
R->getDebugLoc()),
2762-
ExtOp(Instruction::CastOps::CastOpsEnd), ResultTy(ResultTy) {
2763-
assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) ==
2764-
Instruction::Add &&
2765-
"The reduction instruction in MulAccumulateReductionRecipe must be "
2766-
"Add");
2767-
setUnderlyingValue(R->getUnderlyingValue());
2768-
}
2769-
2770-
~VPMulAccumulateReductionRecipe() override = default;
2771-
2772-
VPMulAccumulateReductionRecipe *clone() override {
2773-
return new VPMulAccumulateReductionRecipe(this);
2774-
}
2775-
2776-
VP_CLASSOF_IMPL(VPDef::VPMulAccumulateReductionSC);
2777-
2778-
void execute(VPTransformState &State) override {
2779-
llvm_unreachable("VPMulAccumulateReductionRecipe should transform to "
2780-
"VPWidenCastRecipe + "
2781-
"VPWidenRecipe + VPReductionRecipe before execution");
2782-
}
2783-
2784-
/// Return the cost of VPMulAccumulateReductionRecipe.
2785-
InstructionCost computeCost(ElementCount VF,
2786-
VPCostContext &Ctx) const override;
2787-
2788-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2789-
/// Print the recipe.
2790-
void print(raw_ostream &O, const Twine &Indent,
2791-
VPSlotTracker &SlotTracker) const override;
2792-
#endif
2793-
2794-
Type *getResultType() const { return ResultTy; }
2795-
2796-
/// The first vector value to be extended and reduced.
2797-
VPValue *getVecOp0() const { return getOperand(1); }
2798-
2799-
/// The second vector value to be extended and reduced.
2800-
VPValue *getVecOp1() const { return getOperand(2); }
2801-
2802-
/// Return true if this recipe contains extended operands.
2803-
bool isExtended() const { return ExtOp != Instruction::CastOps::CastOpsEnd; }
2804-
2805-
/// Return the opcode of the extends for the operands.
2806-
Instruction::CastOps getExtOpcode() const { return ExtOp; }
2807-
2808-
/// Return if the operands are zero-extended.
2809-
bool isZExt() const { return ExtOp == Instruction::CastOps::ZExt; }
2810-
2811-
/// Return true if the operand extends have the non-negative flag.
2812-
bool isNonNeg() const { return IsNonNeg; }
2813-
};
2814-
28152604
/// VPReplicateRecipe replicates a given instruction producing multiple scalar
28162605
/// copies of the original scalar type, one per lane, instead of producing a
28172606
/// single copy of widened type for all lanes. If the instruction is known to be
@@ -2930,6 +2719,122 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
29302719
}
29312720
};
29322721

2722+
/// A recipe to combine multiple recipes into a single 'expression' recipe,
2723+
/// which should be considered a single entity for cost-modeling and transforms.
2724+
/// The recipe needs to be 'decomposed', i.e. replaced by its individual
2725+
/// expression recipes, before execute. The individual expression recipes are
2726+
/// completely disconnected from the def-use graph of other recipes not part of
2727+
/// the expression. Def-use edges between pairs of expression recipes remain
2728+
/// intact, whereas every edge between an expression recipe and a recipe outside
2729+
/// the expression is elevated to connect the non-expression recipe with the
2730+
/// VPExpressionRecipe itself.
2731+
class VPExpressionRecipe : public VPSingleDefRecipe {
2732+
/// Recipes included in this VPExpressionRecipe.
2733+
SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
2734+
2735+
/// Temporary VPValues used for external operands of the expression, i.e.
2736+
/// operands not defined by recipes in the expression.
2737+
SmallVector<VPValue *> LiveInPlaceholders;
2738+
2739+
enum class ExpressionTypes {
2740+
/// Represents an inloop extended reduction operation, performing a
2741+
/// reduction on an extended vector operand into a scalar value, and adding
2742+
/// the result to a chain.
2743+
ExtendedReduction,
2744+
/// Represent an inloop multiply-accumulate reduction, multiplying the
2745+
/// extended vector operands, performing a reduction.add on the result, and
2746+
/// adding the scalar result to a chain.
2747+
ExtMulAccReduction,
2748+
/// Represent an inloop multiply-accumulate reduction, multiplying the
2749+
/// vector operands, performing a reduction.add on the result, and adding
2750+
/// the scalar result to a chain.
2751+
MulAccReduction,
2752+
};
2753+
2754+
/// Type of the expression.
2755+
ExpressionTypes ExpressionType;
2756+
2757+
/// Construct a new VPExpressionRecipe by internalizing recipes in \p
2758+
/// ExpressionRecipes. External operands (i.e. not defined by another recipe
2759+
/// in the expression) are replaced by temporary VPValues and the original
2760+
/// operands are transferred to the VPExpressionRecipe itself. Clone recipes
2761+
/// as needed (excluding last) to ensure they are only used by other recipes
2762+
/// in the expression.
2763+
VPExpressionRecipe(ExpressionTypes ExpressionType,
2764+
ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
2765+
2766+
public:
2767+
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
2768+
: VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
2769+
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
2770+
: VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
2771+
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2772+
VPWidenRecipe *Mul, VPReductionRecipe *Red)
2773+
: VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
2774+
{Ext0, Ext1, Mul, Red}) {}
2775+
2776+
~VPExpressionRecipe() override {
2777+
for (auto *R : reverse(ExpressionRecipes))
2778+
delete R;
2779+
for (VPValue *T : LiveInPlaceholders)
2780+
delete T;
2781+
}
2782+
2783+
VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
2784+
2785+
VPExpressionRecipe *clone() override {
2786+
assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
2787+
SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
2788+
for (auto *R : ExpressionRecipes)
2789+
NewExpressiondRecipes.push_back(R->clone());
2790+
for (auto *New : NewExpressiondRecipes) {
2791+
for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
2792+
New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
2793+
// Update placeholder operands in the cloned recipe to use the external
2794+
// operands, to be internalized when the cloned expression is constructed.
2795+
for (const auto &[Placeholder, OutsideOp] :
2796+
zip(LiveInPlaceholders, operands()))
2797+
New->replaceUsesOfWith(Placeholder, OutsideOp);
2798+
}
2799+
return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
2800+
}
2801+
2802+
/// Return the VPValue to use to infer the result type of the recipe.
2803+
VPValue *getOperandOfResultType() const {
2804+
unsigned OpIdx =
2805+
cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
2806+
: 1;
2807+
return getOperand(getNumOperands() - OpIdx);
2808+
}
2809+
2810+
/// Insert the recipes of the expression back into the VPlan, directly before
2811+
/// the current recipe. Leaves the expression recipe empty, which must be
2812+
/// removed before codegen.
2813+
void decompose();
2814+
2815+
/// Method for generating code, must not be called as this recipe is abstract.
2816+
void execute(VPTransformState &State) override {
2817+
llvm_unreachable("recipe must be removed before execute");
2818+
}
2819+
2820+
InstructionCost computeCost(ElementCount VF,
2821+
VPCostContext &Ctx) const override;
2822+
2823+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2824+
/// Print the recipe.
2825+
void print(raw_ostream &O, const Twine &Indent,
2826+
VPSlotTracker &SlotTracker) const override;
2827+
#endif
2828+
2829+
/// Returns true if this expression contains recipes that may read from or
2830+
/// write to memory.
2831+
bool mayReadOrWriteMemory() const;
2832+
2833+
/// Returns true if this expression contains recipes that may have side
2834+
/// effects.
2835+
bool mayHaveSideEffects() const;
2836+
};
2837+
29332838
/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
29342839
/// control converges back from a Branch-on-Mask. The phi nodes are needed in
29352840
/// order to merge values that are set under such a branch and feed their uses.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,13 +297,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
297297
// TODO: Use info from interleave group.
298298
return V->getUnderlyingValue()->getType();
299299
})
300-
.Case<VPExtendedReductionRecipe, VPMulAccumulateReductionRecipe>(
301-
[](const auto *R) { return R->getResultType(); })
302300
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
303301
return R->getSCEV()->getType();
304302
})
305303
.Case<VPReductionRecipe>([this](const auto *R) {
306304
return inferScalarType(R->getChainOp());
305+
})
306+
.Case<VPExpressionRecipe>([this](const auto *R) {
307+
return inferScalarType(R->getOperandOfResultType());
307308
});
308309

309310
assert(ResultTy && "could not infer type for the given VPValue");

0 commit comments

Comments
 (0)