Skip to content

Commit 736357e

Browse files
committed
VPBundleRecipe
This patch adds a new recipe to combine multiple recipes into a 'bundle' recipe, which should be considered as single entity for cost-modeling and transforms. The recipe needs to be 'unbundled', i.e. replaced by its individual recipes before execute. This subsumes VPExtendedReductionRecipe and VPMulAccumulateReductionRecipe and should make it easier to extend to include more types of bundled patterns, like e.g. extends folded into loads or various arithmetic instructions, if supported by the target. It allows avoiding re-creating the original recipes when converting to concrete recipes, together with removing the need to record various information. The current version of the patch still retains the original printing matching VPExtendedReductionRecipe and VPMulAccumulateReductionRecipe, but this specialized print could be replaced with printing the bundled recipes directly. Currently the unbundle implementation is a bit more complicated than necessary, as we need to fold the extends across ops to match the current behavior, but there's quite possibly a better place to do so.
1 parent 49c6235 commit 736357e

File tree

7 files changed

+303
-418
lines changed

7 files changed

+303
-418
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 120 additions & 215 deletions
Original file line numberDiff line numberDiff line change
@@ -525,14 +525,13 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
525525

526526
static inline bool classof(const VPRecipeBase *R) {
527527
switch (R->getVPDefID()) {
528+
case VPRecipeBase::VPBundleSC:
528529
case VPRecipeBase::VPDerivedIVSC:
529530
case VPRecipeBase::VPEVLBasedIVPHISC:
530531
case VPRecipeBase::VPExpandSCEVSC:
531532
case VPRecipeBase::VPInstructionSC:
532533
case VPRecipeBase::VPReductionEVLSC:
533534
case VPRecipeBase::VPReductionSC:
534-
case VPRecipeBase::VPMulAccumulateReductionSC:
535-
case VPRecipeBase::VPExtendedReductionSC:
536535
case VPRecipeBase::VPReplicateSC:
537536
case VPRecipeBase::VPScalarIVStepsSC:
538537
case VPRecipeBase::VPVectorPointerSC:
@@ -852,9 +851,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
852851
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
853852
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
854853
R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
855-
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC ||
856-
R->getVPDefID() == VPRecipeBase::VPExtendedReductionSC ||
857-
R->getVPDefID() == VPRecipeBase::VPMulAccumulateReductionSC;
854+
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
858855
}
859856

860857
static inline bool classof(const VPUser *U) {
@@ -2431,29 +2428,6 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24312428
}
24322429
setUnderlyingValue(I);
24332430
}
2434-
2435-
/// For VPExtendedReductionRecipe.
2436-
/// Note that the debug location is from the extend.
2437-
VPReductionRecipe(const unsigned char SC, const RecurKind RdxKind,
2438-
ArrayRef<VPValue *> Operands, VPValue *CondOp,
2439-
bool IsOrdered, DebugLoc DL)
2440-
: VPRecipeWithIRFlags(SC, Operands, DL), RdxKind(RdxKind),
2441-
IsOrdered(IsOrdered), IsConditional(CondOp) {
2442-
if (CondOp)
2443-
addOperand(CondOp);
2444-
}
2445-
2446-
/// For VPMulAccumulateReductionRecipe.
2447-
/// Note that the NUW/NSW flags and the debug location are from the Mul.
2448-
VPReductionRecipe(const unsigned char SC, const RecurKind RdxKind,
2449-
ArrayRef<VPValue *> Operands, VPValue *CondOp,
2450-
bool IsOrdered, WrapFlagsTy WrapFlags, DebugLoc DL)
2451-
: VPRecipeWithIRFlags(SC, Operands, WrapFlags, DL), RdxKind(RdxKind),
2452-
IsOrdered(IsOrdered), IsConditional(CondOp) {
2453-
if (CondOp)
2454-
addOperand(CondOp);
2455-
}
2456-
24572431
public:
24582432
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
24592433
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
@@ -2479,9 +2453,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24792453

24802454
static inline bool classof(const VPRecipeBase *R) {
24812455
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2482-
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2483-
R->getVPDefID() == VPRecipeBase::VPExtendedReductionSC ||
2484-
R->getVPDefID() == VPRecipeBase::VPMulAccumulateReductionSC;
2456+
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
24852457
}
24862458

24872459
static inline bool classof(const VPUser *U) {
@@ -2620,190 +2592,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
26202592
}
26212593
};
26222594

2623-
/// A recipe to represent inloop extended reduction operations, performing a
2624-
/// reduction on a extended vector operand into a scalar value, and adding the
2625-
/// result to a chain. This recipe is abstract and needs to be lowered to
2626-
/// concrete recipes before codegen. The operands are {ChainOp, VecOp,
2627-
/// [Condition]}.
2628-
class VPExtendedReductionRecipe : public VPReductionRecipe {
2629-
/// Opcode of the extend for VecOp.
2630-
Instruction::CastOps ExtOp;
2631-
2632-
/// The scalar type after extending.
2633-
Type *ResultTy;
2634-
2635-
/// For cloning VPExtendedReductionRecipe.
2636-
VPExtendedReductionRecipe(VPExtendedReductionRecipe *ExtRed)
2637-
: VPReductionRecipe(
2638-
VPDef::VPExtendedReductionSC, ExtRed->getRecurrenceKind(),
2639-
{ExtRed->getChainOp(), ExtRed->getVecOp()}, ExtRed->getCondOp(),
2640-
ExtRed->isOrdered(), ExtRed->getDebugLoc()),
2641-
ExtOp(ExtRed->getExtOpcode()), ResultTy(ExtRed->getResultType()) {
2642-
transferFlags(*ExtRed);
2643-
setUnderlyingValue(ExtRed->getUnderlyingValue());
2644-
}
2645-
2646-
public:
2647-
VPExtendedReductionRecipe(VPReductionRecipe *R, VPWidenCastRecipe *Ext)
2648-
: VPReductionRecipe(VPDef::VPExtendedReductionSC, R->getRecurrenceKind(),
2649-
{R->getChainOp(), Ext->getOperand(0)}, R->getCondOp(),
2650-
R->isOrdered(), Ext->getDebugLoc()),
2651-
ExtOp(Ext->getOpcode()), ResultTy(Ext->getResultType()) {
2652-
assert((ExtOp == Instruction::CastOps::ZExt ||
2653-
ExtOp == Instruction::CastOps::SExt) &&
2654-
"VPExtendedReductionRecipe only supports zext and sext.");
2655-
2656-
transferFlags(*Ext);
2657-
setUnderlyingValue(R->getUnderlyingValue());
2658-
}
2659-
2660-
~VPExtendedReductionRecipe() override = default;
2661-
2662-
VPExtendedReductionRecipe *clone() override {
2663-
return new VPExtendedReductionRecipe(this);
2664-
}
2665-
2666-
VP_CLASSOF_IMPL(VPDef::VPExtendedReductionSC);
2667-
2668-
void execute(VPTransformState &State) override {
2669-
llvm_unreachable("VPExtendedReductionRecipe should be transform to "
2670-
"VPExtendedRecipe + VPReductionRecipe before execution.");
2671-
};
2672-
2673-
/// Return the cost of VPExtendedReductionRecipe.
2674-
InstructionCost computeCost(ElementCount VF,
2675-
VPCostContext &Ctx) const override;
2676-
2677-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2678-
/// Print the recipe.
2679-
void print(raw_ostream &O, const Twine &Indent,
2680-
VPSlotTracker &SlotTracker) const override;
2681-
#endif
2682-
2683-
/// The scalar type after extending.
2684-
Type *getResultType() const { return ResultTy; }
2685-
2686-
/// Is the extend ZExt?
2687-
bool isZExt() const { return getExtOpcode() == Instruction::ZExt; }
2688-
2689-
/// Get the opcode of the extend for VecOp.
2690-
Instruction::CastOps getExtOpcode() const { return ExtOp; }
2691-
};
2692-
2693-
/// A recipe to represent inloop MulAccumulateReduction operations, multiplying
2694-
/// the vector operands (which may be extended), performing a reduction.add on
2695-
/// the result, and adding the scalar result to a chain. This recipe is abstract
2696-
/// and needs to be lowered to concrete recipes before codegen. The operands are
2697-
/// {ChainOp, VecOp1, VecOp2, [Condition]}.
2698-
class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
2699-
/// Opcode of the extend for VecOp1 and VecOp2.
2700-
Instruction::CastOps ExtOp;
2701-
2702-
/// Non-neg flag of the extend recipe.
2703-
bool IsNonNeg = false;
2704-
2705-
/// The scalar type after extending.
2706-
Type *ResultTy = nullptr;
2707-
2708-
/// For cloning VPMulAccumulateReductionRecipe.
2709-
VPMulAccumulateReductionRecipe(VPMulAccumulateReductionRecipe *MulAcc)
2710-
: VPReductionRecipe(
2711-
VPDef::VPMulAccumulateReductionSC, MulAcc->getRecurrenceKind(),
2712-
{MulAcc->getChainOp(), MulAcc->getVecOp0(), MulAcc->getVecOp1()},
2713-
MulAcc->getCondOp(), MulAcc->isOrdered(),
2714-
WrapFlagsTy(MulAcc->hasNoUnsignedWrap(), MulAcc->hasNoSignedWrap()),
2715-
MulAcc->getDebugLoc()),
2716-
ExtOp(MulAcc->getExtOpcode()), IsNonNeg(MulAcc->isNonNeg()),
2717-
ResultTy(MulAcc->getResultType()) {
2718-
transferFlags(*MulAcc);
2719-
setUnderlyingValue(MulAcc->getUnderlyingValue());
2720-
}
2721-
2722-
public:
2723-
VPMulAccumulateReductionRecipe(VPReductionRecipe *R, VPWidenRecipe *Mul,
2724-
VPWidenCastRecipe *Ext0,
2725-
VPWidenCastRecipe *Ext1, Type *ResultTy)
2726-
: VPReductionRecipe(
2727-
VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind(),
2728-
{R->getChainOp(), Ext0->getOperand(0), Ext1->getOperand(0)},
2729-
R->getCondOp(), R->isOrdered(),
2730-
WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
2731-
R->getDebugLoc()),
2732-
ExtOp(Ext0->getOpcode()), ResultTy(ResultTy) {
2733-
assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) ==
2734-
Instruction::Add &&
2735-
"The reduction instruction in MulAccumulateteReductionRecipe must "
2736-
"be Add");
2737-
assert((ExtOp == Instruction::CastOps::ZExt ||
2738-
ExtOp == Instruction::CastOps::SExt) &&
2739-
"VPMulAccumulateReductionRecipe only supports zext and sext.");
2740-
setUnderlyingValue(R->getUnderlyingValue());
2741-
// Only set the non-negative flag if the original recipe contains.
2742-
if (Ext0->hasNonNegFlag())
2743-
IsNonNeg = Ext0->isNonNeg();
2744-
}
2745-
2746-
VPMulAccumulateReductionRecipe(VPReductionRecipe *R, VPWidenRecipe *Mul,
2747-
Type *ResultTy)
2748-
: VPReductionRecipe(
2749-
VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind(),
2750-
{R->getChainOp(), Mul->getOperand(0), Mul->getOperand(1)},
2751-
R->getCondOp(), R->isOrdered(),
2752-
WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
2753-
R->getDebugLoc()),
2754-
ExtOp(Instruction::CastOps::CastOpsEnd), ResultTy(ResultTy) {
2755-
assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) ==
2756-
Instruction::Add &&
2757-
"The reduction instruction in MulAccumulateReductionRecipe must be "
2758-
"Add");
2759-
setUnderlyingValue(R->getUnderlyingValue());
2760-
}
2761-
2762-
~VPMulAccumulateReductionRecipe() override = default;
2763-
2764-
VPMulAccumulateReductionRecipe *clone() override {
2765-
return new VPMulAccumulateReductionRecipe(this);
2766-
}
2767-
2768-
VP_CLASSOF_IMPL(VPDef::VPMulAccumulateReductionSC);
2769-
2770-
void execute(VPTransformState &State) override {
2771-
llvm_unreachable("VPMulAccumulateReductionRecipe should transform to "
2772-
"VPWidenCastRecipe + "
2773-
"VPWidenRecipe + VPReductionRecipe before execution");
2774-
}
2775-
2776-
/// Return the cost of VPMulAccumulateReductionRecipe.
2777-
InstructionCost computeCost(ElementCount VF,
2778-
VPCostContext &Ctx) const override;
2779-
2780-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2781-
/// Print the recipe.
2782-
void print(raw_ostream &O, const Twine &Indent,
2783-
VPSlotTracker &SlotTracker) const override;
2784-
#endif
2785-
2786-
Type *getResultType() const { return ResultTy; }
2787-
2788-
/// The first vector value to be extended and reduced.
2789-
VPValue *getVecOp0() const { return getOperand(1); }
2790-
2791-
/// The second vector value to be extended and reduced.
2792-
VPValue *getVecOp1() const { return getOperand(2); }
2793-
2794-
/// Return true if this recipe contains extended operands.
2795-
bool isExtended() const { return ExtOp != Instruction::CastOps::CastOpsEnd; }
2796-
2797-
/// Return the opcode of the extends for the operands.
2798-
Instruction::CastOps getExtOpcode() const { return ExtOp; }
2799-
2800-
/// Return if the operands are zero-extended.
2801-
bool isZExt() const { return ExtOp == Instruction::CastOps::ZExt; }
2802-
2803-
/// Return true if the operand extends have the non-negative flag.
2804-
bool isNonNeg() const { return IsNonNeg; }
2805-
};
2806-
28072595
/// VPReplicateRecipe replicates a given instruction producing multiple scalar
28082596
/// copies of the original scalar type, one per lane, instead of producing a
28092597
/// single copy of widened type for all lanes. If the instruction is known to be
@@ -2922,6 +2710,123 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
29222710
}
29232711
};
29242712

2713+
/// A recipe to combine multiple recipes into a 'bundle' recipe, which should be
2714+
/// considered as single entity for cost-modeling and transforms. The recipe
2715+
/// needs to be 'unbundled', i.e. replaced by its individual recipes before
2716+
/// execute.
2717+
class VPBundleRecipe : public VPSingleDefRecipe {
2718+
enum class BundleTypes {
2719+
ExtendedReduction,
2720+
MulAccumulateReduction,
2721+
};
2722+
2723+
/// Recipes bundled together in this VPBundleRecipe.
2724+
SmallVector<VPSingleDefRecipe *> BundledOps;
2725+
2726+
/// Temporary VPValues used for external operands of the bundle, i.e. operands
2727+
/// not defined by recipes in the bundle.
2728+
SmallVector<VPValue *> TmpValues;
2729+
2730+
/// Type of the bundle.
2731+
BundleTypes BundleType;
2732+
2733+
VPBundleRecipe(BundleTypes BundleType, ArrayRef<VPSingleDefRecipe *> ToBundle)
2734+
: VPSingleDefRecipe(VPDef::VPBundleSC, {}, {}), BundledOps(ToBundle),
2735+
BundleType(BundleType) {
2736+
// Bundle up the operand recipes.
2737+
SmallPtrSet<VPUser *, 4> BundledUsers;
2738+
for (auto *R : ToBundle)
2739+
BundledUsers.insert(R);
2740+
2741+
// Recipes in the bundle, expect the last one, must only be used inside the
2742+
// bundle. If there other external users, clone the recipes for the bundle.
2743+
for (const auto &[Idx, R] : enumerate(drop_end(ToBundle))) {
2744+
if (all_of(R->users(), [&BundledUsers](VPUser *U) {
2745+
return BundledUsers.contains(U);
2746+
})) {
2747+
if (R->getParent())
2748+
R->removeFromParent();
2749+
continue;
2750+
}
2751+
// There users external to the bundle. Clone the recipe for use in the
2752+
// bundle and update all its in-bundle users.
2753+
this->BundledOps[Idx] = R->clone();
2754+
BundledUsers.insert(this->BundledOps[Idx]);
2755+
R->replaceUsesWithIf(this->BundledOps[Idx],
2756+
[&BundledUsers](VPUser &U, unsigned) {
2757+
return BundledUsers.contains(&U);
2758+
});
2759+
}
2760+
BundledOps.back()->removeFromParent();
2761+
2762+
// Internalize all external operands to the bundled operations. To do so,
2763+
// create new temporary VPValues for all operands not defined by recipe in
2764+
// the bundle. The original operands are added as operands of the
2765+
// VPBundleRecipe.
2766+
for (auto *R : this->BundledOps) {
2767+
for (const auto &[Idx, Op] : enumerate(R->operands())) {
2768+
auto *Def = Op->getDefiningRecipe();
2769+
if (Def && BundledUsers.contains(Def))
2770+
continue;
2771+
addOperand(Op);
2772+
TmpValues.push_back(new VPValue());
2773+
R->setOperand(Idx, TmpValues.back());
2774+
}
2775+
}
2776+
}
2777+
2778+
public:
2779+
VPBundleRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
2780+
: VPBundleRecipe(BundleTypes::ExtendedReduction, {Ext, Red}) {}
2781+
VPBundleRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
2782+
: VPBundleRecipe(BundleTypes::MulAccumulateReduction, {Mul, Red}) {}
2783+
VPBundleRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2784+
VPWidenRecipe *Mul, VPReductionRecipe *Red)
2785+
: VPBundleRecipe(BundleTypes::MulAccumulateReduction,
2786+
{Ext0, Ext1, Mul, Red}) {}
2787+
VPBundleRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2788+
VPWidenRecipe *Mul, VPWidenCastRecipe *Ext2,
2789+
VPReductionRecipe *Red)
2790+
: VPBundleRecipe(BundleTypes::MulAccumulateReduction,
2791+
{Ext0, Ext1, Mul, Ext2, Red}) {}
2792+
2793+
~VPBundleRecipe() override {
2794+
SmallPtrSet<VPRecipeBase *, 4> Seen;
2795+
for (auto *R : reverse(BundledOps))
2796+
if (Seen.insert(R).second)
2797+
delete R;
2798+
for (VPValue *T : TmpValues)
2799+
delete T;
2800+
}
2801+
2802+
VP_CLASSOF_IMPL(VPDef::VPBundleSC)
2803+
2804+
VPBundleRecipe *clone() override {
2805+
return new VPBundleRecipe(BundleType, BundledOps);
2806+
}
2807+
2808+
/// Return the VPSingleDefRecipe producing the final result of the bundled
2809+
/// recipe.
2810+
VPSingleDefRecipe *getResultOp() const { return BundledOps.back(); }
2811+
2812+
void unbundle();
2813+
2814+
/// Generate the extraction of the appropriate bit from the block mask and the
2815+
/// conditional branch.
2816+
void execute(VPTransformState &State) override {
2817+
llvm_unreachable("recipe must be removed before execute");
2818+
}
2819+
2820+
InstructionCost computeCost(ElementCount VF,
2821+
VPCostContext &Ctx) const override;
2822+
2823+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2824+
/// Print the recipe.
2825+
void print(raw_ostream &O, const Twine &Indent,
2826+
VPSlotTracker &SlotTracker) const override;
2827+
#endif
2828+
};
2829+
29252830
/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
29262831
/// control converges back from a Branch-on-Mask. The phi nodes are needed in
29272832
/// order to merge values that are set under such a branch and feed their uses.

0 commit comments

Comments
 (0)