Skip to content

Commit ac1a2dc

Browse files
committed
!fixup deep-clone whole bundle
1 parent 741ba42 commit ac1a2dc

File tree

3 files changed

+108
-76
lines changed

3 files changed

+108
-76
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 33 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2697,15 +2697,18 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
26972697
/// A recipe to combine multiple recipes into a 'bundle' recipe, which should be
26982698
/// considered as single entity for cost-modeling and transforms. The recipe
26992699
/// needs to be 'unbundled', i.e. replaced by its individual recipes before
2700-
/// execute.
2700+
/// execute. The bundled recipes are completely connected from the def-use graph
2701+
/// outside the bundled recipes. Operands not defined by recipes in the bundle
2702+
/// are added as operands of the VPBundleRecipe and the users of the result
2703+
/// recipe must be updated to use the VPBundleRecipe.
27012704
class VPBundleRecipe : public VPSingleDefRecipe {
27022705
enum class BundleTypes {
27032706
ExtendedReduction,
27042707
MulAccumulateReduction,
27052708
};
27062709

27072710
/// Recipes bundled together in this VPBundleRecipe.
2708-
SmallVector<VPSingleDefRecipe *> BundledOps;
2711+
SmallVector<VPSingleDefRecipe *> BundledRecipes;
27092712

27102713
/// Temporary VPValues used for external operands of the bundle, i.e. operands
27112714
/// not defined by recipes in the bundle.
@@ -2714,69 +2717,39 @@ class VPBundleRecipe : public VPSingleDefRecipe {
27142717
/// Type of the bundle.
27152718
BundleTypes BundleType;
27162719

2717-
VPBundleRecipe(BundleTypes BundleType, ArrayRef<VPSingleDefRecipe *> ToBundle)
2718-
: VPSingleDefRecipe(VPDef::VPBundleSC, {}, {}), BundledOps(ToBundle),
2720+
VPBundleRecipe(BundleTypes BundleType, ArrayRef<VPSingleDefRecipe *> ToBundle,
2721+
ArrayRef<VPValue *> Operands)
2722+
: VPSingleDefRecipe(VPDef::VPBundleSC, {}, {}), BundledRecipes(ToBundle),
27192723
BundleType(BundleType) {
2720-
// Bundle up the operand recipes.
2721-
SmallPtrSet<VPUser *, 4> BundledUsers;
2722-
for (auto *R : BundledOps)
2723-
BundledUsers.insert(R);
2724-
2725-
// Recipes in the bundle, except the last one, must only be used inside the
2726-
// bundle. If there other external users, clone the recipes for the bundle.
2727-
for (const auto &[Idx, R] : enumerate(drop_end(ToBundle))) {
2728-
if (all_of(R->users(), [&BundledUsers](VPUser *U) {
2729-
return BundledUsers.contains(U);
2730-
})) {
2731-
if (R->getParent())
2732-
R->removeFromParent();
2733-
continue;
2734-
}
2735-
// The users external to the bundle. Clone the recipe for use in the
2736-
// bundle and update all its in-bundle users.
2737-
VPSingleDefRecipe *Copy = R->clone();
2738-
BundledOps[Idx] = Copy;
2739-
BundledUsers.insert(Copy);
2740-
R->replaceUsesWithIf(Copy, [&BundledUsers](VPUser &U, unsigned) {
2741-
return BundledUsers.contains(&U);
2742-
});
2743-
}
2744-
BundledOps.back()->removeFromParent();
2745-
2746-
// Internalize all external operands to the bundled operations. To do so,
2747-
// create new temporary VPValues for all operands not defined by recipe in
2748-
// the bundle. The original operands are added as operands of the
2749-
// VPBundleRecipe.
2750-
for (auto *R : BundledOps) {
2751-
for (const auto &[Idx, Op] : enumerate(R->operands())) {
2752-
auto *Def = Op->getDefiningRecipe();
2753-
if (Def && BundledUsers.contains(Def))
2754-
continue;
2755-
addOperand(Op);
2756-
TmpValues.push_back(new VPValue());
2757-
R->setOperand(Idx, TmpValues.back());
2758-
}
2759-
}
2724+
bundle(Operands);
27602725
}
27612726

2727+
/// Internalize recipes in BundledRecipes External operands (i.e. not defined
2728+
/// by another recipe in the bundle) are replaced by temporary VPValues and
2729+
/// the original operands are transferred to the VPBundleRecipe itself. Clone
2730+
/// recipes as needed to ensure they are only used by other recipes in the
2731+
/// bundle. If \p Operands is not empty, use it as operands for the new
2732+
/// VPBundleRecipe (used when cloning the recipe).
2733+
void bundle(ArrayRef<VPValue *> Operands);
2734+
27622735
public:
27632736
VPBundleRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
2764-
: VPBundleRecipe(BundleTypes::ExtendedReduction, {Ext, Red}) {}
2737+
: VPBundleRecipe(BundleTypes::ExtendedReduction, {Ext, Red}, {}) {}
27652738
VPBundleRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
2766-
: VPBundleRecipe(BundleTypes::MulAccumulateReduction, {Mul, Red}) {}
2739+
: VPBundleRecipe(BundleTypes::MulAccumulateReduction, {Mul, Red}, {}) {}
27672740
VPBundleRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
27682741
VPWidenRecipe *Mul, VPReductionRecipe *Red)
27692742
: VPBundleRecipe(BundleTypes::MulAccumulateReduction,
2770-
{Ext0, Ext1, Mul, Red}) {}
2743+
{Ext0, Ext1, Mul, Red}, {}) {}
27712744
VPBundleRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
27722745
VPWidenRecipe *Mul, VPWidenCastRecipe *Ext2,
27732746
VPReductionRecipe *Red)
27742747
: VPBundleRecipe(BundleTypes::MulAccumulateReduction,
2775-
{Ext0, Ext1, Mul, Ext2, Red}) {}
2748+
{Ext0, Ext1, Mul, Ext2, Red}, {}) {}
27762749

27772750
~VPBundleRecipe() override {
27782751
SmallPtrSet<VPRecipeBase *, 4> Seen;
2779-
for (auto *R : reverse(BundledOps))
2752+
for (auto *R : reverse(BundledRecipes))
27802753
if (Seen.insert(R).second)
27812754
delete R;
27822755
for (VPValue *T : TmpValues)
@@ -2786,13 +2759,21 @@ class VPBundleRecipe : public VPSingleDefRecipe {
27862759
VP_CLASSOF_IMPL(VPDef::VPBundleSC)
27872760

27882761
VPBundleRecipe *clone() override {
2789-
assert(!BundledOps.empty() && "empty bundles should be removed");
2790-
return new VPBundleRecipe(BundleType, BundledOps);
2762+
assert(!BundledRecipes.empty() && "empty bundles should be removed");
2763+
SmallVector<VPSingleDefRecipe *> NewBundledRecipes;
2764+
for (auto *R : BundledRecipes)
2765+
NewBundledRecipes.push_back(R->clone());
2766+
for (auto *New : NewBundledRecipes) {
2767+
for (const auto &[Idx, Old] : enumerate(BundledRecipes)) {
2768+
New->replaceUsesOfWith(Old, NewBundledRecipes[Idx]);
2769+
}
2770+
}
2771+
return new VPBundleRecipe(BundleType, NewBundledRecipes, operands());
27912772
}
27922773

27932774
/// Return the VPSingleDefRecipe producing the final result of the bundled
27942775
/// recipe.
2795-
VPSingleDefRecipe *getResultOp() const { return BundledOps.back(); }
2776+
VPSingleDefRecipe *getResultRecipe() const { return BundledRecipes.back(); }
27962777

27972778
/// Insert the bundled recipes back into the VPlan, directly before the
27982779
/// current recipe. Leaves the bundle recipe empty and the recipe must be

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,9 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
269269
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
270270
.Case<VPBundleRecipe>([this](const auto *R) {
271271
unsigned RdxOpIdxOffset =
272-
cast<VPReductionRecipe>(R->getResultOp())->isConditional() ? 2
273-
: 1;
272+
cast<VPReductionRecipe>(R->getResultRecipe())->isConditional()
273+
? 2
274+
: 1;
274275
return inferScalarType(
275276
R->getOperand(R->getNumOperands() - RdxOpIdxOffset));
276277
})

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 72 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2440,24 +2440,74 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
24402440
Ctx.CostKind);
24412441
}
24422442

2443+
void VPBundleRecipe::bundle(ArrayRef<VPValue *> Operands) {
2444+
assert(!BundledRecipes.empty() && "Nothing to bundle?");
2445+
2446+
// Bundle up the operand recipes.
2447+
SmallPtrSet<VPUser *, 4> BundledUsers;
2448+
for (auto *R : BundledRecipes)
2449+
BundledUsers.insert(R);
2450+
2451+
// Recipes in the bundle, except the last one, must only be used inside the
2452+
// bundle. If there other external users, clone the recipes for the bundle.
2453+
for (unsigned Idx = 0; Idx != BundledRecipes.size() - 1; ++Idx) {
2454+
VPSingleDefRecipe *R = BundledRecipes[Idx];
2455+
if (all_of(R->users(), [&BundledUsers](VPUser *U) {
2456+
return BundledUsers.contains(U);
2457+
})) {
2458+
if (R->getParent())
2459+
R->removeFromParent();
2460+
continue;
2461+
}
2462+
// The users external to the bundle. Clone the recipe for use in the
2463+
// bundle and update all its in-bundle users.
2464+
VPSingleDefRecipe *Copy = R->clone();
2465+
BundledRecipes[Idx] = Copy;
2466+
BundledUsers.insert(Copy);
2467+
R->replaceUsesWithIf(Copy, [&BundledUsers](VPUser &U, unsigned) {
2468+
return BundledUsers.contains(&U);
2469+
});
2470+
}
2471+
if (BundledRecipes.back()->getParent())
2472+
BundledRecipes.back()->removeFromParent();
2473+
2474+
// Internalize all external operands to the bundled operations. To do so,
2475+
// create new temporary VPValues for all operands not defined by recipe in
2476+
// the bundle. The original operands are added as operands of the
2477+
// VPBundleRecipe.
2478+
for (auto *R : BundledRecipes) {
2479+
for (const auto &[Idx, Op] : enumerate(R->operands())) {
2480+
auto *Def = Op->getDefiningRecipe();
2481+
if (Def && BundledUsers.contains(Def))
2482+
continue;
2483+
if (Operands.empty())
2484+
addOperand(Op);
2485+
else
2486+
addOperand(Operands[TmpValues.size()]);
2487+
TmpValues.push_back(new VPValue());
2488+
R->setOperand(Idx, TmpValues.back());
2489+
}
2490+
}
2491+
}
2492+
24432493
void VPBundleRecipe::unbundle() {
2444-
for (auto *Op : BundledOps)
2445-
if (!Op->getParent())
2446-
Op->insertBefore(this);
2494+
for (auto *R : BundledRecipes)
2495+
if (!R->getParent())
2496+
R->insertBefore(this);
24472497

24482498
for (const auto &[Idx, Op] : enumerate(operands()))
24492499
TmpValues[Idx]->replaceAllUsesWith(Op);
24502500

2451-
replaceAllUsesWith(getResultOp());
2501+
replaceAllUsesWith(getResultRecipe());
24522502

24532503
if (BundleType == BundleTypes::MulAccumulateReduction &&
2454-
BundledOps.size() == 5) {
2504+
BundledRecipes.size() == 5) {
24552505
// Note that we will drop the extend after mul which transforms
24562506
// reduce.add(ext(mul(ext, ext))) to reduce.add(mul(ext, ext)).
24572507
// TODO: This transform should be done separately from bundling/unbundling.
2458-
auto *Ext0 = cast<VPWidenCastRecipe>(BundledOps[0]);
2459-
auto *Ext1 = cast<VPWidenCastRecipe>(BundledOps[1]);
2460-
auto *Ext2 = cast<VPWidenCastRecipe>(BundledOps[3]);
2508+
auto *Ext0 = cast<VPWidenCastRecipe>(BundledRecipes[0]);
2509+
auto *Ext1 = cast<VPWidenCastRecipe>(BundledRecipes[1]);
2510+
auto *Ext2 = cast<VPWidenCastRecipe>(BundledRecipes[3]);
24612511
auto *Op0 =
24622512
new VPWidenCastRecipe(Ext0->getOpcode(), Ext0->getOperand(0),
24632513
Ext2->getResultType(), *Ext0, getDebugLoc());
@@ -2469,8 +2519,8 @@ void VPBundleRecipe::unbundle() {
24692519
Ext2->getResultType(), *Ext1, getDebugLoc());
24702520
Op1->insertBefore(Ext1);
24712521
}
2472-
auto *Mul = cast<VPWidenRecipe>(BundledOps[2]);
2473-
auto *Red = cast<VPReductionRecipe>(BundledOps[4]);
2522+
auto *Mul = cast<VPWidenRecipe>(BundledRecipes[2]);
2523+
auto *Red = cast<VPReductionRecipe>(BundledRecipes[4]);
24742524
Mul->setOperand(0, Op0);
24752525
Mul->setOperand(1, Op1);
24762526
Red->setOperand(1, Mul);
@@ -2479,7 +2529,7 @@ void VPBundleRecipe::unbundle() {
24792529
if (Ext0 != Ext1)
24802530
Ext1->eraseFromParent();
24812531
}
2482-
BundledOps.clear();
2532+
BundledRecipes.clear();
24832533
}
24842534

24852535
InstructionCost VPBundleRecipe::computeCost(ElementCount VF,
@@ -2492,17 +2542,17 @@ InstructionCost VPBundleRecipe::computeCost(ElementCount VF,
24922542
switch (BundleType) {
24932543
case BundleTypes::ExtendedReduction: {
24942544
unsigned Opcode = RecurrenceDescriptor::getOpcode(
2495-
cast<VPReductionRecipe>(BundledOps[1])->getRecurrenceKind());
2545+
cast<VPReductionRecipe>(BundledRecipes[1])->getRecurrenceKind());
24962546
return Ctx.TTI.getExtendedReductionCost(
24972547
Opcode,
2498-
cast<VPWidenCastRecipe>(BundledOps.front())->getOpcode() ==
2548+
cast<VPWidenCastRecipe>(BundledRecipes.front())->getOpcode() ==
24992549
Instruction::ZExt,
25002550
RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
25012551
}
25022552
case BundleTypes::MulAccumulateReduction:
25032553
return Ctx.TTI.getMulAccReductionCost(
2504-
BundledOps.size() > 2
2505-
? cast<VPWidenCastRecipe>(BundledOps.front())->getOpcode() ==
2554+
BundledRecipes.size() > 2
2555+
? cast<VPWidenCastRecipe>(BundledRecipes.front())->getOpcode() ==
25062556
Instruction::ZExt
25072557
: false,
25082558
RedTy, SrcVecTy, Ctx.CostKind);
@@ -2516,7 +2566,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
25162566
O << Indent << "BUNDLE ";
25172567
printAsOperand(O, SlotTracker);
25182568
O << " = ";
2519-
auto *Red = cast<VPReductionRecipe>(BundledOps.back());
2569+
auto *Red = cast<VPReductionRecipe>(BundledRecipes.back());
25202570
unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
25212571

25222572
switch (BundleType) {
@@ -2527,7 +2577,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
25272577
getOperand(0)->printAsOperand(O, SlotTracker);
25282578
Red->printFlags(O);
25292579

2530-
auto *Ext0 = cast<VPWidenCastRecipe>(BundledOps[0]);
2580+
auto *Ext0 = cast<VPWidenCastRecipe>(BundledRecipes[0]);
25312581
O << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
25322582
<< *Ext0->getResultType();
25332583
if (Red->isConditional()) {
@@ -2545,16 +2595,16 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
25452595
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
25462596
<< " (";
25472597
O << "mul";
2548-
auto *Mul = cast<VPWidenRecipe>(BundledOps.size() == 2 ? BundledOps[0]
2549-
: BundledOps[2]);
2598+
auto *Mul = cast<VPWidenRecipe>(
2599+
BundledRecipes.size() == 2 ? BundledRecipes[0] : BundledRecipes[2]);
25502600
Mul->printFlags(O);
2551-
bool IsExtended = BundledOps.size() > 2;
2601+
bool IsExtended = BundledRecipes.size() > 2;
25522602
if (IsExtended)
25532603
O << "(";
25542604
getOperand(0)->printAsOperand(O, SlotTracker);
25552605
if (IsExtended) {
25562606
auto *Ext0 = cast<VPWidenCastRecipe>(
2557-
BundledOps.size() == 5 ? BundledOps[3] : BundledOps[0]);
2607+
BundledRecipes.size() == 5 ? BundledRecipes[3] : BundledRecipes[0]);
25582608
O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
25592609
<< *Ext0->getResultType() << "), (";
25602610
} else {
@@ -2563,7 +2613,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
25632613
getOperand(1)->printAsOperand(O, SlotTracker);
25642614
if (IsExtended) {
25652615
auto *Ext1 = cast<VPWidenCastRecipe>(
2566-
BundledOps.size() == 5 ? BundledOps[3] : BundledOps[1]);
2616+
BundledRecipes.size() == 5 ? BundledRecipes[3] : BundledRecipes[1]);
25672617
O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "
25682618
<< *Ext1->getResultType() << ")";
25692619
}

0 commit comments

Comments
 (0)