Skip to content

Commit a8edb86

Browse files
committed
[LV] NFC: Make VPPartialReductionRecipe a VPReductionRecipe
1 parent 56dcd90 commit a8edb86

File tree

4 files changed

+62
-58
lines changed

4 files changed

+62
-58
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8879,17 +8879,18 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
88798879
ReductionOpcode = Instruction::Add;
88808880
}
88818881

8882+
VPValue *Cond = nullptr;
88828883
if (CM.blockNeedsPredicationForAnyReason(Reduction->getParent())) {
88838884
assert((ReductionOpcode == Instruction::Add ||
88848885
ReductionOpcode == Instruction::Sub) &&
88858886
"Expected an ADD or SUB operation for predicated partial "
88868887
"reductions (because the neutral element in the mask is zero)!");
8887-
VPValue *Mask = getBlockInMask(Reduction->getParent());
8888+
Cond = getBlockInMask(Reduction->getParent());
88888889
VPValue *Zero =
88898890
Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
8890-
BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc());
8891+
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
88918892
}
8892-
return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator,
8893+
return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
88938894
Reduction);
88948895
}
88958896

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2056,55 +2056,6 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
20562056
}
20572057
};
20582058

2059-
/// A recipe for forming partial reductions. In the loop, an accumulator and
2060-
/// vector operand are added together and passed to the next iteration as the
2061-
/// next accumulator. After the loop body, the accumulator is reduced to a
2062-
/// scalar value.
2063-
class VPPartialReductionRecipe : public VPSingleDefRecipe {
2064-
unsigned Opcode;
2065-
2066-
public:
2067-
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
2068-
VPValue *Op1)
2069-
: VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1,
2070-
ReductionInst) {}
2071-
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2072-
Instruction *ReductionInst = nullptr)
2073-
: VPSingleDefRecipe(VPDef::VPPartialReductionSC,
2074-
ArrayRef<VPValue *>({Op0, Op1}), ReductionInst),
2075-
Opcode(Opcode) {
2076-
[[maybe_unused]] auto *AccumulatorRecipe =
2077-
getOperand(1)->getDefiningRecipe();
2078-
assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2079-
isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2080-
"Unexpected operand order for partial reduction recipe");
2081-
}
2082-
~VPPartialReductionRecipe() override = default;
2083-
2084-
VPPartialReductionRecipe *clone() override {
2085-
return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2086-
getUnderlyingInstr());
2087-
}
2088-
2089-
VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2090-
2091-
/// Generate the reduction in the loop.
2092-
void execute(VPTransformState &State) override;
2093-
2094-
/// Return the cost of this VPPartialReductionRecipe.
2095-
InstructionCost computeCost(ElementCount VF,
2096-
VPCostContext &Ctx) const override;
2097-
2098-
/// Get the binary op's opcode.
2099-
unsigned getOpcode() const { return Opcode; }
2100-
2101-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2102-
/// Print the recipe.
2103-
void print(raw_ostream &O, const Twine &Indent,
2104-
VPSlotTracker &SlotTracker) const override;
2105-
#endif
2106-
};
2107-
21082059
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
21092060
/// instructions.
21102061
class VPBlendRecipe : public VPSingleDefRecipe {
@@ -2376,6 +2327,58 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
23762327
}
23772328
};
23782329

2330+
/// A recipe for forming partial reductions. In the loop, an accumulator and
2331+
/// vector operand are added together and passed to the next iteration as the
2332+
/// next accumulator. After the loop body, the accumulator is reduced to a
2333+
/// scalar value.
2334+
class VPPartialReductionRecipe : public VPReductionRecipe {
2335+
unsigned Opcode;
2336+
2337+
public:
2338+
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
2339+
VPValue *Op1, VPValue *Cond)
2340+
: VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2341+
ReductionInst) {}
2342+
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2343+
VPValue *Cond,
2344+
Instruction *ReductionInst = nullptr)
2345+
: VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2346+
FastMathFlags(), ReductionInst,
2347+
ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2348+
Opcode(Opcode) {
2349+
[[maybe_unused]] auto *AccumulatorRecipe =
2350+
getChainOp()->getDefiningRecipe();
2351+
assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2352+
isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2353+
"Unexpected operand order for partial reduction recipe");
2354+
}
2355+
~VPPartialReductionRecipe() override = default;
2356+
2357+
VPPartialReductionRecipe *clone() override {
2358+
return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2359+
getCondOp(),
2360+
getUnderlyingInstr());
2361+
}
2362+
2363+
VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2364+
2365+
/// Generate the reduction in the loop.
2366+
void execute(VPTransformState &State) override;
2367+
2368+
/// Return the cost of this VPPartialReductionRecipe.
2369+
InstructionCost computeCost(ElementCount VF,
2370+
VPCostContext &Ctx) const override;
2371+
2372+
/// Get the binary op's opcode.
2373+
unsigned getOpcode() const { return Opcode; }
2374+
2375+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2376+
/// Print the recipe.
2377+
void print(raw_ostream &O, const Twine &Indent,
2378+
VPSlotTracker &SlotTracker) const override;
2379+
#endif
2380+
};
2381+
23792382
/// A recipe to represent inloop reduction operations with vector-predication
23802383
/// intrinsics, performing a reduction on a vector operand with the explicit
23812384
/// vector length (EVL) into a scalar value, and adding the result to a chain.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -287,12 +287,12 @@ InstructionCost
287287
VPPartialReductionRecipe::computeCost(ElementCount VF,
288288
VPCostContext &Ctx) const {
289289
std::optional<unsigned> Opcode = std::nullopt;
290-
VPValue *BinOp = getOperand(0);
290+
VPValue *BinOp = getOperand(1);
291291

292292
// If the partial reduction is predicated, a select will be operand 0 rather
293293
// than the binary op
294294
using namespace llvm::VPlanPatternMatch;
295-
if (match(getOperand(0), m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
295+
if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
296296
BinOp = BinOp->getDefiningRecipe()->getOperand(1);
297297

298298
// If BinOp is a negation, use the side effect of match to assign the actual
@@ -338,8 +338,8 @@ void VPPartialReductionRecipe::execute(VPTransformState &State) {
338338
assert(getOpcode() == Instruction::Add &&
339339
"Unhandled partial reduction opcode");
340340

341-
Value *BinOpVal = State.get(getOperand(0));
342-
Value *PhiVal = State.get(getOperand(1));
341+
Value *BinOpVal = State.get(getOperand(1));
342+
Value *PhiVal = State.get(getOperand(0));
343343
assert(PhiVal && BinOpVal && "Phi and Mul must be set");
344344

345345
Type *RetTy = PhiVal->getType();

llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
3232
; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
3333
; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
3434
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
35-
; CHECK-NEXT: PARTIAL-REDUCE ir<[[REDUCE]]> = add ir<%mul>, ir<[[ACC]]>
35+
; CHECK-NEXT: PARTIAL-REDUCE ir<[[REDUCE]]> = add ir<[[ACC]]>, ir<%mul>
3636
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
3737
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
3838
; CHECK-NEXT: No successors
@@ -98,7 +98,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
9898
; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
9999
; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
100100
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
101-
; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%mul>, ir<%accum>
101+
; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul>
102102
; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16>
103103
; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024>
104104
; CHECK-NEXT: No successors

0 commit comments

Comments
 (0)