Skip to content

Commit 1939d44

Browse files
committed
Update after merge main.
* Use Ctx.CostKind. * ToVectorTy -> toVectorTy. * Add bc.resume.val into scalar.ph
1 parent 602a5e4 commit 1939d44

File tree

6 files changed

+27
-19
lines changed

6 files changed

+27
-19
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9364,7 +9364,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93649364
// TODO: Enable following transform when the EVL-version of extended-reduction
93659365
// and mulacc-reduction are implemented.
93669366
if (!CM.foldTailWithEVL()) {
9367-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM);
9367+
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM,
9368+
CM.CostKind);
93689369
VPlanTransforms::convertToAbstractRecipes(*Plan, CostCtx, Range);
93699370
}
93709371

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2752,7 +2752,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
27522752
bool IsOrdered, DebugLoc DL = {})
27532753
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
27542754
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2755-
IsOrdered, DL) {}
2755+
IsOrdered) {}
27562756

27572757
VPReductionRecipe(const RecurrenceDescriptor &R, VPValue *ChainOp,
27582758
VPValue *VecOp, VPValue *CondOp, bool IsOrdered,
@@ -2765,8 +2765,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
27652765

27662766
VPReductionRecipe *clone() override {
27672767
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2768-
getVecOp(), getCondOp(), IsOrdered,
2769-
getDebugLoc());
2768+
getVecOp(), getCondOp(), IsOrdered);
27702769
}
27712770

27722771
static inline bool classof(const VPRecipeBase *R) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2282,12 +2282,12 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
22822282
// and the binOp cost in the getReductionCost().
22832283
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {
22842284
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
2285-
return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy,
2286-
RdxDesc.getFastMathFlags(), CostKind);
2285+
return Ctx.TTI.getMinMaxReductionCost(
2286+
Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
22872287
}
22882288

22892289
return Ctx.TTI.getArithmeticReductionCost(
2290-
Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2290+
Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
22912291
}
22922292

22932293
InstructionCost
@@ -2297,22 +2297,22 @@ VPExtendedReductionRecipe::computeCost(ElementCount VF,
22972297
unsigned Opcode = RdxDesc.getOpcode();
22982298
Type *RedTy = Ctx.Types.inferScalarType(this);
22992299
auto *SrcVecTy =
2300-
cast<VectorType>(ToVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF));
2301-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2300+
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF));
23022301

23032302
return Ctx.TTI.getExtendedReductionCost(Opcode, isZExt(), RedTy, SrcVecTy,
2304-
RdxDesc.getFastMathFlags(), CostKind);
2303+
RdxDesc.getFastMathFlags(),
2304+
Ctx.CostKind);
23052305
}
23062306

23072307
InstructionCost
23082308
VPMulAccumulateReductionRecipe::computeCost(ElementCount VF,
23092309
VPCostContext &Ctx) const {
23102310
Type *RedTy = Ctx.Types.inferScalarType(this);
23112311
auto *SrcVecTy =
2312-
cast<VectorType>(ToVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF));
2313-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2312+
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF));
23142313

2315-
return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy, CostKind);
2314+
return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy,
2315+
Ctx.CostKind);
23162316
}
23172317

23182318
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2235,7 +2235,7 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
22352235
Type *SrcTy) -> bool {
22362236
return LoopVectorizationPlanner::getDecisionAndClampRange(
22372237
[&](ElementCount VF) {
2238-
auto *SrcVecTy = cast<VectorType>(ToVectorTy(SrcTy, VF));
2238+
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
22392239
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
22402240
InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
22412241
Opcode, isZExt, RedTy, SrcVecTy, RdxDesc.getFastMathFlags(),
@@ -2285,7 +2285,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
22852285
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
22862286
Type *SrcTy =
22872287
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
2288-
auto *SrcVecTy = cast<VectorType>(ToVectorTy(SrcTy, VF));
2288+
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
22892289
InstructionCost MulAccCost =
22902290
Ctx.TTI.getMulAccReductionCost(isZExt, RedTy, SrcVecTy, CostKind);
22912291
InstructionCost MulCost = Mul->computeCost(VF, Ctx);

llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1692,8 +1692,8 @@ define i64 @test_std_q31(ptr %x, i32 %n) #0 {
16921692
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
16931693
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
16941694
; CHECK: scalar.ph:
1695-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
16961695
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1696+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
16971697
; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
16981698
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
16991699
; CHECK: for.cond.cleanup:

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,6 +1208,11 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re
12081208
; CHECK-NEXT: Live-in ir<%n> = original trip-count
12091209
; CHECK-EMPTY:
12101210
; CHECK-NEXT: ir-bb<for.body.preheader>:
1211+
; CHECK-NEXT: Successor(s): vector.ph
1212+
; CHECK-EMPTY:
1213+
; CHECK-NEXT: vector.ph:
1214+
; CHECK-NEXT: Successor(s): vector loop
1215+
; CHECK-EMPTY:
12111216
; CHECK-NEXT: <x1> vector loop: {
12121217
; CHECK-NEXT: vector.body:
12131218
; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
@@ -1231,11 +1236,12 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re
12311236
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
12321237
; CHECK-EMPTY:
12331238
; CHECK-NEXT: scalar.ph:
1239+
; CHECK-NEXT: EMIT vp<%bc.resume.val> = resume-phi vp<%1>, ir<0>
12341240
; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%7>, ir<0>
12351241
; CHECK-NEXT: Successor(s): ir-bb<for.body>
12361242
; CHECK-EMPTY:
12371243
; CHECK-NEXT: ir-bb<for.body>:
1238-
; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1244+
; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.resume.val> from scalar.ph)
12391245
; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
12401246
; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.010
12411247
; CHECK-NEXT: IR %load0 = load i32, ptr %arrayidx, align 4
@@ -1309,11 +1315,12 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i
13091315
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
13101316
; CHECK-EMPTY:
13111317
; CHECK-NEXT: scalar.ph:
1318+
; CHECK-NEXT: EMIT vp<%bc.resume.val> = resume-phi vp<%1>, ir<0>
13121319
; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%8>, ir<0>
13131320
; CHECK-NEXT: Successor(s): ir-bb<for.body>
13141321
; CHECK-EMPTY:
13151322
; CHECK-NEXT: ir-bb<for.body>:
1316-
; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1323+
; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.resume.val> from scalar.ph)
13171324
; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
13181325
; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i64, ptr %x, i32 %i.010
13191326
; CHECK-NEXT: IR %load0 = load i64, ptr %arrayidx, align 4
@@ -1391,11 +1398,12 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado
13911398
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
13921399
; CHECK-EMPTY:
13931400
; CHECK-NEXT: scalar.ph:
1401+
; CHECK-NEXT: EMIT vp<%bc.resume.val> = resume-phi vp<%1>, ir<0>
13941402
; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<%8>, ir<0>
13951403
; CHECK-NEXT: Successor(s): ir-bb<for.body>
13961404
; CHECK-EMPTY:
13971405
; CHECK-NEXT: ir-bb<for.body>:
1398-
; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1406+
; CHECK-NEXT: IR %i.010 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.resume.val> from scalar.ph)
13991407
; CHECK-NEXT: IR %r.09 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
14001408
; CHECK-NEXT: IR %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.010
14011409
; CHECK-NEXT: IR %load0 = load i16, ptr %arrayidx, align 4

0 commit comments

Comments
 (0)