Skip to content

Commit 3857530

Browse files
committed
[LV] Use getFixedValue instead of getKnownMinValue when appropriate
There are many places in VPlan and LoopVectorize where we use getKnownMinValue to discover the number of elements in a vector. Where we expect the vector to have a fixed length, I have used the stronger getFixedValue call. I believe this is clearer and adds extra protection in the form of an assert in getFixedValue that the vector is not scalable. While looking at VPFirstOrderRecurrencePHIRecipe::computeCost I also took the liberty of simplifying the code. In theory I believe this patch should be NFC, but I'm reluctant to add that to the title in case we're just missing tests. I built and ran the LLVM test suite when targeting neoverse-v1 and it seemed ok.
1 parent b979311 commit 3857530

File tree

3 files changed

+30
-29
lines changed

3 files changed

+30
-29
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3107,12 +3107,13 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
31073107
// that we will create. This cost is likely to be zero. The phi node
31083108
// cost, if any, should be scaled by the block probability because it
31093109
// models a copy at the end of each predicated block.
3110-
ScalarizationCost += VF.getKnownMinValue() *
3111-
TTI.getCFInstrCost(Instruction::PHI, CostKind);
3110+
ScalarizationCost +=
3111+
VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind);
31123112

31133113
// The cost of the non-predicated instruction.
3114-
ScalarizationCost += VF.getKnownMinValue() *
3115-
TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind);
3114+
ScalarizationCost +=
3115+
VF.getFixedValue() *
3116+
TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind);
31163117

31173118
// The cost of insertelement and extractelement instructions needed for
31183119
// scalarization.
@@ -4280,7 +4281,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42804281
return NumLegalParts <= VF.getKnownMinValue();
42814282
}
42824283
// Two or more elements that share a register - are vectorized.
4283-
return NumLegalParts < VF.getKnownMinValue();
4284+
return NumLegalParts < VF.getFixedValue();
42844285
};
42854286

42864287
// If no def nor is a store, e.g., branches, continue - no value to check.
@@ -4565,8 +4566,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
45654566
assert(!isa<SCEVCouldNotCompute>(TC) &&
45664567
"Trip count SCEV must be computable");
45674568
RemainingIterations = SE.getURemExpr(
4568-
TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC));
4569-
MaxTripCount = MainLoopVF.getKnownMinValue() * IC - 1;
4569+
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
4570+
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
45704571
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
45714572
SE.getConstant(TCType, MaxTripCount))) {
45724573
MaxTripCount =
@@ -4577,7 +4578,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
45774578
}
45784579
if (SE.isKnownPredicate(
45794580
CmpInst::ICMP_UGT,
4580-
SE.getConstant(TCType, NextVF.Width.getKnownMinValue()),
4581+
SE.getConstant(TCType, NextVF.Width.getFixedValue()),
45814582
RemainingIterations))
45824583
continue;
45834584
}
@@ -5248,14 +5249,14 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
52485249

52495250
// Get the cost of the scalar memory instruction and address computation.
52505251
InstructionCost Cost =
5251-
VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
5252+
VF.getFixedValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
52525253

52535254
// Don't pass *I here, since it is scalar but will actually be part of a
52545255
// vectorized loop where the user of it is a vectorized instruction.
52555256
const Align Alignment = getLoadStoreAlignment(I);
5256-
Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(),
5257-
ValTy->getScalarType(),
5258-
Alignment, AS, CostKind);
5257+
Cost += VF.getFixedValue() * TTI.getMemoryOpCost(I->getOpcode(),
5258+
ValTy->getScalarType(),
5259+
Alignment, AS, CostKind);
52595260

52605261
// Get the overhead of the extractelement and insertelement instructions
52615262
// we might create due to scalarization.
@@ -5271,7 +5272,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
52715272
auto *VecI1Ty =
52725273
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
52735274
Cost += TTI.getScalarizationOverhead(
5274-
VecI1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
5275+
VecI1Ty, APInt::getAllOnes(VF.getFixedValue()),
52755276
/*Insert=*/false, /*Extract=*/true, CostKind);
52765277
Cost += TTI.getCFInstrCost(Instruction::Br, CostKind);
52775278

@@ -5317,7 +5318,6 @@ InstructionCost
53175318
LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
53185319
ElementCount VF) {
53195320
assert(Legal->isUniformMemOp(*I, VF));
5320-
53215321
Type *ValTy = getLoadStoreType(I);
53225322
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
53235323
const Align Alignment = getLoadStoreAlignment(I);
@@ -5332,6 +5332,10 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
53325332
StoreInst *SI = cast<StoreInst>(I);
53335333

53345334
bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
5335+
// TODO: We have tests that request the cost of extracting element
5336+
// VF.getKnownMinValue() - 1 from a scalable vector. This is actually
5337+
// meaningless, given what we actually want is the last lane and is likely
5338+
// to be more expensive.
53355339
return TTI.getAddressComputationCost(ValTy) +
53365340
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
53375341
CostKind) +
@@ -5614,7 +5618,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
56145618

56155619
for (Type *VectorTy : getContainedTypes(RetTy)) {
56165620
Cost += TTI.getScalarizationOverhead(
5617-
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getKnownMinValue()),
5621+
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
56185622
/*Insert=*/true,
56195623
/*Extract=*/false, CostKind);
56205624
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
331331

332332
bool IsSingleScalar = vputils::isSingleScalar(Def);
333333

334-
VPLane LastLane(IsSingleScalar ? 0 : VF.getKnownMinValue() - 1);
334+
VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
335335
// Check if there is a scalar value for the selected lane.
336336
if (!hasScalarValue(Def, LastLane)) {
337337
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
@@ -368,7 +368,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
368368
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
369369
Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
370370
set(Def, Undef);
371-
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
371+
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
372372
packScalarIntoVectorizedValue(Def, Lane);
373373
VectorValue = get(Def);
374374
}
@@ -789,8 +789,7 @@ void VPRegionBlock::execute(VPTransformState *State) {
789789
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
790790
Entry);
791791
State->Lane = VPLane(0);
792-
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
793-
++Lane) {
792+
for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
794793
State->Lane = VPLane(Lane, VPLane::Kind::First);
795794
// Visit the VPBlocks connected to \p this, starting from it.
796795
for (VPBlockBase *Block : RPOT) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ void VPInstruction::execute(VPTransformState &State) {
872872
isVectorToScalar() || isSingleScalar());
873873
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
874874
if (GeneratesPerAllLanes) {
875-
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
875+
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
876876
Lane != NumLanes; ++Lane) {
877877
Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
878878
assert(GeneratedValue && "generatePerLane must produce a value");
@@ -2792,8 +2792,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
27922792
}
27932793

27942794
// Generate scalar instances for all VF lanes.
2795-
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
2796-
const unsigned EndLane = State.VF.getKnownMinValue();
2795+
const unsigned EndLane = State.VF.getFixedValue();
27972796
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
27982797
scalarizeInstruction(UI, this, VPLane(Lane), State);
27992798
}
@@ -2846,7 +2845,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
28462845
UI->getOpcode(), ResultTy, CostKind,
28472846
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
28482847
Op2Info, Operands, UI, &Ctx.TLI) *
2849-
(isSingleScalar() ? 1 : VF.getKnownMinValue());
2848+
(isSingleScalar() ? 1 : VF.getFixedValue());
28502849
}
28512850
}
28522851

@@ -3399,7 +3398,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
33993398
Value *ResBlockInMask = State.get(BlockInMask);
34003399
Value *ShuffledMask = State.Builder.CreateShuffleVector(
34013400
ResBlockInMask,
3402-
createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
3401+
createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()),
34033402
"interleaved.mask");
34043403
return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
34053404
ShuffledMask, MaskForGaps)
@@ -3411,8 +3410,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34113410
if (isa<LoadInst>(Instr)) {
34123411
Value *MaskForGaps = nullptr;
34133412
if (NeedsMaskForGaps) {
3414-
MaskForGaps = createBitMaskForGaps(State.Builder,
3415-
State.VF.getKnownMinValue(), *Group);
3413+
MaskForGaps =
3414+
createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group);
34163415
assert(MaskForGaps && "Mask for Gaps is required but it is null");
34173416
}
34183417

@@ -3475,13 +3474,12 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34753474
continue;
34763475

34773476
auto StrideMask =
3478-
createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3477+
createStrideMask(I, InterleaveFactor, State.VF.getFixedValue());
34793478
Value *StridedVec =
34803479
State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
34813480

34823481
// If this member has different type, cast the result type.
34833482
if (Member->getType() != ScalarTy) {
3484-
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
34853483
VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
34863484
StridedVec =
34873485
createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
@@ -3817,7 +3815,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
38173815
if (VF.isScalar())
38183816
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
38193817

3820-
if (VF.isScalable() && VF.getKnownMinValue() == 1)
3818+
if (VF == ElementCount::getScalable(1))
38213819
return InstructionCost::getInvalid();
38223820

38233821
return 0;

0 commit comments

Comments
 (0)