Skip to content

Commit 9861ca0

Browse files
committed
Revert "[COST]Improve cost model for shuffles in SLP."
This reverts commit 29a470e to fix a crash reported in https://reviews.llvm.org/D100486#3479989.
1 parent d03bc24 commit 9861ca0

File tree

8 files changed

+151
-213
lines changed

8 files changed

+151
-213
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
413413
void processShuffleMasks(
414414
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
415415
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
416-
function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
416+
function_ref<void(ArrayRef<int>, unsigned)> SingleInputAction,
417417
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
418418

419419
/// Compute a map of integer instructions to their minimum legal type

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
499499
void llvm::processShuffleMasks(
500500
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
501501
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
502-
function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
502+
function_ref<void(ArrayRef<int>, unsigned)> SingleInputAction,
503503
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
504504
SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
505505
// Try to perform better estimation of the permutation.
@@ -543,7 +543,7 @@ void llvm::processShuffleMasks(
543543
auto *It =
544544
find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
545545
unsigned SrcReg = std::distance(Dest.begin(), It);
546-
SingleInputAction(*It, SrcReg, I);
546+
SingleInputAction(*It, SrcReg);
547547
break;
548548
}
549549
default: {

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2523,7 +2523,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
25232523
/*NumOfUsedRegs=*/1,
25242524
[&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); },
25252525
[&Output, &DAG = DAG, NewVT, &DL, &Inputs,
2526-
&BuildVector](ArrayRef<int> Mask, unsigned Idx, unsigned /*Unused*/) {
2526+
&BuildVector](ArrayRef<int> Mask, unsigned Idx) {
25272527
if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR)
25282528
Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask);
25292529
else

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,57 +1224,6 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
12241224
auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(),
12251225
LegalVT.getVectorNumElements());
12261226

1227-
if (!Mask.empty() && NumOfDests.isValid()) {
1228-
// Try to perform better estimation of the permutation.
1229-
// 1. Split the source/destination vectors into real registers.
1230-
// 2. Do the mask analysis to identify which real registers are
1231-
// permuted. If more than 1 source registers are used for the
1232-
// destination register building, the cost for this destination register
1233-
// is (Number_of_source_register - 1) * Cost_PermuteTwoSrc. If only one
1234-
// source register is used, build mask and calculate the cost as a cost
1235-
// of PermuteSingleSrc.
1236-
// Also, for the single register permute we try to identify if the
1237-
// destination register is just a copy of the source register or the
1238-
// copy of the previous destination register (the cost is
1239-
// TTI::TCC_Basic). If the source register is just reused, the cost for
1240-
// this operation is 0.
1241-
unsigned E = *NumOfDests.getValue();
1242-
unsigned PrevSrcReg = 0;
1243-
ArrayRef<int> PrevRegMask;
1244-
InstructionCost Cost = 0;
1245-
processShuffleMasks(
1246-
Mask, NumOfSrcs, E, E, []() {},
1247-
[this, SingleOpTy, &PrevSrcReg, &PrevRegMask,
1248-
&Cost](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
1249-
if (!ShuffleVectorInst::isIdentityMask(RegMask)) {
1250-
// Check if the previous register can be just copied to the next
1251-
// one.
1252-
if (PrevRegMask.empty() || PrevSrcReg != SrcReg ||
1253-
PrevRegMask != RegMask)
1254-
Cost += getShuffleCost(TTI::SK_PermuteSingleSrc, SingleOpTy,
1255-
RegMask, 0, nullptr);
1256-
else
1257-
// Just a copy of previous destination register.
1258-
Cost += TTI::TCC_Basic;
1259-
return;
1260-
}
1261-
if (SrcReg != DestReg &&
1262-
any_of(RegMask, [](int I) { return I != UndefMaskElem; })) {
1263-
// Just a copy of the source register.
1264-
Cost += TTI::TCC_Basic;
1265-
}
1266-
PrevSrcReg = SrcReg;
1267-
PrevRegMask = RegMask;
1268-
},
1269-
[this, SingleOpTy, &Cost](ArrayRef<int> RegMask,
1270-
unsigned /*Unused*/,
1271-
unsigned /*Unused*/) {
1272-
Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, RegMask,
1273-
0, nullptr);
1274-
});
1275-
return Cost;
1276-
}
1277-
12781227
InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
12791228
return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy,
12801229
None, 0, nullptr);

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5059,7 +5059,6 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
50595059
// Process extracts in blocks of EltsPerVector to check if the source vector
50605060
// operand can be re-used directly. If not, add the cost of creating a shuffle
50615061
// to extract the values into a vector register.
5062-
SmallVector<int> RegMask(EltsPerVector, UndefMaskElem);
50635062
for (auto *V : VL) {
50645063
++Idx;
50655064

@@ -5069,7 +5068,6 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
50695068

50705069
// Reached the start of a new vector registers.
50715070
if (Idx % EltsPerVector == 0) {
5072-
RegMask.assign(EltsPerVector, UndefMaskElem);
50735071
AllConsecutive = true;
50745072
continue;
50755073
}
@@ -5081,7 +5079,6 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
50815079
unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
50825080
AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
50835081
CurrentIdx % EltsPerVector == Idx % EltsPerVector;
5084-
RegMask[Idx % EltsPerVector] = CurrentIdx % EltsPerVector;
50855082
}
50865083

50875084
if (AllConsecutive)
@@ -5096,7 +5093,7 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
50965093
// cost to extract the a vector with EltsPerVector elements.
50975094
Cost += TTI.getShuffleCost(
50985095
TargetTransformInfo::SK_PermuteSingleSrc,
5099-
FixedVectorType::get(VecTy->getElementType(), EltsPerVector), RegMask);
5096+
FixedVectorType::get(VecTy->getElementType(), EltsPerVector));
51005097
}
51015098
return Cost;
51025099
}
@@ -5883,21 +5880,16 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
58835880
TTI::CastContextHint::None, CostKind);
58845881
}
58855882

5886-
if (E->ReuseShuffleIndices.empty()) {
5887-
CommonCost =
5888-
TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy);
5889-
} else {
5890-
SmallVector<int> Mask;
5891-
buildShuffleEntryMask(
5892-
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
5893-
[E](Instruction *I) {
5894-
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
5895-
return I->getOpcode() == E->getAltOpcode();
5896-
},
5897-
Mask);
5898-
CommonCost = TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
5899-
FinalVecTy, Mask);
5900-
}
5883+
SmallVector<int> Mask;
5884+
buildShuffleEntryMask(
5885+
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
5886+
[E](Instruction *I) {
5887+
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
5888+
return isAlternateInstruction(I, E->getMainOp(), E->getAltOp());
5889+
},
5890+
Mask);
5891+
CommonCost =
5892+
TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy, Mask);
59015893
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
59025894
return CommonCost + VecCost - ScalarCost;
59035895
}
@@ -6286,8 +6278,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
62866278
Cost += SpillCost + ExtractCost;
62876279
if (FirstUsers.size() == 1) {
62886280
int Limit = ShuffleMask.front().size() * 2;
6289-
if (!all_of(ShuffleMask.front(),
6290-
[Limit](int Idx) { return Idx < Limit; }) ||
6281+
if (all_of(ShuffleMask.front(), [Limit](int Idx) { return Idx < Limit; }) &&
62916282
!ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) {
62926283
InstructionCost C = TTI->getShuffleCost(
62936284
TTI::SK_PermuteSingleSrc,
@@ -6336,8 +6327,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
63366327
<< "SLP: Current total cost = " << Cost << "\n");
63376328
Cost -= InsertCost;
63386329
for (int I = 2, E = FirstUsers.size(); I < E; ++I) {
6339-
if (ShuffleMask[I].empty())
6340-
continue;
63416330
// Other elements - permutation of 2 vectors (the initial one and the
63426331
// next Ith incoming vector).
63436332
unsigned VF = ShuffleMask[I].size();

0 commit comments

Comments
 (0)