Skip to content

Commit 99f31ac

Browse files
committed
[SLP]Further improvement of the cost model for scalars used in buildvectors.
Further improvement of the cost model for the scalars used in buildvectors sequences. The main functionality is outlined into a separate function. The cost is calculated in the following way: 1. If the Base vector is not undef vector, resizing the very first mask to have common VF and perform action for 2 input vectors (including non-undef Base). Other shuffle masks are combined with the resulting after the 1 stage and processed as a shuffle of 2 elements. 2. If the Base is undef vector and have only 1 shuffle mask, perform the action only for 1 vector with the given mask, if it is not the identity mask. 3. If > 2 masks are used, perform serie of shuffle actions for 2 vectors, combing the masks properly between the steps. The original implementation misses the very first analysis for the Base vector, so the cost might too optimistic in some cases. But it improves the cost for the insertelements which are part of the current SLP graph. Part of D107966. Differential Revision: https://reviews.llvm.org/D115750
1 parent e592690 commit 99f31ac

File tree

4 files changed

+237
-122
lines changed

4 files changed

+237
-122
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 204 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -6290,6 +6290,122 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
62906290
return false;
62916291
}
62926292

6293+
/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
6294+
/// buildvector sequence.
6295+
static bool isFirstInsertElement(const InsertElementInst *IE1,
6296+
const InsertElementInst *IE2) {
6297+
const auto *I1 = IE1;
6298+
const auto *I2 = IE2;
6299+
do {
6300+
if (I2 == IE1)
6301+
return true;
6302+
if (I1 == IE2)
6303+
return false;
6304+
if (I1)
6305+
I1 = dyn_cast<InsertElementInst>(I1->getOperand(0));
6306+
if (I2)
6307+
I2 = dyn_cast<InsertElementInst>(I2->getOperand(0));
6308+
} while (I1 || I2);
6309+
llvm_unreachable("Two different buildvectors not expected.");
6310+
}
6311+
6312+
/// Does the analysis of the provided shuffle masks and performs the requested
6313+
/// actions on the vectors with the given shuffle masks. It tries to do it in
6314+
/// several steps.
6315+
/// 1. If the Base vector is not undef vector, resizing the very first mask to
6316+
/// have common VF and perform action for 2 input vectors (including non-undef
6317+
/// Base). Other shuffle masks are combined with the resulting after the 1 stage
6318+
/// and processed as a shuffle of 2 elements.
6319+
/// 2. If the Base is undef vector and have only 1 shuffle mask, perform the
6320+
/// action only for 1 vector with the given mask, if it is not the identity
6321+
/// mask.
6322+
/// 3. If > 2 masks are used, perform the remaining shuffle actions for 2
6323+
/// vectors, combing the masks properly between the steps.
6324+
template <typename T>
6325+
static T *performExtractsShuffleAction(
6326+
MutableArrayRef<std::pair<T *, SmallVector<int>>> ShuffleMask, Value *Base,
6327+
function_ref<unsigned(T *)> GetVF,
6328+
function_ref<std::pair<T *, bool>(T *, ArrayRef<int>)> ResizeAction,
6329+
function_ref<T *(ArrayRef<int>, ArrayRef<T *>)> Action) {
6330+
assert(!ShuffleMask.empty() && "Empty list of shuffles for inserts.");
6331+
SmallVector<int> Mask(ShuffleMask.begin()->second);
6332+
auto VMIt = std::next(ShuffleMask.begin());
6333+
T *Prev = nullptr;
6334+
bool IsBaseNotUndef = !isUndefVector(Base);
6335+
if (IsBaseNotUndef) {
6336+
// Base is not undef, need to combine it with the next subvectors.
6337+
std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
6338+
for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
6339+
if (Mask[Idx] == UndefMaskElem)
6340+
Mask[Idx] = Idx;
6341+
else
6342+
Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
6343+
}
6344+
Prev = Action(Mask, {nullptr, Res.first});
6345+
} else if (ShuffleMask.size() == 1) {
6346+
// Base is undef and only 1 vector is shuffled - perform the action only for
6347+
// single vector, if the mask is not the identity mask.
6348+
std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
6349+
if (Res.second)
6350+
// Identity mask is found.
6351+
Prev = Res.first;
6352+
else
6353+
Prev = Action(Mask, {ShuffleMask.begin()->first});
6354+
} else {
6355+
// Base is undef and at least 2 input vectors shuffled - perform 2 vectors
6356+
// shuffles step by step, combining shuffle between the steps.
6357+
unsigned Vec1VF = GetVF(ShuffleMask.begin()->first);
6358+
unsigned Vec2VF = GetVF(VMIt->first);
6359+
if (Vec1VF == Vec2VF) {
6360+
// No need to resize the input vectors since they are of the same size, we
6361+
// can shuffle them directly.
6362+
ArrayRef<int> SecMask = VMIt->second;
6363+
for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
6364+
if (SecMask[I] != UndefMaskElem) {
6365+
assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
6366+
Mask[I] = SecMask[I] + Vec1VF;
6367+
}
6368+
}
6369+
Prev = Action(Mask, {ShuffleMask.begin()->first, VMIt->first});
6370+
} else {
6371+
// Vectors of different sizes - resize and reshuffle.
6372+
std::pair<T *, bool> Res1 =
6373+
ResizeAction(ShuffleMask.begin()->first, Mask);
6374+
std::pair<T *, bool> Res2 = ResizeAction(VMIt->first, VMIt->second);
6375+
ArrayRef<int> SecMask = VMIt->second;
6376+
for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
6377+
if (Mask[I] != UndefMaskElem) {
6378+
assert(SecMask[I] == UndefMaskElem && "Multiple uses of scalars.");
6379+
if (Res1.second)
6380+
Mask[I] = I;
6381+
} else if (SecMask[I] != UndefMaskElem) {
6382+
assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
6383+
Mask[I] = (Res2.second ? I : SecMask[I]) + VF;
6384+
}
6385+
}
6386+
Prev = Action(Mask, {Res1.first, Res2.first});
6387+
}
6388+
VMIt = std::next(VMIt);
6389+
}
6390+
// Perform requested actions for the remaining masks/vectors.
6391+
for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) {
6392+
// Shuffle other input vectors, if any.
6393+
std::pair<T *, bool> Res = ResizeAction(VMIt->first, VMIt->second);
6394+
ArrayRef<int> SecMask = VMIt->second;
6395+
for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
6396+
if (SecMask[I] != UndefMaskElem) {
6397+
assert((Mask[I] == UndefMaskElem || IsBaseNotUndef) &&
6398+
"Multiple uses of scalars.");
6399+
Mask[I] = (Res.second ? I : SecMask[I]) + VF;
6400+
} else if (Mask[I] != UndefMaskElem) {
6401+
Mask[I] = I;
6402+
}
6403+
}
6404+
Prev = Action(Mask, {Prev, Res.first});
6405+
}
6406+
return Prev;
6407+
}
6408+
62936409
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
62946410
InstructionCost Cost = 0;
62956411
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
@@ -6310,9 +6426,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
63106426

63116427
SmallPtrSet<Value *, 16> ExtractCostCalculated;
63126428
InstructionCost ExtractCost = 0;
6313-
SmallVector<unsigned> VF;
6314-
SmallVector<SmallVector<int>> ShuffleMask;
6315-
SmallVector<Value *> FirstUsers;
6429+
SmallVector<MapVector<const TreeEntry *, SmallVector<int>>> ShuffleMasks;
6430+
SmallVector<std::pair<Value *, const TreeEntry *>> FirstUsers;
63166431
SmallVector<APInt> DemandedElts;
63176432
for (ExternalUser &EU : ExternalUses) {
63186433
// We only add extract cost once for the same scalar.
@@ -6341,14 +6456,16 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
63416456
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
63426457
Optional<unsigned> InsertIdx = getInsertIndex(VU);
63436458
if (InsertIdx) {
6344-
auto *It = find_if(FirstUsers, [VU](Value *V) {
6345-
return areTwoInsertFromSameBuildVector(VU,
6346-
cast<InsertElementInst>(V));
6347-
});
6459+
const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
6460+
auto *It =
6461+
find_if(FirstUsers,
6462+
[VU](const std::pair<Value *, const TreeEntry *> &Pair) {
6463+
return areTwoInsertFromSameBuildVector(
6464+
VU, cast<InsertElementInst>(Pair.first));
6465+
});
63486466
int VecId = -1;
63496467
if (It == FirstUsers.end()) {
6350-
VF.push_back(FTy->getNumElements());
6351-
ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
6468+
(void)ShuffleMasks.emplace_back();
63526469
// Find the insertvector, vectorized in tree, if any.
63536470
Value *Base = VU;
63546471
while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
@@ -6357,21 +6474,31 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
63576474
VU = IEBase;
63586475
do {
63596476
int Idx = E->findLaneForValue(Base);
6360-
ShuffleMask.back()[Idx] = Idx;
6477+
SmallVectorImpl<int> &Mask = ShuffleMasks.back()[ScalarTE];
6478+
if (Mask.empty())
6479+
Mask.assign(FTy->getNumElements(), UndefMaskElem);
6480+
Mask[Idx] = Idx;
63616481
Base = cast<InsertElementInst>(Base)->getOperand(0);
63626482
} while (E == getTreeEntry(Base));
63636483
break;
63646484
}
63656485
Base = cast<InsertElementInst>(Base)->getOperand(0);
63666486
}
6367-
FirstUsers.push_back(VU);
6368-
DemandedElts.push_back(APInt::getZero(VF.back()));
6487+
FirstUsers.emplace_back(VU, ScalarTE);
6488+
DemandedElts.push_back(APInt::getZero(FTy->getNumElements()));
63696489
VecId = FirstUsers.size() - 1;
63706490
} else {
6491+
if (isFirstInsertElement(VU, cast<InsertElementInst>(It->first)))
6492+
It->first = VU;
63716493
VecId = std::distance(FirstUsers.begin(), It);
63726494
}
63736495
int InIdx = *InsertIdx;
6374-
ShuffleMask[VecId][InIdx] = EU.Lane;
6496+
SmallVectorImpl<int> &Mask = ShuffleMasks[VecId][ScalarTE];
6497+
if (Mask.empty())
6498+
Mask.assign(FTy->getNumElements(), UndefMaskElem);
6499+
assert(Mask[InIdx] == UndefMaskElem &&
6500+
"InsertElementInstruction used already.");
6501+
Mask[InIdx] = EU.Lane;
63756502
DemandedElts[VecId].setBit(InIdx);
63766503
continue;
63776504
}
@@ -6398,89 +6525,75 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
63986525

63996526
InstructionCost SpillCost = getSpillCost();
64006527
Cost += SpillCost + ExtractCost;
6401-
if (FirstUsers.size() == 1) {
6402-
int Limit = ShuffleMask.front().size() * 2;
6403-
if (!all_of(ShuffleMask.front(),
6404-
[Limit](int Idx) { return Idx < Limit; }) ||
6405-
!ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) {
6406-
InstructionCost C = TTI->getShuffleCost(
6528+
auto &&ResizeToVF = [this, &Cost](const TreeEntry *TE, ArrayRef<int> Mask) {
6529+
InstructionCost C = 0;
6530+
unsigned VF = Mask.size();
6531+
unsigned VecVF = TE->getVectorFactor();
6532+
if (VF != VecVF &&
6533+
(any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); }) ||
6534+
(all_of(Mask,
6535+
[VF](int Idx) { return Idx < 2 * static_cast<int>(VF); }) &&
6536+
!ShuffleVectorInst::isIdentityMask(Mask)))) {
6537+
SmallVector<int> OrigMask(VecVF, UndefMaskElem);
6538+
std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
6539+
OrigMask.begin());
6540+
C = TTI->getShuffleCost(
64076541
TTI::SK_PermuteSingleSrc,
6408-
cast<FixedVectorType>(FirstUsers.front()->getType()),
6409-
ShuffleMask.front());
6410-
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
6411-
<< " for final shuffle of insertelement external users "
6412-
<< *VectorizableTree.front()->Scalars.front() << ".\n"
6413-
<< "SLP: Current total cost = " << Cost << "\n");
6542+
FixedVectorType::get(TE->getMainOp()->getType(), VecVF), OrigMask);
6543+
LLVM_DEBUG(
6544+
dbgs() << "SLP: Adding cost " << C
6545+
<< " for final shuffle of insertelement external users.\n";
6546+
TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
64146547
Cost += C;
6548+
return std::make_pair(TE, true);
64156549
}
6550+
return std::make_pair(TE, false);
6551+
};
6552+
// Calculate the cost of the reshuffled vectors, if any.
6553+
for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
6554+
Value *Base = cast<Instruction>(FirstUsers[I].first)->getOperand(0);
6555+
unsigned VF = ShuffleMasks[I].begin()->second.size();
6556+
auto *FTy = FixedVectorType::get(
6557+
cast<VectorType>(FirstUsers[I].first->getType())->getElementType(), VF);
6558+
auto Vector = ShuffleMasks[I].takeVector();
6559+
auto &&EstimateShufflesCost = [this, FTy,
6560+
&Cost](ArrayRef<int> Mask,
6561+
ArrayRef<const TreeEntry *> TEs) {
6562+
assert((TEs.size() == 1 || TEs.size() == 2) &&
6563+
"Expected exactly 1 or 2 tree entries.");
6564+
if (TEs.size() == 1) {
6565+
int Limit = 2 * Mask.size();
6566+
if (!all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) ||
6567+
!ShuffleVectorInst::isIdentityMask(Mask)) {
6568+
InstructionCost C =
6569+
TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask);
6570+
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
6571+
<< " for final shuffle of insertelement "
6572+
"external users.\n";
6573+
TEs.front()->dump();
6574+
dbgs() << "SLP: Current total cost = " << Cost << "\n");
6575+
Cost += C;
6576+
}
6577+
} else {
6578+
InstructionCost C =
6579+
TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask);
6580+
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
6581+
<< " for final shuffle of vector node and external "
6582+
"insertelement users.\n";
6583+
if (TEs.front()) { TEs.front()->dump(); } TEs.back()->dump();
6584+
dbgs() << "SLP: Current total cost = " << Cost << "\n");
6585+
Cost += C;
6586+
}
6587+
return TEs.back();
6588+
};
6589+
(void)performExtractsShuffleAction<const TreeEntry>(
6590+
makeMutableArrayRef(Vector.data(), Vector.size()), Base,
6591+
[](const TreeEntry *E) { return E->getVectorFactor(); }, ResizeToVF,
6592+
EstimateShufflesCost);
64166593
InstructionCost InsertCost = TTI->getScalarizationOverhead(
6417-
cast<FixedVectorType>(FirstUsers.front()->getType()),
6418-
DemandedElts.front(), /*Insert*/ true, /*Extract*/ false);
6419-
LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
6420-
<< " for insertelements gather.\n"
6421-
<< "SLP: Current total cost = " << Cost << "\n");
6422-
Cost -= InsertCost;
6423-
} else if (FirstUsers.size() >= 2) {
6424-
unsigned MaxVF = *std::max_element(VF.begin(), VF.end());
6425-
// Combined masks of the first 2 vectors.
6426-
SmallVector<int> CombinedMask(MaxVF, UndefMaskElem);
6427-
copy(ShuffleMask.front(), CombinedMask.begin());
6428-
APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF);
6429-
auto *VecTy = FixedVectorType::get(
6430-
cast<VectorType>(FirstUsers.front()->getType())->getElementType(),
6431-
MaxVF);
6432-
for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) {
6433-
if (ShuffleMask[1][I] != UndefMaskElem) {
6434-
CombinedMask[I] = ShuffleMask[1][I] + MaxVF;
6435-
CombinedDemandedElts.setBit(I);
6436-
}
6437-
}
6438-
InstructionCost C =
6439-
TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
6440-
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
6441-
<< " for final shuffle of vector node and external "
6442-
"insertelement users "
6443-
<< *VectorizableTree.front()->Scalars.front() << ".\n"
6444-
<< "SLP: Current total cost = " << Cost << "\n");
6445-
Cost += C;
6446-
InstructionCost InsertCost = TTI->getScalarizationOverhead(
6447-
VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false);
6448-
LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
6449-
<< " for insertelements gather.\n"
6450-
<< "SLP: Current total cost = " << Cost << "\n");
6594+
cast<FixedVectorType>(FirstUsers[I].first->getType()), DemandedElts[I],
6595+
/*Insert*/ true, /*Extract*/ false);
64516596
Cost -= InsertCost;
6452-
for (int I = 2, E = FirstUsers.size(); I < E; ++I) {
6453-
if (ShuffleMask[I].empty())
6454-
continue;
6455-
// Other elements - permutation of 2 vectors (the initial one and the
6456-
// next Ith incoming vector).
6457-
unsigned VF = ShuffleMask[I].size();
6458-
for (unsigned Idx = 0; Idx < VF; ++Idx) {
6459-
int Mask = ShuffleMask[I][Idx];
6460-
if (Mask != UndefMaskElem)
6461-
CombinedMask[Idx] = MaxVF + Mask;
6462-
else if (CombinedMask[Idx] != UndefMaskElem)
6463-
CombinedMask[Idx] = Idx;
6464-
}
6465-
for (unsigned Idx = VF; Idx < MaxVF; ++Idx)
6466-
if (CombinedMask[Idx] != UndefMaskElem)
6467-
CombinedMask[Idx] = Idx;
6468-
InstructionCost C =
6469-
TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
6470-
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
6471-
<< " for final shuffle of vector node and external "
6472-
"insertelement users "
6473-
<< *VectorizableTree.front()->Scalars.front() << ".\n"
6474-
<< "SLP: Current total cost = " << Cost << "\n");
6475-
Cost += C;
6476-
InstructionCost InsertCost = TTI->getScalarizationOverhead(
6477-
cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
6478-
/*Insert*/ true, /*Extract*/ false);
6479-
LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
6480-
<< " for insertelements gather.\n"
6481-
<< "SLP: Current total cost = " << Cost << "\n");
6482-
Cost -= InsertCost;
6483-
}
64846597
}
64856598

64866599
#ifndef NDEBUG

llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,27 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
1111
; CHECK-NEXT: [[TAB2:%.*]] = alloca [256 x i32], align 16
1212
; CHECK-NEXT: br label [[BB1:%.*]]
1313
; CHECK: bb1:
14-
; CHECK-NEXT: [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04
1514
; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
1615
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
17-
; CHECK-NEXT: [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04
16+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
17+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
18+
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
19+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
1820
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1921
; CHECK: for.body:
2022
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
21-
; CHECK-NEXT: [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ]
22-
; CHECK-NEXT: [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ]
23-
; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[T_0259]], i32 0
23+
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
24+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
25+
; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
2426
; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
2527
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
2628
; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
27-
; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[P3_ADDR_0258]], i32 0
29+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
30+
; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
2831
; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
2932
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
3033
; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
31-
; CHECK-NEXT: [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
32-
; CHECK-NEXT: [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
34+
; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
3335
; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
3436
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
3537
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]

0 commit comments

Comments
 (0)