Skip to content

Commit a999a1b

Browse files
[SLP]Remove emission of vector_insert/vector_extract intrinsics
Replaced by the regular shuffles. Fixes #145512 Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #148007
1 parent ed85487 commit a999a1b

File tree

94 files changed

+799
-757
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+799
-757
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 56 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5809,48 +5809,40 @@ static InstructionCost getExtractWithExtendCost(
58095809
return TTI.getExtractWithExtendCost(Opcode, Dst, VecTy, Index, CostKind);
58105810
}
58115811

5812-
/// Correctly creates insert_subvector, checking that the index is multiple of
5813-
/// the subvectors length. Otherwise, generates shuffle using \p Generator or
5812+
/// Creates subvector insert. Generates shuffle using \p Generator or
58145813
/// using default shuffle.
58155814
static Value *createInsertVector(
58165815
IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index,
58175816
function_ref<Value *(Value *, Value *, ArrayRef<int>)> Generator = {}) {
5817+
if (isa<PoisonValue>(Vec) && isa<PoisonValue>(V))
5818+
return Vec;
58185819
const unsigned SubVecVF = getNumElements(V->getType());
5819-
if (Index % SubVecVF == 0) {
5820-
Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, Index);
5821-
} else {
5822-
// Create shuffle, insertvector requires that index is multiple of
5823-
// the subvector length.
5824-
const unsigned VecVF = getNumElements(Vec->getType());
5825-
SmallVector<int> Mask(VecVF, PoisonMaskElem);
5826-
std::iota(Mask.begin(), Mask.end(), 0);
5827-
for (unsigned I : seq<unsigned>(SubVecVF))
5828-
Mask[I + Index] = I + VecVF;
5829-
if (Generator) {
5830-
Vec = Generator(Vec, V, Mask);
5831-
} else {
5832-
// 1. Resize V to the size of Vec.
5833-
SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
5834-
std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
5835-
V = Builder.CreateShuffleVector(V, ResizeMask);
5836-
Vec = Builder.CreateShuffleVector(Vec, V, Mask);
5837-
}
5820+
// Create shuffle, insertvector requires that index is multiple of
5821+
// the subvector length.
5822+
const unsigned VecVF = getNumElements(Vec->getType());
5823+
SmallVector<int> Mask(VecVF, PoisonMaskElem);
5824+
if (isa<PoisonValue>(Vec)) {
5825+
auto *Begin = std::next(Mask.begin(), Index);
5826+
std::iota(Begin, std::next(Begin, SubVecVF), 0);
5827+
Vec = Builder.CreateShuffleVector(V, Mask);
5828+
return Vec;
58385829
}
5839-
return Vec;
5830+
std::iota(Mask.begin(), Mask.end(), 0);
5831+
std::iota(std::next(Mask.begin(), Index),
5832+
std::next(Mask.begin(), Index + SubVecVF), VecVF);
5833+
if (Generator)
5834+
return Generator(Vec, V, Mask);
5835+
// 1. Resize V to the size of Vec.
5836+
SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
5837+
std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
5838+
V = Builder.CreateShuffleVector(V, ResizeMask);
5839+
// 2. Insert V into Vec.
5840+
return Builder.CreateShuffleVector(Vec, V, Mask);
58405841
}
58415842

5842-
/// Correctly creates extract_subvector, checking that the index is multiple of
5843-
/// the subvectors length. Otherwise, generates shuffle using \p Generator or
5844-
/// using default shuffle.
5843+
/// Generates subvector extract using \p Generator or using default shuffle.
58455844
static Value *createExtractVector(IRBuilderBase &Builder, Value *Vec,
58465845
unsigned SubVecVF, unsigned Index) {
5847-
if (Index % SubVecVF == 0) {
5848-
VectorType *SubVecTy =
5849-
getWidenedType(Vec->getType()->getScalarType(), SubVecVF);
5850-
return Builder.CreateExtractVector(SubVecTy, Vec, Index);
5851-
}
5852-
// Create shuffle, extract_subvector requires that index is multiple of
5853-
// the subvector length.
58545846
SmallVector<int> Mask(SubVecVF, PoisonMaskElem);
58555847
std::iota(Mask.begin(), Mask.end(), Index);
58565848
return Builder.CreateShuffleVector(Vec, Mask);
@@ -16275,8 +16267,8 @@ Value *BoUpSLP::gather(
1627516267
assert(SLPReVec && "FixedVectorType is not expected.");
1627616268
Vec =
1627716269
createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy));
16278-
auto *II = dyn_cast<IntrinsicInst>(Vec);
16279-
if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
16270+
auto *II = dyn_cast<Instruction>(Vec);
16271+
if (!II)
1628016272
return Vec;
1628116273
InsElt = II;
1628216274
} else {
@@ -16296,6 +16288,28 @@ Value *BoUpSLP::gather(
1629616288
if (auto *SI = dyn_cast<Instruction>(Scalar))
1629716289
UserOp = SI;
1629816290
} else {
16291+
if (V->getType()->isVectorTy()) {
16292+
if (auto *SV = dyn_cast<ShuffleVectorInst>(InsElt);
16293+
SV && SV->getOperand(0) != V && SV->getOperand(1) != V) {
16294+
// Find shufflevector, caused by resize.
16295+
auto FindOperand = [](Value *Vec, Value *V) -> Instruction * {
16296+
if (auto *SV = dyn_cast<ShuffleVectorInst>(Vec)) {
16297+
if (SV->getOperand(0) == V)
16298+
return SV;
16299+
if (SV->getOperand(1) == V)
16300+
return SV;
16301+
}
16302+
return nullptr;
16303+
};
16304+
InsElt = nullptr;
16305+
if (Instruction *User = FindOperand(SV->getOperand(0), V))
16306+
InsElt = User;
16307+
else if (Instruction *User = FindOperand(SV->getOperand(1), V))
16308+
InsElt = User;
16309+
assert(InsElt &&
16310+
"Failed to find shufflevector, caused by resize.");
16311+
}
16312+
}
1629916313
UserOp = InsElt;
1630016314
}
1630116315
if (UserOp) {
@@ -16864,10 +16878,18 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1686416878
V, SimplifyQuery(*R.DL));
1686516879
}));
1686616880
unsigned InsertionIndex = Idx * getNumElements(ScalarTy);
16881+
// Use scalar version of the SCalarType to correctly handle shuffles
16882+
// for revectorization. The revectorization mode operates by the
16883+
// vectors, but here we need to operate on the scalars, because the
16884+
// masks were already transformed for the vector elements and we don't
16885+
// need doing this transformation again.
16886+
Type *OrigScalarTy = ScalarTy;
16887+
ScalarTy = ScalarTy->getScalarType();
1686716888
Vec = createInsertVector(
1686816889
Builder, Vec, V, InsertionIndex,
1686916890
std::bind(&ShuffleInstructionBuilder::createShuffle, this, _1, _2,
1687016891
_3));
16892+
ScalarTy = OrigScalarTy;
1687116893
if (!CommonMask.empty()) {
1687216894
std::iota(std::next(CommonMask.begin(), Idx),
1687316895
std::next(CommonMask.begin(), Idx + E->getVectorFactor()),

llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -567,22 +567,19 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
567567
;
568568
; SSE4-LABEL: @buildvector_mul_subadd_ps256(
569569
; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
570-
; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
571-
; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
572-
; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
570+
; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
573571
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
574-
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
575-
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
576-
; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
572+
; SSE4-NEXT: [[TMP5:%.*]] = fsub <8 x float> [[A]], [[B]]
573+
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
574+
; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
577575
; SSE4-NEXT: ret <8 x float> [[TMP6]]
578576
;
579577
; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256(
580578
; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
581-
; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
582-
; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
583-
; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
579+
; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
584580
; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
585-
; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
581+
; AVX_FMA4-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[A]], [[B]]
582+
; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
586583
; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
587584
; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
588585
; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]]
@@ -677,13 +674,11 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
677674
;
678675
; AVX_FMA-LABEL: @buildvector_mul_subadd_ps512(
679676
; AVX_FMA-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
680-
; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]]
681-
; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
682-
; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]]
677+
; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B:%.*]]
683678
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
684-
; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
685-
; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
686-
; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP6]], <16 x float> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
679+
; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <16 x float> [[A]], [[B]]
680+
; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
681+
; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP6]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
687682
; AVX_FMA-NEXT: ret <16 x float> [[TMP7]]
688683
;
689684
; AVX512-LABEL: @buildvector_mul_subadd_ps512(
@@ -880,13 +875,11 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
880875
;
881876
; AVX_FMA-LABEL: @buildvector_mul_subadd_pd512(
882877
; AVX_FMA-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
883-
; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]]
884-
; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
885-
; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]]
878+
; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B:%.*]]
886879
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
887-
; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
888-
; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
889-
; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
880+
; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]]
881+
; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
882+
; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP6]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
890883
; AVX_FMA-NEXT: ret <8 x double> [[TMP7]]
891884
;
892885
; AVX512-LABEL: @buildvector_mul_subadd_pd512(

llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ define void @foo(ptr %0) {
1212
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
1313
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
1414
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> poison, <4 x i32> zeroinitializer
15-
; CHECK-NEXT: [[TMP11:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> poison, <4 x ptr> [[TMP3]], i64 0)
16-
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> [[TMP11]], <4 x ptr> [[TMP5]], i64 4)
17-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP7]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
15+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
16+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
17+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP12]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
1819
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x ptr> [[TMP8]], zeroinitializer
1920
; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP9]], zeroinitializer
2021
; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]])

llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define i32 @test(ptr %c) {
1111
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
1212
; CHECK-NEXT: [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer
1313
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 poison, i32 poison>
14-
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v6i64(<8 x i64> poison, <6 x i64> [[TMP2]], i64 0)
14+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
1515
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 4, i32 5>
1616
; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8>
1717
; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1

llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,8 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
164164
; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
165165
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T10]], i32 2
166166
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T12]], i32 3
167-
; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP6]], i64 0)
167+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
168+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
168169
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]])
169170
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP14]], [[SUM_032]]
170171
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1

0 commit comments

Comments
 (0)