Skip to content

Commit 60ff83c

Browse files
anton-afanasyevmemfrob
authored andcommitted
[SLP] Fix order of insertelement/insertvalue seed operands
Summary: This patch takes the indices operands of `insertelement`/`insertvalue` into account while generation of seed elements for `findBuildAggregate()`. This function has kept the original order of `insert`s before. Also this patch optimizes `findBuildAggregate()` preventing it from redundant temporary vector allocations and its multiple reversing. Fixes llvm.org/pr44067 Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D83779
1 parent 3d6e6a8 commit 60ff83c

File tree

5 files changed

+148
-75
lines changed

5 files changed

+148
-75
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 113 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7084,61 +7084,136 @@ class HorizontalReduction {
70847084

70857085
} // end anonymous namespace
70867086

7087+
static Optional<unsigned> getAggregateSize(Instruction *InsertInst) {
7088+
if (auto *IE = dyn_cast<InsertElementInst>(InsertInst))
7089+
return cast<FixedVectorType>(IE->getType())->getNumElements();
7090+
7091+
unsigned AggregateSize = 1;
7092+
auto *IV = cast<InsertValueInst>(InsertInst);
7093+
Type *CurrentType = IV->getType();
7094+
do {
7095+
if (auto *ST = dyn_cast<StructType>(CurrentType)) {
7096+
for (auto *Elt : ST->elements())
7097+
if (Elt != ST->getElementType(0)) // check homogeneity
7098+
return None;
7099+
AggregateSize *= ST->getNumElements();
7100+
CurrentType = ST->getElementType(0);
7101+
} else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
7102+
AggregateSize *= AT->getNumElements();
7103+
CurrentType = AT->getElementType();
7104+
} else if (auto *VT = dyn_cast<FixedVectorType>(CurrentType)) {
7105+
AggregateSize *= VT->getNumElements();
7106+
return AggregateSize;
7107+
} else if (CurrentType->isSingleValueType()) {
7108+
return AggregateSize;
7109+
} else {
7110+
return None;
7111+
}
7112+
} while (true);
7113+
}
7114+
7115+
static Optional<unsigned> getOperandIndex(Instruction *InsertInst,
7116+
unsigned OperandOffset) {
7117+
unsigned OperandIndex = OperandOffset;
7118+
if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
7119+
if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
7120+
auto *VT = cast<FixedVectorType>(IE->getType());
7121+
OperandIndex *= VT->getNumElements();
7122+
OperandIndex += CI->getZExtValue();
7123+
return OperandIndex;
7124+
}
7125+
return None;
7126+
}
7127+
7128+
auto *IV = cast<InsertValueInst>(InsertInst);
7129+
Type *CurrentType = IV->getType();
7130+
for (unsigned int Index : IV->indices()) {
7131+
if (auto *ST = dyn_cast<StructType>(CurrentType)) {
7132+
OperandIndex *= ST->getNumElements();
7133+
CurrentType = ST->getElementType(Index);
7134+
} else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
7135+
OperandIndex *= AT->getNumElements();
7136+
CurrentType = AT->getElementType();
7137+
} else {
7138+
return None;
7139+
}
7140+
OperandIndex += Index;
7141+
}
7142+
return OperandIndex;
7143+
}
7144+
7145+
static bool findBuildAggregate_rec(Instruction *LastInsertInst,
7146+
TargetTransformInfo *TTI,
7147+
SmallVectorImpl<Value *> &BuildVectorOpds,
7148+
SmallVectorImpl<Value *> &InsertElts,
7149+
unsigned OperandOffset) {
7150+
do {
7151+
Value *InsertedOperand = LastInsertInst->getOperand(1);
7152+
Optional<unsigned> OperandIndex =
7153+
getOperandIndex(LastInsertInst, OperandOffset);
7154+
if (!OperandIndex)
7155+
return false;
7156+
if (isa<InsertElementInst>(InsertedOperand) ||
7157+
isa<InsertValueInst>(InsertedOperand)) {
7158+
if (!findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
7159+
BuildVectorOpds, InsertElts, *OperandIndex))
7160+
return false;
7161+
} else {
7162+
BuildVectorOpds[*OperandIndex] = InsertedOperand;
7163+
InsertElts[*OperandIndex] = LastInsertInst;
7164+
}
7165+
if (isa<UndefValue>(LastInsertInst->getOperand(0)))
7166+
return true;
7167+
LastInsertInst = dyn_cast<Instruction>(LastInsertInst->getOperand(0));
7168+
} while (LastInsertInst != nullptr &&
7169+
(isa<InsertValueInst>(LastInsertInst) ||
7170+
isa<InsertElementInst>(LastInsertInst)) &&
7171+
LastInsertInst->hasOneUse());
7172+
return false;
7173+
}
7174+
70877175
/// Recognize construction of vectors like
70887176
/// %ra = insertelement <4 x float> undef, float %s0, i32 0
70897177
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
70907178
/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
70917179
/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
70927180
/// starting from the last insertelement or insertvalue instruction.
70937181
///
7094-
/// Also recognize aggregates like {<2 x float>, <2 x float>},
7182+
/// Also recognize homogeneous aggregates like {<2 x float>, <2 x float>},
70957183
/// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
70967184
/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
70977185
///
70987186
/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
70997187
///
71007188
/// \return true if it matches.
7101-
static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI,
7189+
static bool findBuildAggregate(Instruction *LastInsertInst,
7190+
TargetTransformInfo *TTI,
71027191
SmallVectorImpl<Value *> &BuildVectorOpds,
71037192
SmallVectorImpl<Value *> &InsertElts) {
7193+
71047194
assert((isa<InsertElementInst>(LastInsertInst) ||
71057195
isa<InsertValueInst>(LastInsertInst)) &&
71067196
"Expected insertelement or insertvalue instruction!");
7107-
do {
7108-
Value *InsertedOperand;
7109-
auto *IE = dyn_cast<InsertElementInst>(LastInsertInst);
7110-
if (IE) {
7111-
InsertedOperand = IE->getOperand(1);
7112-
LastInsertInst = IE->getOperand(0);
7113-
} else {
7114-
auto *IV = cast<InsertValueInst>(LastInsertInst);
7115-
InsertedOperand = IV->getInsertedValueOperand();
7116-
LastInsertInst = IV->getAggregateOperand();
7117-
}
7118-
if (isa<InsertElementInst>(InsertedOperand) ||
7119-
isa<InsertValueInst>(InsertedOperand)) {
7120-
SmallVector<Value *, 8> TmpBuildVectorOpds;
7121-
SmallVector<Value *, 8> TmpInsertElts;
7122-
if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds,
7123-
TmpInsertElts))
7124-
return false;
7125-
BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(),
7126-
TmpBuildVectorOpds.rend());
7127-
InsertElts.append(TmpInsertElts.rbegin(), TmpInsertElts.rend());
7128-
} else {
7129-
BuildVectorOpds.push_back(InsertedOperand);
7130-
InsertElts.push_back(IE);
7131-
}
7132-
if (isa<UndefValue>(LastInsertInst))
7133-
break;
7134-
if ((!isa<InsertValueInst>(LastInsertInst) &&
7135-
!isa<InsertElementInst>(LastInsertInst)) ||
7136-
!LastInsertInst->hasOneUse())
7137-
return false;
7138-
} while (true);
7139-
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
7140-
std::reverse(InsertElts.begin(), InsertElts.end());
7141-
return true;
7197+
7198+
assert((BuildVectorOpds.empty() && InsertElts.empty()) &&
7199+
"Expected empty result vectors!");
7200+
7201+
Optional<unsigned> AggregateSize = getAggregateSize(LastInsertInst);
7202+
if (!AggregateSize)
7203+
return false;
7204+
BuildVectorOpds.resize(*AggregateSize);
7205+
InsertElts.resize(*AggregateSize);
7206+
7207+
if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts,
7208+
0)) {
7209+
llvm::erase_if(BuildVectorOpds,
7210+
[](const Value *V) { return V == nullptr; });
7211+
llvm::erase_if(InsertElts, [](const Value *V) { return V == nullptr; });
7212+
if (BuildVectorOpds.size() >= 2)
7213+
return true;
7214+
}
7215+
7216+
return false;
71427217
}
71437218

71447219
static bool PhiTypeSorterFunc(Value *V, Value *V2) {
@@ -7308,8 +7383,7 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
73087383

73097384
SmallVector<Value *, 16> BuildVectorOpds;
73107385
SmallVector<Value *, 16> BuildVectorInsts;
7311-
if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts) ||
7312-
BuildVectorOpds.size() < 2)
7386+
if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts))
73137387
return false;
73147388

73157389
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
@@ -7324,7 +7398,6 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
73247398
SmallVector<Value *, 16> BuildVectorInsts;
73257399
SmallVector<Value *, 16> BuildVectorOpds;
73267400
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
7327-
BuildVectorOpds.size() < 2 ||
73287401
(llvm::all_of(BuildVectorOpds,
73297402
[](Value *V) { return isa<ExtractElementInst>(V); }) &&
73307403
isShuffle(BuildVectorOpds)))

llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,10 @@ define <4 x float> @hadd_reverse_v4f32(<4 x float> %a, <4 x float> %b) #0 {
3737

3838
define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) #0 {
3939
; CHECK-LABEL: @reverse_hadd_v4f32(
40-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
41-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
40+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 6, i32 4>
41+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[A]], <4 x i32> <i32 3, i32 1, i32 7, i32 5>
4242
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
43-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
44-
; CHECK-NEXT: ret <4 x float> [[TMP4]]
43+
; CHECK-NEXT: ret <4 x float> [[TMP3]]
4544
;
4645
%vecext = extractelement <4 x float> %a, i32 0
4746
%vecext1 = extractelement <4 x float> %a, i32 1

llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,16 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
126126
; doesn't matter
127127
define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
128128
; ANY-LABEL: @simple_select_insert_out_of_order(
129-
; ANY-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
130-
; ANY-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
131-
; ANY-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
129+
; ANY-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
130+
; ANY-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
131+
; ANY-NEXT: [[REORDER_SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
132+
; ANY-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[REORDER_SHUFFLE]], zeroinitializer
133+
; ANY-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[REORDER_SHUFFLE1]], <4 x float> [[REORDER_SHUFFLE2]]
134+
; ANY-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
132135
; ANY-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
133136
; ANY-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
134137
; ANY-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
135-
; ANY-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
138+
; ANY-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
136139
; ANY-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0
137140
; ANY-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
138141
; ANY-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
@@ -447,19 +450,19 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
447450
; Make sure we handle multiple trees that feed one build vector correctly.
448451
define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
449452
; ANY-LABEL: @multi_tree(
450-
; ANY-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
451-
; ANY-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[X:%.*]], i32 1
452-
; ANY-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Y:%.*]], i32 2
453-
; ANY-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[Z:%.*]], i32 3
454-
; ANY-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
453+
; ANY-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[Z:%.*]], i32 0
454+
; ANY-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 1
455+
; ANY-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2
456+
; ANY-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3
457+
; ANY-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 3.000000e+00, double 2.000000e+00, double 1.000000e+00, double 0.000000e+00>
455458
; ANY-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
456-
; ANY-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 0
459+
; ANY-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 3
457460
; ANY-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP7]], i32 3
458-
; ANY-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 1
461+
; ANY-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 2
459462
; ANY-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2
460-
; ANY-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 2
463+
; ANY-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 1
461464
; ANY-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP9]], i32 1
462-
; ANY-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 3
465+
; ANY-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 0
463466
; ANY-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP10]], i32 0
464467
; ANY-NEXT: ret <4 x double> [[I4]]
465468
;

llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,13 @@ define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
147147
define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
148148
; CHECK-LABEL: @NonHomogeneousStruct(
149149
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
150-
; CHECK-NEXT: [[L0:%.*]] = load float, float* [[GEP0]]
150+
; CHECK-NEXT: [[L0:%.*]] = load float, float* [[GEP0]], align 4
151151
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1
152-
; CHECK-NEXT: [[L1:%.*]] = load float, float* [[GEP1]]
152+
; CHECK-NEXT: [[L1:%.*]] = load float, float* [[GEP1]], align 4
153153
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2
154-
; CHECK-NEXT: [[L2:%.*]] = load float, float* [[GEP2]]
154+
; CHECK-NEXT: [[L2:%.*]] = load float, float* [[GEP2]], align 4
155155
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3
156-
; CHECK-NEXT: [[L3:%.*]] = load float, float* [[GEP3]]
156+
; CHECK-NEXT: [[L3:%.*]] = load float, float* [[GEP3]], align 4
157157
; CHECK-NEXT: [[FADD0:%.*]] = fadd fast float [[L0]], 1.100000e+01
158158
; CHECK-NEXT: [[FADD1:%.*]] = fadd fast float [[L1]], 1.200000e+01
159159
; CHECK-NEXT: [[FADD2:%.*]] = fadd fast float [[L2]], 1.300000e+01

llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@ define <2 x float> @foo({{float, float}}* %A) {
88
; CHECK-NEXT: entry:
99
; CHECK-NEXT: [[TMP0:%.*]] = bitcast { { float, float } }* [[A:%.*]] to <2 x float>*
1010
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
11-
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
12-
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[REORDER_SHUFFLE]], <float 2.000000e+00, float 2.000000e+00>
13-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
11+
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
12+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
1413
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 1
15-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
14+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
1615
; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x float> [[INS1]], float [[TMP4]], i32 0
1716
; CHECK-NEXT: ret <2 x float> [[INS0]]
1817
;
@@ -44,23 +43,22 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
4443
; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7
4544
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>*
4645
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
47-
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 1, i32 0, i32 2, i32 3>
48-
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[REORDER_SHUFFLE]], <i16 5, i16 6, i16 8, i16 7, i16 2, i16 1, i16 3, i16 4>
49-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4
46+
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
47+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
5048
; CHECK-NEXT: [[STRUCTIN0:%.*]] = insertvalue [[STRUCT1TY:%.*]] undef, i16 [[TMP4]], 1
51-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5
49+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
5250
; CHECK-NEXT: [[STRUCTIN1:%.*]] = insertvalue [[STRUCT1TY]] %StructIn0, i16 [[TMP5]], 0
53-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6
51+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2
5452
; CHECK-NEXT: [[STRUCTIN2:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP6]], 0
55-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
53+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
5654
; CHECK-NEXT: [[STRUCTIN3:%.*]] = insertvalue [[STRUCT1TY]] %StructIn2, i16 [[TMP7]], 1
57-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
55+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4
5856
; CHECK-NEXT: [[STRUCTIN4:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP8]], 0
59-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
57+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5
6058
; CHECK-NEXT: [[STRUCTIN5:%.*]] = insertvalue [[STRUCT1TY]] %StructIn4, i16 [[TMP9]], 1
61-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2
59+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
6260
; CHECK-NEXT: [[STRUCTIN6:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP10]], 1
63-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
61+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6
6462
; CHECK-NEXT: [[STRUCTIN7:%.*]] = insertvalue [[STRUCT1TY]] %StructIn6, i16 [[TMP11]], 0
6563
; CHECK-NEXT: [[STRUCT2IN0:%.*]] = insertvalue [[STRUCT2TY:%.*]] undef, [[STRUCT1TY]] %StructIn1, 0
6664
; CHECK-NEXT: [[STRUCT2IN1:%.*]] = insertvalue [[STRUCT2TY]] %Struct2In0, [[STRUCT1TY]] %StructIn3, 1

0 commit comments

Comments
 (0)