Skip to content

Commit 7df373e

Browse files
committed
!fixup just fold shuffle/ext
1 parent 7030a9f commit 7df373e

File tree

2 files changed

+96
-65
lines changed

2 files changed

+96
-65
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ class VectorCombine {
128128
bool foldShuffleOfShuffles(Instruction &I);
129129
bool foldShuffleOfIntrinsics(Instruction &I);
130130
bool foldShuffleToIdentity(Instruction &I);
131-
bool foldShuffleExtExtracts(Instruction &I);
131+
bool foldShuffleExt(Instruction &I);
132132
bool foldShuffleFromReductions(Instruction &I);
133133
bool foldCastFromReductions(Instruction &I);
134134
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -2792,21 +2792,17 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
27922792
return true;
27932793
}
27942794

2795-
bool VectorCombine::foldShuffleExtExtracts(Instruction &I) {
2795+
bool VectorCombine::foldShuffleExt(Instruction &I) {
27962796
// Try to fold vector zero- and sign-extends split across multiple operations
2797-
// into a single extend, removing redundant inserts and shuffles.
2798-
2799-
// Check if we have an extended shuffle that selects the first vector, which
2800-
// itself is another extend fed by a load.
2801-
Instruction *L;
2802-
if (!match(
2803-
&I,
2804-
m_OneUse(m_Shuffle(
2805-
m_OneUse(m_ZExtOrSExt(m_OneUse(m_BitCast(m_OneUse(m_InsertElt(
2806-
m_Value(), m_OneUse(m_Instruction(L)), m_SpecificInt(0))))))),
2807-
m_Value()))) ||
2808-
!cast<ShuffleVectorInst>(&I)->isIdentityWithExtract() ||
2809-
!isa<LoadInst>(L))
2797+
// into a single extend.
2798+
2799+
// Check if we have ZEXT/SEXT (SHUFFLE (ZEXT/SEXT %src), _, identity-mask),
2800+
// with an identity mask extracting the first sub-vector.
2801+
Value *Src;
2802+
ArrayRef<int> Mask;
2803+
if (!match(&I, m_OneUse(m_Shuffle(m_OneUse(m_ZExtOrSExt(m_Value(Src))),
2804+
m_Value(), m_Mask(Mask)))) ||
2805+
!cast<ShuffleVectorInst>(&I)->isIdentityWithExtract())
28102806
return false;
28112807
auto *InnerExt = cast<Instruction>(I.getOperand(0));
28122808
auto *OuterExt = cast<Instruction>(*I.user_begin());
@@ -2819,27 +2815,34 @@ bool VectorCombine::foldShuffleExtExtracts(Instruction &I) {
28192815
if (isa<SExtInst>(InnerExt) && !isa<SExtInst>(OuterExt))
28202816
return false;
28212817

2822-
// Don't try to convert the load if it has an odd size.
2823-
if (!DL->typeSizeEqualsStoreSize(L->getType()))
2824-
return false;
28252818
auto *DstTy = cast<FixedVectorType>(OuterExt->getType());
28262819
auto *SrcTy =
28272820
FixedVectorType::get(InnerExt->getOperand(0)->getType()->getScalarType(),
28282821
DstTy->getNumElements());
2829-
if (DL->getTypeStoreSize(SrcTy) != DL->getTypeStoreSize(L->getType()))
2830-
return false;
28312822

2832-
// Convert to a vector load feeding a single wide extend.
2833-
Builder.SetInsertPoint(*L->getInsertionPointAfterDef());
2834-
auto *NewLoad = cast<LoadInst>(
2835-
Builder.CreateLoad(SrcTy, L->getOperand(0), L->getName() + ".vec"));
2823+
// Don't perform the fold if the cost of the new extend is worse than the cost
2824+
// of the 2 original extends.
2825+
InstructionCost OriginalCost =
2826+
TTI.getCastInstrCost(InnerExt->getOpcode(), SrcTy, InnerExt->getType(),
2827+
TTI::CastContextHint::None) +
2828+
TTI.getCastInstrCost(InnerExt->getOpcode(), SrcTy, InnerExt->getType(),
2829+
TTI::CastContextHint::None);
2830+
InstructionCost NewCost = TTI.getCastInstrCost(
2831+
InnerExt->getOpcode(), SrcTy, DstTy, TTI::CastContextHint::None);
2832+
if (NewCost > OriginalCost)
2833+
return false;
2834+
2835+
// Convert to a shuffle of the input feeding a single wide extend.
2836+
Builder.SetInsertPoint(*OuterExt->getInsertionPointAfterDef());
2837+
auto *NewIns =
2838+
Builder.CreateShuffleVector(Src, PoisonValue::get(Src->getType()), Mask);
28362839
auto *NewExt =
28372840
isa<ZExtInst>(InnerExt)
2838-
? Builder.CreateZExt(NewLoad, DstTy, "vec.ext", InnerExt->hasNonNeg())
2839-
: Builder.CreateSExt(NewLoad, DstTy, "vec.ext");
2841+
? Builder.CreateZExt(NewIns, DstTy, "vec.ext", InnerExt->hasNonNeg())
2842+
: Builder.CreateSExt(NewIns, DstTy, "vec.ext");
28402843
OuterExt->replaceAllUsesWith(NewExt);
28412844
replaceValue(*OuterExt, *NewExt);
2842-
Worklist.pushValue(NewLoad);
2845+
Worklist.pushValue(NewExt);
28432846
return true;
28442847
}
28452848

@@ -3617,7 +3620,7 @@ bool VectorCombine::run() {
36173620
break;
36183621
case Instruction::ShuffleVector:
36193622
MadeChange |= widenSubvectorLoad(I);
3620-
MadeChange |= foldShuffleExtExtracts(I);
3623+
MadeChange |= foldShuffleExt(I);
36213624
break;
36223625
default:
36233626
break;

llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll

Lines changed: 65 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) {
1111
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32(
1212
; CHECK-SAME: ptr [[DI:%.*]]) {
1313
; CHECK-NEXT: [[ENTRY:.*:]]
14-
; CHECK-NEXT: [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
15-
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[L_VEC]] to <4 x i32>
14+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
15+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
16+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
17+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
1619
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
1720
;
1821
entry:
@@ -29,8 +32,11 @@ define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(ptr %di) {
2932
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(
3033
; CHECK-SAME: ptr [[DI:%.*]]) {
3134
; CHECK-NEXT: [[ENTRY:.*:]]
32-
; CHECK-NEXT: [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
33-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i8> [[L_VEC]] to <4 x i32>
35+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
36+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
37+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
38+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
39+
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i8> [[TMP0]] to <4 x i32>
3440
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
3541
;
3642
entry:
@@ -47,8 +53,11 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg(ptr %di) {
4753
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg(
4854
; CHECK-SAME: ptr [[DI:%.*]]) {
4955
; CHECK-NEXT: [[ENTRY:.*:]]
50-
; CHECK-NEXT: [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
51-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i8> [[L_VEC]] to <4 x i32>
56+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
57+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
58+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
59+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
60+
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i8> [[TMP0]] to <4 x i32>
5261
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
5362
;
5463
entry:
@@ -65,8 +74,11 @@ define <4 x i32> @load_i32_zext_to_v4i32_outer_nneg(ptr %di) {
6574
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_outer_nneg(
6675
; CHECK-SAME: ptr [[DI:%.*]]) {
6776
; CHECK-NEXT: [[ENTRY:.*:]]
68-
; CHECK-NEXT: [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
69-
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[L_VEC]] to <4 x i32>
77+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
78+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
79+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
80+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
81+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
7082
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
7183
;
7284
entry:
@@ -83,8 +95,11 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg_outer_sext(ptr %di) {
8395
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg_outer_sext(
8496
; CHECK-SAME: ptr [[DI:%.*]]) {
8597
; CHECK-NEXT: [[ENTRY:.*:]]
86-
; CHECK-NEXT: [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
87-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i8> [[L_VEC]] to <4 x i32>
98+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
99+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
100+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
101+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
102+
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i8> [[TMP0]] to <4 x i32>
88103
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
89104
;
90105
entry:
@@ -101,9 +116,12 @@ define <4 x i32> @load_i32_zext_to_v4i32_clobber_after_load(ptr %di) {
101116
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_clobber_after_load(
102117
; CHECK-SAME: ptr [[DI:%.*]]) {
103118
; CHECK-NEXT: [[ENTRY:.*:]]
104-
; CHECK-NEXT: [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
105-
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[L_VEC]] to <4 x i32>
119+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
106120
; CHECK-NEXT: call void @use.i32(i32 0)
121+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
122+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
123+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
124+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
107125
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
108126
;
109127
entry:
@@ -146,9 +164,8 @@ define <4 x i32> @load_i32_zext_to_v4i32_load_other_users(ptr %di) {
146164
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
147165
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
148166
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
149-
; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
150-
; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
151-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
167+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
168+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
152169
; CHECK-NEXT: call void @use.i32(i32 [[L]])
153170
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
154171
;
@@ -170,9 +187,8 @@ define <4 x i32> @load_i32_zext_to_v4i32_ins_other_users(ptr %di) {
170187
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
171188
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
172189
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
173-
; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
174-
; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
175-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
190+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
191+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
176192
; CHECK-NEXT: call void @use.v2i32(<2 x i32> [[VEC_INS]])
177193
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
178194
;
@@ -194,9 +210,8 @@ define <4 x i32> @load_i32_zext_to_v4i32_bc_other_users(ptr %di) {
194210
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
195211
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
196212
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
197-
; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
198-
; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
199-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
213+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
214+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
200215
; CHECK-NEXT: call void @use.v8i8(<8 x i8> [[VEC_BC]])
201216
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
202217
;
@@ -263,8 +278,11 @@ define <8 x i32> @load_i64_zext_to_v8i32(ptr %di) {
263278
; CHECK-LABEL: define <8 x i32> @load_i64_zext_to_v8i32(
264279
; CHECK-SAME: ptr [[DI:%.*]]) {
265280
; CHECK-NEXT: [[ENTRY:.*:]]
266-
; CHECK-NEXT: [[L_VEC:%.*]] = load <8 x i8>, ptr [[DI]], align 8
267-
; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext <8 x i8> [[L_VEC]] to <8 x i32>
281+
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8
282+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[L]], i64 0
283+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8>
284+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
285+
; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i32>
268286
; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]]
269287
;
270288
entry:
@@ -281,8 +299,11 @@ define <3 x i32> @load_i24_zext_to_v3i32(ptr %di) {
281299
; CHECK-LABEL: define <3 x i32> @load_i24_zext_to_v3i32(
282300
; CHECK-SAME: ptr [[DI:%.*]]) {
283301
; CHECK-NEXT: [[ENTRY:.*:]]
284-
; CHECK-NEXT: [[L_VEC:%.*]] = load <3 x i8>, ptr [[DI]], align 4
285-
; CHECK-NEXT: [[EXT_2:%.*]] = zext <3 x i8> [[L_VEC]] to <3 x i32>
302+
; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4
303+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> <i24 poison, i24 0>, i24 [[L]], i64 0
304+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8>
305+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
306+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <3 x i8> [[TMP0]] to <3 x i32>
286307
; CHECK-NEXT: ret <3 x i32> [[EXT_2]]
287308
;
288309
entry:
@@ -302,9 +323,8 @@ define <4 x i32> @load_i32_insert_idx_1_sext(ptr %di) {
302323
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
303324
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[L]], i64 1
304325
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
305-
; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
306-
; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
307-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
326+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
327+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
308328
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
309329
;
310330
entry:
@@ -387,8 +407,11 @@ define <4 x i32> @load_i32_sext_to_v4i32(ptr %di) {
387407
; CHECK-LABEL: define <4 x i32> @load_i32_sext_to_v4i32(
388408
; CHECK-SAME: ptr [[DI:%.*]]) {
389409
; CHECK-NEXT: [[ENTRY:.*:]]
390-
; CHECK-NEXT: [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
391-
; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i8> [[L_VEC]] to <4 x i32>
410+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
411+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
412+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
413+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
414+
; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i8> [[TMP0]] to <4 x i32>
392415
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
393416
;
394417
entry:
@@ -405,8 +428,11 @@ define <8 x i32> @load_i64_sext_to_v8i32(ptr %di) {
405428
; CHECK-LABEL: define <8 x i32> @load_i64_sext_to_v8i32(
406429
; CHECK-SAME: ptr [[DI:%.*]]) {
407430
; CHECK-NEXT: [[ENTRY:.*:]]
408-
; CHECK-NEXT: [[L_VEC:%.*]] = load <8 x i8>, ptr [[DI]], align 8
409-
; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i8> [[L_VEC]] to <8 x i32>
431+
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8
432+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[L]], i64 0
433+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8>
434+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
435+
; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i32>
410436
; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]]
411437
;
412438
entry:
@@ -423,8 +449,11 @@ define <3 x i32> @load_i24_sext_to_v3i32(ptr %di) {
423449
; CHECK-LABEL: define <3 x i32> @load_i24_sext_to_v3i32(
424450
; CHECK-SAME: ptr [[DI:%.*]]) {
425451
; CHECK-NEXT: [[ENTRY:.*:]]
426-
; CHECK-NEXT: [[L_VEC:%.*]] = load <3 x i8>, ptr [[DI]], align 4
427-
; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i8> [[L_VEC]] to <3 x i32>
452+
; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4
453+
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> <i24 poison, i24 0>, i24 [[L]], i64 0
454+
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8>
455+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
456+
; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i8> [[TMP0]] to <3 x i32>
428457
; CHECK-NEXT: ret <3 x i32> [[EXT_2]]
429458
;
430459
entry:
@@ -444,9 +473,8 @@ define <4 x i32> @load_i32_insert_idx_1(ptr %di) {
444473
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
445474
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[L]], i64 1
446475
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
447-
; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
448-
; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
449-
; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
476+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
477+
; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i8> [[TMP0]] to <4 x i32>
450478
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
451479
;
452480
entry:

0 commit comments

Comments
 (0)