Skip to content

Commit 1d24f8a

Browse files
committed
[InstCombine] Move extends across identity shuffles.
Add a new fold to instcombine to move SExt/ZExt across identity shuffles, applying the cast after the shuffle. This sinks extends and can enable more general additional folding of both shuffles (and related instructions) and extends. If backends prefer splitting up doing casts first, the extends can be hoisted again in VectorCombine for example. A larger example is included in the load_i32_zext_to_v4i32. The wider extend is easier to compute an accurate cost for and targets (like AArch64) can lower a single wider extend more efficiently than multiple separate extends. This is a generalization of a VectorCombine version (#141109) as suggested by @preames.
1 parent 8e104d6 commit 1d24f8a

File tree

4 files changed

+28
-19
lines changed

4 files changed

+28
-19
lines changed

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2571,17 +2571,16 @@ static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
25712571
/// Canonicalize casts after shuffle.
25722572
static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
25732573
InstCombiner::BuilderTy &Builder) {
2574-
// Do we have 2 matching cast operands?
25752574
auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2576-
auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2577-
if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2578-
Cast0->getSrcTy() != Cast1->getSrcTy())
2575+
if (!Cast0)
25792576
return nullptr;
25802577

25812578
// TODO: Allow other opcodes? That would require easing the type restrictions
25822579
// below here.
25832580
CastInst::CastOps CastOpcode = Cast0->getOpcode();
25842581
switch (CastOpcode) {
2582+
case Instruction::SExt:
2583+
case Instruction::ZExt:
25852584
case Instruction::FPToSI:
25862585
case Instruction::FPToUI:
25872586
case Instruction::SIToFP:
@@ -2591,15 +2590,30 @@ static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
25912590
return nullptr;
25922591
}
25932592

2593+
VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
25942594
VectorType *ShufTy = Shuf.getType();
25952595
VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2596-
VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
25972596

25982597
// TODO: Allow length-increasing shuffles?
25992598
if (ShufTy->getElementCount().getKnownMinValue() >
26002599
ShufOpTy->getElementCount().getKnownMinValue())
26012600
return nullptr;
26022601

2602+
// shuffle (cast X), Y, identity-with-extract-mask -->
2603+
// cast (shuffle X, Y, identity-with-extract-mask).
2604+
if (Cast0->hasOneUse() && Shuf.isIdentityWithExtract()) {
2605+
auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2606+
PoisonValue::get(CastSrcTy),
2607+
Shuf.getShuffleMask());
2608+
return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2609+
}
2610+
2611+
auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2612+
// Do we have 2 matching cast operands?
2613+
if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2614+
Cast0->getSrcTy() != Cast1->getSrcTy())
2615+
return nullptr;
2616+
26032617
// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
26042618
assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
26052619
"Expected fixed vector operands for casts and binary shuffle");

llvm/test/Transforms/InstCombine/X86/blend_x86.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,9 @@ define <4 x float> @sel_v16i8_bitcast_shuffle_bitcast_cmp(<8 x float> %a, <8 x f
287287
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]]
288288
; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32>
289289
; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32>
290+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
290291
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
291292
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
292-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293293
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]]
294294
; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
295295
; CHECK-NEXT: ret <4 x float> [[RES]]

llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(<8 x i8> %x) {
55
; CHECK-LABEL: define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(
66
; CHECK-SAME: <8 x i8> [[X:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
8-
; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[X]] to <8 x i16>
9-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9+
; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
1010
; CHECK-NEXT: ret <4 x i16> [[SHUFFLE]]
1111
;
1212
entry:
@@ -19,8 +19,8 @@ define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(<4 x i16> %x)
1919
; CHECK-LABEL: define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(
2020
; CHECK-SAME: <4 x i16> [[X:%.*]]) {
2121
; CHECK-NEXT: [[ENTRY:.*:]]
22-
; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i16> [[X]] to <4 x i32>
23-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[E_1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
22+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
23+
; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <3 x i16> [[TMP0]] to <3 x i32>
2424
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
2525
;
2626
entry:
@@ -102,12 +102,8 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) {
102102
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32(
103103
; CHECK-SAME: ptr [[DI:%.*]]) {
104104
; CHECK-NEXT: [[ENTRY:.*:]]
105-
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
106-
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
107-
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
108-
; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
109-
; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
110-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
105+
; CHECK-NEXT: [[L1:%.*]] = load <4 x i8>, ptr [[DI]], align 4
106+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[L1]] to <4 x i32>
111107
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
112108
;
113109
entry:

llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,9 +496,8 @@ define <2 x i64> @x86_pblendvb_v32i8_v16i8_undefs(<4 x i64> %a, <4 x i64> %b, <4
496496
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
497497
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
498498
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
499-
; CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
500-
; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
501-
; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]])
499+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
500+
; CHECK-NEXT: [[SEL_LO:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[B_LO]], <16 x i8> [[A_LO]]
502501
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i8> [[SEL_LO]] to <2 x i64>
503502
; CHECK-NEXT: ret <2 x i64> [[RES]]
504503
;

0 commit comments

Comments
 (0)