Skip to content

Commit 615cca4

Browse files
committed
[InstCombine] Move extends across identity shuffles.
Add a new fold to instcombine to move SExt/ZExt across identity shuffles, applying the cast after the shuffle. This sinks extends and can enable more general additional folding of both shuffles (and related instructions) and extends. If backends prefer splitting up doing casts first, the extends can be hoisted again in VectorCombine for example. A larger example is included in the load_i32_zext_to_v4i32. The wider extend is easier to compute an accurate cost for and targets (like AArch64) can lower a single wider extend more efficiently than multiple separate extends. This is a generalization of a VectorCombine version (#141109) as suggested by @preames.
1 parent 8c76e57 commit 615cca4

File tree

4 files changed

+28
-19
lines changed

4 files changed

+28
-19
lines changed

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2559,17 +2559,16 @@ static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
25592559
/// Canonicalize casts after shuffle.
25602560
static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
25612561
InstCombiner::BuilderTy &Builder) {
2562-
// Do we have 2 matching cast operands?
25632562
auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2564-
auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2565-
if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2566-
Cast0->getSrcTy() != Cast1->getSrcTy())
2563+
if (!Cast0)
25672564
return nullptr;
25682565

25692566
// TODO: Allow other opcodes? That would require easing the type restrictions
25702567
// below here.
25712568
CastInst::CastOps CastOpcode = Cast0->getOpcode();
25722569
switch (CastOpcode) {
2570+
case Instruction::SExt:
2571+
case Instruction::ZExt:
25732572
case Instruction::FPToSI:
25742573
case Instruction::FPToUI:
25752574
case Instruction::SIToFP:
@@ -2579,15 +2578,30 @@ static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
25792578
return nullptr;
25802579
}
25812580

2581+
VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
25822582
VectorType *ShufTy = Shuf.getType();
25832583
VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2584-
VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
25852584

25862585
// TODO: Allow length-increasing shuffles?
25872586
if (ShufTy->getElementCount().getKnownMinValue() >
25882587
ShufOpTy->getElementCount().getKnownMinValue())
25892588
return nullptr;
25902589

2590+
// shuffle (cast X), Y, identity-with-extract-mask -->
2591+
// cast (shuffle X, Y, identity-with-extract-mask).
2592+
if (Cast0->hasOneUse() && Shuf.isIdentityWithExtract()) {
2593+
auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2594+
PoisonValue::get(CastSrcTy),
2595+
Shuf.getShuffleMask());
2596+
return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2597+
}
2598+
2599+
auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2600+
// Do we have 2 matching cast operands?
2601+
if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2602+
Cast0->getSrcTy() != Cast1->getSrcTy())
2603+
return nullptr;
2604+
25912605
// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
25922606
assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
25932607
"Expected fixed vector operands for casts and binary shuffle");

llvm/test/Transforms/InstCombine/X86/blend_x86.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,9 @@ define <4 x float> @sel_v16i8_bitcast_shuffle_bitcast_cmp(<8 x float> %a, <8 x f
287287
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]]
288288
; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32>
289289
; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32>
290+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
290291
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
291292
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
292-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293293
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]]
294294
; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
295295
; CHECK-NEXT: ret <4 x float> [[RES]]

llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(<8 x i8> %x) {
55
; CHECK-LABEL: define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(
66
; CHECK-SAME: <8 x i8> [[X:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
8-
; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[X]] to <8 x i16>
9-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9+
; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
1010
; CHECK-NEXT: ret <4 x i16> [[SHUFFLE]]
1111
;
1212
entry:
@@ -19,8 +19,8 @@ define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(<4 x i16> %x)
1919
; CHECK-LABEL: define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(
2020
; CHECK-SAME: <4 x i16> [[X:%.*]]) {
2121
; CHECK-NEXT: [[ENTRY:.*:]]
22-
; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i16> [[X]] to <4 x i32>
23-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[E_1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
22+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
23+
; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <3 x i16> [[TMP0]] to <3 x i32>
2424
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
2525
;
2626
entry:
@@ -89,12 +89,8 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) {
8989
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32(
9090
; CHECK-SAME: ptr [[DI:%.*]]) {
9191
; CHECK-NEXT: [[ENTRY:.*:]]
92-
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
93-
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
94-
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
95-
; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
96-
; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
97-
; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
92+
; CHECK-NEXT: [[L1:%.*]] = load <4 x i8>, ptr [[DI]], align 4
93+
; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[L1]] to <4 x i32>
9894
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
9995
;
10096
entry:

llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,9 +496,8 @@ define <2 x i64> @x86_pblendvb_v32i8_v16i8_undefs(<4 x i64> %a, <4 x i64> %b, <4
496496
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
497497
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
498498
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
499-
; CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
500-
; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
501-
; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]])
499+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
500+
; CHECK-NEXT: [[SEL_LO:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[B_LO]], <16 x i8> [[A_LO]]
502501
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i8> [[SEL_LO]] to <2 x i64>
503502
; CHECK-NEXT: ret <2 x i64> [[RES]]
504503
;

0 commit comments

Comments
 (0)