diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 1a8d4215b5b71..00b877b8a07ef 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2571,17 +2571,16 @@ static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, /// Canonicalize casts after shuffle. static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder) { - // Do we have 2 matching cast operands? auto *Cast0 = dyn_cast(Shuf.getOperand(0)); - auto *Cast1 = dyn_cast(Shuf.getOperand(1)); - if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() || - Cast0->getSrcTy() != Cast1->getSrcTy()) + if (!Cast0) return nullptr; // TODO: Allow other opcodes? That would require easing the type restrictions // below here. CastInst::CastOps CastOpcode = Cast0->getOpcode(); switch (CastOpcode) { + case Instruction::SExt: + case Instruction::ZExt: case Instruction::FPToSI: case Instruction::FPToUI: case Instruction::SIToFP: @@ -2591,15 +2590,31 @@ static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf, return nullptr; } + VectorType *CastSrcTy = cast(Cast0->getSrcTy()); VectorType *ShufTy = Shuf.getType(); VectorType *ShufOpTy = cast(Shuf.getOperand(0)->getType()); - VectorType *CastSrcTy = cast(Cast0->getSrcTy()); // TODO: Allow length-increasing shuffles? if (ShufTy->getElementCount().getKnownMinValue() > ShufOpTy->getElementCount().getKnownMinValue()) return nullptr; + // shuffle (cast X), Poison, identity-with-extract-mask --> + // cast (shuffle X, Poison, identity-with-extract-mask). + if (isa(Shuf.getOperand(1)) && Cast0->hasOneUse() && + Shuf.isIdentityWithExtract()) { + auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0), + PoisonValue::get(CastSrcTy), + Shuf.getShuffleMask()); + return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType()); + } + + auto *Cast1 = dyn_cast(Shuf.getOperand(1)); + // Do we have 2 matching cast operands? + if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() || + Cast0->getSrcTy() != Cast1->getSrcTy()) + return nullptr; + // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)? assert(isa(CastSrcTy) && isa(ShufOpTy) && "Expected fixed vector operands for casts and binary shuffle"); diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll index aa49f493c9fa1..5c3b0beefbb66 100644 --- a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll +++ b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll @@ -287,9 +287,9 @@ define <4 x float> @sel_v16i8_bitcast_shuffle_bitcast_cmp(<8 x float> %a, <8 x f ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32> ; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> ; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]] ; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[RES]] diff --git a/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll b/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll index dddd63d101e61..fc19b11fa8694 100644 --- a/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll +++ b/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll @@ -5,8 +5,8 @@ define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(<8 x i8> %x) { ; CHECK-LABEL: define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16( ; CHECK-SAME: <8 x i8> [[X:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[X]] to <8 x i16> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[SHUFFLE]] ; entry: @@ -19,8 +19,8 @@ define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(<4 x i16> %x) ; CHECK-LABEL: define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32( ; CHECK-SAME: <4 x i16> [[X:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i16> [[X]] to <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[E_1]], <4 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> poison, <3 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <3 x i16> [[TMP0]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] ; entry: @@ -102,12 +102,8 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) { ; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32( ; CHECK-SAME: ptr [[DI:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 -; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 -; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[L1:%.*]] = load <4 x i8>, ptr [[DI]], align 4 +; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[L1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll index bbf893f6127b0..dbce77698eb07 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll @@ -496,9 +496,8 @@ define <2 x i64> @x86_pblendvb_v32i8_v16i8_undefs(<4 x i64> %a, <4 x i64> %b, <4 ; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]]) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> +; CHECK-NEXT: [[SEL_LO:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[B_LO]], <16 x i8> [[A_LO]] ; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i8> [[SEL_LO]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[RES]] ;