diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e7bae17dd2ceb..fcf22b0266349 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7047,6 +7047,13 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan, if (isa(&R)) return true; + // VPBlendRecipes are converted to selects and may have been simplified. + using namespace VPlanPatternMatch; + if (match(&R, m_VPInstruction( + m_VPValue(), m_VPValue(), m_VPValue())) && + isa_and_nonnull(R.getVPSingleValue()->getUnderlyingValue())) + return true; + /// If a VPlan transform folded a recipe to one producing a single-scalar, /// but the original instruction wasn't uniform-after-vectorization in the /// legacy cost model, the legacy cost overestimates the actual cost. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 356af4a0e74e4..ad80a2ae99b68 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2299,10 +2299,6 @@ class VPBlendRecipe : public VPSingleDefRecipe { /// Generate the phi/select nodes. void execute(VPTransformState &State) override; - /// Return the cost of this VPWidenMemoryRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override; - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ccb7512051d77..d3f4810d6efec 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -928,6 +928,19 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ctx.CostKind); } + case Instruction::Select: { + if (!getUnderlyingValue()) + return 0; + // Handle cases where only the first lane is used the same way as the legacy + // cost model. + if (vputils::onlyFirstLaneUsed(this)) + return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); + Type *ResTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); + Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF); + return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResTy, CmpTy, + CmpInst::BAD_ICMP_PREDICATE, + Ctx.CostKind); + } case VPInstruction::AnyOf: { auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); return Ctx.TTI.getArithmeticReductionCost( @@ -2402,53 +2415,7 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPBlendRecipe::execute(VPTransformState &State) { - assert(isNormalized() && "Expected blend to be normalized!"); - // We know that all PHIs in non-header blocks are converted into - // selects, so we don't have to worry about the insertion order and we - // can just use the builder. - // At this point we generate the predication tree. There may be - // duplications since this is a simple recursive scan, but future - // optimizations will clean it up. - - unsigned NumIncoming = getNumIncomingValues(); - - // Generate a sequence of selects of the form: - // SELECT(Mask3, In3, - // SELECT(Mask2, In2, - // SELECT(Mask1, In1, - // In0))) - // Note that Mask0 is never used: lanes for which no path reaches this phi and - // are essentially undef are taken from In0. - bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); - Value *Result = nullptr; - for (unsigned In = 0; In < NumIncoming; ++In) { - // We might have single edge PHIs (blocks) - use an identity - // 'select' for the first PHI operand. - Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed); - if (In == 0) - Result = In0; // Initialize with the first incoming value. - else { - // Select between the current value and the previous incoming edge - // based on the incoming mask. - Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed); - Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi"); - } - } - State.set(this, Result, OnlyFirstLaneUsed); -} - -InstructionCost VPBlendRecipe::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - // Handle cases where only the first lane is used the same way as the legacy - // cost model. - if (vputils::onlyFirstLaneUsed(this)) - return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); - - Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); - Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF); - return (getNumIncomingValues() - 1) * - Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy, - CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind); + llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends"); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 90137b72c83fb..e90af1a95f173 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1082,6 +1082,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) return Def->replaceAllUsesWith(X); + // select !c, x, y -> select c, y, x + VPValue *C; + if (match(Def, m_Select(m_Not(m_VPValue(C)), m_VPValue(X), m_VPValue(Y)))) { + Def->setOperand(0, C); + Def->setOperand(1, Y); + Def->setOperand(2, X); + return; + } + if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) return Def->replaceAllUsesWith(A); @@ -1288,38 +1297,17 @@ static void simplifyBlends(VPlan &Plan) { } } - SmallVector OperandsWithMask; - OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex)); - + VPBuilder Builder(&R); + VPValue *Select = Blend->getIncomingValue(StartIndex); for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) { if (I == StartIndex) continue; - OperandsWithMask.push_back(Blend->getIncomingValue(I)); - OperandsWithMask.push_back(Blend->getMask(I)); - } - - auto *NewBlend = new VPBlendRecipe( - cast(Blend->getUnderlyingValue()), OperandsWithMask); - NewBlend->insertBefore(&R); - - VPValue *DeadMask = Blend->getMask(StartIndex); - Blend->replaceAllUsesWith(NewBlend); - Blend->eraseFromParent(); - recursivelyDeleteDeadRecipes(DeadMask); - - /// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask. - VPValue *NewMask; - if (NewBlend->getNumOperands() == 3 && - match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) { - VPValue *Inc0 = NewBlend->getOperand(0); - VPValue *Inc1 = NewBlend->getOperand(1); - VPValue *OldMask = NewBlend->getOperand(2); - NewBlend->setOperand(0, Inc1); - NewBlend->setOperand(1, Inc0); - NewBlend->setOperand(2, NewMask); - if (OldMask->getNumUsers() == 0) - cast(OldMask)->eraseFromParent(); + Select = + Builder.createSelect(Blend->getMask(I), Blend->getIncomingValue(I), + Select, R.getDebugLoc(), "predphi"); + Select->setUnderlyingValue(Blend->getUnderlyingValue()); } + Blend->replaceAllUsesWith(Select); } } } @@ -2150,18 +2138,20 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, return new VPReductionEVLRecipe(*Red, EVL, NewMask); }) .Case([&](VPInstruction *VPI) -> VPRecipeBase * { - VPValue *LHS, *RHS; + VPValue *LHS, *RHS, *Mask = &AllOneMask; // Transform select with a header mask condition - // select(header_mask, LHS, RHS) + // select(mask, LHS, RHS) // into vector predication merge. - // vp.merge(all-true, LHS, RHS, EVL) - if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS), - m_VPValue(RHS)))) + // vp.merge(opt_mask, LHS, RHS, EVL) + // TODO: Make m_LogicalAnd commutative? + if (!match(VPI, + m_Select(m_CombineOr(m_Specific(HeaderMask), + m_LogicalAnd(m_Specific(HeaderMask), + m_VPValue(Mask))), + m_VPValue(LHS), m_VPValue(RHS)))) return nullptr; - // Use all true as the condition because this transformation is - // limited to selects whose condition is a header mask. return new VPWidenIntrinsicRecipe( - Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL}, + Intrinsic::vp_merge, {Mask, LHS, RHS, &EVL}, TypeInfo.inferScalarType(LHS), VPI->getDebugLoc()); }) .Default([&](VPRecipeBase *R) { return nullptr; }); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 078f98f54525b..c507c36f0dba9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -22,8 +22,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i64 0 ; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFNONE-NEXT: [[TMP2:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]]) -; TFNONE-NEXT: [[TMP3:%.*]] = fcmp ogt <2 x double> [[TMP2]], zeroinitializer -; TFNONE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) +; TFNONE-NEXT: [[TMP3:%.*]] = fcmp ule <2 x double> [[TMP2]], zeroinitializer +; TFNONE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer ; TFNONE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1 ; TFNONE-NEXT: store double [[TMP14]], ptr [[P:%.*]], align 8 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 6029095bbe7b1..b9b54aeca7f00 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -935,8 +935,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[TMP7]], i64 0 ; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFNONE-NEXT: [[TMP8:%.*]] = call @exp_masked_scalable( [[BROADCAST_SPLAT]], splat (i1 true)) -; TFNONE-NEXT: [[TMP9:%.*]] = fcmp ogt [[TMP8]], zeroinitializer -; TFNONE-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], zeroinitializer, splat (double 1.000000e+00) +; TFNONE-NEXT: [[TMP9:%.*]] = fcmp ule [[TMP8]], zeroinitializer +; TFNONE-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], splat (double 1.000000e+00), zeroinitializer ; TFNONE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() ; TFNONE-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 2 ; TFNONE-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index f8b83ff41f512..d5e07a615057d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -597,8 +597,8 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 2 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], splat (i16 99), [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[WIDE_LOAD]], splat (i16 99) ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index db780c3c12c7e..0451d30179a0a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -677,9 +677,9 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], splat (i64 42) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP10:%.*]] = udiv [[WIDE_LOAD]], splat (i64 27) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -720,12 +720,12 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42) -; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42) +; FIXED-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 42) +; FIXED-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD1]], splat (i64 42) ; FIXED-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], splat (i64 27) ; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27) -; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[WIDE_LOAD]], <4 x i64> [[TMP6]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[WIDE_LOAD1]], <4 x i64> [[TMP7]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8 ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP3]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -797,9 +797,9 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], splat (i64 42) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP10:%.*]] = sdiv [[WIDE_LOAD]], splat (i64 27) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -840,12 +840,12 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42) -; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42) +; FIXED-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 42) +; FIXED-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD1]], splat (i64 42) ; FIXED-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], splat (i64 27) ; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27) -; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[WIDE_LOAD]], <4 x i64> [[TMP6]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[WIDE_LOAD1]], <4 x i64> [[TMP7]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8 ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP3]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 2926011857ae9..d9cbe47f54884 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -262,12 +262,9 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] ; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-OUTLOOP: vector.body: ; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -275,19 +272,12 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 4, i1 true) -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = add zeroinitializer, [[TMP13]] -; IF-EVL-OUTLOOP-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP14]] -; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] ; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sle [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = add [[VEC_PHI]], [[VP_OP_LOAD]] -; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = select [[TMP15]], [[TMP18]], zeroinitializer -; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = select [[TMP21]], [[VEC_PHI]], [[TMP19]] +; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = call @llvm.vp.merge.nxv4i32( [[TMP18]], [[VEC_PHI]], [[TMP19]], i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[PREDPHI1]], [[VEC_PHI]], i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] @@ -398,9 +388,9 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load , ptr [[TMP21]], align 4 -; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt [[WIDE_MASKED_LOAD]], splat (i32 3) +; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = icmp sle [[WIDE_MASKED_LOAD]], splat (i32 3) ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] -; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP18]], [[TMP16]], [[VEC_PHI]] +; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP12]], [[VEC_PHI]], [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -790,15 +780,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] ; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i64 1) -; IF-EVL-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = mul [[TMP12]], splat (i32 1) ; IF-EVL-OUTLOOP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP13]] @@ -806,7 +790,6 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP: vector.body: ; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[IV]] @@ -814,23 +797,16 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = mul i32 1, [[TMP14]] ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP15]], i64 0 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP14]] to i64 -; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP16]] -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer -; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], splat (i1 true), i32 [[TMP14]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp sle [[VP_OP_LOAD]], [[VEC_IND2]] ; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = add [[VEC_PHI]], [[VP_OP_LOAD]] -; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = select [[TMP18]], [[TMP21]], zeroinitializer -; IF-EVL-OUTLOOP-NEXT: [[PREDPHI:%.*]] = select [[TMP23]], [[VEC_PHI]], [[TMP22]] +; IF-EVL-OUTLOOP-NEXT: [[PREDPHI:%.*]] = call @llvm.vp.merge.nxv4i32( [[TMP21]], [[VEC_PHI]], [[TMP22]], i32 [[TMP14]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP24]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[PREDPHI]], [[VEC_PHI]], i32 [[TMP14]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]] ; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT6]] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add [[VEC_IND2]], [[BROADCAST_SPLAT4]] ; IF-EVL-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -956,9 +932,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; NO-VP-OUTLOOP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load , ptr [[TMP27]], align 4 -; NO-VP-OUTLOOP-NEXT: [[TMP28:%.*]] = icmp sgt [[WIDE_MASKED_LOAD]], [[VEC_IND]] +; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp sle [[WIDE_MASKED_LOAD]], [[VEC_IND]] ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] -; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP28]], [[TMP22]], [[VEC_PHI]] +; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP13]], [[VEC_PHI]], [[TMP22]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index f4cd48de60243..7e66fcf8568d3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -34,10 +34,10 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], splat (i64 4) -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sge <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 @@ -47,10 +47,10 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP15]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI10:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI11:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD8]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[WIDE_LOAD9]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> zeroinitializer, <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI10:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> zeroinitializer, <4 x i32> [[WIDE_LOAD7]] +; CHECK-NEXT: [[PREDPHI11:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> zeroinitializer, <4 x i32> [[WIDE_LOAD8]] +; CHECK-NEXT: [[PREDPHI12:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> zeroinitializer, <4 x i32> [[WIDE_LOAD9]] ; CHECK-NEXT: [[TMP16]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[VEC_PHI4]], [[PREDPHI10]] ; CHECK-NEXT: [[TMP18]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI11]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index ed1c67c082134..5ad5cf14aea10 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -11,54 +11,9 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0 -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]] -; CHECK: [[PRED_UREM_IF]]: -; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]] -; CHECK: [[PRED_UREM_CONTINUE]]: -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]] -; CHECK: [[PRED_UREM_IF1]]: -; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]] -; CHECK: [[PRED_UREM_CONTINUE2]]: -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_UREM_IF3:.*]], label %[[PRED_UREM_CONTINUE4:.*]] -; CHECK: [[PRED_UREM_IF3]]: -; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]] -; CHECK: [[PRED_UREM_CONTINUE4]]: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UREM_IF5:.*]], label %[[PRED_UREM_CONTINUE6]] -; CHECK: [[PRED_UREM_IF5]]: -; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]] -; CHECK: [[PRED_UREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) -; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) -; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 1, i64 [[TMP12]] -; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8 -; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: [[REM1:%.*]] = urem i64 [[MUL]], [[X]] @@ -71,7 +26,7 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-NEXT: store i64 0, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT1]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -103,9 +58,3 @@ exit: } declare i64 @llvm.smax.i64(i64, i64) -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll index 8a7f4a386fda1..57109a0996a66 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll @@ -159,10 +159,10 @@ define i32 @reduction_func(ptr nocapture %A, i32 %n) nounwind uwtable readonly s ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 30) +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 31) ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[VEC_PHI]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 8a326c9d0c083..7c6bc6aa6ca3a 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -567,7 +567,7 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP5]] @@ -577,7 +577,7 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i16> poison, i16 [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP11]], i16 [[TMP10]], i32 1 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[TMP12]], <2 x i16> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP12]] ; CHECK-NEXT: [[TMP13]] = add <2 x i16> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index bd9647188911a..1eb6b134621da 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -23,8 +23,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10) ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> splat (i32 9) -; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_IND]], <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_IND]], <2 x i32> splat (i32 9) ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]] ; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index d5a206ff21da0..9bc231108fa96 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -21,8 +21,8 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> splat (i16 1), <2 x i16> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> [[WIDE_LOAD]], <2 x i16> splat (i16 1) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI]], ptr [[TMP7]], align 2 @@ -107,9 +107,8 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> [[WIDE_LOAD]], <2 x i16> splat (i16 1) -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[PREDPHI]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP10]], align 2 @@ -189,8 +188,8 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP0]], <2 x i16> [[VEC_IND1]], <2 x i16> [[VEC_IND3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 @@ -297,9 +296,8 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> splat (i16 1) -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[PREDPHI]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP19]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 732214aa1449e..94ee454e971e8 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -187,7 +187,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: Successor(s): if.then.0 ; CHECK-EMPTY: ; CHECK-NEXT: if.then.0: -; CHECK-NEXT: BLEND ir<%d> = ir<0> vp<[[PRED]]>/ir<%cmp> +; CHECK-NEXT: EMIT ir<%d> = select ir<%cmp>, vp<[[PRED]]>, ir<0> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%d> @@ -376,7 +376,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: if.then.0: ; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not ir<%cmp2> ; CHECK-NEXT: EMIT vp<[[SEL2:%.+]]> = logical-and vp<[[NOT1]]>, vp<[[NOT2]]> -; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]> ir<%psd>/vp<[[SEL2]]> +; CHECK-NEXT: EMIT ir<%ysd.0> = select vp<[[SEL2]]>, ir<%psd>, vp<[[PHI]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%isd> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> @@ -757,7 +757,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: if.then.1: ; CHECK-NEXT: WIDEN ir<%fadd> = fadd vp<[[PHI1]]>, vp<[[PHI2]]> -; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value> ir<%fadd>/ir<%ifcond> +; CHECK-NEXT: EMIT ir<%st.value> = select ir<%ifcond>, ir<%fadd>, ir<%ld.value> ; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%st.addr> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%st.value> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 4a4bda254bf88..a330afc45abab 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -366,7 +366,7 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.0.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0.0: -; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]> +; CHECK-NEXT: EMIT ir<%p> = select vp<[[MASK2]]>, vp<[[PRED]]>, ir<0> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -465,7 +465,7 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.0.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0.0: -; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]> +; CHECK-NEXT: EMIT ir<%p> = select vp<[[MASK2]]>, vp<[[PRED]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.1> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -571,7 +571,7 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.0.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0.0: -; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]> +; CHECK-NEXT: EMIT ir<%p> = select vp<[[MASK2]]>, vp<[[PRED]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.0> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: