From 26ed857d733edf81999c1546acb66b04755a18cb Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 4 Jul 2025 11:03:26 +0100 Subject: [PATCH 1/4] [VPlan] Iterate over header phis to determine FORs that need EVL fixup. NFCI This is a follow-up to https://github.com/llvm/llvm-project/pull/146672#discussion_r2183176231 We can avoid iterating over every recipe to pick out splices that need fixed up given that for now, all splices must use a VPFirstOrderRecurrencePHIRecipe. An assertion was added since this doesn't hold for unrolled loops: vector.body: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] FIRST-ORDER-RECURRENCE-PHI ir<%10> = phi ir<%pre_load>, ir<%11>.1 CLONE ir<%indvars.iv.next> = add nuw nsw vp<%index>, ir<1> CLONE ir<%arrayidx32> = getelementptr inbounds ir<%a>, ir<%indvars.iv.next> vp<%3> = vector-pointer ir<%arrayidx32> vp<%4> = vector-pointer ir<%arrayidx32>, ir<1> WIDEN ir<%11> = load vp<%3> WIDEN ir<%11>.1 = load vp<%4> EMIT vp<%5> = first-order splice ir<%10>, ir<%11> EMIT vp<%6> = first-order splice ir<%11>, ir<%11>.1 <-- doesn't use phi Or sometimes we splices in loops without a FOR phi at all: vector.body: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index> vp<%3> = vector-pointer ir<%gep.a> vp<%4> = vector-pointer ir<%gep.a>, ir<1> WIDEN ir<%load.a> = load vp<%3> WIDEN ir<%load.a>.1 = load vp<%4> WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 WIDEN-CAST ir<%ext.a>.1 = zext ir<%load.a>.1 to i32 CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index> vp<%5> = vector-pointer ir<%gep.b> vp<%6> = vector-pointer ir<%gep.b>, ir<1> WIDEN ir<%load.b> = load vp<%5> WIDEN ir<%load.b>.1 = load vp<%6> WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 WIDEN-CAST ir<%ext.b>.1 = zext ir<%load.b>.1 to i32 WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> WIDEN ir<%mul>.1 = mul ir<%ext.b>.1, ir<%ext.a>.1 EMIT vp<%7> = first-order splice ir<%mul>, ir<%mul>.1 A test was added for second order recurrences just to double check that they indeed also have their own FOR phi. --- .../Transforms/Vectorize/VPlanTransforms.cpp | 78 ++++---- ...ce-tail-with-evl-fixed-order-recurrence.ll | 171 ++++++++++++++++++ 2 files changed, 213 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 0bceb70d8661f..c4ff941cdfded 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2187,42 +2187,48 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { // VPTypeAnalysis cache. SmallVector ToErase; - // Create a scalar phi to track the previous EVL if fixed-order recurrence is - // contained. - bool ContainsFORs = - any_of(Header->phis(), IsaPred); - if (ContainsFORs) { - // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL. - VPValue *MaxEVL = &Plan.getVF(); - // Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer. - VPBuilder Builder(LoopRegion->getPreheaderVPBB()); - MaxEVL = Builder.createScalarZExtOrTrunc(MaxEVL, Type::getInt32Ty(Ctx), - TypeInfo.inferScalarType(MaxEVL), - DebugLoc()); - - Builder.setInsertPoint(Header, Header->getFirstNonPhi()); - VPValue *PrevEVL = - Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl"); - - for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( - vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) { - for (VPRecipeBase &R : *VPBB) { - using namespace VPlanPatternMatch; - VPValue *V1, *V2; - if (!match(&R, - m_VPInstruction( - m_VPValue(V1), m_VPValue(V2)))) - continue; - VPValue *Imm = Plan.getOrAddLiveIn( - ConstantInt::getSigned(Type::getInt32Ty(Ctx), -1)); - VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe( - Intrinsic::experimental_vp_splice, - {V1, V2, Imm, AllOneMask, PrevEVL, &EVL}, - TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc()); - VPSplice->insertBefore(&R); - R.getVPSingleValue()->replaceAllUsesWith(VPSplice); - ToErase.push_back(&R); - } + // Fix-up first-order recurrences + VPValue *PrevEVL = nullptr; + for (VPRecipeBase &PhiR : Header->phis()) { + auto *FOR = dyn_cast(&PhiR); + if (!FOR) + continue; + + // Create a scalar phi to track the previous EVL if fixed-order recurrence + // is contained. + if (!PrevEVL) { + // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL. + VPValue *MaxEVL = &Plan.getVF(); + // Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer. + VPBuilder Builder(LoopRegion->getPreheaderVPBB()); + MaxEVL = Builder.createScalarZExtOrTrunc(MaxEVL, Type::getInt32Ty(Ctx), + TypeInfo.inferScalarType(MaxEVL), + DebugLoc()); + + Builder.setInsertPoint(Header, Header->getFirstNonPhi()); + PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl"); + } + + assert(!Plan.isUnrolled() && "When unrolled splices might not use " + "VPFirstOrederRecurrencePHIRecipe!"); + + for (VPUser *User : PhiR.getVPSingleValue()->users()) { + auto *R = cast(User); + using namespace VPlanPatternMatch; + VPValue *V1, *V2; + if (!match(R, m_VPInstruction( + m_VPValue(V1), m_VPValue(V2)))) + continue; + VPValue *Imm = Plan.getOrAddLiveIn( + ConstantInt::getSigned(Type::getInt32Ty(Ctx), -1)); + VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe( + Intrinsic::experimental_vp_splice, + {V1, V2, Imm, AllOneMask, PrevEVL, &EVL}, + TypeInfo.inferScalarType(R->getVPSingleValue()), R->getDebugLoc()); + + VPSplice->insertBefore(R); + R->getVPSingleValue()->replaceAllUsesWith(VPSplice); + ToErase.push_back(R); } } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll index 0490d63f67d4e..c4e9b3fefc98a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll @@ -737,6 +737,173 @@ for.end: ret void } + +define void @second_order_recurrence_indvar(ptr noalias %A, i64 %TC) { +; IF-EVL-LABEL: define void @second_order_recurrence_indvar( +; IF-EVL-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] { +; IF-EVL-NEXT: [[ENTRY:.*]]: +; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IF-EVL: [[VECTOR_PH]]: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP2]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; IF-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 2 +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1 +; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 33, i32 [[TMP10]] +; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 2 +; IF-EVL-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1 +; IF-EVL-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i64 33, i32 [[TMP13]] +; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; IF-EVL: [[VECTOR_BODY]]: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP5]], %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP14]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +; IF-EVL-NEXT: [[TMP16:%.*]] = mul i64 1, [[TMP15]] +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP17]] = add [[VEC_IND]], splat (i64 42) +; IF-EVL-NEXT: [[TMP18]] = call @llvm.experimental.vp.splice.nxv2i64( [[VECTOR_RECUR]], [[TMP17]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP14]]) +; IF-EVL-NEXT: [[TMP19:%.*]] = call @llvm.experimental.vp.splice.nxv2i64( [[VECTOR_RECUR2]], [[TMP18]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP14]]) +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP20]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP19]], ptr align 8 [[TMP21]], splat (i1 true), i32 [[TMP14]]) +; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP14]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP22]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL: [[MIDDLE_BLOCK]]: +; IF-EVL-NEXT: br label %[[FOR_END:.*]] +; IF-EVL: [[SCALAR_PH]]: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 33, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i64 [ 33, %[[ENTRY]] ] +; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] +; IF-EVL: [[FOR_BODY]]: +; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[X:%.*]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT3]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[X]] = add i64 [[INDVARS]], 42 +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDVARS]] +; IF-EVL-NEXT: store i64 [[FOR2]], ptr [[ARRAYIDX]], align 8 +; IF-EVL-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; IF-EVL: [[FOR_END]]: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: define void @second_order_recurrence_indvar( +; NO-VP-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] { +; NO-VP-NEXT: [[ENTRY:.*]]: +; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]] +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-VP: [[VECTOR_PH]]: +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 +; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() +; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] +; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 2 +; NO-VP-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 +; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 33, i32 [[TMP11]] +; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 2 +; NO-VP-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 +; NO-VP-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i64 33, i32 [[TMP14]] +; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-VP: [[VECTOR_BODY]]: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP15]] = add [[VEC_IND]], splat (i64 42) +; NO-VP-NEXT: [[TMP16]] = call @llvm.vector.splice.nxv2i64( [[VECTOR_RECUR]], [[TMP15]], i32 -1) +; NO-VP-NEXT: [[TMP17:%.*]] = call @llvm.vector.splice.nxv2i64( [[VECTOR_RECUR2]], [[TMP16]], i32 -1) +; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]] +; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP18]], i32 0 +; NO-VP-NEXT: store [[TMP17]], ptr [[TMP19]], align 8 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; NO-VP: [[MIDDLE_BLOCK]]: +; NO-VP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 2 +; NO-VP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1 +; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[TMP15]], i32 [[TMP23]] +; NO-VP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP25:%.*]] = mul nuw i32 [[TMP24]], 2 +; NO-VP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP25]], 1 +; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement [[TMP16]], i32 [[TMP26]] +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TC]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; NO-VP: [[SCALAR_PH]]: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ] +; NO-VP-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ] +; NO-VP-NEXT: br label %[[FOR_BODY:.*]] +; NO-VP: [[FOR_BODY]]: +; NO-VP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] +; NO-VP-NEXT: [[FOR1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[X:%.*]], %[[FOR_BODY]] ] +; NO-VP-NEXT: [[FOR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT4]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] +; NO-VP-NEXT: [[X]] = add i64 [[INDVARS]], 42 +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDVARS]] +; NO-VP-NEXT: store i64 [[FOR2]], ptr [[ARRAYIDX]], align 8 +; NO-VP-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; NO-VP: [[FOR_END]]: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %for.body ] + %for1 = phi i64 [ 33, %entry ], [ %x, %for.body ] + %for2 = phi i64 [ 33, %entry ], [ %for1, %for.body ] + + %x = add i64 %indvars, 42 + + %arrayidx = getelementptr inbounds nuw i64, ptr %A, i64 %indvars + store i64 %for2, ptr %arrayidx + + %indvars.next = add nuw nsw i64 %indvars, 1 + %exitcond.not = icmp eq i64 %indvars.next, %TC + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.vectorize.enable", i1 true} ;. @@ -753,6 +920,8 @@ for.end: ; IF-EVL: [[META10]] = !{!"llvm.loop.vectorize.enable", i1 true} ; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]], [[META3]]} ; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META1]]} +; IF-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]], [[META3]]} +; IF-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META1]]} ;. ; NO-VP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; NO-VP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -766,4 +935,6 @@ for.end: ; NO-VP: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} ; NO-VP: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} ; NO-VP: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; NO-VP: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; NO-VP: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} ;. From 50d68b2bb4c8ad4a3ca124c7e18f5bf8095877de Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 7 Jul 2025 22:04:42 +0800 Subject: [PATCH 2/4] Avoid redundant getVPSingleValue() --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c4ff941cdfded..3c78d4006a30f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2212,7 +2212,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { assert(!Plan.isUnrolled() && "When unrolled splices might not use " "VPFirstOrederRecurrencePHIRecipe!"); - for (VPUser *User : PhiR.getVPSingleValue()->users()) { + for (VPUser *User : FOR->users()) { auto *R = cast(User); using namespace VPlanPatternMatch; VPValue *V1, *V2; From 0f752fe9be3899bab11f591fdc91ecacf352950f Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 8 Jul 2025 15:59:37 +0800 Subject: [PATCH 3/4] Move assert, fix typos and adjust comments --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3c78d4006a30f..b3880f6e0aee2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2187,7 +2187,11 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { // VPTypeAnalysis cache. SmallVector ToErase; - // Fix-up first-order recurrences + // When unrolling splices may not use VPFirstOrderRecurrencePHIRecipes, so the + // below transformation will need to be fixed. + assert(!Plan.isUnrolled() && "Unrolling not supported with EVL tail folding."); + + // Replace FirstOrderRecurrenceSplice with experimental_vp_splice intrinsics. VPValue *PrevEVL = nullptr; for (VPRecipeBase &PhiR : Header->phis()) { auto *FOR = dyn_cast(&PhiR); @@ -2209,9 +2213,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl"); } - assert(!Plan.isUnrolled() && "When unrolled splices might not use " - "VPFirstOrederRecurrencePHIRecipe!"); - for (VPUser *User : FOR->users()) { auto *R = cast(User); using namespace VPlanPatternMatch; From 5d4bac97a9954f7f0121f5549e60db5270e4a29d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 8 Jul 2025 16:05:14 +0800 Subject: [PATCH 4/4] clang-format --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b3880f6e0aee2..68d3c4f03f1ae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2189,7 +2189,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { // When unrolling splices may not use VPFirstOrderRecurrencePHIRecipes, so the // below transformation will need to be fixed. - assert(!Plan.isUnrolled() && "Unrolling not supported with EVL tail folding."); + assert(!Plan.isUnrolled() && + "Unrolling not supported with EVL tail folding."); // Replace FirstOrderRecurrenceSplice with experimental_vp_splice intrinsics. VPValue *PrevEVL = nullptr;