Skip to content

Commit 26ed857

Browse files
committed
[VPlan] Iterate over header phis to determine FORs that need EVL fixup. NFCI
This is a follow-up to llvm#146672 (comment) We can avoid iterating over every recipe to pick out splices that need fixed up given that for now, all splices must use a VPFirstOrderRecurrencePHIRecipe. An assertion was added since this doesn't hold for unrolled loops: vector.body: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] FIRST-ORDER-RECURRENCE-PHI ir<%10> = phi ir<%pre_load>, ir<%11>.1 CLONE ir<%indvars.iv.next> = add nuw nsw vp<%index>, ir<1> CLONE ir<%arrayidx32> = getelementptr inbounds ir<%a>, ir<%indvars.iv.next> vp<%3> = vector-pointer ir<%arrayidx32> vp<%4> = vector-pointer ir<%arrayidx32>, ir<1> WIDEN ir<%11> = load vp<%3> WIDEN ir<%11>.1 = load vp<%4> EMIT vp<%5> = first-order splice ir<%10>, ir<%11> EMIT vp<%6> = first-order splice ir<%11>, ir<%11>.1 <-- doesn't use phi Or sometimes we splices in loops without a FOR phi at all: vector.body: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index> vp<%3> = vector-pointer ir<%gep.a> vp<%4> = vector-pointer ir<%gep.a>, ir<1> WIDEN ir<%load.a> = load vp<%3> WIDEN ir<%load.a>.1 = load vp<%4> WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 WIDEN-CAST ir<%ext.a>.1 = zext ir<%load.a>.1 to i32 CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index> vp<%5> = vector-pointer ir<%gep.b> vp<%6> = vector-pointer ir<%gep.b>, ir<1> WIDEN ir<%load.b> = load vp<%5> WIDEN ir<%load.b>.1 = load vp<%6> WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 WIDEN-CAST ir<%ext.b>.1 = zext ir<%load.b>.1 to i32 WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> WIDEN ir<%mul>.1 = mul ir<%ext.b>.1, ir<%ext.a>.1 EMIT vp<%7> = first-order splice ir<%mul>, ir<%mul>.1 A test was added for second order recurrences just to double check that they indeed also have their own FOR phi.
1 parent b6e113a commit 26ed857

File tree

2 files changed

+213
-36
lines changed

2 files changed

+213
-36
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2187,42 +2187,48 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
21872187
// VPTypeAnalysis cache.
21882188
SmallVector<VPRecipeBase *> ToErase;
21892189

2190-
// Create a scalar phi to track the previous EVL if fixed-order recurrence is
2191-
// contained.
2192-
bool ContainsFORs =
2193-
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2194-
if (ContainsFORs) {
2195-
// TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
2196-
VPValue *MaxEVL = &Plan.getVF();
2197-
// Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer.
2198-
VPBuilder Builder(LoopRegion->getPreheaderVPBB());
2199-
MaxEVL = Builder.createScalarZExtOrTrunc(MaxEVL, Type::getInt32Ty(Ctx),
2200-
TypeInfo.inferScalarType(MaxEVL),
2201-
DebugLoc());
2202-
2203-
Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2204-
VPValue *PrevEVL =
2205-
Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl");
2206-
2207-
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2208-
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
2209-
for (VPRecipeBase &R : *VPBB) {
2210-
using namespace VPlanPatternMatch;
2211-
VPValue *V1, *V2;
2212-
if (!match(&R,
2213-
m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2214-
m_VPValue(V1), m_VPValue(V2))))
2215-
continue;
2216-
VPValue *Imm = Plan.getOrAddLiveIn(
2217-
ConstantInt::getSigned(Type::getInt32Ty(Ctx), -1));
2218-
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
2219-
Intrinsic::experimental_vp_splice,
2220-
{V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
2221-
TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());
2222-
VPSplice->insertBefore(&R);
2223-
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2224-
ToErase.push_back(&R);
2225-
}
2190+
// Fix-up first-order recurrences
2191+
VPValue *PrevEVL = nullptr;
2192+
for (VPRecipeBase &PhiR : Header->phis()) {
2193+
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&PhiR);
2194+
if (!FOR)
2195+
continue;
2196+
2197+
// Create a scalar phi to track the previous EVL if fixed-order recurrence
2198+
// is contained.
2199+
if (!PrevEVL) {
2200+
// TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
2201+
VPValue *MaxEVL = &Plan.getVF();
2202+
// Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer.
2203+
VPBuilder Builder(LoopRegion->getPreheaderVPBB());
2204+
MaxEVL = Builder.createScalarZExtOrTrunc(MaxEVL, Type::getInt32Ty(Ctx),
2205+
TypeInfo.inferScalarType(MaxEVL),
2206+
DebugLoc());
2207+
2208+
Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2209+
PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl");
2210+
}
2211+
2212+
assert(!Plan.isUnrolled() && "When unrolled splices might not use "
2213+
"VPFirstOrederRecurrencePHIRecipe!");
2214+
2215+
for (VPUser *User : PhiR.getVPSingleValue()->users()) {
2216+
auto *R = cast<VPRecipeBase>(User);
2217+
using namespace VPlanPatternMatch;
2218+
VPValue *V1, *V2;
2219+
if (!match(R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2220+
m_VPValue(V1), m_VPValue(V2))))
2221+
continue;
2222+
VPValue *Imm = Plan.getOrAddLiveIn(
2223+
ConstantInt::getSigned(Type::getInt32Ty(Ctx), -1));
2224+
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
2225+
Intrinsic::experimental_vp_splice,
2226+
{V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
2227+
TypeInfo.inferScalarType(R->getVPSingleValue()), R->getDebugLoc());
2228+
2229+
VPSplice->insertBefore(R);
2230+
R->getVPSingleValue()->replaceAllUsesWith(VPSplice);
2231+
ToErase.push_back(R);
22262232
}
22272233
}
22282234

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,173 @@ for.end:
737737
ret void
738738
}
739739

740+
741+
define void @second_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
742+
; IF-EVL-LABEL: define void @second_order_recurrence_indvar(
743+
; IF-EVL-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
744+
; IF-EVL-NEXT: [[ENTRY:.*]]:
745+
; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
746+
; IF-EVL: [[VECTOR_PH]]:
747+
; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
748+
; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
749+
; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
750+
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP2]]
751+
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
752+
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
753+
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
754+
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
755+
; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
756+
; IF-EVL-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
757+
; IF-EVL-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
758+
; IF-EVL-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
759+
; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
760+
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 2
761+
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1
762+
; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP10]]
763+
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
764+
; IF-EVL-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 2
765+
; IF-EVL-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1
766+
; IF-EVL-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP13]]
767+
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
768+
; IF-EVL: [[VECTOR_BODY]]:
769+
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
770+
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
771+
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
772+
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
773+
; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
774+
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP5]], %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
775+
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
776+
; IF-EVL-NEXT: [[TMP14]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
777+
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
778+
; IF-EVL-NEXT: [[TMP16:%.*]] = mul i64 1, [[TMP15]]
779+
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP16]], i64 0
780+
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
781+
; IF-EVL-NEXT: [[TMP17]] = add <vscale x 2 x i64> [[VEC_IND]], splat (i64 42)
782+
; IF-EVL-NEXT: [[TMP18]] = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP17]], i32 -1, <vscale x 2 x i1> splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP14]])
783+
; IF-EVL-NEXT: [[TMP19:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR2]], <vscale x 2 x i64> [[TMP18]], i32 -1, <vscale x 2 x i1> splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP14]])
784+
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[EVL_BASED_IV]]
785+
; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP20]], i32 0
786+
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP19]], ptr align 8 [[TMP21]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP14]])
787+
; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP14]] to i64
788+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP22]], [[EVL_BASED_IV]]
789+
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
790+
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
791+
; IF-EVL-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
792+
; IF-EVL-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
793+
; IF-EVL: [[MIDDLE_BLOCK]]:
794+
; IF-EVL-NEXT: br label %[[FOR_END:.*]]
795+
; IF-EVL: [[SCALAR_PH]]:
796+
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
797+
; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 33, %[[ENTRY]] ]
798+
; IF-EVL-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i64 [ 33, %[[ENTRY]] ]
799+
; IF-EVL-NEXT: br label %[[FOR_BODY:.*]]
800+
; IF-EVL: [[FOR_BODY]]:
801+
; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ]
802+
; IF-EVL-NEXT: [[FOR1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[X:%.*]], %[[FOR_BODY]] ]
803+
; IF-EVL-NEXT: [[FOR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT3]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ]
804+
; IF-EVL-NEXT: [[X]] = add i64 [[INDVARS]], 42
805+
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDVARS]]
806+
; IF-EVL-NEXT: store i64 [[FOR2]], ptr [[ARRAYIDX]], align 8
807+
; IF-EVL-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1
808+
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]]
809+
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
810+
; IF-EVL: [[FOR_END]]:
811+
; IF-EVL-NEXT: ret void
812+
;
813+
; NO-VP-LABEL: define void @second_order_recurrence_indvar(
814+
; NO-VP-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
815+
; NO-VP-NEXT: [[ENTRY:.*]]:
816+
; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
817+
; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
818+
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]]
819+
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
820+
; NO-VP: [[VECTOR_PH]]:
821+
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
822+
; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
823+
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
824+
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
825+
; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
826+
; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
827+
; NO-VP-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
828+
; NO-VP-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
829+
; NO-VP-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
830+
; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]]
831+
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
832+
; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
833+
; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
834+
; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 2
835+
; NO-VP-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1
836+
; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP11]]
837+
; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
838+
; NO-VP-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 2
839+
; NO-VP-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1
840+
; NO-VP-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP14]]
841+
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
842+
; NO-VP: [[VECTOR_BODY]]:
843+
; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
844+
; NO-VP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
845+
; NO-VP-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
846+
; NO-VP-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
847+
; NO-VP-NEXT: [[TMP15]] = add <vscale x 2 x i64> [[VEC_IND]], splat (i64 42)
848+
; NO-VP-NEXT: [[TMP16]] = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP15]], i32 -1)
849+
; NO-VP-NEXT: [[TMP17:%.*]] = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR2]], <vscale x 2 x i64> [[TMP16]], i32 -1)
850+
; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]]
851+
; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP18]], i32 0
852+
; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP17]], ptr [[TMP19]], align 8
853+
; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
854+
; NO-VP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
855+
; NO-VP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
856+
; NO-VP-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
857+
; NO-VP: [[MIDDLE_BLOCK]]:
858+
; NO-VP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
859+
; NO-VP-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 2
860+
; NO-VP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1
861+
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 2 x i64> [[TMP15]], i32 [[TMP23]]
862+
; NO-VP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32()
863+
; NO-VP-NEXT: [[TMP25:%.*]] = mul nuw i32 [[TMP24]], 2
864+
; NO-VP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP25]], 1
865+
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <vscale x 2 x i64> [[TMP16]], i32 [[TMP26]]
866+
; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TC]], [[N_VEC]]
867+
; NO-VP-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
868+
; NO-VP: [[SCALAR_PH]]:
869+
; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
870+
; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
871+
; NO-VP-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
872+
; NO-VP-NEXT: br label %[[FOR_BODY:.*]]
873+
; NO-VP: [[FOR_BODY]]:
874+
; NO-VP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ]
875+
; NO-VP-NEXT: [[FOR1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[X:%.*]], %[[FOR_BODY]] ]
876+
; NO-VP-NEXT: [[FOR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT4]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ]
877+
; NO-VP-NEXT: [[X]] = add i64 [[INDVARS]], 42
878+
; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDVARS]]
879+
; NO-VP-NEXT: store i64 [[FOR2]], ptr [[ARRAYIDX]], align 8
880+
; NO-VP-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1
881+
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]]
882+
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
883+
; NO-VP: [[FOR_END]]:
884+
; NO-VP-NEXT: ret void
885+
;
886+
entry:
887+
br label %for.body
888+
889+
for.body:
890+
%indvars = phi i64 [ 0, %entry ], [ %indvars.next, %for.body ]
891+
%for1 = phi i64 [ 33, %entry ], [ %x, %for.body ]
892+
%for2 = phi i64 [ 33, %entry ], [ %for1, %for.body ]
893+
894+
%x = add i64 %indvars, 42
895+
896+
%arrayidx = getelementptr inbounds nuw i64, ptr %A, i64 %indvars
897+
store i64 %for2, ptr %arrayidx
898+
899+
%indvars.next = add nuw nsw i64 %indvars, 1
900+
%exitcond.not = icmp eq i64 %indvars.next, %TC
901+
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
902+
903+
for.end:
904+
ret void
905+
}
906+
740907
!0 = distinct !{!0, !1}
741908
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
742909
;.
@@ -753,6 +920,8 @@ for.end:
753920
; IF-EVL: [[META10]] = !{!"llvm.loop.vectorize.enable", i1 true}
754921
; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]], [[META3]]}
755922
; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META1]]}
923+
; IF-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]], [[META3]]}
924+
; IF-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META1]]}
756925
;.
757926
; NO-VP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
758927
; NO-VP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -766,4 +935,6 @@ for.end:
766935
; NO-VP: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
767936
; NO-VP: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
768937
; NO-VP: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
938+
; NO-VP: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
939+
; NO-VP: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
769940
;.

0 commit comments

Comments
 (0)