Skip to content

Commit 61a0653

Browse files
authored
[VPlan] Fix first-order splices without header mask not using EVL (#146672)
This fixes a buildbot failure with EVL tail folding after #144666: https://lab.llvm.org/buildbot/#/builders/132/builds/1653 For a first-order recurrence to be correct with EVL tail folding we need to convert splices to vp splices with the EVL operand. Originally we did this by looking for users of the header mask and its users, and converting it in createEVLRecipe. However after #144666 a FOR splice might not actually use the header mask if it's based off e.g. an induction variable, and so we wouldn't pick it up in createEVLRecipe. This fixes this by converting FOR splices separately in a loop over all recipes in the plan, regardless of whether or not it uses the header mask. I think there was some conflation in createEVLRecipe between what was an optimisation and what was needed for correctness. Most of the transforms in it just exist to optimize the mask away and we should still emit correct code without them. So I've renamed it to make the separation clearer.
1 parent 21c4fbd commit 61a0653

File tree

2 files changed

+186
-29
lines changed

2 files changed

+186
-29
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,21 +2114,19 @@ void VPlanTransforms::addActiveLaneMask(
21142114
HeaderMask->replaceAllUsesWith(LaneMask);
21152115
}
21162116

2117-
/// Try to convert \p CurRecipe to a corresponding EVL-based recipe. Returns
2118-
/// nullptr if no EVL-based recipe could be created.
2117+
/// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
2118+
/// EVL-based recipe without the header mask. Returns nullptr if no EVL-based
2119+
/// recipe could be created.
21192120
/// \p HeaderMask Header Mask.
21202121
/// \p CurRecipe Recipe to be transform.
21212122
/// \p TypeInfo VPlan-based type analysis.
21222123
/// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
21232124
/// \p EVL The explicit vector length parameter of vector-predication
21242125
/// intrinsics.
2125-
/// \p PrevEVL The explicit vector length of the previous iteration. Only
2126-
/// required if \p CurRecipe is a VPInstruction::FirstOrderRecurrenceSplice.
2127-
static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
2128-
VPRecipeBase &CurRecipe,
2129-
VPTypeAnalysis &TypeInfo,
2130-
VPValue &AllOneMask, VPValue &EVL,
2131-
VPValue *PrevEVL) {
2126+
static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
2127+
VPRecipeBase &CurRecipe,
2128+
VPTypeAnalysis &TypeInfo,
2129+
VPValue &AllOneMask, VPValue &EVL) {
21322130
using namespace llvm::VPlanPatternMatch;
21332131
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
21342132
assert(OrigMask && "Unmasked recipe when folding tail");
@@ -2153,18 +2151,6 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
21532151
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
21542152
})
21552153
.Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
2156-
if (VPI->getOpcode() == VPInstruction::FirstOrderRecurrenceSplice) {
2157-
assert(PrevEVL && "Fixed-order recurrences require previous EVL");
2158-
VPValue *MinusOneVPV = VPI->getParent()->getPlan()->getOrAddLiveIn(
2159-
ConstantInt::getSigned(Type::getInt32Ty(TypeInfo.getContext()),
2160-
-1));
2161-
SmallVector<VPValue *> Ops(VPI->operands());
2162-
Ops.append({MinusOneVPV, &AllOneMask, PrevEVL, &EVL});
2163-
return new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splice,
2164-
Ops, TypeInfo.inferScalarType(VPI),
2165-
VPI->getDebugLoc());
2166-
}
2167-
21682154
VPValue *LHS, *RHS;
21692155
// Transform select with a header mask condition
21702156
// select(header_mask, LHS, RHS)
@@ -2197,9 +2183,12 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
21972183
"User of VF that we can't transform to EVL.");
21982184
Plan.getVF().replaceAllUsesWith(&EVL);
21992185

2186+
// Defer erasing recipes till the end so that we don't invalidate the
2187+
// VPTypeAnalysis cache.
2188+
SmallVector<VPRecipeBase *> ToErase;
2189+
22002190
// Create a scalar phi to track the previous EVL if fixed-order recurrence is
22012191
// contained.
2202-
VPInstruction *PrevEVL = nullptr;
22032192
bool ContainsFORs =
22042193
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
22052194
if (ContainsFORs) {
@@ -2212,16 +2201,37 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22122201
DebugLoc());
22132202

22142203
Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2215-
PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl");
2204+
VPValue *PrevEVL =
2205+
Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl");
2206+
2207+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2208+
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
2209+
for (VPRecipeBase &R : *VPBB) {
2210+
using namespace VPlanPatternMatch;
2211+
VPValue *V1, *V2;
2212+
if (!match(&R,
2213+
m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2214+
m_VPValue(V1), m_VPValue(V2))))
2215+
continue;
2216+
VPValue *Imm = Plan.getOrAddLiveIn(
2217+
ConstantInt::getSigned(Type::getInt32Ty(Ctx), -1));
2218+
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
2219+
Intrinsic::experimental_vp_splice,
2220+
{V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
2221+
TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());
2222+
VPSplice->insertBefore(&R);
2223+
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2224+
ToErase.push_back(&R);
2225+
}
2226+
}
22162227
}
22172228

2218-
SmallVector<VPRecipeBase *> ToErase;
2219-
2229+
// Try to optimize header mask recipes away to their EVL variants.
22202230
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
22212231
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
22222232
auto *CurRecipe = cast<VPRecipeBase>(U);
2223-
VPRecipeBase *EVLRecipe = createEVLRecipe(
2224-
HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL, PrevEVL);
2233+
VPRecipeBase *EVLRecipe =
2234+
optimizeMaskToEVL(HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
22252235
if (!EVLRecipe)
22262236
continue;
22272237

@@ -2237,8 +2247,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22372247
VPValue *CurVPV = CurRecipe->getVPSingleValue();
22382248
CurVPV->replaceAllUsesWith(EVLRecipe->getVPSingleValue());
22392249
}
2240-
// Defer erasing recipes till the end so that we don't invalidate the
2241-
// VPTypeAnalysis cache.
22422250
ToErase.push_back(CurRecipe);
22432251
}
22442252
}

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,151 @@ for.end:
592592
ret i32 %for1
593593
}
594594

595+
define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
596+
; IF-EVL-LABEL: define void @first_order_recurrence_indvar(
597+
; IF-EVL-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
598+
; IF-EVL-NEXT: [[ENTRY:.*]]:
599+
; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
600+
; IF-EVL: [[VECTOR_PH]]:
601+
; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
602+
; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
603+
; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
604+
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP2]]
605+
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
606+
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
607+
; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
608+
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP18]], 2
609+
; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
610+
; IF-EVL-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
611+
; IF-EVL-NEXT: [[TMP12:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
612+
; IF-EVL-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP12]]
613+
; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
614+
; IF-EVL-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP13]], 2
615+
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i32 [[TMP19]], 1
616+
; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP10]]
617+
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
618+
; IF-EVL: [[VECTOR_BODY]]:
619+
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
620+
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
621+
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
622+
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
623+
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP5]], %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
624+
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
625+
; IF-EVL-NEXT: [[TMP11]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
626+
; IF-EVL-NEXT: [[TMP7:%.*]] = zext i32 [[TMP11]] to i64
627+
; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]]
628+
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
629+
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
630+
; IF-EVL-NEXT: [[TMP20]] = add <vscale x 2 x i64> [[VEC_IND]], splat (i64 42)
631+
; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP20]], i32 -1, <vscale x 2 x i1> splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP11]])
632+
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[EVL_BASED_IV]]
633+
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i32 0
634+
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
635+
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64
636+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
637+
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
638+
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
639+
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
640+
; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
641+
; IF-EVL: [[MIDDLE_BLOCK]]:
642+
; IF-EVL-NEXT: br label %[[FOR_END:.*]]
643+
; IF-EVL: [[SCALAR_PH]]:
644+
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
645+
; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 33, %[[ENTRY]] ]
646+
; IF-EVL-NEXT: br label %[[FOR_BODY:.*]]
647+
; IF-EVL: [[FOR_BODY]]:
648+
; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV1_NEXT:%.*]], %[[FOR_BODY]] ]
649+
; IF-EVL-NEXT: [[FOR1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP14:%.*]], %[[FOR_BODY]] ]
650+
; IF-EVL-NEXT: [[TMP14]] = add i64 [[IV1]], 42
651+
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[IV1]]
652+
; IF-EVL-NEXT: store i64 [[FOR1]], ptr [[ARRAYIDX]], align 8
653+
; IF-EVL-NEXT: [[IV1_NEXT]] = add nuw nsw i64 [[IV1]], 1
654+
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV1_NEXT]], [[TC]]
655+
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
656+
; IF-EVL: [[FOR_END]]:
657+
; IF-EVL-NEXT: ret void
658+
;
659+
; NO-VP-LABEL: define void @first_order_recurrence_indvar(
660+
; NO-VP-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
661+
; NO-VP-NEXT: [[ENTRY:.*]]:
662+
; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
663+
; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
664+
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]]
665+
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
666+
; NO-VP: [[VECTOR_PH]]:
667+
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
668+
; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
669+
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
670+
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
671+
; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
672+
; NO-VP-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP4]], 2
673+
; NO-VP-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
674+
; NO-VP-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
675+
; NO-VP-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
676+
; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
677+
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0
678+
; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
679+
; NO-VP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
680+
; NO-VP-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP14]], 2
681+
; NO-VP-NEXT: [[TMP20:%.*]] = sub i32 [[TMP16]], 1
682+
; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP20]]
683+
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
684+
; NO-VP: [[VECTOR_BODY]]:
685+
; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
686+
; NO-VP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
687+
; NO-VP-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
688+
; NO-VP-NEXT: [[TMP12]] = add <vscale x 2 x i64> [[VEC_IND]], splat (i64 42)
689+
; NO-VP-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP12]], i32 -1)
690+
; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]]
691+
; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP11]], i32 0
692+
; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP13]], ptr [[TMP15]], align 8
693+
; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
694+
; NO-VP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
695+
; NO-VP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
696+
; NO-VP-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
697+
; NO-VP: [[MIDDLE_BLOCK]]:
698+
; NO-VP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
699+
; NO-VP-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP17]], 2
700+
; NO-VP-NEXT: [[TMP19:%.*]] = sub i32 [[TMP21]], 1
701+
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 2 x i64> [[TMP12]], i32 [[TMP19]]
702+
; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TC]], [[N_VEC]]
703+
; NO-VP-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
704+
; NO-VP: [[SCALAR_PH]]:
705+
; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
706+
; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
707+
; NO-VP-NEXT: br label %[[FOR_BODY:.*]]
708+
; NO-VP: [[FOR_BODY]]:
709+
; NO-VP-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV1_NEXT:%.*]], %[[FOR_BODY]] ]
710+
; NO-VP-NEXT: [[FOR1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ]
711+
; NO-VP-NEXT: [[TMP18]] = add i64 [[IV1]], 42
712+
; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[IV1]]
713+
; NO-VP-NEXT: store i64 [[FOR1]], ptr [[ARRAYIDX]], align 8
714+
; NO-VP-NEXT: [[IV1_NEXT]] = add nuw nsw i64 [[IV1]], 1
715+
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV1_NEXT]], [[TC]]
716+
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
717+
; NO-VP: [[FOR_END]]:
718+
; NO-VP-NEXT: ret void
719+
;
720+
entry:
721+
br label %for.body
722+
723+
for.body:
724+
%indvars = phi i64 [ 0, %entry ], [ %indvars.next, %for.body ]
725+
%for1 = phi i64 [ 33, %entry ], [ %x, %for.body ]
726+
727+
%x = add i64 %indvars, 42
728+
729+
%arrayidx = getelementptr inbounds nuw i64, ptr %A, i64 %indvars
730+
store i64 %for1, ptr %arrayidx
731+
732+
%indvars.next = add nuw nsw i64 %indvars, 1
733+
%exitcond.not = icmp eq i64 %indvars.next, %TC
734+
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
735+
736+
for.end:
737+
ret void
738+
}
739+
595740
!0 = distinct !{!0, !1}
596741
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
597742
;.
@@ -606,6 +751,8 @@ for.end:
606751
; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META3]], [[META1]]}
607752
; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]}
608753
; IF-EVL: [[META10]] = !{!"llvm.loop.vectorize.enable", i1 true}
754+
; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]], [[META3]]}
755+
; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META1]]}
609756
;.
610757
; NO-VP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
611758
; NO-VP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -617,4 +764,6 @@ for.end:
617764
; NO-VP: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
618765
; NO-VP: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
619766
; NO-VP: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
767+
; NO-VP: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
768+
; NO-VP: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
620769
;.

0 commit comments

Comments
 (0)