Skip to content

Commit ec25a05

Browse files
authored
[VPlan] Don't convert VPWidenSelectRecipes to vp.select in EVL transform (#146695)
createEVLRecipe tries to optimise recipes that use the header mask by replacing them with their VP equivalents and setting the EVL, allowing the mask to be removed. However we currently also convert widened selects to vp.select even though they don't necessarily use the header mask. Unlike vp.merge a vp.select only makes the "unused" lanes past EVL poison, so it's not needed for correctness. In the same vein as #127180, this patch removes the transform for VPWidenSelectRecipes and keeps them as plain select instructions to allow for more optimisations. RISCVVLOptimizer will still be able to optimise away any VL toggles and we end up with better code generation across llvm-test-suite and SPEC CPU 2017.
1 parent 7891270 commit ec25a05

File tree

8 files changed

+36
-123
lines changed

8 files changed

+36
-123
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2152,13 +2152,6 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
21522152
VPValue *NewMask = GetNewMask(Red->getCondOp());
21532153
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
21542154
})
2155-
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
2156-
SmallVector<VPValue *> Ops(Sel->operands());
2157-
Ops.push_back(&EVL);
2158-
return new VPWidenIntrinsicRecipe(Intrinsic::vp_select, Ops,
2159-
TypeInfo.inferScalarType(Sel),
2160-
Sel->getDebugLoc());
2161-
})
21622155
.Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
21632156
if (VPI->getOpcode() == VPInstruction::FirstOrderRecurrenceSplice) {
21642157
assert(PrevEVL && "Fixed-order recurrences require previous EVL");

llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
367367
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
368368
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
369369
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]]
370-
; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
370+
; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = select <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]]
371371
; IF-EVL-OUTLOOP-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
372372
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
373373
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]

llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -138,20 +138,19 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
138138
; PREDICATED_EVL-NEXT: [[TMP11:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
139139
; PREDICATED_EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP11]]
140140
; PREDICATED_EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP12]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
141-
; PREDICATED_EVL-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 16 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER3]]
142-
; PREDICATED_EVL-NEXT: [[TMP14:%.*]] = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> [[TMP13]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER3]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER]], i32 [[TMP5]])
143-
; PREDICATED_EVL-NEXT: [[TMP15:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64>
144-
; PREDICATED_EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP15]]
145-
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP14]], <vscale x 16 x ptr> align 1 [[TMP16]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
146-
; PREDICATED_EVL-NEXT: [[TMP17:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP14]]
147-
; PREDICATED_EVL-NEXT: [[TMP18:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
148-
; PREDICATED_EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP18]]
149-
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP17]], <vscale x 16 x ptr> align 1 [[TMP19]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
141+
; PREDICATED_EVL-NEXT: [[TMP13:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER3]])
142+
; PREDICATED_EVL-NEXT: [[TMP14:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64>
143+
; PREDICATED_EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP14]]
144+
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x ptr> align 1 [[TMP15]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
145+
; PREDICATED_EVL-NEXT: [[TMP16:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP13]]
146+
; PREDICATED_EVL-NEXT: [[TMP17:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
147+
; PREDICATED_EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP17]]
148+
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP16]], <vscale x 16 x ptr> align 1 [[TMP18]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
150149
; PREDICATED_EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
151150
; PREDICATED_EVL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]]
152151
; PREDICATED_EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
153-
; PREDICATED_EVL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
154-
; PREDICATED_EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
152+
; PREDICATED_EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
153+
; PREDICATED_EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
155154
; PREDICATED_EVL: middle.block:
156155
; PREDICATED_EVL-NEXT: br label [[FOR_END:%.*]]
157156
; PREDICATED_EVL: scalar.ph:
@@ -364,29 +363,27 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
364363
; PREDICATED_EVL-NEXT: [[TMP17:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
365364
; PREDICATED_EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP17]]
366365
; PREDICATED_EVL-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP18]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
367-
; PREDICATED_EVL-NEXT: [[TMP19:%.*]] = icmp slt <vscale x 16 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER3]]
368-
; PREDICATED_EVL-NEXT: [[TMP20:%.*]] = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> [[TMP19]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER3]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER]], i32 [[TMP5]])
369-
; PREDICATED_EVL-NEXT: [[TMP21:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP20]]
370-
; PREDICATED_EVL-NEXT: [[TMP22:%.*]] = icmp slt <vscale x 16 x i8> [[WIDE_MASKED_GATHER4]], [[WIDE_MASKED_GATHER5]]
371-
; PREDICATED_EVL-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> [[TMP22]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER5]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER4]], i32 [[TMP5]])
372-
; PREDICATED_EVL-NEXT: [[TMP24:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP23]]
373-
; PREDICATED_EVL-NEXT: [[TMP25:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64>
366+
; PREDICATED_EVL-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER3]])
367+
; PREDICATED_EVL-NEXT: [[TMP20:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP19]]
368+
; PREDICATED_EVL-NEXT: [[TMP21:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER4]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER5]])
369+
; PREDICATED_EVL-NEXT: [[TMP22:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP21]]
370+
; PREDICATED_EVL-NEXT: [[TMP23:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64>
371+
; PREDICATED_EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP23]]
372+
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP19]], <vscale x 16 x ptr> align 1 [[TMP24]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
373+
; PREDICATED_EVL-NEXT: [[TMP25:%.*]] = zext nneg <vscale x 16 x i32> [[TMP8]] to <vscale x 16 x i64>
374374
; PREDICATED_EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP25]]
375375
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP20]], <vscale x 16 x ptr> align 1 [[TMP26]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
376-
; PREDICATED_EVL-NEXT: [[TMP27:%.*]] = zext nneg <vscale x 16 x i32> [[TMP8]] to <vscale x 16 x i64>
376+
; PREDICATED_EVL-NEXT: [[TMP27:%.*]] = zext nneg <vscale x 16 x i32> [[TMP9]] to <vscale x 16 x i64>
377377
; PREDICATED_EVL-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP27]]
378378
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP21]], <vscale x 16 x ptr> align 1 [[TMP28]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
379-
; PREDICATED_EVL-NEXT: [[TMP29:%.*]] = zext nneg <vscale x 16 x i32> [[TMP9]] to <vscale x 16 x i64>
379+
; PREDICATED_EVL-NEXT: [[TMP29:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
380380
; PREDICATED_EVL-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP29]]
381-
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP23]], <vscale x 16 x ptr> align 1 [[TMP30]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
382-
; PREDICATED_EVL-NEXT: [[TMP31:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
383-
; PREDICATED_EVL-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP31]]
384-
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP24]], <vscale x 16 x ptr> align 1 [[TMP32]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
381+
; PREDICATED_EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP22]], <vscale x 16 x ptr> align 1 [[TMP30]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
385382
; PREDICATED_EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
386383
; PREDICATED_EVL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]]
387384
; PREDICATED_EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
388-
; PREDICATED_EVL-NEXT: [[TMP33:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
389-
; PREDICATED_EVL-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
385+
; PREDICATED_EVL-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
386+
; PREDICATED_EVL-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
390387
; PREDICATED_EVL: middle.block:
391388
; PREDICATED_EVL-NEXT: br label [[FOR_END:%.*]]
392389
; PREDICATED_EVL: scalar.ph:

llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
define void @vp_select(ptr %a, ptr %b, ptr %c, i64 %N) {
1010
; DEBUGLOC-LABEL: define void @vp_select(
1111
; DEBUGLOC: vector.body:
12-
; DEBUGLOC: = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %{{.+}}, <vscale x 4 x i32> %{{.+}}, <vscale x 4 x i32> %{{.+}}, i32 %{{.+}}), !dbg ![[SELLOC:[0-9]+]]
12+
; DEBUGLOC: = select <vscale x 4 x i1> %{{.+}}, <vscale x 4 x i32> %{{.+}}, <vscale x 4 x i32> %{{.+}}, !dbg ![[SELLOC:[0-9]+]]
1313
; DEBUGLOC: loop:
1414
; DEBUGLOC: = select i1 %{{.+}}, i32 %{{.+}}, i32 %{{.+}}, !dbg ![[SELLOC]]
1515
;

llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
4444
; CHECK-NEXT: [[TMP23:%.*]] = ashr <vscale x 8 x i32> [[TMP15]], zeroinitializer
4545
; CHECK-NEXT: [[VP_OP3:%.*]] = or <vscale x 8 x i32> [[TMP23]], zeroinitializer
4646
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <vscale x 8 x i32> [[TMP15]], zeroinitializer
47-
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer, i32 [[TMP11]])
47+
; CHECK-NEXT: [[TMP17:%.*]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer
4848
; CHECK-NEXT: [[TMP24:%.*]] = trunc <vscale x 8 x i32> [[TMP17]] to <vscale x 8 x i8>
4949
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP24]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
5050
; CHECK-NEXT: [[TMP19:%.*]] = trunc <vscale x 8 x i32> [[VP_OP]] to <vscale x 8 x i16>

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
4747
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
4848
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
4949
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
50-
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP18]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP11]])
50+
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = select <vscale x 4 x i1> [[TMP18]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
5151
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[TMP19]], [[VEC_PHI]]
5252
; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
5353
; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64
@@ -101,7 +101,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
101101
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
102102
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
103103
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
104-
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP12]])
104+
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
105105
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
106106
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
107107
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64
@@ -543,7 +543,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
543543
; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
544544
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
545545
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_IND]]
546-
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP12]])
546+
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
547547
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = add <vscale x 4 x i32> [[TMP17]], [[VEC_PHI]]
548548
; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP18]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]])
549549
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64
@@ -606,7 +606,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
606606
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
607607
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
608608
; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_IND]]
609-
; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP11]])
609+
; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
610610
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
611611
; IF-EVL-INLOOP-NEXT: [[ADD]] = add i32 [[TMP17]], [[RDX]]
612612
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP11]] to i64

0 commit comments

Comments
 (0)