Skip to content

Commit 7263435

Browse files
committed
Bundle partial reductions with no bin op
1 parent 92e8dbd commit 7263435

File tree

5 files changed

+70
-44
lines changed

5 files changed

+70
-44
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2666,11 +2666,16 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
26662666
case ExpressionTypes::ExtendedReduction: {
26672667
unsigned Opcode = RecurrenceDescriptor::getOpcode(
26682668
cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind());
2669+
auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
2670+
if (isa<VPPartialReductionRecipe>(ExpressionRecipes.back())) {
2671+
return Ctx.TTI.getPartialReductionCost(
2672+
Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr, RedTy, VF,
2673+
TargetTransformInfo::getPartialReductionExtendKind(ExtR->getOpcode()),
2674+
TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind);
2675+
}
26692676
return Ctx.TTI.getExtendedReductionCost(
2670-
Opcode,
2671-
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
2672-
Instruction::ZExt,
2673-
RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
2677+
Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy, SrcVecTy,
2678+
std::nullopt, Ctx.CostKind);
26742679
}
26752680
case ExpressionTypes::MulAccReduction:
26762681
return Ctx.TTI.getMulAccReductionCost(false, RedTy, SrcVecTy, false,
@@ -2733,8 +2738,10 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
27332738
switch (ExpressionType) {
27342739
case ExpressionTypes::ExtendedReduction: {
27352740
getOperand(1)->printAsOperand(O, SlotTracker);
2736-
O << " +";
2737-
O << " reduce." << Instruction::getOpcodeName(Opcode) << " (";
2741+
O << " + ";
2742+
if (IsPartialReduction)
2743+
O << "partial.";
2744+
O << "reduce." << Instruction::getOpcodeName(Opcode) << " (";
27382745
getOperand(0)->printAsOperand(O, SlotTracker);
27392746
Red->printFlags(O);
27402747

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2857,15 +2857,26 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
28572857
VPValue *VecOp = Red->getVecOp();
28582858

28592859
// Clamp the range if using extended-reduction is profitable.
2860-
auto IsExtendedRedValidAndClampRange = [&](unsigned Opcode, bool isZExt,
2861-
Type *SrcTy) -> bool {
2860+
auto IsExtendedRedValidAndClampRange =
2861+
[&](unsigned Opcode, Instruction::CastOps ExtOpc, Type *SrcTy) -> bool {
2862+
bool IsZExt = ExtOpc == Instruction::CastOps::ZExt;
28622863
return LoopVectorizationPlanner::getDecisionAndClampRange(
28632864
[&](ElementCount VF) {
28642865
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
28652866
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2866-
InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
2867-
Opcode, isZExt, RedTy, SrcVecTy, Red->getFastMathFlags(),
2868-
CostKind);
2867+
2868+
InstructionCost ExtRedCost;
2869+
if (isa<VPPartialReductionRecipe>(Red)) {
2870+
TargetTransformInfo::PartialReductionExtendKind ExtKind =
2871+
TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
2872+
ExtRedCost = Ctx.TTI.getPartialReductionCost(
2873+
Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
2874+
llvm::TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind);
2875+
} else {
2876+
ExtRedCost = Ctx.TTI.getExtendedReductionCost(
2877+
Opcode, IsZExt, RedTy, SrcVecTy, Red->getFastMathFlags(),
2878+
CostKind);
2879+
}
28692880
InstructionCost ExtCost =
28702881
cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx);
28712882
InstructionCost RedCost = Red->computeCost(VF, Ctx);
@@ -2879,8 +2890,7 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
28792890
if (match(VecOp, m_ZExtOrSExt(m_VPValue(A))) &&
28802891
IsExtendedRedValidAndClampRange(
28812892
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()),
2882-
cast<VPWidenCastRecipe>(VecOp)->getOpcode() ==
2883-
Instruction::CastOps::ZExt,
2893+
cast<VPWidenCastRecipe>(VecOp)->getOpcode(),
28842894
Ctx.Types.inferScalarType(A)))
28852895
return new VPExpressionRecipe(cast<VPWidenCastRecipe>(VecOp), Red);
28862896

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,11 +1166,11 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) #
11661166
; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1
11671167
; CHECK-SVE-MAXBW-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i32 0
11681168
; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
1169-
; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
11701169
; CHECK-SVE-MAXBW-NEXT: [[TMP13:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
11711170
; CHECK-SVE-MAXBW-NEXT: [[TMP14:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32>
11721171
; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP14]]
11731172
; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP16]])
1173+
; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
11741174
; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP15]])
11751175
; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
11761176
; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1315,8 +1315,8 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 {
13151315
; CHECK-SVE-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i32 0
13161316
; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP10]], align 1
13171317
; CHECK-SVE-MAXBW-NEXT: [[TMP11:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
1318-
; CHECK-SVE-MAXBW-NEXT: [[TMP12:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32>
13191318
; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP11]])
1319+
; CHECK-SVE-MAXBW-NEXT: [[TMP12:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32>
13201320
; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE2]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP12]])
13211321
; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
13221322
; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -456,23 +456,32 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
456456
; CHECK-MAXBW-LABEL: define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(
457457
; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] {
458458
; CHECK-MAXBW-NEXT: entry:
459+
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
460+
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
459461
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
460462
; CHECK-MAXBW: vector.ph:
463+
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
464+
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
465+
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
466+
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
467+
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
468+
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16
461469
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
462470
; CHECK-MAXBW: vector.body:
463471
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
464-
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
472+
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
465473
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
466474
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0
467-
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1
468-
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
469-
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP2]])
470-
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
471-
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
472-
; CHECK-MAXBW-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
475+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1
476+
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i32>
477+
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP11]])
478+
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
479+
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
480+
; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
473481
; CHECK-MAXBW: middle.block:
474-
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
475-
; CHECK-MAXBW-NEXT: br i1 false, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
482+
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE]])
483+
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
484+
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
476485
; CHECK-MAXBW: scalar.ph:
477486
;
478487
entry:

llvm/test/Transforms/LoopVectorize/ARM/mve-reductions-interleave.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ define i64 @add_i32_i64(ptr nocapture readonly %x, i32 %n) #0 {
1818
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
1919
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
2020
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
21-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 16
21+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 16
2222
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
23-
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4
24-
; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
25-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP1]])
26-
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], [[VEC_PHI]]
23+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
24+
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
25+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]])
26+
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP8]], [[VEC_PHI]]
2727
; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i32> [[WIDE_LOAD2]] to <4 x i64>
2828
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]])
2929
; CHECK-NEXT: [[TMP7]] = add i64 [[TMP6]], [[VEC_PHI1]]
@@ -84,20 +84,20 @@ define i64 @mla_i32_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i3
8484
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
8585
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
8686
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
87-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
88-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 16
89-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
90-
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4
9187
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i32 [[INDEX]]
92-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 16
93-
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
94-
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4
95-
; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
96-
; CHECK-NEXT: [[TMP14:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD4]], [[WIDE_LOAD2]]
97-
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
98-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP3]])
99-
; CHECK-NEXT: [[TMP5]] = add i64 [[TMP4]], [[VEC_PHI]]
100-
; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i32> [[TMP14]] to <4 x i64>
88+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 16
89+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
90+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4
91+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
92+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 16
93+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
94+
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
95+
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
96+
; CHECK-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD4]], [[WIDE_LOAD2]]
97+
; CHECK-NEXT: [[TMP14:%.*]] = sext <4 x i32> [[TMP4]] to <4 x i64>
98+
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP14]])
99+
; CHECK-NEXT: [[TMP5]] = add i64 [[TMP15]], [[VEC_PHI]]
100+
; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i32> [[TMP13]] to <4 x i64>
101101
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]])
102102
; CHECK-NEXT: [[TMP11]] = add i64 [[TMP10]], [[VEC_PHI1]]
103103
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -114,9 +114,9 @@ define i64 @mla_i32_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i3
114114
; CHECK: loop:
115115
; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
116116
; CHECK-NEXT: [[R_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
117-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_010]]
117+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[I_010]]
118118
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
119-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[I_010]]
119+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_010]]
120120
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
121121
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP7]]
122122
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[MUL]] to i64

0 commit comments

Comments
 (0)