Skip to content

Commit 836f2d2

Browse files
committed
[SimplifyIndVar] Changes to convert ICMP predicate to EQ/NE when possible to facilitate better work of OSR (which makes limited changes to the IVUse formula for other predicates). Regression tests are updated accordingly.
1 parent b9c979d commit 836f2d2

File tree

10 files changed

+222
-89
lines changed

10 files changed

+222
-89
lines changed

llvm/lib/Transforms/Utils/SimplifyIndVar.cpp

Lines changed: 148 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ namespace {
9797
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
9898
bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
9999
void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
100+
bool forceEqualityForICmp(ICmpInst *ICmp, Instruction *IVOperand);
100101
void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
101102
bool IsSigned);
102103
void replaceRemWithNumerator(BinaryOperator *Rem);
@@ -244,6 +245,128 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
244245
return true;
245246
}
246247

248+
/// Try to change predicate of ICmp to EQ/NE to facilitate better work of OSR.
249+
/// This can be done only if all possible IV values but one lead to the same
250+
/// produced comparison result, while the 'chosen one' value gives the opposite
251+
/// result.
252+
bool SimplifyIndvar::forceEqualityForICmp(ICmpInst *ICmp,
253+
Instruction *IVOperand) {
254+
if (ICmp->isEquality()) {
255+
// nothing to do
256+
return false;
257+
}
258+
259+
unsigned BoundOperandIdx = IVOperand == ICmp->getOperand(0) ? 1 : 0;
260+
const SCEV *BoundSCEV = SE->getSCEV(ICmp->getOperand(BoundOperandIdx));
261+
const SCEVConstant *BoundC = dyn_cast<SCEVConstant>(BoundSCEV);
262+
CmpInst::Predicate OrigPredicate = ICmp->getPredicate();
263+
CmpInst::Predicate NewPredicate = CmpInst::BAD_ICMP_PREDICATE;
264+
Type *Ty = IVOperand->getType();
265+
APInt NewBoundA;
266+
267+
if (BoundC) {
268+
// Try to find the 'chosen one' value basing on predicate type and bound
269+
const APInt &BoundA = BoundC->getAPInt();
270+
ConstantRange ExactCR =
271+
ConstantRange::makeExactICmpRegion(OrigPredicate, BoundA);
272+
if (!ExactCR.getEquivalentICmp(NewPredicate, NewBoundA)) {
273+
NewPredicate = CmpInst::BAD_ICMP_PREDICATE;
274+
}
275+
}
276+
277+
if (!ICmpInst::isEquality(NewPredicate)) {
278+
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVOperand));
279+
if (!AR) {
280+
return false;
281+
}
282+
const SCEVConstant *IVStart = dyn_cast<SCEVConstant>(AR->getStart());
283+
const SCEVConstant *IVStep =
284+
dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
285+
if (!IVStart || !IVStep || !IVStep->getValue()->getValue()) {
286+
return false;
287+
}
288+
289+
if (BoundC) {
290+
// Check to see the 'chosen one' value is the IV start value
291+
bool HasNoWrap = ICmpInst::isSigned(OrigPredicate)
292+
? AR->hasNoSignedWrap()
293+
: AR->hasNoUnsignedWrap();
294+
if (HasNoWrap) {
295+
const DataLayout &DL = ICmp->getParent()->getDataLayout();
296+
Constant *SecondIterIV =
297+
ConstantInt::get(Ty, IVStart->getAPInt() + IVStep->getAPInt());
298+
Constant *FirstIterResult = ConstantFoldCompareInstOperands(
299+
OrigPredicate, IVStart->getValue(), BoundC->getValue(), DL);
300+
Constant *SecondIterResult = ConstantFoldCompareInstOperands(
301+
OrigPredicate, SecondIterIV, BoundC->getValue(), DL);
302+
if (FirstIterResult != SecondIterResult) {
303+
NewBoundA = IVStart->getAPInt();
304+
NewPredicate = FirstIterResult->isAllOnesValue() ? CmpInst::ICMP_EQ
305+
: CmpInst::ICMP_NE;
306+
}
307+
}
308+
}
309+
310+
if (!ICmpInst::isEquality(NewPredicate)) {
311+
// Check to see the 'chosen one' value is the very last IV value.
312+
// To put it differently, check to see if ICmp directly or indirectly
313+
// defines maximum loop trip count (or simply has aligned behavior by
314+
// accident). This is different from loop exit condition rewriting as here
315+
// not only ICmp instructions directly writing to exiting branch are
316+
// considered.
317+
318+
// check to see if max trip count and IV parameters are constant
319+
const SCEVConstant *MaxBackCount =
320+
dyn_cast<SCEVConstant>(SE->getConstantMaxBackedgeTakenCount(L));
321+
if (!MaxBackCount) {
322+
return false;
323+
}
324+
325+
// compute the number of consecutive iterations in which produced
326+
// predicate value will be the same
327+
bool ExitIfTrue = false;
328+
auto EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false);
329+
const SCEVConstant *SameIterCount =
330+
dyn_cast<SCEVConstant>(EL.ExactNotTaken);
331+
if (!SameIterCount || SameIterCount->getValue()->isZero()) {
332+
ExitIfTrue = !ExitIfTrue;
333+
EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false);
334+
SameIterCount = dyn_cast<SCEVConstant>(EL.ExactNotTaken);
335+
}
336+
337+
if (SameIterCount != MaxBackCount) {
338+
// ICmp isn't aligned with maximum trip count
339+
return false;
340+
}
341+
342+
unsigned IVBitWigth = IVStep->getAPInt().getBitWidth();
343+
unsigned CountBitWigth = SameIterCount->getAPInt().getBitWidth();
344+
APInt SameIterCountA = SameIterCount->getAPInt();
345+
if (IVBitWigth < CountBitWigth) {
346+
SameIterCountA = SameIterCountA.trunc(IVBitWigth);
347+
} else if (IVBitWigth > CountBitWigth) {
348+
SameIterCountA = SameIterCountA.zext(IVBitWigth);
349+
}
350+
NewBoundA = IVStart->getAPInt() + (IVStep->getAPInt() * SameIterCountA);
351+
NewPredicate = ExitIfTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
352+
}
353+
}
354+
355+
if (!TTI->isLegalICmpImmediate(NewBoundA.getSExtValue())) {
356+
return false;
357+
}
358+
359+
LLVM_DEBUG(dbgs() << "INDVARS: Force EQ/NE predicate for max trip count: "
360+
<< *ICmp << '\n');
361+
362+
assert(Ty->getPrimitiveSizeInBits() == NewBoundA.getBitWidth() &&
363+
"bit widths should be aligned");
364+
ICmp->setOperand(BoundOperandIdx, ConstantInt::get(Ty, NewBoundA));
365+
ICmp->setPredicate(NewPredicate);
366+
367+
return true;
368+
}
369+
247370
/// SimplifyIVUsers helper for eliminating useless
248371
/// comparisons against an induction variable.
249372
void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
@@ -267,33 +390,43 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
267390
// If the condition is always true or always false in the given context,
268391
// replace it with a constant value.
269392
SmallVector<Instruction *, 4> Users;
393+
bool IsDead = false;
270394
for (auto *U : ICmp->users())
271395
Users.push_back(cast<Instruction>(U));
272396
const Instruction *CtxI = findCommonDominator(Users, *DT);
273397
if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
274398
SE->forgetValue(ICmp);
275399
ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev));
276400
DeadInsts.emplace_back(ICmp);
401+
IsDead = true;
277402
LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
278403
} else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
279-
// fallthrough to end of function
280-
} else if (ICmpInst::isSigned(OriginalPred) &&
281-
SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
404+
IsDead = true;
405+
} else {
282406
// If we were unable to make anything above, all we can is to canonicalize
283407
// the comparison hoping that it will open the doors for other
284-
// optimizations. If we find out that we compare two non-negative values,
285-
// we turn the instruction's predicate to its unsigned version. Note that
286-
// we cannot rely on Pred here unless we check if we have swapped it.
287-
assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
288-
LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
289-
<< '\n');
290-
ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
291-
ICmp->setSameSign();
292-
} else
293-
return;
408+
// optimizations.
409+
if (ICmpInst::isSigned(OriginalPred) && SE->isKnownNonNegative(S) &&
410+
SE->isKnownNonNegative(X)) {
411+
// If we find out that we compare two non-negative values,
412+
// we turn the instruction's predicate to its unsigned version. Note that
413+
// we cannot rely on Pred here unless we check if we have swapped it.
414+
assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
415+
LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
416+
<< '\n');
417+
ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
418+
ICmp->setSameSign();
419+
Changed = true;
420+
}
421+
if (forceEqualityForICmp(ICmp, IVOperand)) {
422+
Changed = true;
423+
}
424+
}
294425

295-
++NumElimCmp;
296-
Changed = true;
426+
if (IsDead) {
427+
NumElimCmp++;
428+
Changed = true;
429+
}
297430
}
298431

299432
bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {

llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define i32 @guards_applied_to_add_rec(ptr %dst) {
1313
; CHECK-NEXT: [[OUTER_IV_0:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[OUTER_IV_0_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
1414
; CHECK-NEXT: [[OUTER_IV_1:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[OUTER_IV_0]], %[[OUTER_LATCH]] ]
1515
; CHECK-NEXT: [[SHR28:%.*]] = lshr i32 [[OUTER_IV_1]], 1
16-
; CHECK-NEXT: [[PRE:%.*]] = icmp samesign ult i32 [[OUTER_IV_1]], 2
16+
; CHECK-NEXT: [[PRE:%.*]] = icmp samesign eq i32 [[OUTER_IV_1]], 1
1717
; CHECK-NEXT: br i1 [[PRE]], label %[[OUTER_LATCH]], label %[[INNER_PREHEADER:.*]]
1818
; CHECK: [[INNER_PREHEADER]]:
1919
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SHR28]] to i64

llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
9797
; CHECK-LABEL: @test2(
9898
; CHECK-NEXT: entry:
9999
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32
100-
; CHECK-NEXT: br i1 %arg, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
100+
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
101101
; CHECK: for.cond1.preheader.us.preheader:
102102
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[CONV]], i32 1)
103103
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
@@ -110,7 +110,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
110110
; CHECK-NEXT: br label [[FOR_INC13_US]]
111111
; CHECK: for.inc13.us:
112112
; CHECK-NEXT: [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1
113-
; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT4]], 4
113+
; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT4]], 4
114114
; CHECK-NEXT: br i1 [[EXITCOND6]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]]
115115
; CHECK: for.body4.us:
116116
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ]
@@ -237,8 +237,7 @@ define i32 @test4(i32 %a) {
237237
; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[OR]] to i8
238238
; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i8 signext [[CONV3]])
239239
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i32 [[INDVARS_IV]], -1
240-
; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[INDVARS_IV_NEXT]] to i8
241-
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[TMP0]], -14
240+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[INDVARS_IV_NEXT]], 242
242241
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
243242
; CHECK: for.end:
244243
; CHECK-NEXT: ret i32 0
@@ -517,8 +516,8 @@ define i32 @test10(i32 %v) {
517516
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVARS_IV]], -1
518517
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], [[SEXT]]
519518
; CHECK-NEXT: call void @consume.i1(i1 [[TMP1]])
519+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT]], 11
520520
; CHECK-NEXT: call void @consume.i64(i64 [[TMP0]])
521-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 11
522521
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE:%.*]]
523522
; CHECK: leave:
524523
; CHECK-NEXT: ret i32 22

llvm/test/Transforms/IndVarSimplify/X86/pr24356.ll

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt -S -passes=indvars -indvars-predicate-loops=0 < %s | FileCheck %s
23

34
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -7,15 +8,43 @@ target triple = "x86_64-apple-macosx10.10.0"
78

89
; Function Attrs: nounwind ssp uwtable
910
define void @fn1() {
10-
; CHECK-LABEL: @fn1(
11+
; CHECK-LABEL: define void @fn1() {
12+
; CHECK-NEXT: [[BB:.*]]:
13+
; CHECK-NEXT: br label %[[BB4_PREHEADER:.*]]
14+
; CHECK: [[BB4_PREHEADER]]:
15+
; CHECK-NEXT: [[B_03:%.*]] = phi i8 [ 0, %[[BB]] ], [ [[TMP17:%.*]], %[[BB16:.*]] ]
16+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[B_03]], 0
17+
; CHECK-NEXT: br i1 [[TMP9]], label %[[BB4_PREHEADER_BB18_LOOPEXIT_SPLIT_CRIT_EDGE:.*]], label %[[BB4_PREHEADER_BB4_PREHEADER_SPLIT_CRIT_EDGE:.*]]
18+
; CHECK: [[BB4_PREHEADER_BB4_PREHEADER_SPLIT_CRIT_EDGE]]:
19+
; CHECK-NEXT: br label %[[BB4_PREHEADER_SPLIT:.*]]
20+
; CHECK: [[BB4_PREHEADER_BB18_LOOPEXIT_SPLIT_CRIT_EDGE]]:
21+
; CHECK-NEXT: store i32 0, ptr @a, align 4
22+
; CHECK-NEXT: br label %[[BB18_LOOPEXIT_SPLIT:.*]]
23+
; CHECK: [[BB4_PREHEADER_SPLIT]]:
24+
; CHECK-NEXT: br label %[[BB7:.*]]
25+
; CHECK: [[BB4:.*]]:
26+
; CHECK-NEXT: br i1 false, label %[[BB7]], label %[[BB16]]
27+
; CHECK: [[BB7]]:
28+
; CHECK-NEXT: br i1 false, label %[[BB18_LOOPEXIT:.*]], label %[[BB4]]
29+
; CHECK: [[BB16]]:
30+
; CHECK-NEXT: [[TMP17]] = add nuw nsw i8 [[B_03]], -1
31+
; CHECK-NEXT: br i1 false, label %[[BB18_LOOPEXIT1:.*]], label %[[BB4_PREHEADER]]
32+
; CHECK: [[BB18_LOOPEXIT]]:
33+
; CHECK-NEXT: br label %[[BB18_LOOPEXIT_SPLIT]]
34+
; CHECK: [[BB18_LOOPEXIT_SPLIT]]:
35+
; CHECK-NEXT: br label %[[BB18:.*]]
36+
; CHECK: [[BB18_LOOPEXIT1]]:
37+
; CHECK-NEXT: [[TMP14_LCSSA5_LCSSA:%.*]] = phi i32 [ 1, %[[BB16]] ]
38+
; CHECK-NEXT: store i32 [[TMP14_LCSSA5_LCSSA]], ptr @a, align 4
39+
; CHECK-NEXT: br label %[[BB18]]
40+
; CHECK: [[BB18]]:
41+
; CHECK-NEXT: ret void
42+
;
1143
bb:
1244
br label %bb4.preheader
1345

1446
bb4.preheader: ; preds = %bb, %bb16
15-
; CHECK-LABEL: bb4.preheader:
1647
%b.03 = phi i8 [ 0, %bb ], [ %tmp17, %bb16 ]
17-
; CHECK: %tmp9 = icmp ugt i8 %b.03, 1
18-
; CHECK-NOT: %tmp9 = icmp ugt i8 0, 1
1948

2049
%tmp9 = icmp ugt i8 %b.03, 1
2150
br i1 %tmp9, label %bb4.preheader.bb18.loopexit.split_crit_edge, label %bb4.preheader.bb4.preheader.split_crit_edge

llvm/test/Transforms/IndVarSimplify/ada-loops.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ define void @kinds__urangezero(ptr nocapture %a) nounwind {
133133
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [21 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
134134
; CHECK-NEXT: store i32 0, ptr [[TMP5]], align 4
135135
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
136-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 31
136+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp samesign eq i32 [[INDVARS_IV_NEXT]], 31
137137
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[BB]]
138138
; CHECK: return:
139139
; CHECK-NEXT: ret void
Lines changed: 12 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,44 @@
11
# CSIR level Instrumentation Flag
22
:csir
3-
cond.llvm.11253644763537639171
4-
# Func Hash:
5-
1152921517491748863
6-
# Num Counters:
7-
1
8-
# Counter Values:
9-
200000
10-
11-
foo
12-
# Func Hash:
13-
1720106746050921044
14-
# Num Counters:
15-
2
16-
# Counter Values:
17-
100000
18-
1
19-
203
bar
214
# Func Hash:
225
1299757151682747028
236
# Num Counters:
247
2
258
# Counter Values:
26-
0
27-
0
28-
29-
bar
30-
# Func Hash:
31-
29667547796
32-
# Num Counters:
33-
2
34-
# Counter Values:
359
100000
3610
100000
3711

3812
main
3913
# Func Hash:
40-
1152921517491748863
14+
1895182923573755903
4115
# Num Counters:
4216
1
4317
# Counter Values:
4418
1
4519

46-
main
20+
cspgo_bar.c;clobber
4721
# Func Hash:
4822
1895182923573755903
4923
# Num Counters:
5024
1
5125
# Counter Values:
52-
1
26+
200000
5327

54-
cspgo.c:foo
28+
cspgo_bar.c;cond
5529
# Func Hash:
56-
1720106746050921044
30+
1895182923573755903
5731
# Num Counters:
58-
4
59-
# Counter Values:
60-
100000
61-
100000
62-
0
6332
1
33+
# Counter Values:
34+
200000
6435

65-
cspgo_bar.c:cond
36+
cspgo.c;foo
6637
# Func Hash:
67-
12884901887
38+
2216626667076672412
6839
# Num Counters:
69-
1
40+
2
7041
# Counter Values:
71-
200000
42+
100000
43+
1
7244

0 commit comments

Comments
 (0)