Skip to content

Commit c4f8b40

Browse files
committed
Match the inverse of m_AddOverflow
Basically where there are checks for overflow but the condition is inverted. This is also really cool because it allows us to also fold usubs with constants better. How? Well, if subtraction of a number underflows, addition of its negative overflows. This identity is baked into the silicon of ARM chips and single-add. I didn't even write code based on this property: I just negated the icmps and LLVM folding figured the rest out!
1 parent 6df863a commit c4f8b40

File tree

10 files changed

+670
-131
lines changed

10 files changed

+670
-131
lines changed

llvm/include/llvm/IR/PatternMatch.h

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2595,6 +2595,70 @@ struct UAddWithOverflow_match {
25952595
}
25962596
};
25972597

2598+
//===----------------------------------------------------------------------===//
2599+
// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b
2600+
// Note that S might be matched to other instructions than AddInst.
2601+
//
2602+
2603+
template <typename LHS_t, typename RHS_t, typename Sum_t>
2604+
struct UAddWithOverflowInv_match {
2605+
LHS_t L;
2606+
RHS_t R;
2607+
Sum_t S;
2608+
2609+
UAddWithOverflowInv_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
2610+
: L(L), R(R), S(S) {}
2611+
2612+
template <typename OpTy> bool match(OpTy *V) const {
2613+
Value *ICmpLHS, *ICmpRHS;
2614+
CmpPredicate Pred;
2615+
if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
2616+
return false;
2617+
2618+
Value *AddLHS, *AddRHS;
2619+
auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
2620+
2621+
// (a + b) u>= a, (a + b) u>= b
2622+
if (Pred == ICmpInst::ICMP_UGE)
2623+
if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
2624+
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
2625+
2626+
// a <=u (a + b), b <=u (a + b)
2627+
if (Pred == ICmpInst::ICMP_ULE)
2628+
if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
2629+
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
2630+
2631+
Value *Op1;
2632+
auto XorExpr = m_OneUse(m_Not(m_Value(Op1)));
2633+
// (~a) >= u b
2634+
if (Pred == ICmpInst::ICMP_UGE) {
2635+
if (XorExpr.match(ICmpLHS))
2636+
return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS);
2637+
}
2638+
// b <= u (~a)
2639+
if (Pred == ICmpInst::ICMP_ULE) {
2640+
if (XorExpr.match(ICmpRHS))
2641+
return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS);
2642+
}
2643+
2644+
// Match special-case for increment-by-1.
2645+
if (Pred == ICmpInst::ICMP_NE) {
2646+
// (a + 1) != 0
2647+
// (1 + a) != 0
2648+
if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
2649+
(m_One().match(AddLHS) || m_One().match(AddRHS)))
2650+
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
2651+
// 0 != (a + 1)
2652+
// 0 != (1 + a)
2653+
if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
2654+
(m_One().match(AddLHS) || m_One().match(AddRHS)))
2655+
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
2656+
}
2657+
2658+
return false;
2659+
}
2660+
};
2661+
25982662
/// Match an icmp instruction checking for unsigned overflow on addition.
25992663
///
26002664
/// S is matched to the addition whose result is being checked for overflow, and
@@ -2605,6 +2669,17 @@ m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
26052669
return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
26062670
}
26072671

2672+
/// Match an icmp instruction checking for unsigned overflow on addition, but
2673+
/// with the opposite check.
2674+
///
2675+
/// S is matched to the addition whose result is being checked for overflow, and
2676+
/// L and R are matched to the LHS and RHS of S.
2677+
template <typename LHS_t, typename RHS_t, typename Sum_t>
2678+
UAddWithOverflowInv_match<LHS_t, RHS_t, Sum_t>
2679+
m_UAddWithOverflowInv(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
2680+
return UAddWithOverflowInv_match<LHS_t, RHS_t, Sum_t>(L, R, S);
2681+
}
2682+
26082683
template <typename Opnd_t> struct Argument_match {
26092684
unsigned OpI;
26102685
Opnd_t Val;

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,8 @@ class CodeGenPrepare {
470470

471471
bool tryToSinkFreeOperands(Instruction *I);
472472
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
473-
CmpInst *Cmp, Intrinsic::ID IID);
473+
CmpInst *Cmp, Intrinsic::ID IID,
474+
bool NegateOverflow = false);
474475
bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
475476
bool optimizeURem(Instruction *Rem);
476477
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
@@ -1552,7 +1553,8 @@ static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
15521553
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
15531554
Value *Arg0, Value *Arg1,
15541555
CmpInst *Cmp,
1555-
Intrinsic::ID IID) {
1556+
Intrinsic::ID IID,
1557+
bool NegateOverflow) {
15561558
auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
15571559
if (!isIVIncrement(BO, LI))
15581560
return false;
@@ -1624,6 +1626,8 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
16241626
assert(BO->hasOneUse() &&
16251627
"Patterns with XOr should use the BO only in the compare");
16261628
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1629+
if (NegateOverflow)
1630+
OV = Builder.CreateNot(OV, "not");
16271631
replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
16281632
Cmp->eraseFromParent();
16291633
BO->eraseFromParent();
@@ -1660,20 +1664,63 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
16601664
return false;
16611665
}
16621666

1667+
/// Match special-case patterns that check for unsigned add overflow but inverts
1668+
/// the add check
1669+
static bool
1670+
matchUAddWithOverflowConstantEdgeCasesInverted(CmpInst *Cmp,
1671+
BinaryOperator *&Add) {
1672+
// Add = add A, 1; Cmp = icmp ne A,-1 (overflow if A is max val)
1673+
// Add = add A,-1; Cmp = icmp eq A, 0 (overflow if A is non-zero)
1674+
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1675+
1676+
// We are not expecting non-canonical/degenerate code. Just bail out.
1677+
if (isa<Constant>(A))
1678+
return false;
1679+
1680+
ICmpInst::Predicate Pred = Cmp->getPredicate();
1681+
if (Pred == ICmpInst::ICMP_NE && match(B, m_AllOnes()))
1682+
B = ConstantInt::get(B->getType(), 1);
1683+
else if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt()))
1684+
B = Constant::getAllOnesValue(B->getType());
1685+
else
1686+
return false;
1687+
1688+
// Check the users of the variable operand of the compare looking for an add
1689+
// with the adjusted constant.
1690+
for (User *U : A->users()) {
1691+
if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1692+
Add = cast<BinaryOperator>(U);
1693+
return true;
1694+
}
1695+
}
1696+
return false;
1697+
}
1698+
16631699
/// Try to combine the compare into a call to the llvm.uadd.with.overflow
16641700
/// intrinsic. Return true if any changes were made.
16651701
bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
16661702
ModifyDT &ModifiedDT) {
16671703
bool EdgeCase = false;
16681704
Value *A, *B;
16691705
BinaryOperator *Add;
1706+
bool Negate = false;
16701707
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1671-
if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
1672-
return false;
1673-
// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1674-
A = Add->getOperand(0);
1675-
B = Add->getOperand(1);
1676-
EdgeCase = true;
1708+
if (matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) {
1709+
// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1710+
A = Add->getOperand(0);
1711+
B = Add->getOperand(1);
1712+
EdgeCase = true;
1713+
} else {
1714+
Negate = true;
1715+
if (!match(Cmp,
1716+
m_UAddWithOverflowInv(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1717+
if (!matchUAddWithOverflowConstantEdgeCasesInverted(Cmp, Add))
1718+
return false;
1719+
A = Add->getOperand(0);
1720+
B = Add->getOperand(1);
1721+
EdgeCase = true;
1722+
}
1723+
}
16771724
}
16781725

16791726
if (!TLI->shouldFormOverflowOp(ISD::UADDO,
@@ -1688,7 +1735,7 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
16881735
return false;
16891736

16901737
if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1691-
Intrinsic::uadd_with_overflow))
1738+
Intrinsic::uadd_with_overflow, Negate))
16921739
return false;
16931740

16941741
// Reset callers - do not crash by iterating over a dead instruction.
@@ -2218,10 +2265,10 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
22182265
if (sinkCmpExpression(Cmp, *TLI))
22192266
return true;
22202267

2221-
if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2268+
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
22222269
return true;
22232270

2224-
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2271+
if (combineToUAddWithOverflow(Cmp, ModifiedDT))
22252272
return true;
22262273

22272274
if (unfoldPowerOf2Test(Cmp))

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7821,7 +7821,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
78217821
isa<IntegerType>(X->getType())) {
78227822
Value *Result;
78237823
Constant *Overflow;
7824-
// m_UAddWithOverflow can match patterns that do not include an explicit
7824+
// m_UAddWithOverflow can match patterns that do not include an explicit
78257825
// "add" instruction, so check the opcode of the matched op.
78267826
if (AddI->getOpcode() == Instruction::Add &&
78277827
OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, X, Y, *AddI,
@@ -7832,6 +7832,25 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
78327832
}
78337833
}
78347834

7835+
if (match(&I, m_UAddWithOverflowInv(m_Value(X), m_Value(Y),
7836+
m_Instruction(AddI))) &&
7837+
isa<IntegerType>(X->getType())) {
7838+
Value *Result;
7839+
Constant *Overflow;
7840+
// m_UAddWithOverflowInv can match patterns that do not include an
7841+
// explicit "add" instruction, so check the opcode of the matched op.
7842+
if (AddI->getOpcode() == Instruction::Add &&
7843+
OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, X, Y, *AddI,
7844+
Result, Overflow)) {
7845+
Overflow = Overflow->isNullValue()
7846+
? ConstantInt::getTrue(Overflow->getType())
7847+
: ConstantInt::getFalse(Overflow->getType());
7848+
replaceInstUsesWith(*AddI, Result);
7849+
eraseInstFromFunction(*AddI);
7850+
return replaceInstUsesWith(I, Overflow);
7851+
}
7852+
}
7853+
78357854
// (zext X) * (zext Y) --> llvm.umul.with.overflow.
78367855
if (match(Op0, m_NUWMul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
78377856
match(Op1, m_APInt(C))) {

llvm/test/CodeGen/AArch64/cgp-usubo.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,9 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) nounwind {
108108
define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) nounwind {
109109
; CHECK-LABEL: usubo_eq_constant1_op1_i32:
110110
; CHECK: // %bb.0:
111-
; CHECK-NEXT: cmp w0, #0
112-
; CHECK-NEXT: sub w9, w0, #1
113-
; CHECK-NEXT: cset w8, eq
114-
; CHECK-NEXT: str w9, [x1]
115-
; CHECK-NEXT: mov w0, w8
111+
; CHECK-NEXT: subs w8, w0, #1
112+
; CHECK-NEXT: cset w0, lo
113+
; CHECK-NEXT: str w8, [x1]
116114
; CHECK-NEXT: ret
117115
%s = add i32 %x, -1
118116
%ov = icmp eq i32 %x, 0

llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
196196
; CHECK-NEXT: mov w22, #2 ; =0x2
197197
; CHECK-NEXT: LBB3_6: ; %for.cond
198198
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
199-
; CHECK-NEXT: cbz w22, LBB3_9
199+
; CHECK-NEXT: subs w22, w22, #1
200+
; CHECK-NEXT: b.lo LBB3_9
200201
; CHECK-NEXT: ; %bb.7: ; %for.body
201202
; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1
202-
; CHECK-NEXT: sub w22, w22, #1
203-
; CHECK-NEXT: orr w9, w21, w20
204203
; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2]
204+
; CHECK-NEXT: orr w9, w21, w20
205205
; CHECK-NEXT: cmp w9, w10
206206
; CHECK-NEXT: b.eq LBB3_6
207207
; CHECK-NEXT: ; %bb.8: ; %if.then
@@ -238,12 +238,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
238238
; OUTLINE-ATOMICS-NEXT: cset w8, eq
239239
; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond
240240
; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1
241-
; OUTLINE-ATOMICS-NEXT: cbz w22, LBB3_4
241+
; OUTLINE-ATOMICS-NEXT: subs w22, w22, #1
242+
; OUTLINE-ATOMICS-NEXT: b.lo LBB3_4
242243
; OUTLINE-ATOMICS-NEXT: ; %bb.2: ; %for.body
243244
; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1
244-
; OUTLINE-ATOMICS-NEXT: sub w22, w22, #1
245-
; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20
246245
; OUTLINE-ATOMICS-NEXT: ldr w10, [x19, w22, sxtw #2]
246+
; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20
247247
; OUTLINE-ATOMICS-NEXT: cmp w9, w10
248248
; OUTLINE-ATOMICS-NEXT: b.eq LBB3_1
249249
; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %if.then

llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -506,17 +506,15 @@ define i1 @add_ugecmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
506506
define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind {
507507
; X86-LABEL: add_ugecmp_bad_i8_i16:
508508
; X86: # %bb.0:
509-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
510-
; X86-NEXT: subl $-128, %eax
511-
; X86-NEXT: cmpw $127, %ax
512-
; X86-NEXT: seta %al
509+
; X86-NEXT: movw $128, %ax
510+
; X86-NEXT: addw {{[0-9]+}}(%esp), %ax
511+
; X86-NEXT: setae %al
513512
; X86-NEXT: retl
514513
;
515514
; X64-LABEL: add_ugecmp_bad_i8_i16:
516515
; X64: # %bb.0:
517-
; X64-NEXT: subl $-128, %edi
518-
; X64-NEXT: cmpw $127, %di
519-
; X64-NEXT: seta %al
516+
; X64-NEXT: addw $128, %di
517+
; X64-NEXT: setae %al
520518
; X64-NEXT: retq
521519
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
522520
%tmp1 = icmp uge i16 %tmp0, 128 ; 1U << (8-1)

llvm/test/CodeGen/X86/setcc-combine.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -664,8 +664,9 @@ define <4 x i32> @cmp_sge_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
664664
define i64 @cmp_uge_not(i64 %a, i64 %b) {
665665
; CHECK-LABEL: cmp_uge_not:
666666
; CHECK: # %bb.0:
667+
; CHECK-NEXT: notq %rsi
667668
; CHECK-NEXT: xorl %eax, %eax
668-
; CHECK-NEXT: cmpq %rdi, %rsi
669+
; CHECK-NEXT: addq %rdi, %rsi
669670
; CHECK-NEXT: adcq $-1, %rax
670671
; CHECK-NEXT: retq
671672
%na = xor i64 %a, -1
@@ -679,8 +680,8 @@ define i64 @cmp_uge_not_with_constant(i64 %a) {
679680
; CHECK-LABEL: cmp_uge_not_with_constant:
680681
; CHECK: # %bb.0:
681682
; CHECK-NEXT: xorl %eax, %eax
682-
; CHECK-NEXT: cmpq $-42, %rdi
683-
; CHECK-NEXT: sbbq %rax, %rax
683+
; CHECK-NEXT: addq $42, %rdi
684+
; CHECK-NEXT: adcq $-1, %rax
684685
; CHECK-NEXT: retq
685686
%na = xor i64 %a, -1
686687
%c = icmp uge i64 %na, 42
@@ -850,8 +851,9 @@ define <4 x i32> @cmp_ult_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
850851
define i64 @cmp_ule_not(i64 %a, i64 %b) {
851852
; CHECK-LABEL: cmp_ule_not:
852853
; CHECK: # %bb.0:
854+
; CHECK-NEXT: notq %rdi
853855
; CHECK-NEXT: xorl %eax, %eax
854-
; CHECK-NEXT: cmpq %rsi, %rdi
856+
; CHECK-NEXT: addq %rsi, %rdi
855857
; CHECK-NEXT: adcq $-1, %rax
856858
; CHECK-NEXT: retq
857859
%na = xor i64 %a, -1
@@ -983,8 +985,9 @@ define <4 x i32> @cmp_ne_not_with_vec(<4 x i32> %a, <4 x i32> %b) {
983985
define i64 @cmp_uge_not_commute(i64 %b, i64 %a) {
984986
; CHECK-LABEL: cmp_uge_not_commute:
985987
; CHECK: # %bb.0:
988+
; CHECK-NEXT: notq %rdi
986989
; CHECK-NEXT: xorl %eax, %eax
987-
; CHECK-NEXT: cmpq %rsi, %rdi
990+
; CHECK-NEXT: addq %rsi, %rdi
988991
; CHECK-NEXT: adcq $-1, %rax
989992
; CHECK-NEXT: retq
990993
%na = xor i64 %a, -1

0 commit comments

Comments
 (0)