Skip to content

Commit 131b14f

Browse files
committed
[InstCombine] Fold umul.overflow(x, c1) | (x*c1 > c2) to x > c2/c1
The motivation of this pattern is to check whether the product of a variable and a constant would be mathematically (i.e., as integer numbers instead of bit vectors) greater than a given constant bound. The pattern appears to occur when compiling several Rust projects (it seems to originate from the `smallvec` crate but I have not checked this further). Unless `c1` is `0`, we can transform this pattern into `x > c2/c1` with all operations working on unsigned integers. Due to undefined behavior when an element of a non-splat vector is `0`, the transform is only implemented for scalars and splat vectors. Alive proof: https://alive2.llvm.org/ce/z/LawTkm Closes #142674
1 parent c40f406 commit 131b14f

File tree

2 files changed

+42
-39
lines changed

2 files changed

+42
-39
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3717,6 +3717,30 @@ Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS) {
37173717
return nullptr;
37183718
}
37193719

3720+
/// Fold Res, Overflow = (umul.with.overflow x c1); (or Overflow (ugt Res c2))
3721+
/// --> (ugt x (c2/c1)). This code checks whether a multiplication of two
3722+
/// unsigned numbers (one is a constant) is mathematically greater than a
3723+
/// second constant.
3724+
static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,
3725+
InstCombiner::BuilderTy &Builder,
3726+
const DataLayout &DL) {
3727+
const WithOverflowInst *WO;
3728+
const Value *WOV;
3729+
const APInt *C1, *C2;
3730+
if (match(&I,
3731+
m_c_Or(m_ExtractValue<1>(
3732+
m_CombineAnd(m_WithOverflowInst(WO), m_Value(WOV))),
3733+
m_OneUse(m_SpecificCmp(ICmpInst::ICMP_UGT,
3734+
m_ExtractValue<0>(m_Deferred(WOV)),
3735+
m_APInt(C2))))) &&
3736+
WO->getIntrinsicID() == Intrinsic::umul_with_overflow &&
3737+
match(WO->getRHS(), m_APInt(C1)) && !C1->isZero() && WO->hasNUses(2)) {
3738+
Constant *NewC = ConstantInt::get(WO->getLHS()->getType(), C2->udiv(*C1));
3739+
return Builder.CreateICmp(ICmpInst::ICMP_UGT, WO->getLHS(), NewC);
3740+
}
3741+
return nullptr;
3742+
}
3743+
37203744
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
37213745
// here. We should standardize that construct where it is needed or choose some
37223746
// other way to ensure that commutated variants of patterns are not missed.
@@ -4150,6 +4174,11 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
41504174
}
41514175
}
41524176

4177+
// Try to fold the pattern "Overflow | icmp pred Res, C2" into a single
4178+
// comparison instruction for umul.with.overflow.
4179+
if (Value *R = foldOrUnsignedUMulOverflowICmp(I, Builder, DL))
4180+
return replaceInstUsesWith(I, R);
4181+
41534182
// (~x) | y --> ~(x & (~y)) iff that gets rid of inversions
41544183
if (sinkNotIntoOtherHandOfLogicalOp(I))
41554184
return &I;

llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll

Lines changed: 13 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@ declare void @use.i64i1({i64, i1} %x)
88
define i1 @umul_greater_than_or_overflow_const(i64 %in) {
99
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const(
1010
; CHECK-SAME: i64 [[IN:%.*]]) {
11-
; CHECK-NEXT: [[TMP2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 168)
12-
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
13-
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
14-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], -16
15-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
11+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 109802048057794950
1612
; CHECK-NEXT: ret i1 [[TMP6]]
1713
;
1814
%mwo = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 168)
@@ -26,11 +22,7 @@ define i1 @umul_greater_than_or_overflow_const(i64 %in) {
2622
define i1 @umul_greater_than_or_overflow_const_i8(i8 %in) {
2723
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_i8(
2824
; CHECK-SAME: i8 [[IN:%.*]]) {
29-
; CHECK-NEXT: [[TMP2:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[IN]], i8 24)
30-
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8, i1 } [[TMP2]], 0
31-
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8, i1 } [[TMP2]], 1
32-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i8 [[TMP3]], -16
33-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
25+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[IN]], 10
3426
; CHECK-NEXT: ret i1 [[TMP6]]
3527
;
3628
%mwo = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %in, i8 24)
@@ -44,11 +36,7 @@ define i1 @umul_greater_than_or_overflow_const_i8(i8 %in) {
4436
define i1 @umul_greater_than_or_overflow_const_commuted(i64 %in) {
4537
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_commuted(
4638
; CHECK-SAME: i64 [[IN:%.*]]) {
47-
; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
48-
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
49-
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
50-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
51-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[TMP4]]
39+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
5240
; CHECK-NEXT: ret i1 [[TMP6]]
5341
;
5442
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
@@ -62,11 +50,7 @@ define i1 @umul_greater_than_or_overflow_const_commuted(i64 %in) {
6250
define i1 @umul_greater_than_or_overflow_const_disjoint(i64 %in) {
6351
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_disjoint(
6452
; CHECK-SAME: i64 [[IN:%.*]]) {
65-
; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 40)
66-
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
67-
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
68-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
69-
; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i1 [[TMP4]], [[TMP5]]
53+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 230584300921369395
7054
; CHECK-NEXT: ret i1 [[TMP6]]
7155
;
7256
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 40)
@@ -80,11 +64,8 @@ define i1 @umul_greater_than_or_overflow_const_disjoint(i64 %in) {
8064
define i1 @umul_greater_than_or_overflow_const_multiuse_mul(i64 %in) {
8165
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_mul(
8266
; CHECK-SAME: i64 [[IN:%.*]]) {
83-
; CHECK-NEXT: [[MWO:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
84-
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[MWO]], 0
85-
; CHECK-NEXT: [[OVF:%.*]] = extractvalue { i64, i1 } [[MWO]], 1
86-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MUL]], 9223372036854775800
87-
; CHECK-NEXT: [[RET:%.*]] = or i1 [[OVF]], [[CMP]]
67+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[IN]], 48
68+
; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
8869
; CHECK-NEXT: tail call void @use.i64(i64 [[MUL]])
8970
; CHECK-NEXT: ret i1 [[RET]]
9071
;
@@ -100,11 +81,8 @@ define i1 @umul_greater_than_or_overflow_const_multiuse_mul(i64 %in) {
10081
define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(i64 %in) {
10182
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(
10283
; CHECK-SAME: i64 [[IN:%.*]]) {
103-
; CHECK-NEXT: [[MWO:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
104-
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[MWO]], 0
105-
; CHECK-NEXT: [[OVF:%.*]] = extractvalue { i64, i1 } [[MWO]], 1
106-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MUL]], 9223372036854775800
107-
; CHECK-NEXT: [[RET:%.*]] = or i1 [[OVF]], [[CMP]]
84+
; CHECK-NEXT: [[OVF:%.*]] = icmp ugt i64 [[IN]], 384307168202282325
85+
; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
10886
; CHECK-NEXT: tail call void @use.i1(i1 [[OVF]])
10987
; CHECK-NEXT: ret i1 [[RET]]
11088
;
@@ -120,11 +98,7 @@ define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(i64 %in) {
12098
define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(<2 x i64> %in) {
12199
; CHECK-LABEL: define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(
122100
; CHECK-SAME: <2 x i64> [[IN:%.*]]) {
123-
; CHECK-NEXT: [[TMP2:%.*]] = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> [[IN]], <2 x i64> splat (i64 1424))
124-
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP2]], 0
125-
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP2]], 1
126-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <2 x i64> [[TMP3]], splat (i64 9223372036854775800)
127-
; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i1> [[TMP4]], [[TMP5]]
101+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <2 x i64> [[IN]], splat (i64 6477087104532848)
128102
; CHECK-NEXT: ret <2 x i1> [[TMP6]]
129103
;
130104
%mwo = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> %in, <2 x i64> <i64 1424, i64 1424>)
@@ -237,10 +211,10 @@ define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call_negative(i64 %
237211
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call_negative(
238212
; CHECK-SAME: i64 [[IN:%.*]]) {
239213
; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
240-
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
241-
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
242-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
243-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
214+
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
215+
; CHECK-NEXT: [[OVF:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
216+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MUL]], 9223372036854775800
217+
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[OVF]], [[CMP]]
244218
; CHECK-NEXT: tail call void @use.i64i1({ i64, i1 } [[TMP2]])
245219
; CHECK-NEXT: ret i1 [[TMP6]]
246220
;

0 commit comments

Comments
 (0)