Skip to content

Commit 9544bb5

Browse files
authored
[InstCombine] Fold umul.overflow(x, c1) | (x*c1 > c2) to x > c2/c1 (#147327)
The motivation of this pattern is to check whether the product of a variable and a constant would be mathematically (i.e., as integer numbers instead of bit vectors) greater than a given constant bound. The pattern appears to occur when compiling several Rust projects (it seems to originate from the `smallvec` crate but I have not checked this further). Unless `c1` is `0`, we can transform this pattern into `x > c2/c1` with all operations working on unsigned integers. Due to undefined behavior when an element of a non-splat vector is `0`, the transform is only implemented for scalars and splat vectors. Alive proof: https://alive2.llvm.org/ce/z/LawTkm Closes #142674
1 parent c042273 commit 9544bb5

File tree

2 files changed

+267
-0
lines changed

2 files changed

+267
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3717,6 +3717,30 @@ Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS) {
37173717
return nullptr;
37183718
}
37193719

3720+
/// Fold Res, Overflow = (umul.with.overflow x c1); (or Overflow (ugt Res c2))
3721+
/// --> (ugt x (c2/c1)). This code checks whether a multiplication of two
3722+
/// unsigned numbers (one is a constant) is mathematically greater than a
3723+
/// second constant.
3724+
static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,
3725+
InstCombiner::BuilderTy &Builder,
3726+
const DataLayout &DL) {
3727+
Value *WOV, *X;
3728+
const APInt *C1, *C2;
3729+
if (match(&I,
3730+
m_c_Or(m_ExtractValue<1>(
3731+
m_CombineAnd(m_Intrinsic<Intrinsic::umul_with_overflow>(
3732+
m_Value(X), m_APInt(C1)),
3733+
m_Value(WOV))),
3734+
m_OneUse(m_SpecificCmp(ICmpInst::ICMP_UGT,
3735+
m_ExtractValue<0>(m_Deferred(WOV)),
3736+
m_APInt(C2))))) &&
3737+
!C1->isZero()) {
3738+
Constant *NewC = ConstantInt::get(X->getType(), C2->udiv(*C1));
3739+
return Builder.CreateICmp(ICmpInst::ICMP_UGT, X, NewC);
3740+
}
3741+
return nullptr;
3742+
}
3743+
37203744
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
37213745
// here. We should standardize that construct where it is needed or choose some
37223746
// other way to ensure that commutated variants of patterns are not missed.
@@ -4150,6 +4174,11 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
41504174
}
41514175
}
41524176

4177+
// Try to fold the pattern "Overflow | icmp pred Res, C2" into a single
4178+
// comparison instruction for umul.with.overflow.
4179+
if (Value *R = foldOrUnsignedUMulOverflowICmp(I, Builder, DL))
4180+
return replaceInstUsesWith(I, R);
4181+
41534182
// (~x) | y --> ~(x & (~y)) iff that gets rid of inversions
41544183
if (sinkNotIntoOtherHandOfLogicalOp(I))
41554184
return &I;
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
3+
4+
declare void @use.i1(i1 %x)
5+
declare void @use.i64(i64 %x)
6+
declare void @use.i64i1({i64, i1} %x)
7+
8+
define i1 @umul_greater_than_or_overflow_const(i64 %in) {
9+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const(
10+
; CHECK-SAME: i64 [[IN:%.*]]) {
11+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 109802048057794950
12+
; CHECK-NEXT: ret i1 [[TMP6]]
13+
;
14+
%mwo = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 168)
15+
%mul = extractvalue { i64, i1 } %mwo, 0
16+
%ovf = extractvalue { i64, i1 } %mwo, 1
17+
%cmp = icmp ugt i64 %mul, -16
18+
%ret = or i1 %ovf, %cmp
19+
ret i1 %ret
20+
}
21+
22+
define i1 @umul_greater_than_or_overflow_const_i8(i8 %in) {
23+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_i8(
24+
; CHECK-SAME: i8 [[IN:%.*]]) {
25+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[IN]], 10
26+
; CHECK-NEXT: ret i1 [[TMP6]]
27+
;
28+
%mwo = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %in, i8 24)
29+
%mul = extractvalue { i8, i1 } %mwo, 0
30+
%ovf = extractvalue { i8, i1 } %mwo, 1
31+
%cmp = icmp ugt i8 %mul, -16
32+
%ret = or i1 %ovf, %cmp
33+
ret i1 %ret
34+
}
35+
36+
define i1 @umul_greater_than_or_overflow_const_commuted(i64 %in) {
37+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_commuted(
38+
; CHECK-SAME: i64 [[IN:%.*]]) {
39+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
40+
; CHECK-NEXT: ret i1 [[TMP6]]
41+
;
42+
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
43+
%mul = extractvalue { i64, i1 } %mwo, 0
44+
%ovf = extractvalue { i64, i1 } %mwo, 1
45+
%cmp = icmp ugt i64 %mul, 9223372036854775800
46+
%ret = or i1 %cmp, %ovf
47+
ret i1 %ret
48+
}
49+
50+
define i1 @umul_greater_than_or_overflow_const_disjoint(i64 %in) {
51+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_disjoint(
52+
; CHECK-SAME: i64 [[IN:%.*]]) {
53+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 230584300921369395
54+
; CHECK-NEXT: ret i1 [[TMP6]]
55+
;
56+
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 40)
57+
%mul = extractvalue { i64, i1 } %mwo, 0
58+
%ovf = extractvalue { i64, i1 } %mwo, 1
59+
%cmp = icmp ugt i64 %mul, 9223372036854775800
60+
%ret = or disjoint i1 %ovf, %cmp
61+
ret i1 %ret
62+
}
63+
64+
define i1 @umul_greater_than_or_overflow_const_multiuse_mul(i64 %in) {
65+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_mul(
66+
; CHECK-SAME: i64 [[IN:%.*]]) {
67+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[IN]], 48
68+
; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
69+
; CHECK-NEXT: tail call void @use.i64(i64 [[MUL]])
70+
; CHECK-NEXT: ret i1 [[RET]]
71+
;
72+
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
73+
%mul = extractvalue { i64, i1 } %mwo, 0
74+
%ovf = extractvalue { i64, i1 } %mwo, 1
75+
%cmp = icmp ugt i64 %mul, 9223372036854775800
76+
%ret = or i1 %ovf, %cmp
77+
tail call void @use.i64(i64 %mul)
78+
ret i1 %ret
79+
}
80+
81+
define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(i64 %in) {
82+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(
83+
; CHECK-SAME: i64 [[IN:%.*]]) {
84+
; CHECK-NEXT: [[OVF:%.*]] = icmp ugt i64 [[IN]], 384307168202282325
85+
; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
86+
; CHECK-NEXT: tail call void @use.i1(i1 [[OVF]])
87+
; CHECK-NEXT: ret i1 [[RET]]
88+
;
89+
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
90+
%mul = extractvalue { i64, i1 } %mwo, 0
91+
%ovf = extractvalue { i64, i1 } %mwo, 1
92+
%cmp = icmp ugt i64 %mul, 9223372036854775800
93+
%ret = or i1 %ovf, %cmp
94+
tail call void @use.i1(i1 %ovf)
95+
ret i1 %ret
96+
}
97+
98+
define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call(i64 %in) {
99+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call(
100+
; CHECK-SAME: i64 [[IN:%.*]]) {
101+
; CHECK-NEXT: [[MWO:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
102+
; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
103+
; CHECK-NEXT: tail call void @use.i64i1({ i64, i1 } [[MWO]])
104+
; CHECK-NEXT: ret i1 [[RET]]
105+
;
106+
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
107+
%mul = extractvalue { i64, i1 } %mwo, 0
108+
%ovf = extractvalue { i64, i1 } %mwo, 1
109+
%cmp = icmp ugt i64 %mul, 9223372036854775800
110+
%ret = or i1 %ovf, %cmp
111+
tail call void @use.i64i1({ i64, i1 } %mwo)
112+
ret i1 %ret
113+
}
114+
115+
define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(<2 x i64> %in) {
116+
; CHECK-LABEL: define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(
117+
; CHECK-SAME: <2 x i64> [[IN:%.*]]) {
118+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <2 x i64> [[IN]], splat (i64 6477087104532848)
119+
; CHECK-NEXT: ret <2 x i1> [[TMP6]]
120+
;
121+
%mwo = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> %in, <2 x i64> <i64 1424, i64 1424>)
122+
%mul = extractvalue { <2 x i64>, <2 x i1> } %mwo, 0
123+
%ovf = extractvalue { <2 x i64>, <2 x i1> } %mwo, 1
124+
%cmp = icmp ugt <2 x i64> %mul, <i64 9223372036854775800, i64 9223372036854775800>
125+
%ret = or <2 x i1> %ovf, %cmp
126+
ret <2 x i1> %ret
127+
}
128+
129+
; Negative test
130+
define <4 x i1> @umul_greater_than_or_overflow_const_vector_non_splat_negative(<4 x i64> %in) {
131+
; CHECK-LABEL: define <4 x i1> @umul_greater_than_or_overflow_const_vector_non_splat_negative(
132+
; CHECK-SAME: <4 x i64> [[IN:%.*]]) {
133+
; CHECK-NEXT: [[MWO:%.*]] = tail call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> [[IN]], <4 x i64> <i64 24, i64 1424, i64 0, i64 -1>)
134+
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { <4 x i64>, <4 x i1> } [[MWO]], 0
135+
; CHECK-NEXT: [[OVF:%.*]] = extractvalue { <4 x i64>, <4 x i1> } [[MWO]], 1
136+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <4 x i64> [[MUL]], <i64 9223372036854775000, i64 9223372036854775800, i64 -16, i64 -16>
137+
; CHECK-NEXT: [[RET:%.*]] = or <4 x i1> [[OVF]], [[CMP]]
138+
; CHECK-NEXT: ret <4 x i1> [[RET]]
139+
;
140+
%mwo = tail call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v2i64(<4 x i64> %in, <4 x i64> <i64 24, i64 1424, i64 0, i64 -1>)
141+
%mul = extractvalue { <4 x i64>, <4 x i1> } %mwo, 0
142+
%ovf = extractvalue { <4 x i64>, <4 x i1> } %mwo, 1
143+
%cmp = icmp ugt <4 x i64> %mul, <i64 9223372036854775000, i64 9223372036854775800, i64 -16, i64 -16>
144+
%ret = or <4 x i1> %ovf, %cmp
145+
ret <4 x i1> %ret
146+
}
147+
148+
; Negative test
149+
define <2 x i1> @umul_greater_than_or_overflow_const_vector_poison_non_splat_negative(<2 x i64> %in) {
150+
; CHECK-LABEL: define <2 x i1> @umul_greater_than_or_overflow_const_vector_poison_non_splat_negative(
151+
; CHECK-SAME: <2 x i64> [[IN:%.*]]) {
152+
; CHECK-NEXT: [[MWO:%.*]] = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> [[IN]], <2 x i64> <i64 poison, i64 1424>)
153+
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[MWO]], 0
154+
; CHECK-NEXT: [[OVF:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[MWO]], 1
155+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i64> [[MUL]], <i64 9223372036854775800, i64 poison>
156+
; CHECK-NEXT: [[RET:%.*]] = or <2 x i1> [[OVF]], [[CMP]]
157+
; CHECK-NEXT: ret <2 x i1> [[RET]]
158+
;
159+
%mwo = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> %in, <2 x i64> <i64 poison, i64 1424>)
160+
%mul = extractvalue { <2 x i64>, <2 x i1> } %mwo, 0
161+
%ovf = extractvalue { <2 x i64>, <2 x i1> } %mwo, 1
162+
%cmp = icmp ugt <2 x i64> %mul, <i64 9223372036854775800, i64 poison>
163+
%ret = or <2 x i1> %ovf, %cmp
164+
ret <2 x i1> %ret
165+
}
166+
167+
; Negative test
168+
define i1 @umul_greater_than_and_overflow_const_negative(i64 %in) {
169+
; CHECK-LABEL: define i1 @umul_greater_than_and_overflow_const_negative(
170+
; CHECK-SAME: i64 [[IN:%.*]]) {
171+
; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
172+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
173+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
174+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP3]], 9223372036854775800
175+
; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP4]], [[TMP5]]
176+
; CHECK-NEXT: ret i1 [[TMP6]]
177+
;
178+
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
179+
%mul = extractvalue { i64, i1 } %mwo, 0
180+
%ovf = extractvalue { i64, i1 } %mwo, 1
181+
%cmp = icmp ult i64 %mul, 9223372036854775800
182+
%ret = and i1 %ovf, %cmp
183+
ret i1 %ret
184+
}
185+
186+
; Negative test
187+
define i1 @umul_less_than_or_overflow_const_negative(i64 %in) {
188+
; CHECK-LABEL: define i1 @umul_less_than_or_overflow_const_negative(
189+
; CHECK-SAME: i64 [[IN:%.*]]) {
190+
; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
191+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
192+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
193+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP3]], 9223372036854775800
194+
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
195+
; CHECK-NEXT: ret i1 [[TMP6]]
196+
;
197+
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
198+
%mul = extractvalue { i64, i1 } %mwo, 0
199+
%ovf = extractvalue { i64, i1 } %mwo, 1
200+
%cmp = icmp ult i64 %mul, 9223372036854775800
201+
%ret = or i1 %ovf, %cmp
202+
ret i1 %ret
203+
}
204+
205+
; Negative test
206+
define i1 @umul_greater_than_or_overflow_const_multiuse_icmp_negative(i64 %in) {
207+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_icmp_negative(
208+
; CHECK-SAME: i64 [[IN:%.*]]) {
209+
; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
210+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
211+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
212+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
213+
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
214+
; CHECK-NEXT: tail call void @use.i1(i1 [[TMP5]])
215+
; CHECK-NEXT: ret i1 [[TMP6]]
216+
;
217+
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
218+
%mul = extractvalue { i64, i1 } %mwo, 0
219+
%ovf = extractvalue { i64, i1 } %mwo, 1
220+
%cmp = icmp ugt i64 %mul, 9223372036854775800
221+
%ret = or i1 %ovf, %cmp
222+
tail call void @use.i1(i1 %cmp)
223+
ret i1 %ret
224+
}
225+
226+
; Negative test. The umul.with.overflow should be folded away before.
227+
define i1 @umul_greater_than_or_overflow_const_0_negative(i64 %in) {
228+
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_0_negative(
229+
; CHECK-SAME: i64 [[IN:%.*]]) {
230+
; CHECK-NEXT: ret i1 false
231+
;
232+
%mwo = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 0)
233+
%mul = extractvalue { i64, i1 } %mwo, 0
234+
%ovf = extractvalue { i64, i1 } %mwo, 1
235+
%cmp = icmp ugt i64 %mul, 0
236+
%ret = or i1 %ovf, %cmp
237+
ret i1 %ret
238+
}

0 commit comments

Comments
 (0)