Skip to content

Commit c9d9c3e

Browse files
authored
[InstCombine] Fold icmp pred X + K, Y -> icmp pred2 X, Y if both X and Y is divisible by K (llvm#147130)
This patch generalizes `icmp ule X +nuw 1, Y -> icmp ult X, Y`-like optimizations to handle the case that the added RHS constant is a common power-of-2 divisor of both X and Y. We can further generalize this pattern to handle non-power-of-2 divisors as well. Alive2: https://alive2.llvm.org/ce/z/QgpeM_ Compile-time improvement (Stage2-O3 -0.09%): https://llvm-compile-time-tracker.com/compare.php?from=0ba59587fa98849ed5107fee4134e810e84b69a3&to=f80e5fe0bb2e63c05401bde7cd42899ea270909b&stat=instructions:u The original case is from the comparison of expanded GEP offsets: https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2530/files#r2183005292
1 parent 0bc6d60 commit c9d9c3e

File tree

2 files changed

+165
-55
lines changed

2 files changed

+165
-55
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 63 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5120,6 +5120,18 @@ static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
51205120
return nullptr;
51215121
}
51225122

5123+
/// Return true if X is a multiple of C.
5124+
/// TODO: Handle non-power-of-2 factors.
5125+
static bool isMultipleOf(Value *X, const APInt &C, const SimplifyQuery &Q) {
5126+
if (C.isOne())
5127+
return true;
5128+
5129+
if (!C.isPowerOf2())
5130+
return false;
5131+
5132+
return MaskedValueIsZero(X, C - 1, Q);
5133+
}
5134+
51235135
/// Try to fold icmp (binop), X or icmp X, (binop).
51245136
/// TODO: A large part of this logic is duplicated in InstSimplify's
51255137
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -5278,66 +5290,62 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
52785290
return new ICmpInst(Pred, Y, Z);
52795291
}
52805292

5281-
// icmp slt (A + -1), Op1 -> icmp sle A, Op1
5282-
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
5283-
match(B, m_AllOnes()))
5284-
return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
5285-
5286-
// icmp sge (A + -1), Op1 -> icmp sgt A, Op1
5287-
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
5288-
match(B, m_AllOnes()))
5289-
return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
5290-
5291-
// icmp sle (A + 1), Op1 -> icmp slt A, Op1
5292-
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
5293-
return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
5294-
5295-
// icmp sgt (A + 1), Op1 -> icmp sge A, Op1
5296-
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
5297-
return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
5293+
if (ICmpInst::isRelational(Pred)) {
5294+
// Return if both X and Y is divisible by Z/-Z.
5295+
// TODO: Generalize to check if (X - Y) is divisible by Z/-Z.
5296+
auto ShareCommonDivisor = [&Q](Value *X, Value *Y, Value *Z,
5297+
bool IsNegative) -> bool {
5298+
const APInt *OffsetC;
5299+
if (!match(Z, m_APInt(OffsetC)))
5300+
return false;
52985301

5299-
// icmp sgt Op0, (C + -1) -> icmp sge Op0, C
5300-
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
5301-
match(D, m_AllOnes()))
5302-
return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
5302+
// Fast path for Z == 1/-1.
5303+
if (IsNegative ? OffsetC->isAllOnes() : OffsetC->isOne())
5304+
return true;
53035305

5304-
// icmp sle Op0, (C + -1) -> icmp slt Op0, C
5305-
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
5306-
match(D, m_AllOnes()))
5307-
return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
5306+
APInt C = *OffsetC;
5307+
if (IsNegative)
5308+
C.negate();
5309+
// Note: -INT_MIN is also negative.
5310+
if (!C.isStrictlyPositive())
5311+
return false;
53085312

5309-
// icmp sge Op0, (C + 1) -> icmp sgt Op0, C
5310-
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
5311-
return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
5313+
return isMultipleOf(X, C, Q) && isMultipleOf(Y, C, Q);
5314+
};
53125315

5313-
// icmp slt Op0, (C + 1) -> icmp sle Op0, C
5314-
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
5315-
return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
5316+
// TODO: The subtraction-related identities shown below also hold, but
5317+
// canonicalization from (X -nuw 1) to (X + -1) means that the combinations
5318+
// wouldn't happen even if they were implemented.
5319+
//
5320+
// icmp ult (A - 1), Op1 -> icmp ule A, Op1
5321+
// icmp uge (A - 1), Op1 -> icmp ugt A, Op1
5322+
// icmp ugt Op0, (C - 1) -> icmp uge Op0, C
5323+
// icmp ule Op0, (C - 1) -> icmp ult Op0, C
5324+
5325+
// icmp slt (A + -1), Op1 -> icmp sle A, Op1
5326+
// icmp sge (A + -1), Op1 -> icmp sgt A, Op1
5327+
// icmp sle (A + 1), Op1 -> icmp slt A, Op1
5328+
// icmp sgt (A + 1), Op1 -> icmp sge A, Op1
5329+
// icmp ule (A + 1), Op0 -> icmp ult A, Op1
5330+
// icmp ugt (A + 1), Op0 -> icmp uge A, Op1
5331+
if (A && NoOp0WrapProblem &&
5332+
ShareCommonDivisor(A, Op1, B,
5333+
ICmpInst::isLT(Pred) || ICmpInst::isGE(Pred)))
5334+
return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), A,
5335+
Op1);
53165336

5317-
// TODO: The subtraction-related identities shown below also hold, but
5318-
// canonicalization from (X -nuw 1) to (X + -1) means that the combinations
5319-
// wouldn't happen even if they were implemented.
5320-
//
5321-
// icmp ult (A - 1), Op1 -> icmp ule A, Op1
5322-
// icmp uge (A - 1), Op1 -> icmp ugt A, Op1
5323-
// icmp ugt Op0, (C - 1) -> icmp uge Op0, C
5324-
// icmp ule Op0, (C - 1) -> icmp ult Op0, C
5325-
5326-
// icmp ule (A + 1), Op0 -> icmp ult A, Op1
5327-
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
5328-
return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
5329-
5330-
// icmp ugt (A + 1), Op0 -> icmp uge A, Op1
5331-
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
5332-
return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
5333-
5334-
// icmp uge Op0, (C + 1) -> icmp ugt Op0, C
5335-
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
5336-
return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
5337-
5338-
// icmp ult Op0, (C + 1) -> icmp ule Op0, C
5339-
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
5340-
return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
5337+
// icmp sgt Op0, (C + -1) -> icmp sge Op0, C
5338+
// icmp sle Op0, (C + -1) -> icmp slt Op0, C
5339+
// icmp sge Op0, (C + 1) -> icmp sgt Op0, C
5340+
// icmp slt Op0, (C + 1) -> icmp sle Op0, C
5341+
// icmp uge Op0, (C + 1) -> icmp ugt Op0, C
5342+
// icmp ult Op0, (C + 1) -> icmp ule Op0, C
5343+
if (C && NoOp1WrapProblem &&
5344+
ShareCommonDivisor(Op0, C, D,
5345+
ICmpInst::isGT(Pred) || ICmpInst::isLE(Pred)))
5346+
return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), Op0,
5347+
C);
5348+
}
53415349

53425350
// if C1 has greater magnitude than C2:
53435351
// icmp (A + C1), (C + C2) -> icmp (A + C3), C

llvm/test/Transforms/InstCombine/icmp.ll

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,6 +1900,108 @@ define i1 @icmp_add1_sle(i32 %x, i32 %y) {
19001900
ret i1 %cmp
19011901
}
19021902

1903+
define i1 @icmp_slt_offset_with_common_divisor(i64 %x, i64 %y) {
1904+
; CHECK-LABEL: @icmp_slt_offset_with_common_divisor(
1905+
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
1906+
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
1907+
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[SHLX]], [[SHLY]]
1908+
; CHECK-NEXT: ret i1 [[CMP]]
1909+
;
1910+
%shlx = shl i64 %x, 4
1911+
%shly = shl i64 %y, 4
1912+
%shlx_offset = add nsw i64 %shlx, -16
1913+
%cmp = icmp slt i64 %shlx_offset, %shly
1914+
ret i1 %cmp
1915+
}
1916+
1917+
define i1 @icmp_slt_offset_with_smaller_common_divisor(i64 %x, i64 %y) {
1918+
; CHECK-LABEL: @icmp_slt_offset_with_smaller_common_divisor(
1919+
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
1920+
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
1921+
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[SHLX]], [[SHLY]]
1922+
; CHECK-NEXT: ret i1 [[CMP]]
1923+
;
1924+
%shlx = shl i64 %x, 4
1925+
%shly = shl i64 %y, 4
1926+
%shlx_offset = add nsw i64 %shlx, -8
1927+
%cmp = icmp slt i64 %shlx_offset, %shly
1928+
ret i1 %cmp
1929+
}
1930+
1931+
define i1 @icmp_sle_offset_with_common_divisor(i64 %x, i64 %y) {
1932+
; CHECK-LABEL: @icmp_sle_offset_with_common_divisor(
1933+
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
1934+
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
1935+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX]], [[SHLY]]
1936+
; CHECK-NEXT: ret i1 [[CMP]]
1937+
;
1938+
%shlx = shl i64 %x, 4
1939+
%shly = shl i64 %y, 4
1940+
%shlx_offset = add nsw i64 %shlx, 16
1941+
%cmp = icmp sle i64 %shlx_offset, %shly
1942+
ret i1 %cmp
1943+
}
1944+
1945+
define i1 @icmp_ule_offset_with_common_divisor(i64 %x, i64 %y) {
1946+
; CHECK-LABEL: @icmp_ule_offset_with_common_divisor(
1947+
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
1948+
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
1949+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[SHLX]], [[SHLY]]
1950+
; CHECK-NEXT: ret i1 [[CMP]]
1951+
;
1952+
%shlx = shl i64 %x, 4
1953+
%shly = shl i64 %y, 4
1954+
%shlx_offset = add nuw i64 %shlx, 16
1955+
%cmp = icmp ule i64 %shlx_offset, %shly
1956+
ret i1 %cmp
1957+
}
1958+
1959+
; TODO: Handle non-power-of-2 divisors
1960+
define i1 @icmp_ule_offset_with_common_non_pow2_divisor(i64 %x, i64 %y) {
1961+
; CHECK-LABEL: @icmp_ule_offset_with_common_non_pow2_divisor(
1962+
; CHECK-NEXT: [[MULX:%.*]] = mul nuw i64 [[X:%.*]], 7
1963+
; CHECK-NEXT: [[MULY:%.*]] = mul nuw i64 [[Y:%.*]], 7
1964+
; CHECK-NEXT: [[MULX_OFFSET:%.*]] = add nuw i64 [[MULX]], 7
1965+
; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MULX_OFFSET]], [[MULY]]
1966+
; CHECK-NEXT: ret i1 [[CMP]]
1967+
;
1968+
%mulx = mul nuw i64 %x, 7
1969+
%muly = mul nuw i64 %y, 7
1970+
%mulx_offset = add nuw i64 %mulx, 7
1971+
%cmp = icmp ule i64 %mulx_offset, %muly
1972+
ret i1 %cmp
1973+
}
1974+
1975+
define i1 @neg_icmp_slt_offset_without_common_divisor(i64 %x, i64 %y) {
1976+
; CHECK-LABEL: @neg_icmp_slt_offset_without_common_divisor(
1977+
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
1978+
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
1979+
; CHECK-NEXT: [[SHLX_OFFSET:%.*]] = add nsw i64 [[SHLX]], -32
1980+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX_OFFSET]], [[SHLY]]
1981+
; CHECK-NEXT: ret i1 [[CMP]]
1982+
;
1983+
%shlx = shl i64 %x, 4
1984+
%shly = shl i64 %y, 4
1985+
%shlx_offset = add nsw i64 %shlx, -32
1986+
%cmp = icmp slt i64 %shlx_offset, %shly
1987+
ret i1 %cmp
1988+
}
1989+
1990+
define i1 @neg_icmp_slt_offset_with_wrong_sign(i64 %x, i64 %y) {
1991+
; CHECK-LABEL: @neg_icmp_slt_offset_with_wrong_sign(
1992+
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
1993+
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
1994+
; CHECK-NEXT: [[SHLX_OFFSET:%.*]] = add nsw i64 [[SHLX]], 16
1995+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX_OFFSET]], [[SHLY]]
1996+
; CHECK-NEXT: ret i1 [[CMP]]
1997+
;
1998+
%shlx = shl i64 %x, 4
1999+
%shly = shl i64 %y, 4
2000+
%shlx_offset = add nsw i64 %shlx, 16
2001+
%cmp = icmp slt i64 %shlx_offset, %shly
2002+
ret i1 %cmp
2003+
}
2004+
19032005
define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
19042006
; CHECK-LABEL: @icmp_add20_sge_add57(
19052007
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[Y:%.*]], 37

0 commit comments

Comments
 (0)