Skip to content

Commit 889854b

Browse files
authored
[InstCombine] Avoid unprofitable add with remainder transform (#147319)
If C1 is 1 and we're working with a power of two divisor, this will end up replacing the `and` for the remainder with a multiply and a longer dependency chain. Fixes #147176.
1 parent 6c8c836 commit 889854b

File tree

2 files changed

+126
-1
lines changed

2 files changed

+126
-1
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1194,7 +1194,9 @@ Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) {
11941194
Value *DivOpV;
11951195
APInt DivOpC;
11961196
if (MatchRem(Rem, X, C0, IsSigned) &&
1197-
MatchDiv(Div, DivOpV, DivOpC, IsSigned) && X == DivOpV && C0 == DivOpC) {
1197+
MatchDiv(Div, DivOpV, DivOpC, IsSigned) && X == DivOpV && C0 == DivOpC &&
1198+
// Avoid unprofitable replacement of and with mul.
1199+
!(C1.isOne() && !IsSigned && DivOpC.isPowerOf2() && DivOpC != 2)) {
11981200
APInt NewC = C1 - C2 * C0;
11991201
if (!NewC.isZero() && !Rem->hasOneUse())
12001202
return nullptr;

llvm/test/Transforms/InstCombine/add4.ll

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,3 +289,126 @@ entry:
289289
%add = add i32 %shl, %rem
290290
ret i32 %add
291291
}
292+
293+
define i32 @fold_add_udiv_urem_no_mul(i32 noundef %val) {
294+
; CHECK-LABEL: @fold_add_udiv_urem_no_mul(
295+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
296+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], -9
297+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[VAL]]
298+
; CHECK-NEXT: ret i32 [[ADD]]
299+
;
300+
%div = udiv i32 %val, 10
301+
%rem = urem i32 %val, 10
302+
%add = add i32 %div, %rem
303+
ret i32 %add
304+
}
305+
306+
define i32 @fold_add_udiv_urem_rem_mul(i32 noundef %val) {
307+
; CHECK-LABEL: @fold_add_udiv_urem_rem_mul(
308+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
309+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL]], 3
310+
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[DIV]], -29
311+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[TMP1]]
312+
; CHECK-NEXT: ret i32 [[ADD]]
313+
;
314+
%div = udiv i32 %val, 10
315+
%rem = urem i32 %val, 10
316+
%mul = mul i32 %rem, 3
317+
%add = add i32 %div, %mul
318+
ret i32 %add
319+
}
320+
321+
define i32 @fold_add_udiv_urem_pow2_no_mul(i32 noundef %arg) {
322+
; CHECK-LABEL: @fold_add_udiv_urem_pow2_no_mul(
323+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 4
324+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG]], 15
325+
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[LSHR]], [[AND]]
326+
; CHECK-NEXT: ret i32 [[ADD]]
327+
;
328+
%lshr = lshr i32 %arg, 4
329+
%and = and i32 %arg, 15
330+
%add = add i32 %lshr, %and
331+
ret i32 %add
332+
}
333+
334+
define i32 @fold_add_udiv_urem_pow2_div_mul(i32 noundef %arg) {
335+
; CHECK-LABEL: @fold_add_udiv_urem_pow2_div_mul(
336+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 4
337+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[LSHR]], -13
338+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[ARG]]
339+
; CHECK-NEXT: ret i32 [[ADD]]
340+
;
341+
%lshr = lshr i32 %arg, 4
342+
%mul = mul i32 %lshr, 3
343+
%and = and i32 %arg, 15
344+
%add = add i32 %mul, %and
345+
ret i32 %add
346+
}
347+
348+
define i32 @fold_add_sdiv_srem_no_mul(i32 noundef %val) {
349+
; CHECK-LABEL: @fold_add_sdiv_srem_no_mul(
350+
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[VAL:%.*]], 10
351+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], -9
352+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[VAL]]
353+
; CHECK-NEXT: ret i32 [[ADD]]
354+
;
355+
%div = sdiv i32 %val, 10
356+
%rem = srem i32 %val, 10
357+
%add = add i32 %div, %rem
358+
ret i32 %add
359+
}
360+
361+
define i32 @fold_add_udiv_urem_pow2_rem_mul(i32 noundef %arg) {
362+
; CHECK-LABEL: @fold_add_udiv_urem_pow2_rem_mul(
363+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 4
364+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG]], 15
365+
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 3
366+
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[LSHR]], [[MUL]]
367+
; CHECK-NEXT: ret i32 [[ADD]]
368+
;
369+
%lshr = lshr i32 %arg, 4
370+
%and = and i32 %arg, 15
371+
%mul = mul i32 %and, 3
372+
%add = add i32 %lshr, %mul
373+
ret i32 %add
374+
}
375+
376+
define i32 @fold_add_udiv_urem_pow2_both_mul(i32 noundef %arg) {
377+
; CHECK-LABEL: @fold_add_udiv_urem_pow2_both_mul(
378+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 4
379+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[ARG]], 3
380+
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[LSHR]], -41
381+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[TMP1]]
382+
; CHECK-NEXT: ret i32 [[ADD]]
383+
;
384+
%lshr = lshr i32 %arg, 4
385+
%mul1 = mul i32 %lshr, 7
386+
%and = and i32 %arg, 15
387+
%mul2 = mul i32 %and, 3
388+
%add = add i32 %mul1, %mul2
389+
ret i32 %add
390+
}
391+
392+
define i32 @fold_add_udiv_urem_by_two_no_mul(i32 noundef %arg) {
393+
; CHECK-LABEL: @fold_add_udiv_urem_by_two_no_mul(
394+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 1
395+
; CHECK-NEXT: [[ADD:%.*]] = sub i32 [[ARG]], [[LSHR]]
396+
; CHECK-NEXT: ret i32 [[ADD]]
397+
;
398+
%lshr = lshr i32 %arg, 1
399+
%and = and i32 %arg, 1
400+
%add = add i32 %lshr, %and
401+
ret i32 %add
402+
}
403+
404+
define i32 @fold_add_sdiv_srem_by_two_no_mul(i32 noundef %arg) {
405+
; CHECK-LABEL: @fold_add_sdiv_srem_by_two_no_mul(
406+
; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv i32 [[ARG:%.*]], -2
407+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[DIV_NEG]], [[ARG]]
408+
; CHECK-NEXT: ret i32 [[ADD]]
409+
;
410+
%div = sdiv i32 %arg, 2
411+
%rem = srem i32 %arg, 2
412+
%add = add i32 %div, %rem
413+
ret i32 %add
414+
}

0 commit comments

Comments
 (0)