Skip to content

Commit 3f33d67

Browse files
committed
[InstCombine] fold mul with masked low bit operand to trunc+select
https://alive2.llvm.org/ce/z/o7rQ5q This shows an extra instruction in some cases, but that is caused by an existing canonicalization of trunc -> and+icmp. Codegen should be better for any target where a multiply is more costly than the most simple ALU op. This ends up producing the requested x86 asm from issue #55618, but it's not the same IR. We are missing a canonicalization from the negate+mask pattern to the trunc+select created here.
1 parent abb21b5 commit 3f33d67

File tree

6 files changed

+34
-25
lines changed

6 files changed

+34
-25
lines changed

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,12 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
390390
return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));
391391
}
392392

393+
// (and X, 1) * Y --> (trunc X) ? Y : 0
394+
if (match(&I, m_c_BinOp(m_OneUse(m_And(m_Value(X), m_One())), m_Value(Y)))) {
395+
Value *Tr = Builder.CreateTrunc(X, CmpInst::makeCmpResultType(Ty));
396+
return SelectInst::Create(Tr, Y, ConstantInt::getNullValue(Ty));
397+
}
398+
393399
// ((ashr X, 31) | 1) * X --> abs(X)
394400
// X * ((ashr X, 31) | 1) --> abs(X)
395401
if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X),

llvm/test/Transforms/InstCombine/icmp-mul-and.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,11 @@ define i1 @pr51551_neg1(i32 %x, i32 %y) {
267267

268268
define i1 @pr51551_neg2(i32 %x, i32 %y) {
269269
; CHECK-LABEL: @pr51551_neg2(
270-
; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], 1
271-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[T0]], [[X:%.*]]
272-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7
273-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
270+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y:%.*]], 1
271+
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
272+
; CHECK-NEXT: [[X_OP:%.*]] = and i32 [[X:%.*]], 7
273+
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X_OP]], 0
274+
; CHECK-NEXT: [[CMP:%.*]] = select i1 [[DOTNOT]], i1 true, i1 [[CMP1]]
274275
; CHECK-NEXT: ret i1 [[CMP]]
275276
;
276277
%t0 = and i32 %y, -7

llvm/test/Transforms/InstCombine/icmp-mul.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -749,16 +749,11 @@ define i1 @not_mul_of_bool_commute(i32 %x, i32 %y) {
749749
ret i1 %r
750750
}
751751

752-
; negative test - no leading zeros for 's'
753-
; TODO: If analysis was generalized for sign bits, we could reduce this to false.
752+
; no leading zeros for 's', but we reduce this with other transforms
754753

755754
define i1 @mul_of_bool_no_lz_other_op(i32 %x, i8 %y) {
756755
; CHECK-LABEL: @mul_of_bool_no_lz_other_op(
757-
; CHECK-NEXT: [[B:%.*]] = and i32 [[X:%.*]], 1
758-
; CHECK-NEXT: [[S:%.*]] = sext i8 [[Y:%.*]] to i32
759-
; CHECK-NEXT: [[M:%.*]] = mul nuw nsw i32 [[B]], [[S]]
760-
; CHECK-NEXT: [[R:%.*]] = icmp sgt i32 [[M]], 127
761-
; CHECK-NEXT: ret i1 [[R]]
756+
; CHECK-NEXT: ret i1 false
762757
;
763758
%b = and i32 %x, 1
764759
%s = sext i8 %y to i32

llvm/test/Transforms/InstCombine/mul-masked-bits.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ define <4 x i32> @combine_mul_self_demandedbits_vector(<4 x i32> %x) {
8080

8181
define i8 @one_demanded_bit(i8 %x) {
8282
; CHECK-LABEL: @one_demanded_bit(
83-
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 6
84-
; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -65
83+
; CHECK-NEXT: [[M:%.*]] = shl i8 [[X:%.*]], 6
84+
; CHECK-NEXT: [[R:%.*]] = or i8 [[M]], -65
8585
; CHECK-NEXT: ret i8 [[R]]
8686
;
8787
%m = mul i8 %x, 192 ; 0b1100_0000
@@ -91,8 +91,8 @@ define i8 @one_demanded_bit(i8 %x) {
9191

9292
define <2 x i8> @one_demanded_bit_splat(<2 x i8> %x) {
9393
; CHECK-LABEL: @one_demanded_bit_splat(
94-
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
95-
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], <i8 32, i8 32>
94+
; CHECK-NEXT: [[M:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
95+
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[M]], <i8 32, i8 32>
9696
; CHECK-NEXT: ret <2 x i8> [[R]]
9797
;
9898
%m = mul <2 x i8> %x, <i8 160, i8 160> ; 0b1010_0000
@@ -201,9 +201,10 @@ define i64 @scalar_mul_bit_x0_y0_uses(i64 %x, i64 %y) {
201201
; Negative test
202202
define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) {
203203
; CHECK-LABEL: @scalar_mul_bit_x0_y1(
204-
; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1
205204
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 2
206-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[AND1]], [[AND2]]
205+
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 1
206+
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
207+
; CHECK-NEXT: [[MUL:%.*]] = select i1 [[DOTNOT]], i64 0, i64 [[AND2]]
207208
; CHECK-NEXT: ret i64 [[MUL]]
208209
;
209210
%and1 = and i64 %x, 1
@@ -214,9 +215,10 @@ define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) {
214215

215216
define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) {
216217
; CHECK-LABEL: @scalar_mul_bit_x0_yC(
217-
; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1
218218
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], [[C:%.*]]
219-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[AND1]], [[AND2]]
219+
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 1
220+
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
221+
; CHECK-NEXT: [[MUL:%.*]] = select i1 [[DOTNOT]], i64 0, i64 [[AND2]]
220222
; CHECK-NEXT: ret i64 [[MUL]]
221223
;
222224
%and1 = and i64 %x, 1

llvm/test/Transforms/InstCombine/mul.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,9 @@ define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
466466

467467
define i32 @lowbit_mul(i32 %a, i32 %b) {
468468
; CHECK-LABEL: @lowbit_mul(
469-
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 1
470-
; CHECK-NEXT: [[E:%.*]] = mul nuw i32 [[D]], [[B:%.*]]
469+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 1
470+
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
471+
; CHECK-NEXT: [[E:%.*]] = select i1 [[DOTNOT]], i32 0, i32 [[B:%.*]]
471472
; CHECK-NEXT: ret i32 [[E]]
472473
;
473474
%d = and i32 %a, 1
@@ -480,8 +481,8 @@ define i32 @lowbit_mul(i32 %a, i32 %b) {
480481
define <2 x i17> @lowbit_mul_commute(<2 x i17> %a, <2 x i17> %p) {
481482
; CHECK-LABEL: @lowbit_mul_commute(
482483
; CHECK-NEXT: [[B:%.*]] = xor <2 x i17> [[P:%.*]], <i17 42, i17 43>
483-
; CHECK-NEXT: [[D:%.*]] = and <2 x i17> [[A:%.*]], <i17 1, i17 1>
484-
; CHECK-NEXT: [[E:%.*]] = mul nuw <2 x i17> [[B]], [[D]]
484+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i17> [[A:%.*]] to <2 x i1>
485+
; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[TMP1]], <2 x i17> [[B]], <2 x i17> zeroinitializer
485486
; CHECK-NEXT: ret <2 x i17> [[E]]
486487
;
487488
%b = xor <2 x i17> %p, <i17 42, i17 43> ; thwart complexity-based canonicalization
@@ -490,6 +491,8 @@ define <2 x i17> @lowbit_mul_commute(<2 x i17> %a, <2 x i17> %p) {
490491
ret <2 x i17> %e
491492
}
492493

494+
; negative test - extra use
495+
493496
define i32 @lowbit_mul_use(i32 %a, i32 %b) {
494497
; CHECK-LABEL: @lowbit_mul_use(
495498
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 1
@@ -503,6 +506,8 @@ define i32 @lowbit_mul_use(i32 %a, i32 %b) {
503506
ret i32 %e
504507
}
505508

509+
; negative test - wrong mask
510+
506511
define i32 @not_lowbit_mul(i32 %a, i32 %b) {
507512
; CHECK-LABEL: @not_lowbit_mul(
508513
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 2

llvm/test/Transforms/InstCombine/or.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,8 +1499,8 @@ define i32 @mul_no_common_bits_const_op(i32 %p) {
14991499

15001500
define <2 x i12> @mul_no_common_bits_commute(<2 x i12> %p) {
15011501
; CHECK-LABEL: @mul_no_common_bits_commute(
1502-
; CHECK-NEXT: [[X:%.*]] = and <2 x i12> [[P:%.*]], <i12 1, i12 1>
1503-
; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <2 x i12> [[X]], <i12 15, i12 17>
1502+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i12> [[P:%.*]] to <2 x i1>
1503+
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP1]], <2 x i12> <i12 15, i12 17>, <2 x i12> zeroinitializer
15041504
; CHECK-NEXT: ret <2 x i12> [[R]]
15051505
;
15061506
%x = and <2 x i12> %p, <i12 1, i12 1>

0 commit comments

Comments
 (0)