Skip to content

Commit 423729d

Browse files
RKSimonmemfrob
authored andcommitted
[X86] Fold PMADD(x,0) or PMADD(0,x) -> 0
Pulled out of D108522 - handle zero-operand cases for PMADDWD/VPMADDUBSW ops
1 parent d2b8684 commit 423729d

File tree

2 files changed

+25
-17
lines changed

2 files changed

+25
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51824,6 +51824,21 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
5182451824
return SDValue();
5182551825
}
5182651826

51827+
// Simplify VPMADDUBSW/VPMADDWD operations.
51828+
static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
51829+
TargetLowering::DAGCombinerInfo &DCI) {
51830+
SDValue LHS = N->getOperand(0);
51831+
SDValue RHS = N->getOperand(1);
51832+
51833+
// Multiply by zero.
51834+
// Don't return LHS/RHS as it may contain UNDEFs.
51835+
if (ISD::isBuildVectorAllZeros(LHS.getNode()) ||
51836+
ISD::isBuildVectorAllZeros(RHS.getNode()))
51837+
return DAG.getConstant(0, SDLoc(N), N->getValueType(0));
51838+
51839+
return SDValue();
51840+
}
51841+
5182751842
static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
5182851843
TargetLowering::DAGCombinerInfo &DCI,
5182951844
const X86Subtarget &Subtarget) {
@@ -52274,6 +52289,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5227452289
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
5227552290
case X86ISD::PMULDQ:
5227652291
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
52292+
case X86ISD::VPMADDUBSW:
52293+
case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI);
5227752294
case X86ISD::KSHIFTL:
5227852295
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
5227952296
case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);

llvm/test/CodeGen/X86/combine-pmadd.ll

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@ declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind
99
define <4 x i32> @combine_pmaddwd_zero(<8 x i16> %a0, <8 x i16> %a1) {
1010
; SSE-LABEL: combine_pmaddwd_zero:
1111
; SSE: # %bb.0:
12-
; SSE-NEXT: pxor %xmm1, %xmm1
13-
; SSE-NEXT: pmaddwd %xmm1, %xmm0
12+
; SSE-NEXT: xorps %xmm0, %xmm0
1413
; SSE-NEXT: retq
1514
;
1615
; AVX-LABEL: combine_pmaddwd_zero:
1716
; AVX: # %bb.0:
18-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19-
; AVX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
17+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
2018
; AVX-NEXT: retq
2119
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> zeroinitializer)
2220
ret <4 x i32> %1
@@ -25,14 +23,12 @@ define <4 x i32> @combine_pmaddwd_zero(<8 x i16> %a0, <8 x i16> %a1) {
2523
define <4 x i32> @combine_pmaddwd_zero_commute(<8 x i16> %a0, <8 x i16> %a1) {
2624
; SSE-LABEL: combine_pmaddwd_zero_commute:
2725
; SSE: # %bb.0:
28-
; SSE-NEXT: pxor %xmm1, %xmm1
29-
; SSE-NEXT: pmaddwd %xmm1, %xmm0
26+
; SSE-NEXT: xorps %xmm0, %xmm0
3027
; SSE-NEXT: retq
3128
;
3229
; AVX-LABEL: combine_pmaddwd_zero_commute:
3330
; AVX: # %bb.0:
34-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35-
; AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0
31+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
3632
; AVX-NEXT: retq
3733
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> zeroinitializer, <8 x i16> %a0)
3834
ret <4 x i32> %1
@@ -41,14 +37,12 @@ define <4 x i32> @combine_pmaddwd_zero_commute(<8 x i16> %a0, <8 x i16> %a1) {
4137
define <8 x i16> @combine_pmaddubsw_zero(<16 x i8> %a0, <16 x i8> %a1) {
4238
; SSE-LABEL: combine_pmaddubsw_zero:
4339
; SSE: # %bb.0:
44-
; SSE-NEXT: pxor %xmm1, %xmm1
45-
; SSE-NEXT: pmaddubsw %xmm1, %xmm0
40+
; SSE-NEXT: xorps %xmm0, %xmm0
4641
; SSE-NEXT: retq
4742
;
4843
; AVX-LABEL: combine_pmaddubsw_zero:
4944
; AVX: # %bb.0:
50-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
51-
; AVX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
45+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
5246
; AVX-NEXT: retq
5347
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
5448
ret <8 x i16> %1
@@ -57,15 +51,12 @@ define <8 x i16> @combine_pmaddubsw_zero(<16 x i8> %a0, <16 x i8> %a1) {
5751
define <8 x i16> @combine_pmaddubsw_zero_commute(<16 x i8> %a0, <16 x i8> %a1) {
5852
; SSE-LABEL: combine_pmaddubsw_zero_commute:
5953
; SSE: # %bb.0:
60-
; SSE-NEXT: pxor %xmm1, %xmm1
61-
; SSE-NEXT: pmaddubsw %xmm0, %xmm1
62-
; SSE-NEXT: movdqa %xmm1, %xmm0
54+
; SSE-NEXT: xorps %xmm0, %xmm0
6355
; SSE-NEXT: retq
6456
;
6557
; AVX-LABEL: combine_pmaddubsw_zero_commute:
6658
; AVX: # %bb.0:
67-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
68-
; AVX-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0
59+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
6960
; AVX-NEXT: retq
7061
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> zeroinitializer, <16 x i8> %a0)
7162
ret <8 x i16> %1

0 commit comments

Comments
 (0)