Skip to content

Commit c2426fd

Browse files
committed
[X86][XOP] Add SimplifyDemandedVectorElts handling for xop shifts
Noticed while investigating how to improve funnel shift codegen
1 parent 13362ab commit c2426fd

File tree

3 files changed

+20
-6
lines changed

3 files changed

+20
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40082,6 +40082,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4008240082
Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1)));
4008340083
break;
4008440084
}
40085+
case X86ISD::VPSHA:
40086+
case X86ISD::VPSHL: {
40087+
APInt LHSUndef, LHSZero;
40088+
APInt RHSUndef, RHSZero;
40089+
SDValue LHS = Op.getOperand(0);
40090+
SDValue RHS = Op.getOperand(1);
40091+
if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
40092+
Depth + 1))
40093+
return true;
40094+
if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
40095+
Depth + 1))
40096+
return true;
40097+
KnownZero = LHSZero;
40098+
break;
40099+
}
4008540100
case X86ISD::KSHIFTL: {
4008640101
SDValue Src = Op.getOperand(0);
4008740102
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));

llvm/test/CodeGen/X86/combine-udiv.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -675,10 +675,11 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
675675
;
676676
; XOP-LABEL: combine_vec_udiv_nonuniform4:
677677
; XOP: # %bb.0:
678-
; XOP-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
679-
; XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
680-
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
681-
; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15]
678+
; XOP-NEXT: movl $171, %eax
679+
; XOP-NEXT: vmovd %eax, %xmm1
680+
; XOP-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
681+
; XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1
682+
; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1
682683
; XOP-NEXT: movl $249, %eax
683684
; XOP-NEXT: vmovd %eax, %xmm2
684685
; XOP-NEXT: vpshlb %xmm2, %xmm1, %xmm1

llvm/test/CodeGen/X86/xop-shifts.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
55
; CHECK-LABEL: demandedelts_vpshab:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
87
; CHECK-NEXT: vpshab %xmm1, %xmm0, %xmm0
98
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
109
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
@@ -18,7 +17,6 @@ define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
1817
define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
1918
; CHECK-LABEL: demandedelts_vpshld:
2019
; CHECK: # %bb.0:
21-
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2220
; CHECK-NEXT: vpshld %xmm1, %xmm0, %xmm0
2321
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2422
; CHECK-NEXT: retq

0 commit comments

Comments
 (0)