Skip to content

Commit 95c6c11

Browse files
authored
[X86] combineConcatVectorOps - only always concat logic ops on AVX512 targets (#145036)
We should only concat logic ops if at least one operand will freely concatenate. We've now addressed the remaining regressions on AVX2 targets, but still have a number on AVX512 targets which can aggressively use VPTERNLOG in many cases.
1 parent 20d57e7 commit 95c6c11

File tree

3 files changed

+28
-28
lines changed

3 files changed

+28
-28
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58885,7 +58885,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5888558885
case ISD::OR:
5888658886
case ISD::XOR:
5888758887
case X86ISD::ANDNP:
58888-
// TODO: AVX2+ targets should only use CombineSubOperand like AVX1.
58888+
// TODO: AVX512 targets should only use CombineSubOperand like AVX1/2.
5888958889
if (!IsSplat && (VT.is256BitVector() ||
5889058890
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
5889158891
// Don't concatenate root AVX1 NOT patterns.
@@ -58897,7 +58897,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5889758897
break;
5889858898
SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
5889958899
SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58900-
if (Concat0 || Concat1 || Subtarget.hasInt256())
58900+
if (Concat0 || Concat1 || Subtarget.useAVX512Regs())
5890158901
return DAG.getNode(Opcode, DL, VT,
5890258902
Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
5890358903
Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));

llvm/test/CodeGen/X86/vector-fshl-256.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -739,15 +739,15 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
739739
; XOPAVX2-NEXT: vpbroadcastb {{.*#+}} xmm6 = [249,249,249,249,249,249,249,249,249,249,249,249,249,249,249,249]
740740
; XOPAVX2-NEXT: vpaddb %xmm6, %xmm5, %xmm7
741741
; XOPAVX2-NEXT: vpshlb %xmm7, %xmm3, %xmm3
742+
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm7
743+
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm7, %xmm5
744+
; XOPAVX2-NEXT: vpor %xmm3, %xmm5, %xmm3
742745
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1
743746
; XOPAVX2-NEXT: vpaddb %xmm6, %xmm2, %xmm4
744747
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1
745-
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
746-
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
747-
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm3, %xmm3
748748
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0
749+
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
749750
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
750-
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
751751
; XOPAVX2-NEXT: retq
752752
%res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
753753
ret <32 x i8> %res
@@ -1992,17 +1992,17 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
19921992
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
19931993
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1]
19941994
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2
1995+
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
1996+
; XOPAVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
1997+
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm4, %xmm4
1998+
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm6 = [249,250,251,252,253,254,255,0,249,0,255,254,253,252,251,250]
1999+
; XOPAVX2-NEXT: vpshlb %xmm6, %xmm4, %xmm4
2000+
; XOPAVX2-NEXT: vpor %xmm4, %xmm2, %xmm2
19952001
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm0, %xmm0
2002+
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm1, %xmm1
2003+
; XOPAVX2-NEXT: vpshlb %xmm6, %xmm1, %xmm1
2004+
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
19962005
; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1997-
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1998-
; XOPAVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
1999-
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2
2000-
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [249,250,251,252,253,254,255,0,249,0,255,254,253,252,251,250]
2001-
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm2, %xmm2
2002-
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm1, %xmm1
2003-
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1
2004-
; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2005-
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
20062006
; XOPAVX2-NEXT: retq
20072007
%res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
20082008
ret <32 x i8> %res

llvm/test/CodeGen/X86/vector-fshr-256.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -766,18 +766,18 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
766766
; XOPAVX2-NEXT: vpsubb %xmm4, %xmm5, %xmm6
767767
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm7
768768
; XOPAVX2-NEXT: vpshlb %xmm6, %xmm7, %xmm6
769+
; XOPAVX2-NEXT: vpxor %xmm3, %xmm4, %xmm4
770+
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm7
771+
; XOPAVX2-NEXT: vpaddb %xmm7, %xmm7, %xmm7
772+
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm7, %xmm4
773+
; XOPAVX2-NEXT: vpor %xmm6, %xmm4, %xmm4
769774
; XOPAVX2-NEXT: vpsubb %xmm2, %xmm5, %xmm5
770775
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm1, %xmm1
771-
; XOPAVX2-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
772-
; XOPAVX2-NEXT: vpxor %xmm3, %xmm4, %xmm4
773-
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm5
774-
; XOPAVX2-NEXT: vpaddb %xmm5, %xmm5, %xmm5
775-
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm5, %xmm4
776776
; XOPAVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
777777
; XOPAVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
778778
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0
779+
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
779780
; XOPAVX2-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0
780-
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
781781
; XOPAVX2-NEXT: retq
782782
%res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
783783
ret <32 x i8> %res
@@ -1793,16 +1793,16 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
17931793
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
17941794
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,255,254,253,252,251,250,249,0,249,250,251,252,253,254,255]
17951795
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2
1796+
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
1797+
; XOPAVX2-NEXT: vpaddb %xmm4, %xmm4, %xmm4
1798+
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,7,0,1,2,3,4,5,6]
1799+
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm4, %xmm4
1800+
; XOPAVX2-NEXT: vpor %xmm2, %xmm4, %xmm2
17961801
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm1, %xmm1
1797-
; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1798-
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1799-
; XOPAVX2-NEXT: vpaddb %xmm2, %xmm2, %xmm2
1800-
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,6,5,4,3,2,1,0,7,0,1,2,3,4,5,6]
1801-
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2
18021802
; XOPAVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
1803-
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm0, %xmm0
1803+
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm0, %xmm0
1804+
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
18041805
; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1805-
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
18061806
; XOPAVX2-NEXT: retq
18071807
%res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
18081808
ret <32 x i8> %res

0 commit comments

Comments
 (0)