Skip to content

Commit f889d75

Browse files
RKSimonIanWood1
authored andcommitted
[X86] vector-shuffle-combining-ssse3.ll - add tests showing the failure to merge logical shifts with non-uniform shift amounts into shuffles
1 parent 056d794 commit f889d75

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,75 @@ define <16 x i8> @combine_and_pshufb_or_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
763763
ret <16 x i8> %4
764764
}
765765

766+
define <16 x i8> @combine_lshr_pshufb(<4 x i32> %a0) {
767+
; SSE-LABEL: combine_lshr_pshufb:
768+
; SSE: # %bb.0:
769+
; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[3,5,6,7,4,10,11],zero,xmm0[9,14,15],zero,zero
770+
; SSE-NEXT: retq
771+
;
772+
; AVX1-LABEL: combine_lshr_pshufb:
773+
; AVX1: # %bb.0:
774+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[3,5,6,7,4,10,11],zero,xmm0[9,14,15],zero,zero
775+
; AVX1-NEXT: retq
776+
;
777+
; AVX2-LABEL: combine_lshr_pshufb:
778+
; AVX2: # %bb.0:
779+
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
780+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
781+
; AVX2-NEXT: retq
782+
;
783+
; AVX512F-LABEL: combine_lshr_pshufb:
784+
; AVX512F: # %bb.0:
785+
; AVX512F-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
786+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
787+
; AVX512F-NEXT: retq
788+
%shr = lshr <4 x i32> %a0, <i32 24, i32 0, i32 8, i32 16>
789+
%bc = bitcast <4 x i32> %shr to <16 x i8>
790+
%shuffle = shufflevector <16 x i8> %bc, <16 x i8> poison, <16 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4, i32 9, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
791+
ret <16 x i8> %shuffle
792+
}
793+
794+
define <16 x i8> @combine_shl_pshufb(<4 x i32> %a0) {
795+
; SSSE3-LABEL: combine_shl_pshufb:
796+
; SSSE3: # %bb.0:
797+
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
798+
; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
799+
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
800+
; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
801+
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
802+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
803+
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
804+
; SSSE3-NEXT: retq
805+
;
806+
; SSE41-LABEL: combine_shl_pshufb:
807+
; SSE41: # %bb.0:
808+
; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
809+
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
810+
; SSE41-NEXT: retq
811+
;
812+
; AVX1-LABEL: combine_shl_pshufb:
813+
; AVX1: # %bb.0:
814+
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
815+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
816+
; AVX1-NEXT: retq
817+
;
818+
; AVX2-LABEL: combine_shl_pshufb:
819+
; AVX2: # %bb.0:
820+
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
821+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
822+
; AVX2-NEXT: retq
823+
;
824+
; AVX512F-LABEL: combine_shl_pshufb:
825+
; AVX512F: # %bb.0:
826+
; AVX512F-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
827+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
828+
; AVX512F-NEXT: retq
829+
%shr = shl <4 x i32> %a0, <i32 0, i32 8, i32 16, i32 16>
830+
%bc = bitcast <4 x i32> %shr to <16 x i8>
831+
%shuffle = shufflevector <16 x i8> %bc, <16 x i8> poison, <16 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4, i32 9, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
832+
ret <16 x i8> %shuffle
833+
}
834+
766835
define <16 x i8> @constant_fold_pshufb() {
767836
; SSE-LABEL: constant_fold_pshufb:
768837
; SSE: # %bb.0:

0 commit comments

Comments
 (0)