Skip to content

Commit 13362ab

Browse files
committed
[X86][XOP] Add tests for missing demanded elts handling for xop shifts
Noticed while investigating how to improve funnel shift codegen
1 parent 138fcc5 commit 13362ab

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

llvm/test/CodeGen/X86/xop-shifts.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s
3+
4+
define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
5+
; CHECK-LABEL: demandedelts_vpshab:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
8+
; CHECK-NEXT: vpshab %xmm1, %xmm0, %xmm0
9+
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
10+
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
11+
; CHECK-NEXT: retq
12+
%shuffle = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
13+
%shift = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %shuffle, <16 x i8> %a1)
14+
%res = shufflevector <16 x i8> %shift, <16 x i8> undef, <16 x i32> zeroinitializer
15+
ret <16 x i8> %res
16+
}
17+
18+
define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
19+
; CHECK-LABEL: demandedelts_vpshld:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
22+
; CHECK-NEXT: vpshld %xmm1, %xmm0, %xmm0
23+
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
24+
; CHECK-NEXT: retq
25+
%shuffle = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> zeroinitializer
26+
%shift = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %shuffle)
27+
%result = shufflevector <4 x i32> %shift, <4 x i32> undef, <4 x i32> zeroinitializer
28+
ret <4 x i32> %result
29+
}
30+
31+
declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
32+
declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
33+
declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
34+
declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
35+
36+
declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
37+
declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
38+
declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
39+
declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone

0 commit comments

Comments
 (0)