Skip to content

Commit ff64327

Browse files
committed
[X86] Extend PR53419 test coverage
Test on SSE2/SSE41/AVX1 targets to compare PMOVMSK vs PTEST codegen paths Add v8i8 reduction case and test on X64 and X86 targets to check 32-bit handling
1 parent 5157f98 commit ff64327

File tree

1 file changed

+178
-19
lines changed

1 file changed

+178
-19
lines changed

llvm/test/CodeGen/X86/pr53419.ll

Lines changed: 178 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,56 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
3-
4-
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5-
target triple = "x86_64-unknown-linux-gnu"
2+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
4+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
5+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
6+
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86
67

78
declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
9+
declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
810

911
; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
10-
define i1 @intrinsic_version(i8* align 1 %arg, i8* align 1 %arg1, i32 %arg2) {
11-
; AVX-LABEL: intrinsic_version:
12+
13+
define i1 @intrinsic_v4i8(i8* align 1 %arg, i8* align 1 %arg1) {
14+
; SSE2-LABEL: intrinsic_v4i8:
15+
; SSE2: # %bb.0: # %bb
16+
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
17+
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
18+
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
19+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
20+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
21+
; SSE2-NEXT: movmskps %xmm0, %eax
22+
; SSE2-NEXT: cmpb $15, %al
23+
; SSE2-NEXT: sete %al
24+
; SSE2-NEXT: retq
25+
;
26+
; SSE42-LABEL: intrinsic_v4i8:
27+
; SSE42: # %bb.0: # %bb
28+
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
29+
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
30+
; SSE42-NEXT: psubd %xmm1, %xmm0
31+
; SSE42-NEXT: ptest %xmm0, %xmm0
32+
; SSE42-NEXT: sete %al
33+
; SSE42-NEXT: retq
34+
;
35+
; AVX-LABEL: intrinsic_v4i8:
1236
; AVX: # %bb.0: # %bb
1337
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1438
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1539
; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1640
; AVX-NEXT: vptest %xmm0, %xmm0
1741
; AVX-NEXT: sete %al
1842
; AVX-NEXT: retq
43+
;
44+
; X86-LABEL: intrinsic_v4i8:
45+
; X86: # %bb.0: # %bb
46+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
47+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
48+
; X86-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
49+
; X86-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
50+
; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm0
51+
; X86-NEXT: vptest %xmm0, %xmm0
52+
; X86-NEXT: sete %al
53+
; X86-NEXT: retl
1954
bb:
2055
%ptr1 = bitcast i8* %arg1 to <4 x i8>*
2156
%ptr2 = bitcast i8* %arg to <4 x i8>*
@@ -26,7 +61,72 @@ bb:
2661
ret i1 %all_eq
2762
}
2863

64+
define i1 @intrinsic_v8i8(i8* align 1 %arg, i8* align 1 %arg1) {
65+
; SSE-LABEL: intrinsic_v8i8:
66+
; SSE: # %bb.0: # %bb
67+
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
68+
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
69+
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
70+
; SSE-NEXT: pmovmskb %xmm1, %eax
71+
; SSE-NEXT: cmpb $-1, %al
72+
; SSE-NEXT: sete %al
73+
; SSE-NEXT: retq
74+
;
75+
; AVX-LABEL: intrinsic_v8i8:
76+
; AVX: # %bb.0: # %bb
77+
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
78+
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
79+
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
80+
; AVX-NEXT: vpmovmskb %xmm0, %eax
81+
; AVX-NEXT: cmpb $-1, %al
82+
; AVX-NEXT: sete %al
83+
; AVX-NEXT: retq
84+
;
85+
; X86-LABEL: intrinsic_v8i8:
86+
; X86: # %bb.0: # %bb
87+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
88+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
89+
; X86-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
90+
; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
91+
; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
92+
; X86-NEXT: vpmovmskb %xmm0, %eax
93+
; X86-NEXT: cmpb $-1, %al
94+
; X86-NEXT: sete %al
95+
; X86-NEXT: retl
96+
bb:
97+
%ptr1 = bitcast i8* %arg1 to <8 x i8>*
98+
%ptr2 = bitcast i8* %arg to <8 x i8>*
99+
%lhs = load <8 x i8>, <8 x i8>* %ptr1, align 1
100+
%rhs = load <8 x i8>, <8 x i8>* %ptr2, align 1
101+
%cmp = icmp eq <8 x i8> %lhs, %rhs
102+
%all_eq = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %cmp)
103+
ret i1 %all_eq
104+
}
105+
29106
define i1 @vector_version(i8* align 1 %arg, i8* align 1 %arg1) {
107+
; SSE2-LABEL: vector_version:
108+
; SSE2: # %bb.0: # %bb
109+
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
110+
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
111+
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
112+
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
113+
; SSE2-NEXT: pxor %xmm1, %xmm0
114+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
115+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
116+
; SSE2-NEXT: movmskps %xmm0, %eax
117+
; SSE2-NEXT: testl %eax, %eax
118+
; SSE2-NEXT: sete %al
119+
; SSE2-NEXT: retq
120+
;
121+
; SSE42-LABEL: vector_version:
122+
; SSE42: # %bb.0: # %bb
123+
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
124+
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
125+
; SSE42-NEXT: psubd %xmm1, %xmm0
126+
; SSE42-NEXT: ptest %xmm0, %xmm0
127+
; SSE42-NEXT: sete %al
128+
; SSE42-NEXT: retq
129+
;
30130
; AVX-LABEL: vector_version:
31131
; AVX: # %bb.0: # %bb
32132
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
@@ -35,6 +135,17 @@ define i1 @vector_version(i8* align 1 %arg, i8* align 1 %arg1) {
35135
; AVX-NEXT: vptest %xmm0, %xmm0
36136
; AVX-NEXT: sete %al
37137
; AVX-NEXT: retq
138+
;
139+
; X86-LABEL: vector_version:
140+
; X86: # %bb.0: # %bb
141+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
142+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
143+
; X86-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
144+
; X86-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
145+
; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm0
146+
; X86-NEXT: vptest %xmm0, %xmm0
147+
; X86-NEXT: sete %al
148+
; X86-NEXT: retl
38149
bb:
39150
%ptr1 = bitcast i8* %arg1 to <4 x i8>*
40151
%ptr2 = bitcast i8* %arg to <4 x i8>*
@@ -46,13 +157,22 @@ bb:
46157
ret i1 %all_eq
47158
}
48159

49-
define i1 @mixed_version(i8* align 1 %arg, i8* align 1 %arg1) {
50-
; AVX-LABEL: mixed_version:
51-
; AVX: # %bb.0: # %bb
52-
; AVX-NEXT: movl (%rsi), %eax
53-
; AVX-NEXT: cmpl (%rdi), %eax
54-
; AVX-NEXT: sete %al
55-
; AVX-NEXT: retq
160+
define i1 @mixed_version_v4i8(i8* align 1 %arg, i8* align 1 %arg1) {
161+
; CHECK-LABEL: mixed_version_v4i8:
162+
; CHECK: # %bb.0: # %bb
163+
; CHECK-NEXT: movl (%rsi), %eax
164+
; CHECK-NEXT: cmpl (%rdi), %eax
165+
; CHECK-NEXT: sete %al
166+
; CHECK-NEXT: retq
167+
;
168+
; X86-LABEL: mixed_version_v4i8:
169+
; X86: # %bb.0: # %bb
170+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
171+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
172+
; X86-NEXT: movl (%ecx), %ecx
173+
; X86-NEXT: cmpl (%eax), %ecx
174+
; X86-NEXT: sete %al
175+
; X86-NEXT: retl
56176
bb:
57177
%ptr1 = bitcast i8* %arg1 to <4 x i8>*
58178
%ptr2 = bitcast i8* %arg to <4 x i8>*
@@ -64,13 +184,52 @@ bb:
64184
ret i1 %all_eq
65185
}
66186

187+
define i1 @mixed_version_v8i8(i8* align 1 %arg, i8* align 1 %arg1) {
188+
; CHECK-LABEL: mixed_version_v8i8:
189+
; CHECK: # %bb.0: # %bb
190+
; CHECK-NEXT: movq (%rsi), %rax
191+
; CHECK-NEXT: cmpq (%rdi), %rax
192+
; CHECK-NEXT: sete %al
193+
; CHECK-NEXT: retq
194+
;
195+
; X86-LABEL: mixed_version_v8i8:
196+
; X86: # %bb.0: # %bb
197+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
198+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
199+
; X86-NEXT: movl (%ecx), %edx
200+
; X86-NEXT: movl 4(%ecx), %ecx
201+
; X86-NEXT: xorl 4(%eax), %ecx
202+
; X86-NEXT: xorl (%eax), %edx
203+
; X86-NEXT: orl %ecx, %edx
204+
; X86-NEXT: sete %al
205+
; X86-NEXT: retl
206+
bb:
207+
%ptr1 = bitcast i8* %arg1 to <8 x i8>*
208+
%ptr2 = bitcast i8* %arg to <8 x i8>*
209+
%lhs = load <8 x i8>, <8 x i8>* %ptr1, align 1
210+
%rhs = load <8 x i8>, <8 x i8>* %ptr2, align 1
211+
%lhs_s = bitcast <8 x i8> %lhs to i64
212+
%rhs_s = bitcast <8 x i8> %rhs to i64
213+
%all_eq = icmp eq i64 %lhs_s, %rhs_s
214+
ret i1 %all_eq
215+
}
216+
67217
define i1 @scalar_version(i8* align 1 %arg, i8* align 1 %arg1) {
68-
; AVX-LABEL: scalar_version:
69-
; AVX: # %bb.0: # %bb
70-
; AVX-NEXT: movl (%rsi), %eax
71-
; AVX-NEXT: cmpl (%rdi), %eax
72-
; AVX-NEXT: sete %al
73-
; AVX-NEXT: retq
218+
; CHECK-LABEL: scalar_version:
219+
; CHECK: # %bb.0: # %bb
220+
; CHECK-NEXT: movl (%rsi), %eax
221+
; CHECK-NEXT: cmpl (%rdi), %eax
222+
; CHECK-NEXT: sete %al
223+
; CHECK-NEXT: retq
224+
;
225+
; X86-LABEL: scalar_version:
226+
; X86: # %bb.0: # %bb
227+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
228+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
229+
; X86-NEXT: movl (%ecx), %ecx
230+
; X86-NEXT: cmpl (%eax), %ecx
231+
; X86-NEXT: sete %al
232+
; X86-NEXT: retl
74233
bb:
75234
%ptr1 = bitcast i8* %arg1 to i32*
76235
%ptr2 = bitcast i8* %arg to i32*

0 commit comments

Comments
 (0)