1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
3
-
4
- target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5
- target triple = "x86_64-unknown-linux-gnu"
2
+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3
+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
4
+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
5
+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
6
+ ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86
6
7
7
8
declare i1 @llvm.vector.reduce.and.v4i1 (<4 x i1 >)
9
+ declare i1 @llvm.vector.reduce.and.v8i1 (<8 x i1 >)
8
10
9
11
; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
10
- define i1 @intrinsic_version (i8* align 1 %arg , i8* align 1 %arg1 , i32 %arg2 ) {
11
- ; AVX-LABEL: intrinsic_version:
12
+
13
+ define i1 @intrinsic_v4i8 (i8* align 1 %arg , i8* align 1 %arg1 ) {
14
+ ; SSE2-LABEL: intrinsic_v4i8:
15
+ ; SSE2: # %bb.0: # %bb
16
+ ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
17
+ ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
18
+ ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
19
+ ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
20
+ ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
21
+ ; SSE2-NEXT: movmskps %xmm0, %eax
22
+ ; SSE2-NEXT: cmpb $15, %al
23
+ ; SSE2-NEXT: sete %al
24
+ ; SSE2-NEXT: retq
25
+ ;
26
+ ; SSE42-LABEL: intrinsic_v4i8:
27
+ ; SSE42: # %bb.0: # %bb
28
+ ; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
29
+ ; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
30
+ ; SSE42-NEXT: psubd %xmm1, %xmm0
31
+ ; SSE42-NEXT: ptest %xmm0, %xmm0
32
+ ; SSE42-NEXT: sete %al
33
+ ; SSE42-NEXT: retq
34
+ ;
35
+ ; AVX-LABEL: intrinsic_v4i8:
12
36
; AVX: # %bb.0: # %bb
13
37
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
14
38
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
15
39
; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
16
40
; AVX-NEXT: vptest %xmm0, %xmm0
17
41
; AVX-NEXT: sete %al
18
42
; AVX-NEXT: retq
43
+ ;
44
+ ; X86-LABEL: intrinsic_v4i8:
45
+ ; X86: # %bb.0: # %bb
46
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
47
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
48
+ ; X86-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
49
+ ; X86-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
50
+ ; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm0
51
+ ; X86-NEXT: vptest %xmm0, %xmm0
52
+ ; X86-NEXT: sete %al
53
+ ; X86-NEXT: retl
19
54
bb:
20
55
%ptr1 = bitcast i8* %arg1 to <4 x i8 >*
21
56
%ptr2 = bitcast i8* %arg to <4 x i8 >*
26
61
ret i1 %all_eq
27
62
}
28
63
64
+ define i1 @intrinsic_v8i8 (i8* align 1 %arg , i8* align 1 %arg1 ) {
65
+ ; SSE-LABEL: intrinsic_v8i8:
66
+ ; SSE: # %bb.0: # %bb
67
+ ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
68
+ ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
69
+ ; SSE-NEXT: pcmpeqb %xmm0, %xmm1
70
+ ; SSE-NEXT: pmovmskb %xmm1, %eax
71
+ ; SSE-NEXT: cmpb $-1, %al
72
+ ; SSE-NEXT: sete %al
73
+ ; SSE-NEXT: retq
74
+ ;
75
+ ; AVX-LABEL: intrinsic_v8i8:
76
+ ; AVX: # %bb.0: # %bb
77
+ ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
78
+ ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
79
+ ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
80
+ ; AVX-NEXT: vpmovmskb %xmm0, %eax
81
+ ; AVX-NEXT: cmpb $-1, %al
82
+ ; AVX-NEXT: sete %al
83
+ ; AVX-NEXT: retq
84
+ ;
85
+ ; X86-LABEL: intrinsic_v8i8:
86
+ ; X86: # %bb.0: # %bb
87
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
88
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
89
+ ; X86-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
90
+ ; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
91
+ ; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
92
+ ; X86-NEXT: vpmovmskb %xmm0, %eax
93
+ ; X86-NEXT: cmpb $-1, %al
94
+ ; X86-NEXT: sete %al
95
+ ; X86-NEXT: retl
96
+ bb:
97
+ %ptr1 = bitcast i8* %arg1 to <8 x i8 >*
98
+ %ptr2 = bitcast i8* %arg to <8 x i8 >*
99
+ %lhs = load <8 x i8 >, <8 x i8 >* %ptr1 , align 1
100
+ %rhs = load <8 x i8 >, <8 x i8 >* %ptr2 , align 1
101
+ %cmp = icmp eq <8 x i8 > %lhs , %rhs
102
+ %all_eq = call i1 @llvm.vector.reduce.and.v8i1 (<8 x i1 > %cmp )
103
+ ret i1 %all_eq
104
+ }
105
+
29
106
define i1 @vector_version (i8* align 1 %arg , i8* align 1 %arg1 ) {
107
+ ; SSE2-LABEL: vector_version:
108
+ ; SSE2: # %bb.0: # %bb
109
+ ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
110
+ ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
111
+ ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
112
+ ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
113
+ ; SSE2-NEXT: pxor %xmm1, %xmm0
114
+ ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
115
+ ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
116
+ ; SSE2-NEXT: movmskps %xmm0, %eax
117
+ ; SSE2-NEXT: testl %eax, %eax
118
+ ; SSE2-NEXT: sete %al
119
+ ; SSE2-NEXT: retq
120
+ ;
121
+ ; SSE42-LABEL: vector_version:
122
+ ; SSE42: # %bb.0: # %bb
123
+ ; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
124
+ ; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
125
+ ; SSE42-NEXT: psubd %xmm1, %xmm0
126
+ ; SSE42-NEXT: ptest %xmm0, %xmm0
127
+ ; SSE42-NEXT: sete %al
128
+ ; SSE42-NEXT: retq
129
+ ;
30
130
; AVX-LABEL: vector_version:
31
131
; AVX: # %bb.0: # %bb
32
132
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
@@ -35,6 +135,17 @@ define i1 @vector_version(i8* align 1 %arg, i8* align 1 %arg1) {
35
135
; AVX-NEXT: vptest %xmm0, %xmm0
36
136
; AVX-NEXT: sete %al
37
137
; AVX-NEXT: retq
138
+ ;
139
+ ; X86-LABEL: vector_version:
140
+ ; X86: # %bb.0: # %bb
141
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
142
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
143
+ ; X86-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
144
+ ; X86-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
145
+ ; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm0
146
+ ; X86-NEXT: vptest %xmm0, %xmm0
147
+ ; X86-NEXT: sete %al
148
+ ; X86-NEXT: retl
38
149
bb:
39
150
%ptr1 = bitcast i8* %arg1 to <4 x i8 >*
40
151
%ptr2 = bitcast i8* %arg to <4 x i8 >*
46
157
ret i1 %all_eq
47
158
}
48
159
49
- define i1 @mixed_version (i8* align 1 %arg , i8* align 1 %arg1 ) {
50
- ; AVX-LABEL: mixed_version:
51
- ; AVX: # %bb.0: # %bb
52
- ; AVX-NEXT: movl (%rsi), %eax
53
- ; AVX-NEXT: cmpl (%rdi), %eax
54
- ; AVX-NEXT: sete %al
55
- ; AVX-NEXT: retq
160
+ define i1 @mixed_version_v4i8 (i8* align 1 %arg , i8* align 1 %arg1 ) {
161
+ ; CHECK-LABEL: mixed_version_v4i8:
162
+ ; CHECK: # %bb.0: # %bb
163
+ ; CHECK-NEXT: movl (%rsi), %eax
164
+ ; CHECK-NEXT: cmpl (%rdi), %eax
165
+ ; CHECK-NEXT: sete %al
166
+ ; CHECK-NEXT: retq
167
+ ;
168
+ ; X86-LABEL: mixed_version_v4i8:
169
+ ; X86: # %bb.0: # %bb
170
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
171
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
172
+ ; X86-NEXT: movl (%ecx), %ecx
173
+ ; X86-NEXT: cmpl (%eax), %ecx
174
+ ; X86-NEXT: sete %al
175
+ ; X86-NEXT: retl
56
176
bb:
57
177
%ptr1 = bitcast i8* %arg1 to <4 x i8 >*
58
178
%ptr2 = bitcast i8* %arg to <4 x i8 >*
64
184
ret i1 %all_eq
65
185
}
66
186
187
+ define i1 @mixed_version_v8i8 (i8* align 1 %arg , i8* align 1 %arg1 ) {
188
+ ; CHECK-LABEL: mixed_version_v8i8:
189
+ ; CHECK: # %bb.0: # %bb
190
+ ; CHECK-NEXT: movq (%rsi), %rax
191
+ ; CHECK-NEXT: cmpq (%rdi), %rax
192
+ ; CHECK-NEXT: sete %al
193
+ ; CHECK-NEXT: retq
194
+ ;
195
+ ; X86-LABEL: mixed_version_v8i8:
196
+ ; X86: # %bb.0: # %bb
197
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
198
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
199
+ ; X86-NEXT: movl (%ecx), %edx
200
+ ; X86-NEXT: movl 4(%ecx), %ecx
201
+ ; X86-NEXT: xorl 4(%eax), %ecx
202
+ ; X86-NEXT: xorl (%eax), %edx
203
+ ; X86-NEXT: orl %ecx, %edx
204
+ ; X86-NEXT: sete %al
205
+ ; X86-NEXT: retl
206
+ bb:
207
+ %ptr1 = bitcast i8* %arg1 to <8 x i8 >*
208
+ %ptr2 = bitcast i8* %arg to <8 x i8 >*
209
+ %lhs = load <8 x i8 >, <8 x i8 >* %ptr1 , align 1
210
+ %rhs = load <8 x i8 >, <8 x i8 >* %ptr2 , align 1
211
+ %lhs_s = bitcast <8 x i8 > %lhs to i64
212
+ %rhs_s = bitcast <8 x i8 > %rhs to i64
213
+ %all_eq = icmp eq i64 %lhs_s , %rhs_s
214
+ ret i1 %all_eq
215
+ }
216
+
67
217
define i1 @scalar_version (i8* align 1 %arg , i8* align 1 %arg1 ) {
68
- ; AVX-LABEL: scalar_version:
69
- ; AVX: # %bb.0: # %bb
70
- ; AVX-NEXT: movl (%rsi), %eax
71
- ; AVX-NEXT: cmpl (%rdi), %eax
72
- ; AVX-NEXT: sete %al
73
- ; AVX-NEXT: retq
218
+ ; CHECK-LABEL: scalar_version:
219
+ ; CHECK: # %bb.0: # %bb
220
+ ; CHECK-NEXT: movl (%rsi), %eax
221
+ ; CHECK-NEXT: cmpl (%rdi), %eax
222
+ ; CHECK-NEXT: sete %al
223
+ ; CHECK-NEXT: retq
224
+ ;
225
+ ; X86-LABEL: scalar_version:
226
+ ; X86: # %bb.0: # %bb
227
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
228
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
229
+ ; X86-NEXT: movl (%ecx), %ecx
230
+ ; X86-NEXT: cmpl (%eax), %ecx
231
+ ; X86-NEXT: sete %al
232
+ ; X86-NEXT: retl
74
233
bb:
75
234
%ptr1 = bitcast i8* %arg1 to i32*
76
235
%ptr2 = bitcast i8* %arg to i32*
0 commit comments