Skip to content

Commit bcbad75

Browse files
committed
[AArch64][SVE] NFC: Add test file for predicate vector reductions.
This adds some tests for vector reductions which can and should be implemented with ptest as opposed to promoted ANDV/ORV reduction.
1 parent 381767a commit bcbad75

File tree

1 file changed

+181
-0
lines changed

1 file changed

+181
-0
lines changed
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
3+
4+
define i1 @ptest_v16i1_256bit_min_sve(float* %a, float * %b) vscale_range(2, 0) {
5+
; CHECK-LABEL: ptest_v16i1_256bit_min_sve:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: mov x8, #8
8+
; CHECK-NEXT: ptrue p0.s, vl8
9+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
10+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
11+
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
12+
; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0
13+
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
14+
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
15+
; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff
16+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
17+
; CHECK-NEXT: eor z1.d, z2.d, z1.d
18+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
19+
; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
20+
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
21+
; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
22+
; CHECK-NEXT: ptrue p0.b, vl16
23+
; CHECK-NEXT: mov v1.d[1], v0.d[0]
24+
; CHECK-NEXT: orv b0, p0, z1.b
25+
; CHECK-NEXT: fmov w8, s0
26+
; CHECK-NEXT: and w0, w8, #0x1
27+
; CHECK-NEXT: ret
28+
%v0 = bitcast float* %a to <16 x float>*
29+
%v1 = load <16 x float>, <16 x float>* %v0, align 4
30+
%v2 = fcmp une <16 x float> %v1, zeroinitializer
31+
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
32+
ret i1 %v3
33+
}
34+
35+
define i1 @ptest_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
36+
; CHECK-LABEL: ptest_v16i1_512bit_min_sve:
37+
; CHECK: // %bb.0:
38+
; CHECK-NEXT: ptrue p0.s, vl16
39+
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
40+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
41+
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
42+
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
43+
; CHECK-NEXT: ptrue p0.b, vl16
44+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
45+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
46+
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
47+
; CHECK-NEXT: orv b0, p0, z0.b
48+
; CHECK-NEXT: fmov w8, s0
49+
; CHECK-NEXT: and w0, w8, #0x1
50+
; CHECK-NEXT: ret
51+
%v0 = bitcast float* %a to <16 x float>*
52+
%v1 = load <16 x float>, <16 x float>* %v0, align 4
53+
%v2 = fcmp une <16 x float> %v1, zeroinitializer
54+
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
55+
ret i1 %v3
56+
}
57+
58+
define i1 @ptest_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4) {
59+
; CHECK-LABEL: ptest_v16i1_512bit_sve:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: ptrue p0.s
62+
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
63+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
64+
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
65+
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
66+
; CHECK-NEXT: ptrue p0.b, vl16
67+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
68+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
69+
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
70+
; CHECK-NEXT: orv b0, p0, z0.b
71+
; CHECK-NEXT: fmov w8, s0
72+
; CHECK-NEXT: and w0, w8, #0x1
73+
; CHECK-NEXT: ret
74+
%v0 = bitcast float* %a to <16 x float>*
75+
%v1 = load <16 x float>, <16 x float>* %v0, align 4
76+
%v2 = fcmp une <16 x float> %v1, zeroinitializer
77+
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
78+
ret i1 %v3
79+
}
80+
81+
define i1 @ptest_or_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
82+
; CHECK-LABEL: ptest_or_v16i1_512bit_min_sve:
83+
; CHECK: // %bb.0:
84+
; CHECK-NEXT: ptrue p0.s, vl16
85+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
86+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
87+
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
88+
; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0
89+
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
90+
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
91+
; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff
92+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
93+
; CHECK-NEXT: eor z1.d, z2.d, z1.d
94+
; CHECK-NEXT: ptrue p0.b, vl16
95+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
96+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
97+
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
98+
; CHECK-NEXT: orv b0, p0, z0.b
99+
; CHECK-NEXT: fmov w8, s0
100+
; CHECK-NEXT: and w0, w8, #0x1
101+
; CHECK-NEXT: ret
102+
%v0 = bitcast float* %a to <16 x float>*
103+
%v1 = load <16 x float>, <16 x float>* %v0, align 4
104+
%v2 = fcmp une <16 x float> %v1, zeroinitializer
105+
%v3 = bitcast float* %b to <16 x float>*
106+
%v4 = load <16 x float>, <16 x float>* %v3, align 4
107+
%v5 = fcmp une <16 x float> %v4, zeroinitializer
108+
%v6 = or <16 x i1> %v2, %v5
109+
%v7 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v6)
110+
ret i1 %v7
111+
}
112+
113+
declare i1 @llvm.vector.reduce.or.i1.v16i1(<16 x i1>)
114+
115+
;
116+
; AND reduction.
117+
;
118+
119+
define i1 @ptest_and_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4) {
120+
; CHECK-LABEL: ptest_and_v16i1_512bit_sve:
121+
; CHECK: // %bb.0:
122+
; CHECK-NEXT: ptrue p0.s
123+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
124+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
125+
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
126+
; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0
127+
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
128+
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
129+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
130+
; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
131+
; CHECK-NEXT: bic z0.d, z0.d, z1.d
132+
; CHECK-NEXT: ptrue p0.b, vl16
133+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
134+
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
135+
; CHECK-NEXT: andv b0, p0, z0.b
136+
; CHECK-NEXT: fmov w8, s0
137+
; CHECK-NEXT: and w0, w8, #0x1
138+
; CHECK-NEXT: ret
139+
%v0 = bitcast float* %a to <16 x float>*
140+
%v1 = load <16 x float>, <16 x float>* %v0, align 4
141+
%v2 = fcmp une <16 x float> %v1, zeroinitializer
142+
%v3 = bitcast float* %b to <16 x float>*
143+
%v4 = load <16 x float>, <16 x float>* %v3, align 4
144+
%v5 = fcmp une <16 x float> %v4, zeroinitializer
145+
%v6 = and <16 x i1> %v2, %v5
146+
%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
147+
ret i1 %v7
148+
}
149+
150+
define i1 @ptest_and_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
151+
; CHECK-LABEL: ptest_and_v16i1_512bit_min_sve:
152+
; CHECK: // %bb.0:
153+
; CHECK-NEXT: ptrue p0.s, vl16
154+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
155+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
156+
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
157+
; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0
158+
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
159+
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
160+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
161+
; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
162+
; CHECK-NEXT: bic z0.d, z0.d, z1.d
163+
; CHECK-NEXT: ptrue p0.b, vl16
164+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
165+
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
166+
; CHECK-NEXT: andv b0, p0, z0.b
167+
; CHECK-NEXT: fmov w8, s0
168+
; CHECK-NEXT: and w0, w8, #0x1
169+
; CHECK-NEXT: ret
170+
%v0 = bitcast float* %a to <16 x float>*
171+
%v1 = load <16 x float>, <16 x float>* %v0, align 4
172+
%v2 = fcmp une <16 x float> %v1, zeroinitializer
173+
%v3 = bitcast float* %b to <16 x float>*
174+
%v4 = load <16 x float>, <16 x float>* %v3, align 4
175+
%v5 = fcmp une <16 x float> %v4, zeroinitializer
176+
%v6 = and <16 x i1> %v2, %v5
177+
%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
178+
ret i1 %v7
179+
}
180+
181+
declare i1 @llvm.vector.reduce.and.i1.v16i1(<16 x i1>)

0 commit comments

Comments
 (0)