Skip to content

Commit 8bc4dd7

Browse files
author
Simon Moll
committed
[VE] packed strided VST splitting test
1 parent 8734f0e commit 8bc4dd7

File tree

1 file changed

+232
-0
lines changed

1 file changed

+232
-0
lines changed
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu,+packed | FileCheck %s
3+
4+
declare void @llvm.experimental.vp.strided.store.v512f32.i64(<512 x float> %val, float* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl)
5+
6+
define fastcc void @vp_strided_store_v512f32_rrm(<512 x float> %val, float* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl) {
7+
; CHECK-LABEL: vp_strided_store_v512f32_rrm:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: adds.w.sx %s3, 1, %s2
10+
; CHECK-NEXT: and %s3, %s3, (32)0
11+
; CHECK-NEXT: srl %s3, %s3, 1
12+
; CHECK-NEXT: sll %s4, %s1, 1
13+
; CHECK-NEXT: lvl %s3
14+
; CHECK-NEXT: vstu %v0, %s4, %s0, %vm2
15+
; CHECK-NEXT: adds.l %s0, %s0, %s1
16+
; CHECK-NEXT: and %s1, %s2, (32)0
17+
; CHECK-NEXT: srl %s1, %s1, 1
18+
; CHECK-NEXT: lvl %s1
19+
; CHECK-NEXT: vshf %v0, %v0, %v0, 4
20+
; CHECK-NEXT: lvl %s3
21+
; CHECK-NEXT: vstu %v0, %s4, %s0, %vm3
22+
; CHECK-NEXT: b.l.t (, %s10)
23+
call void @llvm.experimental.vp.strided.store.v512f32.i64(<512 x float> %val, float* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl)
24+
ret void
25+
}
26+
27+
define fastcc void @vp_strided_store_v512f32_rr(<512 x float> %val, float* %ptr, i64 %stride, i32 %evl) {
28+
; CHECK-LABEL: vp_strided_store_v512f32_rr:
29+
; CHECK: # %bb.0:
30+
; CHECK-NEXT: adds.w.sx %s3, 1, %s2
31+
; CHECK-NEXT: and %s3, %s3, (32)0
32+
; CHECK-NEXT: srl %s3, %s3, 1
33+
; CHECK-NEXT: sll %s4, %s1, 1
34+
; CHECK-NEXT: lvl %s3
35+
; CHECK-NEXT: vstu %v0, %s4, %s0
36+
; CHECK-NEXT: adds.l %s0, %s0, %s1
37+
; CHECK-NEXT: and %s1, %s2, (32)0
38+
; CHECK-NEXT: srl %s1, %s1, 1
39+
; CHECK-NEXT: lvl %s1
40+
; CHECK-NEXT: vshf %v0, %v0, %v0, 4
41+
; CHECK-NEXT: lvl %s3
42+
; CHECK-NEXT: vstu %v0, %s4, %s0
43+
; CHECK-NEXT: b.l.t (, %s10)
44+
%one = insertelement <512 x i1> undef, i1 1, i32 0
45+
%allones = shufflevector <512 x i1> %one, <512 x i1> undef, <512 x i32> zeroinitializer
46+
call void @llvm.experimental.vp.strided.store.v512f32.i64(<512 x float> %val, float* %ptr, i64 %stride, <512 x i1> %allones, i32 %evl)
47+
ret void
48+
}
49+
50+
define fastcc void @vp_strided_store_v512f32_ri(<512 x float> %val, float* %ptr, i32 %evl) {
51+
; CHECK-LABEL: vp_strided_store_v512f32_ri:
52+
; CHECK: # %bb.0:
53+
; CHECK-NEXT: adds.w.sx %s2, 1, %s1
54+
; CHECK-NEXT: and %s2, %s2, (32)0
55+
; CHECK-NEXT: srl %s2, %s2, 1
56+
; CHECK-NEXT: lvl %s2
57+
; CHECK-NEXT: vstu %v0, 48, %s0
58+
; CHECK-NEXT: and %s1, %s1, (32)0
59+
; CHECK-NEXT: srl %s1, %s1, 1
60+
; CHECK-NEXT: lvl %s1
61+
; CHECK-NEXT: vshf %v0, %v0, %v0, 4
62+
; CHECK-NEXT: lea %s0, 24(, %s0)
63+
; CHECK-NEXT: lvl %s2
64+
; CHECK-NEXT: vstu %v0, 48, %s0
65+
; CHECK-NEXT: b.l.t (, %s10)
66+
%one = insertelement <512 x i1> undef, i1 1, i32 0
67+
%allones = shufflevector <512 x i1> %one, <512 x i1> undef, <512 x i32> zeroinitializer
68+
call void @llvm.experimental.vp.strided.store.v512f32.i64(<512 x float> %val, float* %ptr, i64 24, <512 x i1> %allones, i32 %evl)
69+
ret void
70+
}
71+
72+
declare void @llvm.experimental.vp.strided.store.v512i32.i64(<512 x i32> %val, i32* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl)
73+
74+
define fastcc void @vp_strided_store_v512i32_rrm(<512 x i32> %val, i32* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl) {
75+
; CHECK-LABEL: vp_strided_store_v512i32_rrm:
76+
; CHECK: # %bb.0:
77+
; CHECK-NEXT: adds.l %s3, %s0, %s1
78+
; CHECK-NEXT: adds.w.sx %s2, 1, %s2
79+
; CHECK-NEXT: and %s2, %s2, (32)0
80+
; CHECK-NEXT: srl %s2, %s2, 1
81+
; CHECK-NEXT: sll %s1, %s1, 1
82+
; CHECK-NEXT: lvl %s2
83+
; CHECK-NEXT: vstl %v0, %s1, %s3, %vm3
84+
; CHECK-NEXT: vshf %v0, %v0, %v0, 0
85+
; CHECK-NEXT: vstl %v0, %s1, %s0, %vm2
86+
; CHECK-NEXT: b.l.t (, %s10)
87+
call void @llvm.experimental.vp.strided.store.v512i32.i64(<512 x i32> %val, i32* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl)
88+
ret void
89+
}
90+
91+
define fastcc void @vp_strided_store_v512i32_rr(<512 x i32> %val, i32* %ptr, i64 %stride, i32 %evl) {
92+
; CHECK-LABEL: vp_strided_store_v512i32_rr:
93+
; CHECK: # %bb.0:
94+
; CHECK-NEXT: adds.l %s3, %s0, %s1
95+
; CHECK-NEXT: adds.w.sx %s2, 1, %s2
96+
; CHECK-NEXT: and %s2, %s2, (32)0
97+
; CHECK-NEXT: srl %s2, %s2, 1
98+
; CHECK-NEXT: sll %s1, %s1, 1
99+
; CHECK-NEXT: lvl %s2
100+
; CHECK-NEXT: vstl %v0, %s1, %s3
101+
; CHECK-NEXT: vshf %v0, %v0, %v0, 0
102+
; CHECK-NEXT: vstl %v0, %s1, %s0
103+
; CHECK-NEXT: b.l.t (, %s10)
104+
%one = insertelement <512 x i1> undef, i1 1, i32 0
105+
%allones = shufflevector <512 x i1> %one, <512 x i1> undef, <512 x i32> zeroinitializer
106+
call void @llvm.experimental.vp.strided.store.v512i32.i64(<512 x i32> %val, i32* %ptr, i64 %stride, <512 x i1> %allones, i32 %evl)
107+
ret void
108+
}
109+
110+
define fastcc void @vp_strided_store_v512i32_ri(<512 x i32> %val, i32* %ptr, i32 %evl) {
111+
; CHECK-LABEL: vp_strided_store_v512i32_ri:
112+
; CHECK: # %bb.0:
113+
; CHECK-NEXT: adds.w.sx %s1, 1, %s1
114+
; CHECK-NEXT: and %s1, %s1, (32)0
115+
; CHECK-NEXT: srl %s1, %s1, 1
116+
; CHECK-NEXT: lea %s2, 24(, %s0)
117+
; CHECK-NEXT: lvl %s1
118+
; CHECK-NEXT: vstl %v0, 48, %s2
119+
; CHECK-NEXT: vshf %v0, %v0, %v0, 0
120+
; CHECK-NEXT: vstl %v0, 48, %s0
121+
; CHECK-NEXT: b.l.t (, %s10)
122+
%one = insertelement <512 x i1> undef, i1 1, i32 0
123+
%allones = shufflevector <512 x i1> %one, <512 x i1> undef, <512 x i32> zeroinitializer
124+
call void @llvm.experimental.vp.strided.store.v512i32.i64(<512 x i32> %val, i32* %ptr, i64 24, <512 x i1> %allones, i32 %evl)
125+
ret void
126+
}
127+
128+
declare void @llvm.experimental.vp.strided.store.v512f64.i64(<512 x double> %val, double* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl)
129+
130+
define fastcc void @vp_strided_store_v512f64_rrm(<512 x double> %val, double* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl) {
131+
; CHECK-LABEL: vp_strided_store_v512f64_rrm:
132+
; CHECK: # %bb.0:
133+
; CHECK-NEXT: adds.w.sx %s2, 1, %s2
134+
; CHECK-NEXT: and %s2, %s2, (32)0
135+
; CHECK-NEXT: srl %s2, %s2, 1
136+
; CHECK-NEXT: sll %s3, %s1, 1
137+
; CHECK-NEXT: lvl %s2
138+
; CHECK-NEXT: vst %v0, %s3, %s0, %vm2
139+
; CHECK-NEXT: adds.l %s0, %s0, %s1
140+
; CHECK-NEXT: vst %v1, %s3, %s0, %vm3
141+
; CHECK-NEXT: b.l.t (, %s10)
142+
call void @llvm.experimental.vp.strided.store.v512f64.i64(<512 x double> %val, double* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl)
143+
ret void
144+
}
145+
146+
define fastcc void @vp_strided_store_v512f64_rr(<512 x double> %val, double* %ptr, i64 %stride, i32 %evl) {
147+
; CHECK-LABEL: vp_strided_store_v512f64_rr:
148+
; CHECK: # %bb.0:
149+
; CHECK-NEXT: adds.w.sx %s2, 1, %s2
150+
; CHECK-NEXT: and %s2, %s2, (32)0
151+
; CHECK-NEXT: srl %s2, %s2, 1
152+
; CHECK-NEXT: sll %s3, %s1, 1
153+
; CHECK-NEXT: lvl %s2
154+
; CHECK-NEXT: vst %v0, %s3, %s0
155+
; CHECK-NEXT: adds.l %s0, %s0, %s1
156+
; CHECK-NEXT: vst %v1, %s3, %s0
157+
; CHECK-NEXT: b.l.t (, %s10)
158+
%one = insertelement <512 x i1> undef, i1 1, i32 0
159+
%allones = shufflevector <512 x i1> %one, <512 x i1> undef, <512 x i32> zeroinitializer
160+
call void @llvm.experimental.vp.strided.store.v512f64.i64(<512 x double> %val, double* %ptr, i64 %stride, <512 x i1> %allones, i32 %evl)
161+
ret void
162+
}
163+
164+
define fastcc void @vp_strided_store_v512f64_ri(<512 x double> %val, double* %ptr, i32 %evl) {
165+
; CHECK-LABEL: vp_strided_store_v512f64_ri:
166+
; CHECK: # %bb.0:
167+
; CHECK-NEXT: adds.w.sx %s1, 1, %s1
168+
; CHECK-NEXT: and %s1, %s1, (32)0
169+
; CHECK-NEXT: srl %s1, %s1, 1
170+
; CHECK-NEXT: lvl %s1
171+
; CHECK-NEXT: vst %v0, 48, %s0
172+
; CHECK-NEXT: lea %s0, 24(, %s0)
173+
; CHECK-NEXT: vst %v1, 48, %s0
174+
; CHECK-NEXT: b.l.t (, %s10)
175+
%one = insertelement <512 x i1> undef, i1 1, i32 0
176+
%allones = shufflevector <512 x i1> %one, <512 x i1> undef, <512 x i32> zeroinitializer
177+
call void @llvm.experimental.vp.strided.store.v512f64.i64(<512 x double> %val, double* %ptr, i64 24, <512 x i1> %allones, i32 %evl)
178+
ret void
179+
}
180+
181+
declare void @llvm.experimental.vp.strided.store.v512i64.i64(<512 x i64> %val, i64* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl)
182+
183+
define fastcc void @vp_strided_store_v512i64_rrm(<512 x i64> %val, i64* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl) {
184+
; CHECK-LABEL: vp_strided_store_v512i64_rrm:
185+
; CHECK: # %bb.0:
186+
; CHECK-NEXT: adds.w.sx %s2, 1, %s2
187+
; CHECK-NEXT: and %s2, %s2, (32)0
188+
; CHECK-NEXT: srl %s2, %s2, 1
189+
; CHECK-NEXT: sll %s3, %s1, 1
190+
; CHECK-NEXT: lvl %s2
191+
; CHECK-NEXT: vst %v0, %s3, %s0, %vm2
192+
; CHECK-NEXT: adds.l %s0, %s0, %s1
193+
; CHECK-NEXT: vst %v1, %s3, %s0, %vm3
194+
; CHECK-NEXT: b.l.t (, %s10)
195+
call void @llvm.experimental.vp.strided.store.v512i64.i64(<512 x i64> %val, i64* %ptr, i64 %stride, <512 x i1> %mask, i32 %evl)
196+
ret void
197+
}
198+
199+
define fastcc void @vp_strided_store_v512i64_rr(<512 x i64> %val, i64* %ptr, i64 %stride, i32 %evl) {
200+
; CHECK-LABEL: vp_strided_store_v512i64_rr:
201+
; CHECK: # %bb.0:
202+
; CHECK-NEXT: adds.w.sx %s2, 1, %s2
203+
; CHECK-NEXT: and %s2, %s2, (32)0
204+
; CHECK-NEXT: srl %s2, %s2, 1
205+
; CHECK-NEXT: sll %s3, %s1, 1
206+
; CHECK-NEXT: lvl %s2
207+
; CHECK-NEXT: vst %v0, %s3, %s0
208+
; CHECK-NEXT: adds.l %s0, %s0, %s1
209+
; CHECK-NEXT: vst %v1, %s3, %s0
210+
; CHECK-NEXT: b.l.t (, %s10)
211+
%one = insertelement <512 x i1> undef, i1 1, i32 0
212+
%allones = shufflevector <512 x i1> %one, <512 x i1> undef, <512 x i32> zeroinitializer
213+
call void @llvm.experimental.vp.strided.store.v512i64.i64(<512 x i64> %val, i64* %ptr, i64 %stride, <512 x i1> %allones, i32 %evl)
214+
ret void
215+
}
216+
217+
define fastcc void @vp_strided_store_v512i64_ri(<512 x i64> %val, i64* %ptr, i32 %evl) {
218+
; CHECK-LABEL: vp_strided_store_v512i64_ri:
219+
; CHECK: # %bb.0:
220+
; CHECK-NEXT: adds.w.sx %s1, 1, %s1
221+
; CHECK-NEXT: and %s1, %s1, (32)0
222+
; CHECK-NEXT: srl %s1, %s1, 1
223+
; CHECK-NEXT: lvl %s1
224+
; CHECK-NEXT: vst %v0, 48, %s0
225+
; CHECK-NEXT: lea %s0, 24(, %s0)
226+
; CHECK-NEXT: vst %v1, 48, %s0
227+
; CHECK-NEXT: b.l.t (, %s10)
228+
%one = insertelement <512 x i1> undef, i1 1, i32 0
229+
%allones = shufflevector <512 x i1> %one, <512 x i1> undef, <512 x i32> zeroinitializer
230+
call void @llvm.experimental.vp.strided.store.v512i64.i64(<512 x i64> %val, i64* %ptr, i64 24, <512 x i1> %allones, i32 %evl)
231+
ret void
232+
}

0 commit comments

Comments
 (0)