Skip to content

Commit a7dcedc

Browse files
committed
[RISCV] Add initial batch of test coverage for zvqdotq codegen
This is not complete coverage, but it's a starting point for working on codegen for this extension.
1 parent 7e86afa commit a7dcedc

File tree

1 file changed

+211
-0
lines changed

1 file changed

+211
-0
lines changed
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvqdotq -verify-machineinstrs < %s | FileCheck %s
5+
; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvqdotq -verify-machineinstrs < %s | FileCheck %s
6+
7+
define i32 @vqdot_vv(<16 x i8> %a, <16 x i8> %b) {
8+
; CHECK-LABEL: vqdot_vv:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
11+
; CHECK-NEXT: vsext.vf2 v12, v8
12+
; CHECK-NEXT: vsext.vf2 v14, v9
13+
; CHECK-NEXT: vwmul.vv v8, v12, v14
14+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
15+
; CHECK-NEXT: vmv.s.x v12, zero
16+
; CHECK-NEXT: vredsum.vs v8, v8, v12
17+
; CHECK-NEXT: vmv.x.s a0, v8
18+
; CHECK-NEXT: ret
19+
entry:
20+
%a.sext = sext <16 x i8> %a to <16 x i32>
21+
%b.sext = sext <16 x i8> %b to <16 x i32>
22+
%mul = mul nuw nsw <16 x i32> %a.sext, %b.sext
23+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
24+
ret i32 %res
25+
}
26+
27+
define i32 @vqdot_vx_constant(<16 x i8> %a) {
28+
; CHECK-LABEL: vqdot_vx_constant:
29+
; CHECK: # %bb.0: # %entry
30+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
31+
; CHECK-NEXT: vsext.vf2 v12, v8
32+
; CHECK-NEXT: li a0, 23
33+
; CHECK-NEXT: vwmul.vx v8, v12, a0
34+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
35+
; CHECK-NEXT: vmv.s.x v12, zero
36+
; CHECK-NEXT: vredsum.vs v8, v8, v12
37+
; CHECK-NEXT: vmv.x.s a0, v8
38+
; CHECK-NEXT: ret
39+
entry:
40+
%a.sext = sext <16 x i8> %a to <16 x i32>
41+
%mul = mul nuw nsw <16 x i32> %a.sext, splat (i32 23)
42+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
43+
ret i32 %res
44+
}
45+
46+
define i32 @vqdot_vx_constant_swapped(<16 x i8> %a) {
47+
; CHECK-LABEL: vqdot_vx_constant_swapped:
48+
; CHECK: # %bb.0: # %entry
49+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
50+
; CHECK-NEXT: vsext.vf2 v12, v8
51+
; CHECK-NEXT: li a0, 23
52+
; CHECK-NEXT: vwmul.vx v8, v12, a0
53+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
54+
; CHECK-NEXT: vmv.s.x v12, zero
55+
; CHECK-NEXT: vredsum.vs v8, v8, v12
56+
; CHECK-NEXT: vmv.x.s a0, v8
57+
; CHECK-NEXT: ret
58+
entry:
59+
%a.sext = sext <16 x i8> %a to <16 x i32>
60+
%mul = mul nuw nsw <16 x i32> splat (i32 23), %a.sext
61+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
62+
ret i32 %res
63+
}
64+
65+
define i32 @vqdotu_vv(<16 x i8> %a, <16 x i8> %b) {
66+
; CHECK-LABEL: vqdotu_vv:
67+
; CHECK: # %bb.0: # %entry
68+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
69+
; CHECK-NEXT: vwmulu.vv v10, v8, v9
70+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
71+
; CHECK-NEXT: vmv.s.x v8, zero
72+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
73+
; CHECK-NEXT: vwredsumu.vs v8, v10, v8
74+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
75+
; CHECK-NEXT: vmv.x.s a0, v8
76+
; CHECK-NEXT: ret
77+
entry:
78+
%a.zext = zext <16 x i8> %a to <16 x i32>
79+
%b.zext = zext <16 x i8> %b to <16 x i32>
80+
%mul = mul nuw nsw <16 x i32> %a.zext, %b.zext
81+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
82+
ret i32 %res
83+
}
84+
85+
define i32 @vqdotu_vx_constant(<16 x i8> %a) {
86+
; CHECK-LABEL: vqdotu_vx_constant:
87+
; CHECK: # %bb.0: # %entry
88+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
89+
; CHECK-NEXT: vzext.vf2 v12, v8
90+
; CHECK-NEXT: li a0, 123
91+
; CHECK-NEXT: vwmulu.vx v8, v12, a0
92+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
93+
; CHECK-NEXT: vmv.s.x v12, zero
94+
; CHECK-NEXT: vredsum.vs v8, v8, v12
95+
; CHECK-NEXT: vmv.x.s a0, v8
96+
; CHECK-NEXT: ret
97+
entry:
98+
%a.zext = zext <16 x i8> %a to <16 x i32>
99+
%mul = mul nuw nsw <16 x i32> %a.zext, splat (i32 123)
100+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
101+
ret i32 %res
102+
}
103+
104+
define i32 @vqdotsu_vv(<16 x i8> %a, <16 x i8> %b) {
105+
; CHECK-LABEL: vqdotsu_vv:
106+
; CHECK: # %bb.0: # %entry
107+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
108+
; CHECK-NEXT: vsext.vf2 v12, v8
109+
; CHECK-NEXT: vzext.vf2 v14, v9
110+
; CHECK-NEXT: vwmulsu.vv v8, v12, v14
111+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
112+
; CHECK-NEXT: vmv.s.x v12, zero
113+
; CHECK-NEXT: vredsum.vs v8, v8, v12
114+
; CHECK-NEXT: vmv.x.s a0, v8
115+
; CHECK-NEXT: ret
116+
entry:
117+
%a.sext = sext <16 x i8> %a to <16 x i32>
118+
%b.zext = zext <16 x i8> %b to <16 x i32>
119+
%mul = mul nuw nsw <16 x i32> %a.sext, %b.zext
120+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
121+
ret i32 %res
122+
}
123+
124+
define i32 @vqdotsu_vv_swapped(<16 x i8> %a, <16 x i8> %b) {
125+
; CHECK-LABEL: vqdotsu_vv_swapped:
126+
; CHECK: # %bb.0: # %entry
127+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
128+
; CHECK-NEXT: vsext.vf2 v12, v8
129+
; CHECK-NEXT: vzext.vf2 v14, v9
130+
; CHECK-NEXT: vwmulsu.vv v8, v12, v14
131+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
132+
; CHECK-NEXT: vmv.s.x v12, zero
133+
; CHECK-NEXT: vredsum.vs v8, v8, v12
134+
; CHECK-NEXT: vmv.x.s a0, v8
135+
; CHECK-NEXT: ret
136+
entry:
137+
%a.sext = sext <16 x i8> %a to <16 x i32>
138+
%b.zext = zext <16 x i8> %b to <16 x i32>
139+
%mul = mul nuw nsw <16 x i32> %b.zext, %a.sext
140+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
141+
ret i32 %res
142+
}
143+
144+
define i32 @vdotqsu_vx_constant(<16 x i8> %a) {
145+
; CHECK-LABEL: vdotqsu_vx_constant:
146+
; CHECK: # %bb.0: # %entry
147+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
148+
; CHECK-NEXT: vsext.vf2 v12, v8
149+
; CHECK-NEXT: li a0, 123
150+
; CHECK-NEXT: vwmul.vx v8, v12, a0
151+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
152+
; CHECK-NEXT: vmv.s.x v12, zero
153+
; CHECK-NEXT: vredsum.vs v8, v8, v12
154+
; CHECK-NEXT: vmv.x.s a0, v8
155+
; CHECK-NEXT: ret
156+
entry:
157+
%a.sext = sext <16 x i8> %a to <16 x i32>
158+
%mul = mul nuw nsw <16 x i32> %a.sext, splat (i32 123)
159+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
160+
ret i32 %res
161+
}
162+
163+
define i32 @vdotqus_vx_constant(<16 x i8> %a) {
164+
; CHECK-LABEL: vdotqus_vx_constant:
165+
; CHECK: # %bb.0: # %entry
166+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
167+
; CHECK-NEXT: vzext.vf2 v12, v8
168+
; CHECK-NEXT: li a0, -23
169+
; CHECK-NEXT: vmv.v.x v14, a0
170+
; CHECK-NEXT: vwmulsu.vv v8, v14, v12
171+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
172+
; CHECK-NEXT: vmv.s.x v12, zero
173+
; CHECK-NEXT: vredsum.vs v8, v8, v12
174+
; CHECK-NEXT: vmv.x.s a0, v8
175+
; CHECK-NEXT: ret
176+
entry:
177+
%a.zext = zext <16 x i8> %a to <16 x i32>
178+
%mul = mul nuw nsw <16 x i32> %a.zext, splat (i32 -23)
179+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
180+
ret i32 %res
181+
}
182+
183+
define i32 @reduce_of_sext(<16 x i8> %a) {
184+
; CHECK-LABEL: reduce_of_sext:
185+
; CHECK: # %bb.0: # %entry
186+
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
187+
; CHECK-NEXT: vsext.vf4 v12, v8
188+
; CHECK-NEXT: vmv.s.x v8, zero
189+
; CHECK-NEXT: vredsum.vs v8, v12, v8
190+
; CHECK-NEXT: vmv.x.s a0, v8
191+
; CHECK-NEXT: ret
192+
entry:
193+
%a.ext = sext <16 x i8> %a to <16 x i32>
194+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a.ext)
195+
ret i32 %res
196+
}
197+
198+
define i32 @reduce_of_zext(<16 x i8> %a) {
199+
; CHECK-LABEL: reduce_of_zext:
200+
; CHECK: # %bb.0: # %entry
201+
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
202+
; CHECK-NEXT: vzext.vf4 v12, v8
203+
; CHECK-NEXT: vmv.s.x v8, zero
204+
; CHECK-NEXT: vredsum.vs v8, v12, v8
205+
; CHECK-NEXT: vmv.x.s a0, v8
206+
; CHECK-NEXT: ret
207+
entry:
208+
%a.ext = zext <16 x i8> %a to <16 x i32>
209+
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a.ext)
210+
ret i32 %res
211+
}

0 commit comments

Comments
 (0)