1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
2
; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-I8MM
3
3
; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM
4
- ; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING
4
+ ; RUN: llc -mtriple=aarch64 -mattr=+sve,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE
5
+ ; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE2
6
+ ; RUN: llc -mtriple=aarch64 -mattr=+sme -force-streaming -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SME
5
7
6
8
define <vscale x 4 x i32 > @udot (<vscale x 4 x i32 > %acc , <vscale x 16 x i8 > %a , <vscale x 16 x i8 > %b ) {
7
9
; CHECK-LABEL: udot:
@@ -196,46 +198,31 @@ define <vscale x 4 x i64> @udot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8
196
198
; CHECK-NEXT: add z1.d, z1.d, z3.d
197
199
; CHECK-NEXT: ret
198
200
;
199
- ; CHECK-NEWLOWERING-LABEL: udot_8to64:
200
- ; CHECK-NEWLOWERING: // %bb.0: // %entry
201
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.h, z2.b
202
- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b
203
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.h, z3.b
204
- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z3.b
205
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
206
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
207
- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z2.h
208
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z5.h
209
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
210
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
211
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
212
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
213
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
214
- ; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z6.s
215
- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z7.s
216
- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z24.s
217
- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z25.s
218
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z6.s
219
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
220
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z24.d, z24.s
221
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z25.d, z25.s
222
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d
223
- ; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z4.s
224
- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z5.s
225
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d
226
- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z2.s
227
- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z3.s
228
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
229
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
230
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
231
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
232
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z6.d, z24.d
233
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z7.d, z25.d
234
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d
235
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d
236
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z5.d
237
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z3.d
238
- ; CHECK-NEWLOWERING-NEXT: ret
201
+ ; CHECK-NEWLOWERING-SVE-LABEL: udot_8to64:
202
+ ; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
203
+ ; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000
204
+ ; CHECK-NEWLOWERING-SVE-NEXT: udot z4.s, z2.b, z3.b
205
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.d, z4.s
206
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z3.d, z4.s
207
+ ; CHECK-NEWLOWERING-SVE-NEXT: add z2.d, z3.d, z2.d
208
+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
209
+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
210
+ ;
211
+ ; CHECK-NEWLOWERING-SVE2-LABEL: udot_8to64:
212
+ ; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
213
+ ; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000
214
+ ; CHECK-NEWLOWERING-SVE2-NEXT: udot z4.s, z2.b, z3.b
215
+ ; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z4.s
216
+ ; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z4.s
217
+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
218
+ ;
219
+ ; CHECK-NEWLOWERING-SME-LABEL: udot_8to64:
220
+ ; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
221
+ ; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
222
+ ; CHECK-NEWLOWERING-SME-NEXT: udot z4.s, z2.b, z3.b
223
+ ; CHECK-NEWLOWERING-SME-NEXT: uaddwb z0.d, z0.d, z4.s
224
+ ; CHECK-NEWLOWERING-SME-NEXT: uaddwt z0.d, z0.d, z4.s
225
+ ; CHECK-NEWLOWERING-SME-NEXT: ret
239
226
entry:
240
227
%a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
241
228
%b.wide = zext <vscale x 16 x i8 > %b to <vscale x 16 x i64 >
@@ -256,46 +243,31 @@ define <vscale x 4 x i64> @sdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8
256
243
; CHECK-NEXT: add z1.d, z1.d, z3.d
257
244
; CHECK-NEXT: ret
258
245
;
259
- ; CHECK-NEWLOWERING-LABEL: sdot_8to64:
260
- ; CHECK-NEWLOWERING: // %bb.0: // %entry
261
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.h, z2.b
262
- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b
263
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.h, z3.b
264
- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z3.b
265
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
266
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
267
- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z2.h
268
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z5.h
269
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
270
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
271
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
272
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
273
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
274
- ; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z6.s
275
- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z7.s
276
- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z24.s
277
- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z25.s
278
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z6.s
279
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
280
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z24.d, z24.s
281
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z25.d, z25.s
282
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d
283
- ; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z4.s
284
- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z5.s
285
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d
286
- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z2.s
287
- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z3.s
288
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
289
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
290
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
291
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
292
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z6.d, z24.d
293
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z7.d, z25.d
294
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d
295
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d
296
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z5.d
297
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z3.d
298
- ; CHECK-NEWLOWERING-NEXT: ret
246
+ ; CHECK-NEWLOWERING-SVE-LABEL: sdot_8to64:
247
+ ; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
248
+ ; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000
249
+ ; CHECK-NEWLOWERING-SVE-NEXT: sdot z4.s, z2.b, z3.b
250
+ ; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z2.d, z4.s
251
+ ; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z3.d, z4.s
252
+ ; CHECK-NEWLOWERING-SVE-NEXT: add z2.d, z3.d, z2.d
253
+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
254
+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
255
+ ;
256
+ ; CHECK-NEWLOWERING-SVE2-LABEL: sdot_8to64:
257
+ ; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
258
+ ; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000
259
+ ; CHECK-NEWLOWERING-SVE2-NEXT: sdot z4.s, z2.b, z3.b
260
+ ; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z4.s
261
+ ; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z4.s
262
+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
263
+ ;
264
+ ; CHECK-NEWLOWERING-SME-LABEL: sdot_8to64:
265
+ ; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
266
+ ; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
267
+ ; CHECK-NEWLOWERING-SME-NEXT: sdot z4.s, z2.b, z3.b
268
+ ; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s
269
+ ; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s
270
+ ; CHECK-NEWLOWERING-SME-NEXT: ret
299
271
entry:
300
272
%a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
301
273
%b.wide = sext <vscale x 16 x i8 > %b to <vscale x 16 x i64 >
0 commit comments