@@ -194,23 +194,16 @@ define <8 x float> @uitofp_i64_float(<8 x i64> %a) {
194
194
define <4 x double > @sitofp_v4i8_double (<4 x i8 > %a ) {
195
195
; CHECK-LABEL: sitofp_v4i8_double:
196
196
; CHECK: // %bb.0:
197
- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
198
- ; CHECK-NEXT: umov w8, v0.h[0]
199
- ; CHECK-NEXT: umov w9, v0.h[2]
200
- ; CHECK-NEXT: umov w10, v0.h[1]
201
- ; CHECK-NEXT: fmov s1, w8
202
- ; CHECK-NEXT: umov w8, v0.h[3]
203
- ; CHECK-NEXT: fmov s0, w9
204
- ; CHECK-NEXT: mov v1.s[1], w10
205
- ; CHECK-NEXT: mov v0.s[1], w8
206
- ; CHECK-NEXT: shl v1.2s, v1.2s, #24
197
+ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
198
+ ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
207
199
; CHECK-NEXT: shl v0.2s, v0.2s, #24
208
- ; CHECK-NEXT: sshr v1.2s, v1.2s, #24
209
200
; CHECK-NEXT: sshr v0.2s, v0.2s, #24
201
+ ; CHECK-NEXT: shl v1.2s, v1.2s, #24
202
+ ; CHECK-NEXT: sshll v0.2d, v0.2s, #0
203
+ ; CHECK-NEXT: sshr v1.2s, v1.2s, #24
204
+ ; CHECK-NEXT: scvtf v0.2d, v0.2d
210
205
; CHECK-NEXT: sshll v1.2d, v1.2s, #0
211
- ; CHECK-NEXT: sshll v2.2d, v0.2s, #0
212
- ; CHECK-NEXT: scvtf v0.2d, v1.2d
213
- ; CHECK-NEXT: scvtf v1.2d, v2.2d
206
+ ; CHECK-NEXT: scvtf v1.2d, v1.2d
214
207
; CHECK-NEXT: ret
215
208
%1 = sitofp <4 x i8 > %a to <4 x double >
216
209
ret <4 x double > %1
@@ -333,39 +326,26 @@ define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) {
333
326
define <8 x double > @sitofp_i16_double (<8 x i16 > %a ) {
334
327
; CHECK-LABEL: sitofp_i16_double:
335
328
; CHECK: // %bb.0:
336
- ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
337
- ; CHECK-NEXT: umov w8, v0.h[0]
338
- ; CHECK-NEXT: umov w9, v0.h[2]
339
- ; CHECK-NEXT: umov w11, v0.h[1]
340
- ; CHECK-NEXT: umov w10, v1.h[0]
341
- ; CHECK-NEXT: umov w12, v1.h[2]
342
- ; CHECK-NEXT: fmov s2, w8
343
- ; CHECK-NEXT: umov w8, v0.h[3]
344
- ; CHECK-NEXT: fmov s0, w9
345
- ; CHECK-NEXT: umov w9, v1.h[1]
346
- ; CHECK-NEXT: fmov s3, w10
347
- ; CHECK-NEXT: umov w10, v1.h[3]
348
- ; CHECK-NEXT: fmov s1, w12
349
- ; CHECK-NEXT: mov v0.s[1], w8
350
- ; CHECK-NEXT: mov v2.s[1], w11
351
- ; CHECK-NEXT: mov v3.s[1], w9
352
- ; CHECK-NEXT: mov v1.s[1], w10
353
- ; CHECK-NEXT: shl v0.2s, v0.2s, #16
354
- ; CHECK-NEXT: shl v2.2s, v2.2s, #16
355
- ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
356
- ; CHECK-NEXT: shl v3.2s, v3.2s, #16
357
- ; CHECK-NEXT: shl v1.2s, v1.2s, #16
329
+ ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
330
+ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
331
+ ; CHECK-NEXT: shl v2.2s, v1.2s, #16
332
+ ; CHECK-NEXT: shl v3.2s, v0.2s, #16
333
+ ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
358
334
; CHECK-NEXT: sshr v2.2s, v2.2s, #16
359
- ; CHECK-NEXT: sshll v4.2d, v0.2s, #0
360
- ; CHECK-NEXT: sshr v0.2s, v3.2s, #16
361
- ; CHECK-NEXT: sshr v1.2s, v1.2s, #16
335
+ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
336
+ ; CHECK-NEXT: sshr v3.2s, v3.2s, #16
362
337
; CHECK-NEXT: sshll v2.2d, v2.2s, #0
363
- ; CHECK-NEXT: sshll v3.2d, v0.2s, #0
364
- ; CHECK-NEXT: sshll v5.2d, v1.2s, #0
365
- ; CHECK-NEXT: scvtf v0.2d, v2.2d
366
- ; CHECK-NEXT: scvtf v1.2d, v4.2d
367
- ; CHECK-NEXT: scvtf v2.2d, v3.2d
368
- ; CHECK-NEXT: scvtf v3.2d, v5.2d
338
+ ; CHECK-NEXT: shl v1.2s, v1.2s, #16
339
+ ; CHECK-NEXT: shl v0.2s, v0.2s, #16
340
+ ; CHECK-NEXT: scvtf v2.2d, v2.2d
341
+ ; CHECK-NEXT: sshr v1.2s, v1.2s, #16
342
+ ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
343
+ ; CHECK-NEXT: sshll v3.2d, v3.2s, #0
344
+ ; CHECK-NEXT: sshll v4.2d, v1.2s, #0
345
+ ; CHECK-NEXT: sshll v1.2d, v0.2s, #0
346
+ ; CHECK-NEXT: scvtf v0.2d, v3.2d
347
+ ; CHECK-NEXT: scvtf v1.2d, v1.2d
348
+ ; CHECK-NEXT: scvtf v3.2d, v4.2d
369
349
; CHECK-NEXT: ret
370
350
%1 = sitofp <8 x i16 > %a to <8 x double >
371
351
ret <8 x double > %1
@@ -402,22 +382,15 @@ define <8 x double> @sitofp_i64_double(<8 x i64> %a) {
402
382
define <4 x double > @uitofp_v4i8_double (<4 x i8 > %a ) {
403
383
; CHECK-LABEL: uitofp_v4i8_double:
404
384
; CHECK: // %bb.0:
405
- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
406
- ; CHECK-NEXT: umov w8, v0.h[0]
407
- ; CHECK-NEXT: umov w9, v0.h[2]
408
- ; CHECK-NEXT: umov w10, v0.h[1]
385
+ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
409
386
; CHECK-NEXT: movi d1, #0x0000ff000000ff
410
- ; CHECK-NEXT: fmov s2, w8
411
- ; CHECK-NEXT: umov w8, v0.h[3]
412
- ; CHECK-NEXT: fmov s0, w9
413
- ; CHECK-NEXT: mov v2.s[1], w10
414
- ; CHECK-NEXT: mov v0.s[1], w8
415
- ; CHECK-NEXT: and v2.8b, v2.8b, v1.8b
387
+ ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
416
388
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
417
- ; CHECK-NEXT: ushll v1.2d, v2.2s, #0
418
- ; CHECK-NEXT: ushll v2.2d, v0.2s, #0
419
- ; CHECK-NEXT: ucvtf v0.2d, v1.2d
420
- ; CHECK-NEXT: ucvtf v1.2d, v2.2d
389
+ ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
390
+ ; CHECK-NEXT: and v1.8b, v2.8b, v1.8b
391
+ ; CHECK-NEXT: ushll v1.2d, v1.2s, #0
392
+ ; CHECK-NEXT: ucvtf v0.2d, v0.2d
393
+ ; CHECK-NEXT: ucvtf v1.2d, v1.2d
421
394
; CHECK-NEXT: ret
422
395
%1 = uitofp <4 x i8 > %a to <4 x double >
423
396
ret <4 x double > %1
@@ -530,36 +503,23 @@ define <16 x double> @uitofp_v16i8_double(<16 x i8> %a) {
530
503
define <8 x double > @uitofp_i16_double (<8 x i16 > %a ) {
531
504
; CHECK-LABEL: uitofp_i16_double:
532
505
; CHECK: // %bb.0:
533
- ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
534
- ; CHECK-NEXT: umov w8, v0.h[0]
535
- ; CHECK-NEXT: umov w9, v0.h[2]
536
- ; CHECK-NEXT: umov w11, v0.h[1]
537
506
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
538
- ; CHECK-NEXT: umov w10, v2.h[0]
539
- ; CHECK-NEXT: umov w12, v2.h[2]
540
- ; CHECK-NEXT: fmov s3, w8
541
- ; CHECK-NEXT: umov w8, v0.h[3]
542
- ; CHECK-NEXT: fmov s0, w9
543
- ; CHECK-NEXT: umov w9, v2.h[1]
544
- ; CHECK-NEXT: fmov s4, w10
545
- ; CHECK-NEXT: umov w10, v2.h[3]
546
- ; CHECK-NEXT: fmov s2, w12
547
- ; CHECK-NEXT: mov v0.s[1], w8
548
- ; CHECK-NEXT: mov v3.s[1], w11
549
- ; CHECK-NEXT: mov v4.s[1], w9
550
- ; CHECK-NEXT: mov v2.s[1], w10
551
- ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
552
- ; CHECK-NEXT: and v3.8b, v3.8b, v1.8b
553
- ; CHECK-NEXT: ushll v5.2d, v0.2s, #0
554
- ; CHECK-NEXT: and v0.8b, v4.8b, v1.8b
555
- ; CHECK-NEXT: and v1.8b, v2.8b, v1.8b
507
+ ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
508
+ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
509
+ ; CHECK-NEXT: and v3.8b, v2.8b, v1.8b
510
+ ; CHECK-NEXT: and v4.8b, v0.8b, v1.8b
511
+ ; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8
512
+ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
556
513
; CHECK-NEXT: ushll v3.2d, v3.2s, #0
557
- ; CHECK-NEXT: ushll v2.2d, v0.2s, #0
558
- ; CHECK-NEXT: ushll v4.2d, v1.2s, #0
559
- ; CHECK-NEXT: ucvtf v0.2d, v3.2d
560
- ; CHECK-NEXT: ucvtf v1.2d, v5.2d
561
- ; CHECK-NEXT: ucvtf v2.2d, v2.2d
562
- ; CHECK-NEXT: ucvtf v3.2d, v4.2d
514
+ ; CHECK-NEXT: ushll v4.2d, v4.2s, #0
515
+ ; CHECK-NEXT: and v2.8b, v2.8b, v1.8b
516
+ ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
517
+ ; CHECK-NEXT: ushll v5.2d, v2.2s, #0
518
+ ; CHECK-NEXT: ucvtf v2.2d, v3.2d
519
+ ; CHECK-NEXT: ushll v1.2d, v0.2s, #0
520
+ ; CHECK-NEXT: ucvtf v0.2d, v4.2d
521
+ ; CHECK-NEXT: ucvtf v1.2d, v1.2d
522
+ ; CHECK-NEXT: ucvtf v3.2d, v5.2d
563
523
; CHECK-NEXT: ret
564
524
%1 = uitofp <8 x i16 > %a to <8 x double >
565
525
ret <8 x double > %1
0 commit comments