1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+rdm | FileCheck %s
3
- ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a | FileCheck %s
2
+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+rdm | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3
+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a | FileCheck %s --check-prefixes=CHECK,CHECK-SD
4
+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+rdm -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5
+
6
+ ; CHECK-GI: warning: Instruction selection used fallback path for test_sqrdmlah_extracted_lane_s32
7
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlahq_extracted_lane_s32
8
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlsh_extracted_lane_s32
9
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlshq_extracted_lane_s32
10
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlah_i32
11
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlsh_i32
12
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlah_extract_i32
13
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlsh_extract_i32
14
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlahs_s32
15
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlahs_lane_s32
16
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlahs_laneq_s32
17
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlshs_s32
18
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlshs_lane_s32
19
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlshs_laneq_s32
4
20
5
21
declare <4 x i16 > @llvm.aarch64.neon.sqrdmulh.v4i16 (<4 x i16 >, <4 x i16 >)
6
22
declare <8 x i16 > @llvm.aarch64.neon.sqrdmulh.v8i16 (<8 x i16 >, <8 x i16 >)
@@ -404,15 +420,25 @@ define i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) {
404
420
}
405
421
406
422
define i32 @test_sqrdmlah_v1i32 (i32 %acc , i32 %x , i32 %y ) {
407
- ; CHECK-LABEL: test_sqrdmlah_v1i32:
408
- ; CHECK: // %bb.0:
409
- ; CHECK-NEXT: fmov s0, w1
410
- ; CHECK-NEXT: fmov s1, w2
411
- ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
412
- ; CHECK-NEXT: fmov s1, w0
413
- ; CHECK-NEXT: sqadd v0.4s, v1.4s, v0.4s
414
- ; CHECK-NEXT: fmov w0, s0
415
- ; CHECK-NEXT: ret
423
+ ; CHECK-SD-LABEL: test_sqrdmlah_v1i32:
424
+ ; CHECK-SD: // %bb.0:
425
+ ; CHECK-SD-NEXT: fmov s0, w1
426
+ ; CHECK-SD-NEXT: fmov s1, w2
427
+ ; CHECK-SD-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
428
+ ; CHECK-SD-NEXT: fmov s1, w0
429
+ ; CHECK-SD-NEXT: sqadd v0.4s, v1.4s, v0.4s
430
+ ; CHECK-SD-NEXT: fmov w0, s0
431
+ ; CHECK-SD-NEXT: ret
432
+ ;
433
+ ; CHECK-GI-LABEL: test_sqrdmlah_v1i32:
434
+ ; CHECK-GI: // %bb.0:
435
+ ; CHECK-GI-NEXT: mov v0.s[0], w1
436
+ ; CHECK-GI-NEXT: mov v1.s[0], w2
437
+ ; CHECK-GI-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
438
+ ; CHECK-GI-NEXT: mov v1.s[0], w0
439
+ ; CHECK-GI-NEXT: sqadd v0.4s, v1.4s, v0.4s
440
+ ; CHECK-GI-NEXT: fmov w0, s0
441
+ ; CHECK-GI-NEXT: ret
416
442
%x_vec = insertelement <4 x i32 > undef , i32 %x , i64 0
417
443
%y_vec = insertelement <4 x i32 > undef , i32 %y , i64 0
418
444
%prod_vec = call <4 x i32 > @llvm.aarch64.neon.sqrdmulh.v4i32 (<4 x i32 > %x_vec , <4 x i32 > %y_vec )
@@ -443,15 +469,25 @@ define i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) {
443
469
}
444
470
445
471
define i32 @test_sqrdmlsh_v1i32 (i32 %acc , i32 %x , i32 %y ) {
446
- ; CHECK-LABEL: test_sqrdmlsh_v1i32:
447
- ; CHECK: // %bb.0:
448
- ; CHECK-NEXT: fmov s0, w1
449
- ; CHECK-NEXT: fmov s1, w2
450
- ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
451
- ; CHECK-NEXT: fmov s1, w0
452
- ; CHECK-NEXT: sqsub v0.4s, v1.4s, v0.4s
453
- ; CHECK-NEXT: fmov w0, s0
454
- ; CHECK-NEXT: ret
472
+ ; CHECK-SD-LABEL: test_sqrdmlsh_v1i32:
473
+ ; CHECK-SD: // %bb.0:
474
+ ; CHECK-SD-NEXT: fmov s0, w1
475
+ ; CHECK-SD-NEXT: fmov s1, w2
476
+ ; CHECK-SD-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
477
+ ; CHECK-SD-NEXT: fmov s1, w0
478
+ ; CHECK-SD-NEXT: sqsub v0.4s, v1.4s, v0.4s
479
+ ; CHECK-SD-NEXT: fmov w0, s0
480
+ ; CHECK-SD-NEXT: ret
481
+ ;
482
+ ; CHECK-GI-LABEL: test_sqrdmlsh_v1i32:
483
+ ; CHECK-GI: // %bb.0:
484
+ ; CHECK-GI-NEXT: mov v0.s[0], w1
485
+ ; CHECK-GI-NEXT: mov v1.s[0], w2
486
+ ; CHECK-GI-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
487
+ ; CHECK-GI-NEXT: mov v1.s[0], w0
488
+ ; CHECK-GI-NEXT: sqsub v0.4s, v1.4s, v0.4s
489
+ ; CHECK-GI-NEXT: fmov w0, s0
490
+ ; CHECK-GI-NEXT: ret
455
491
%x_vec = insertelement <4 x i32 > undef , i32 %x , i64 0
456
492
%y_vec = insertelement <4 x i32 > undef , i32 %y , i64 0
457
493
%prod_vec = call <4 x i32 > @llvm.aarch64.neon.sqrdmulh.v4i32 (<4 x i32 > %x_vec , <4 x i32 > %y_vec )
@@ -568,21 +604,33 @@ define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
568
604
; Using sqrdmlah intrinsics
569
605
570
606
define <4 x i16 > @test_vqrdmlah_laneq_s16 (<4 x i16 > %a , <4 x i16 > %b , <8 x i16 > %v ) {
571
- ; CHECK-LABEL: test_vqrdmlah_laneq_s16:
572
- ; CHECK: // %bb.0: // %entry
573
- ; CHECK-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[7]
574
- ; CHECK-NEXT: ret
607
+ ; CHECK-SD-LABEL: test_vqrdmlah_laneq_s16:
608
+ ; CHECK-SD: // %bb.0: // %entry
609
+ ; CHECK-SD-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[7]
610
+ ; CHECK-SD-NEXT: ret
611
+ ;
612
+ ; CHECK-GI-LABEL: test_vqrdmlah_laneq_s16:
613
+ ; CHECK-GI: // %bb.0: // %entry
614
+ ; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
615
+ ; CHECK-GI-NEXT: sqrdmlah v0.4h, v1.4h, v2.4h
616
+ ; CHECK-GI-NEXT: ret
575
617
entry:
576
618
%lane = shufflevector <8 x i16 > %v , <8 x i16 > poison, <4 x i32 > <i32 7 , i32 7 , i32 7 , i32 7 >
577
619
%vqrdmlah_v3.i = tail call <4 x i16 > @llvm.aarch64.neon.sqrdmlah.v4i16 (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %lane ) #4
578
620
ret <4 x i16 > %vqrdmlah_v3.i
579
621
}
580
622
581
623
define <2 x i32 > @test_vqrdmlah_laneq_s32 (<2 x i32 > %a , <2 x i32 > %b , <4 x i32 > %v ) {
582
- ; CHECK-LABEL: test_vqrdmlah_laneq_s32:
583
- ; CHECK: // %bb.0: // %entry
584
- ; CHECK-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[3]
585
- ; CHECK-NEXT: ret
624
+ ; CHECK-SD-LABEL: test_vqrdmlah_laneq_s32:
625
+ ; CHECK-SD: // %bb.0: // %entry
626
+ ; CHECK-SD-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[3]
627
+ ; CHECK-SD-NEXT: ret
628
+ ;
629
+ ; CHECK-GI-LABEL: test_vqrdmlah_laneq_s32:
630
+ ; CHECK-GI: // %bb.0: // %entry
631
+ ; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
632
+ ; CHECK-GI-NEXT: sqrdmlah v0.2s, v1.2s, v2.2s
633
+ ; CHECK-GI-NEXT: ret
586
634
entry:
587
635
%lane = shufflevector <4 x i32 > %v , <4 x i32 > poison, <2 x i32 > <i32 3 , i32 3 >
588
636
%vqrdmlah_v3.i = tail call <2 x i32 > @llvm.aarch64.neon.sqrdmlah.v2i32 (<2 x i32 > %a , <2 x i32 > %b , <2 x i32 > %lane ) #4
@@ -644,14 +692,23 @@ entry:
644
692
}
645
693
646
694
define i16 @test_vqrdmlahh_lane_s16 (i16 %a , i16 %b , <4 x i16 > %c ) {
647
- ; CHECK-LABEL: test_vqrdmlahh_lane_s16:
648
- ; CHECK: // %bb.0: // %entry
649
- ; CHECK-NEXT: fmov s1, w0
650
- ; CHECK-NEXT: fmov s2, w1
651
- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
652
- ; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
653
- ; CHECK-NEXT: umov w0, v1.h[0]
654
- ; CHECK-NEXT: ret
695
+ ; CHECK-SD-LABEL: test_vqrdmlahh_lane_s16:
696
+ ; CHECK-SD: // %bb.0: // %entry
697
+ ; CHECK-SD-NEXT: fmov s1, w0
698
+ ; CHECK-SD-NEXT: fmov s2, w1
699
+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
700
+ ; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
701
+ ; CHECK-SD-NEXT: umov w0, v1.h[0]
702
+ ; CHECK-SD-NEXT: ret
703
+ ;
704
+ ; CHECK-GI-LABEL: test_vqrdmlahh_lane_s16:
705
+ ; CHECK-GI: // %bb.0: // %entry
706
+ ; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
707
+ ; CHECK-GI-NEXT: fmov s1, w0
708
+ ; CHECK-GI-NEXT: fmov s2, w1
709
+ ; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
710
+ ; CHECK-GI-NEXT: umov w0, v1.h[0]
711
+ ; CHECK-GI-NEXT: ret
655
712
entry:
656
713
%0 = insertelement <4 x i16 > undef , i16 %a , i64 0
657
714
%1 = insertelement <4 x i16 > undef , i16 %b , i64 0
@@ -677,13 +734,22 @@ entry:
677
734
}
678
735
679
736
define i16 @test_vqrdmlahh_laneq_s16 (i16 %a , i16 %b , <8 x i16 > %c ) {
680
- ; CHECK-LABEL: test_vqrdmlahh_laneq_s16:
681
- ; CHECK: // %bb.0: // %entry
682
- ; CHECK-NEXT: fmov s1, w0
683
- ; CHECK-NEXT: fmov s2, w1
684
- ; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[7]
685
- ; CHECK-NEXT: umov w0, v1.h[0]
686
- ; CHECK-NEXT: ret
737
+ ; CHECK-SD-LABEL: test_vqrdmlahh_laneq_s16:
738
+ ; CHECK-SD: // %bb.0: // %entry
739
+ ; CHECK-SD-NEXT: fmov s1, w0
740
+ ; CHECK-SD-NEXT: fmov s2, w1
741
+ ; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[7]
742
+ ; CHECK-SD-NEXT: umov w0, v1.h[0]
743
+ ; CHECK-SD-NEXT: ret
744
+ ;
745
+ ; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16:
746
+ ; CHECK-GI: // %bb.0: // %entry
747
+ ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
748
+ ; CHECK-GI-NEXT: fmov s1, w0
749
+ ; CHECK-GI-NEXT: fmov s2, w1
750
+ ; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
751
+ ; CHECK-GI-NEXT: umov w0, v1.h[0]
752
+ ; CHECK-GI-NEXT: ret
687
753
entry:
688
754
%0 = insertelement <4 x i16 > undef , i16 %a , i64 0
689
755
%1 = insertelement <4 x i16 > undef , i16 %b , i64 0
@@ -708,21 +774,33 @@ entry:
708
774
}
709
775
710
776
define <4 x i16 > @test_vqrdmlsh_laneq_s16 (<4 x i16 > %a , <4 x i16 > %b , <8 x i16 > %v ) {
711
- ; CHECK-LABEL: test_vqrdmlsh_laneq_s16:
712
- ; CHECK: // %bb.0: // %entry
713
- ; CHECK-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[7]
714
- ; CHECK-NEXT: ret
777
+ ; CHECK-SD-LABEL: test_vqrdmlsh_laneq_s16:
778
+ ; CHECK-SD: // %bb.0: // %entry
779
+ ; CHECK-SD-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[7]
780
+ ; CHECK-SD-NEXT: ret
781
+ ;
782
+ ; CHECK-GI-LABEL: test_vqrdmlsh_laneq_s16:
783
+ ; CHECK-GI: // %bb.0: // %entry
784
+ ; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
785
+ ; CHECK-GI-NEXT: sqrdmlsh v0.4h, v1.4h, v2.4h
786
+ ; CHECK-GI-NEXT: ret
715
787
entry:
716
788
%lane = shufflevector <8 x i16 > %v , <8 x i16 > poison, <4 x i32 > <i32 7 , i32 7 , i32 7 , i32 7 >
717
789
%vqrdmlsh_v3.i = tail call <4 x i16 > @llvm.aarch64.neon.sqrdmlsh.v4i16 (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %lane ) #4
718
790
ret <4 x i16 > %vqrdmlsh_v3.i
719
791
}
720
792
721
793
define <2 x i32 > @test_vqrdmlsh_laneq_s32 (<2 x i32 > %a , <2 x i32 > %b , <4 x i32 > %v ) {
722
- ; CHECK-LABEL: test_vqrdmlsh_laneq_s32:
723
- ; CHECK: // %bb.0: // %entry
724
- ; CHECK-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[3]
725
- ; CHECK-NEXT: ret
794
+ ; CHECK-SD-LABEL: test_vqrdmlsh_laneq_s32:
795
+ ; CHECK-SD: // %bb.0: // %entry
796
+ ; CHECK-SD-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[3]
797
+ ; CHECK-SD-NEXT: ret
798
+ ;
799
+ ; CHECK-GI-LABEL: test_vqrdmlsh_laneq_s32:
800
+ ; CHECK-GI: // %bb.0: // %entry
801
+ ; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
802
+ ; CHECK-GI-NEXT: sqrdmlsh v0.2s, v1.2s, v2.2s
803
+ ; CHECK-GI-NEXT: ret
726
804
entry:
727
805
%lane = shufflevector <4 x i32 > %v , <4 x i32 > poison, <2 x i32 > <i32 3 , i32 3 >
728
806
%vqrdmlsh_v3.i = tail call <2 x i32 > @llvm.aarch64.neon.sqrdmlsh.v2i32 (<2 x i32 > %a , <2 x i32 > %b , <2 x i32 > %lane ) #4
@@ -784,14 +862,23 @@ entry:
784
862
}
785
863
786
864
define i16 @test_vqrdmlshh_lane_s16 (i16 %a , i16 %b , <4 x i16 > %c ) {
787
- ; CHECK-LABEL: test_vqrdmlshh_lane_s16:
788
- ; CHECK: // %bb.0: // %entry
789
- ; CHECK-NEXT: fmov s1, w0
790
- ; CHECK-NEXT: fmov s2, w1
791
- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
792
- ; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
793
- ; CHECK-NEXT: umov w0, v1.h[0]
794
- ; CHECK-NEXT: ret
865
+ ; CHECK-SD-LABEL: test_vqrdmlshh_lane_s16:
866
+ ; CHECK-SD: // %bb.0: // %entry
867
+ ; CHECK-SD-NEXT: fmov s1, w0
868
+ ; CHECK-SD-NEXT: fmov s2, w1
869
+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
870
+ ; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
871
+ ; CHECK-SD-NEXT: umov w0, v1.h[0]
872
+ ; CHECK-SD-NEXT: ret
873
+ ;
874
+ ; CHECK-GI-LABEL: test_vqrdmlshh_lane_s16:
875
+ ; CHECK-GI: // %bb.0: // %entry
876
+ ; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
877
+ ; CHECK-GI-NEXT: fmov s1, w0
878
+ ; CHECK-GI-NEXT: fmov s2, w1
879
+ ; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
880
+ ; CHECK-GI-NEXT: umov w0, v1.h[0]
881
+ ; CHECK-GI-NEXT: ret
795
882
entry:
796
883
%0 = insertelement <4 x i16 > undef , i16 %a , i64 0
797
884
%1 = insertelement <4 x i16 > undef , i16 %b , i64 0
@@ -817,13 +904,22 @@ entry:
817
904
}
818
905
819
906
define i16 @test_vqrdmlshh_laneq_s16 (i16 %a , i16 %b , <8 x i16 > %c ) {
820
- ; CHECK-LABEL: test_vqrdmlshh_laneq_s16:
821
- ; CHECK: // %bb.0: // %entry
822
- ; CHECK-NEXT: fmov s1, w0
823
- ; CHECK-NEXT: fmov s2, w1
824
- ; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[7]
825
- ; CHECK-NEXT: umov w0, v1.h[0]
826
- ; CHECK-NEXT: ret
907
+ ; CHECK-SD-LABEL: test_vqrdmlshh_laneq_s16:
908
+ ; CHECK-SD: // %bb.0: // %entry
909
+ ; CHECK-SD-NEXT: fmov s1, w0
910
+ ; CHECK-SD-NEXT: fmov s2, w1
911
+ ; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[7]
912
+ ; CHECK-SD-NEXT: umov w0, v1.h[0]
913
+ ; CHECK-SD-NEXT: ret
914
+ ;
915
+ ; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16:
916
+ ; CHECK-GI: // %bb.0: // %entry
917
+ ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
918
+ ; CHECK-GI-NEXT: fmov s1, w0
919
+ ; CHECK-GI-NEXT: fmov s2, w1
920
+ ; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
921
+ ; CHECK-GI-NEXT: umov w0, v1.h[0]
922
+ ; CHECK-GI-NEXT: ret
827
923
entry:
828
924
%0 = insertelement <4 x i16 > undef , i16 %a , i64 0
829
925
%1 = insertelement <4 x i16 > undef , i16 %b , i64 0
0 commit comments