@@ -713,3 +713,182 @@ define void @short_vector_to_i64(ptr %in, ptr %out, ptr %p) {
713
713
store i64 %i3 , ptr %out
714
714
ret void
715
715
}
716
+
717
+ ; x1 = x0
718
+ define void @scalable_vector_to_i32 (ptr %in , ptr %out , ptr %p ) #0 {
719
+ ; CHECK-LABEL: scalable_vector_to_i32:
720
+ ; CHECK: // %bb.0:
721
+ ; CHECK-NEXT: ldr w8, [x0]
722
+ ; CHECK-NEXT: str w8, [x1]
723
+ ; CHECK-NEXT: ret
724
+ %ld = load <vscale x 4 x i8 >, ptr %in , align 4
725
+
726
+ %e1 = extractelement <vscale x 4 x i8 > %ld , i32 0
727
+ %e2 = extractelement <vscale x 4 x i8 > %ld , i32 1
728
+ %e3 = extractelement <vscale x 4 x i8 > %ld , i32 2
729
+ %e4 = extractelement <vscale x 4 x i8 > %ld , i32 3
730
+
731
+ %z0 = zext i8 %e1 to i32
732
+ %z1 = zext i8 %e2 to i32
733
+ %z2 = zext i8 %e3 to i32
734
+ %z3 = zext i8 %e4 to i32
735
+
736
+ %s1 = shl nuw nsw i32 %z1 , 8
737
+ %s2 = shl nuw nsw i32 %z2 , 16
738
+ %s3 = shl nuw i32 %z3 , 24
739
+
740
+ %i1 = or i32 %s1 , %z0
741
+ %i2 = or i32 %i1 , %s2
742
+ %i3 = or i32 %i2 , %s3
743
+
744
+ store i32 %i3 , ptr %out
745
+ ret void
746
+ }
747
+
748
+ define void @scalable_vector_to_i32_unused_low_i8 (ptr %in , ptr %out , ptr %p ) #0 {
749
+ ; CHECK-LABEL: scalable_vector_to_i32_unused_low_i8:
750
+ ; CHECK: // %bb.0:
751
+ ; CHECK-NEXT: ptrue p0.s
752
+ ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
753
+ ; CHECK-NEXT: mov w8, v0.s[1]
754
+ ; CHECK-NEXT: mov w9, v0.s[2]
755
+ ; CHECK-NEXT: mov w10, v0.s[3]
756
+ ; CHECK-NEXT: lsl w8, w8, #8
757
+ ; CHECK-NEXT: orr w8, w8, w9, lsl #16
758
+ ; CHECK-NEXT: orr w8, w8, w10, lsl #24
759
+ ; CHECK-NEXT: str w8, [x1]
760
+ ; CHECK-NEXT: ret
761
+ %ld = load <vscale x 4 x i8 >, ptr %in , align 4
762
+
763
+ %e2 = extractelement <vscale x 4 x i8 > %ld , i32 1
764
+ %e3 = extractelement <vscale x 4 x i8 > %ld , i32 2
765
+ %e4 = extractelement <vscale x 4 x i8 > %ld , i32 3
766
+
767
+ %z1 = zext i8 %e2 to i32
768
+ %z2 = zext i8 %e3 to i32
769
+ %z3 = zext i8 %e4 to i32
770
+
771
+ %s1 = shl nuw nsw i32 %z1 , 8
772
+ %s2 = shl nuw nsw i32 %z2 , 16
773
+ %s3 = shl nuw i32 %z3 , 24
774
+
775
+ %i2 = or i32 %s1 , %s2
776
+ %i3 = or i32 %i2 , %s3
777
+
778
+ store i32 %i3 , ptr %out
779
+ ret void
780
+ }
781
+
782
+ define void @scalable_vector_to_i32_unused_high_i8 (ptr %in , ptr %out , ptr %p ) #0 {
783
+ ; CHECK-LABEL: scalable_vector_to_i32_unused_high_i8:
784
+ ; CHECK: // %bb.0:
785
+ ; CHECK-NEXT: ptrue p0.s
786
+ ; CHECK-NEXT: ldrh w9, [x0]
787
+ ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
788
+ ; CHECK-NEXT: mov w8, v0.s[2]
789
+ ; CHECK-NEXT: orr w8, w9, w8, lsl #16
790
+ ; CHECK-NEXT: str w8, [x1]
791
+ ; CHECK-NEXT: ret
792
+ %ld = load <vscale x 4 x i8 >, ptr %in , align 4
793
+
794
+ %e1 = extractelement <vscale x 4 x i8 > %ld , i32 0
795
+ %e2 = extractelement <vscale x 4 x i8 > %ld , i32 1
796
+ %e3 = extractelement <vscale x 4 x i8 > %ld , i32 2
797
+
798
+ %z0 = zext i8 %e1 to i32
799
+ %z1 = zext i8 %e2 to i32
800
+ %z2 = zext i8 %e3 to i32
801
+
802
+ %s1 = shl nuw nsw i32 %z1 , 8
803
+ %s2 = shl nuw nsw i32 %z2 , 16
804
+
805
+ %i1 = or i32 %s1 , %z0
806
+ %i2 = or i32 %i1 , %s2
807
+
808
+ store i32 %i2 , ptr %out
809
+ ret void
810
+ }
811
+
812
+ define void @scalable_vector_to_i32_unused_low_i16 (ptr %in , ptr %out , ptr %p ) #0 {
813
+ ; CHECK-LABEL: scalable_vector_to_i32_unused_low_i16:
814
+ ; CHECK: // %bb.0:
815
+ ; CHECK-NEXT: ptrue p0.s
816
+ ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
817
+ ; CHECK-NEXT: mov w8, v0.s[2]
818
+ ; CHECK-NEXT: mov w9, v0.s[3]
819
+ ; CHECK-NEXT: lsl w8, w8, #16
820
+ ; CHECK-NEXT: orr w8, w8, w9, lsl #24
821
+ ; CHECK-NEXT: str w8, [x1]
822
+ ; CHECK-NEXT: ret
823
+ %ld = load <vscale x 4 x i8 >, ptr %in , align 4
824
+
825
+ %e3 = extractelement <vscale x 4 x i8 > %ld , i32 2
826
+ %e4 = extractelement <vscale x 4 x i8 > %ld , i32 3
827
+
828
+ %z2 = zext i8 %e3 to i32
829
+ %z3 = zext i8 %e4 to i32
830
+
831
+ %s2 = shl nuw nsw i32 %z2 , 16
832
+ %s3 = shl nuw i32 %z3 , 24
833
+
834
+ %i3 = or i32 %s2 , %s3
835
+
836
+ store i32 %i3 , ptr %out
837
+ ret void
838
+ }
839
+
840
+ ; x1 = x0[0:1]
841
+ define void @scalable_vector_to_i32_unused_high_i16 (ptr %in , ptr %out , ptr %p ) #0 {
842
+ ; CHECK-LABEL: scalable_vector_to_i32_unused_high_i16:
843
+ ; CHECK: // %bb.0:
844
+ ; CHECK-NEXT: ldrh w8, [x0]
845
+ ; CHECK-NEXT: str w8, [x1]
846
+ ; CHECK-NEXT: ret
847
+ %ld = load <vscale x 4 x i8 >, ptr %in , align 4
848
+
849
+ %e1 = extractelement <vscale x 4 x i8 > %ld , i32 0
850
+ %e2 = extractelement <vscale x 4 x i8 > %ld , i32 1
851
+
852
+ %z0 = zext i8 %e1 to i32
853
+ %z1 = zext i8 %e2 to i32
854
+
855
+ %s1 = shl nuw nsw i32 %z1 , 8
856
+
857
+ %i1 = or i32 %s1 , %z0
858
+
859
+ store i32 %i1 , ptr %out
860
+ ret void
861
+ }
862
+
863
+ ; x1 = x0
864
+ define void @scalable_vector_to_i64 (ptr %in , ptr %out , ptr %p ) #0 {
865
+ ; CHECK-LABEL: scalable_vector_to_i64:
866
+ ; CHECK: // %bb.0:
867
+ ; CHECK-NEXT: ldr w8, [x0]
868
+ ; CHECK-NEXT: str x8, [x1]
869
+ ; CHECK-NEXT: ret
870
+ %ld = load <vscale x 4 x i8 >, ptr %in , align 4
871
+
872
+ %e1 = extractelement <vscale x 4 x i8 > %ld , i32 0
873
+ %e2 = extractelement <vscale x 4 x i8 > %ld , i32 1
874
+ %e3 = extractelement <vscale x 4 x i8 > %ld , i32 2
875
+ %e4 = extractelement <vscale x 4 x i8 > %ld , i32 3
876
+
877
+ %z0 = zext i8 %e1 to i64
878
+ %z1 = zext i8 %e2 to i64
879
+ %z2 = zext i8 %e3 to i64
880
+ %z3 = zext i8 %e4 to i64
881
+
882
+ %s1 = shl nuw nsw i64 %z1 , 8
883
+ %s2 = shl nuw nsw i64 %z2 , 16
884
+ %s3 = shl nuw i64 %z3 , 24
885
+
886
+ %i1 = or i64 %s1 , %z0
887
+ %i2 = or i64 %i1 , %s2
888
+ %i3 = or i64 %i2 , %s3
889
+
890
+ store i64 %i3 , ptr %out
891
+ ret void
892
+ }
893
+
894
+ attributes #0 = { "target-features" ="+sve" }
0 commit comments