@@ -622,6 +622,30 @@ multiclass sme_vector_to_tile_aliases<Instruction inst,
622
622
(inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
623
623
}
624
624
625
+ multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
626
+ ValueType ppr_vt, Operand imm_ty,
627
+ Operand offset_ty,
628
+ SDPatternOperator op,
629
+ ComplexPattern tileslice> {
630
+ def : Pat<(op imm_ty:$tile, MatrixIndexGPR32Op12_15:$idx,
631
+ (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
632
+ (inst imm_ty:$tile, $idx, 0, $pg, $zn)>;
633
+ let AddedComplexity = 1 in {
634
+ def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
635
+ offset_ty:$imm)),
636
+ (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
637
+ (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
638
+ }
639
+ }
640
+
641
+ class sme_mova_insert_pseudo
642
+ : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx,
643
+ i64imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
644
+ Sched<[]> {
645
+ // Translated to the actual instructions in AArch64ISelLowering.cpp
646
+ let usesCustomInserter = 1;
647
+ }
648
+
625
649
multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
626
650
def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
627
651
TileVectorOpH8),
@@ -661,6 +685,14 @@ multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
661
685
let Inst{3-0} = ZAd;
662
686
}
663
687
688
+ // Pseudo instructions for lowering intrinsics, using immediates instead of
689
+ // tile registers.
690
+ def _PSEUDO_B : sme_mova_insert_pseudo;
691
+ def _PSEUDO_H : sme_mova_insert_pseudo;
692
+ def _PSEUDO_S : sme_mova_insert_pseudo;
693
+ def _PSEUDO_D : sme_mova_insert_pseudo;
694
+ def _PSEUDO_Q : sme_mova_insert_pseudo;
695
+
664
696
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
665
697
!if(is_col, TileVectorOpV8,
666
698
TileVectorOpH8),
@@ -681,6 +713,62 @@ multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
681
713
!if(is_col, TileVectorOpV128,
682
714
TileVectorOpH128),
683
715
ZPR128, sme_elm_idx0_0>;
716
+
717
+ defvar op = !if(is_col, int_aarch64_sme_write_vert,
718
+ int_aarch64_sme_write_horiz);
719
+
720
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
721
+ nxv16i8, nxv16i1, sme_elm_idx0_0, imm0_15,
722
+ op, tileslice8>;
723
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
724
+ nxv8i16, nxv8i1, sme_elm_idx0_1, imm0_7,
725
+ op, tileslice16>;
726
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
727
+ nxv8f16, nxv8i1, sme_elm_idx0_1, imm0_7,
728
+ op, tileslice16>;
729
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
730
+ nxv8bf16, nxv8i1, sme_elm_idx0_1, imm0_7,
731
+ op, tileslice16>;
732
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
733
+ nxv4i32, nxv4i1, sme_elm_idx0_3, imm0_3,
734
+ op, tileslice32>;
735
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
736
+ nxv4f32, nxv4i1, sme_elm_idx0_3, imm0_3,
737
+ op, tileslice32>;
738
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
739
+ nxv2i64, nxv2i1, sme_elm_idx0_7, imm0_1,
740
+ op, tileslice64>;
741
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
742
+ nxv2f64, nxv2i1, sme_elm_idx0_7, imm0_1,
743
+ op, tileslice64>;
744
+
745
+ defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
746
+ int_aarch64_sme_writeq_horiz);
747
+
748
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
749
+ nxv16i8, nxv16i1, sme_elm_idx0_15,
750
+ sme_elm_idx0_0, opq, tileslice128>;
751
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
752
+ nxv8i16, nxv8i1, sme_elm_idx0_15,
753
+ sme_elm_idx0_0, opq, tileslice128>;
754
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
755
+ nxv8f16, nxv8i1, sme_elm_idx0_15,
756
+ sme_elm_idx0_0, opq, tileslice128>;
757
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
758
+ nxv8bf16, nxv8i1, sme_elm_idx0_15,
759
+ sme_elm_idx0_0, opq, tileslice128>;
760
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
761
+ nxv4i32, nxv4i1, sme_elm_idx0_15,
762
+ sme_elm_idx0_0, opq, tileslice128>;
763
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
764
+ nxv4f32, nxv4i1, sme_elm_idx0_15,
765
+ sme_elm_idx0_0, opq, tileslice128>;
766
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
767
+ nxv2i64, nxv2i1, sme_elm_idx0_15,
768
+ sme_elm_idx0_0, opq, tileslice128>;
769
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
770
+ nxv2f64, nxv2i1, sme_elm_idx0_15,
771
+ sme_elm_idx0_0, opq, tileslice128>;
684
772
}
685
773
686
774
multiclass sme_vector_to_tile<string mnemonic> {
@@ -722,6 +810,23 @@ multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
722
810
(inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
723
811
}
724
812
813
+ multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt,
814
+ ValueType ppr_vt, Operand offset_ty,
815
+ ComplexPattern imm2tile,
816
+ ComplexPattern tileslice,
817
+ SDPatternOperator op> {
818
+ def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
819
+ (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)),
820
+ (inst $passthru, $pg, $tile, $idx, 0)>;
821
+ let AddedComplexity = 1 in {
822
+ def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
823
+ (imm2tile untyped:$tile),
824
+ (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
825
+ offset_ty:$imm)))),
826
+ (inst $passthru, $pg, $tile, $idx, $imm)>;
827
+ }
828
+ }
829
+
725
830
multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
726
831
def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
727
832
TileVectorOpH8),
@@ -775,6 +880,62 @@ multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
775
880
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
776
881
!if(is_col, TileVectorOpV128,
777
882
TileVectorOpH128), sme_elm_idx0_0>;
883
+
884
+ defvar op = !if(is_col, int_aarch64_sme_read_vert,
885
+ int_aarch64_sme_read_horiz);
886
+
887
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
888
+ nxv16i8, nxv16i1, imm0_15,
889
+ imm_to_tile8, tileslice8, op>;
890
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
891
+ nxv8i16, nxv8i1, imm0_7,
892
+ imm_to_tile16, tileslice16, op>;
893
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
894
+ nxv8f16, nxv8i1, imm0_7,
895
+ imm_to_tile16, tileslice16, op>;
896
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
897
+ nxv8bf16, nxv8i1, imm0_7,
898
+ imm_to_tile16, tileslice16, op>;
899
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
900
+ nxv4i32, nxv4i1, imm0_3,
901
+ imm_to_tile32, tileslice32, op>;
902
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
903
+ nxv4f32, nxv4i1, imm0_3,
904
+ imm_to_tile32, tileslice32, op>;
905
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
906
+ nxv2i64, nxv2i1, imm0_1,
907
+ imm_to_tile64, tileslice64, op>;
908
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
909
+ nxv2f64, nxv2i1, imm0_1,
910
+ imm_to_tile64, tileslice64, op>;
911
+
912
+ defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
913
+ int_aarch64_sme_readq_horiz);
914
+
915
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
916
+ nxv16i8, nxv16i1, sme_elm_idx0_0,
917
+ imm_to_tile128, tileslice128, opq>;
918
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
919
+ nxv8i16, nxv8i1, sme_elm_idx0_0,
920
+ imm_to_tile128, tileslice128, opq>;
921
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
922
+ nxv8f16, nxv8i1, sme_elm_idx0_0,
923
+ imm_to_tile128, tileslice128, opq>;
924
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
925
+ nxv8bf16, nxv8i1, sme_elm_idx0_0,
926
+ imm_to_tile128, tileslice128, opq>;
927
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
928
+ nxv4i32, nxv4i1, sme_elm_idx0_0,
929
+ imm_to_tile128, tileslice128, opq>;
930
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
931
+ nxv4f32, nxv4i1, sme_elm_idx0_0,
932
+ imm_to_tile128, tileslice128, opq>;
933
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
934
+ nxv2i64, nxv2i1, sme_elm_idx0_0,
935
+ imm_to_tile128, tileslice128, opq>;
936
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
937
+ nxv2f64, nxv2i1, sme_elm_idx0_0,
938
+ imm_to_tile128, tileslice128, opq>;
778
939
}
779
940
780
941
multiclass sme_tile_to_vector<string mnemonic> {
0 commit comments