@@ -436,13 +436,23 @@ def AArch64fabd_p : PatFrags<(ops node:$pg, node:$op1, node:$op2),
436
436
(AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)]>;
437
437
438
438
def AArch64fmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
439
- [(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za)]>;
439
+ [(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za),
440
+ (AArch64fma_p node:$pg, node:$zm, node:$zn, node:$za)]>;
441
+
442
+ def AArch64fmlaidx : PatFrags<(ops node:$acc, node:$op1, node:$op2, node:$idx),
443
+ [(AArch64fmla_p (SVEAllActive), node:$acc, node:$op1, (int_aarch64_sve_dup_laneq node:$op2, node:$idx)),
444
+ (int_aarch64_sve_fmla_lane node:$acc, node:$op1, node:$op2, node:$idx)]>;
440
445
441
446
def AArch64fmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
442
447
[(int_aarch64_sve_fmls_u node:$pg, node:$za, node:$zn, node:$zm),
443
448
(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za),
444
449
(AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za)]>;
445
450
451
+ def AArch64fmlsidx : PatFrags<(ops node:$acc, node:$op1, node:$op2, node:$idx),
452
+ [(AArch64fmla_p (SVEAllActive), node:$acc, (AArch64fneg_mt(SVEAllActive), node:$op1, (undef)), (int_aarch64_sve_dup_laneq node:$op2, node:$idx)),
453
+ (int_aarch64_sve_fmls_lane node:$acc, node:$op1, node:$op2, node:$idx)]>;
454
+
455
+
446
456
def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
447
457
[(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm),
448
458
(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))),
@@ -560,7 +570,12 @@ def AArch64fadd : PatFrags<(ops node:$op1, node:$op2),
560
570
561
571
def AArch64fmul : PatFrags<(ops node:$op1, node:$op2),
562
572
[(fmul node:$op1, node:$op2),
563
- (AArch64fmul_p (SVEAllActive), node:$op1, node:$op2)]>;
573
+ (AArch64fmul_p (SVEAllActive), node:$op1, node:$op2),
574
+ (AArch64fmul_p (SVEAllActive), node:$op2, node:$op1)]>;
575
+
576
+ def AArch64fmulidx : PatFrags<(ops node:$op1, node:$op2, node:$idx),
577
+ [(AArch64fmul node:$op1, (int_aarch64_sve_dup_laneq node:$op2, node:$idx)),
578
+ (int_aarch64_sve_fmul_lane node:$op1, node:$op2, node:$idx)]>;
564
579
565
580
def AArch64fsub : PatFrags<(ops node:$op1, node:$op2),
566
581
[(fsub node:$op1, node:$op2),
@@ -872,11 +887,25 @@ let Predicates = [HasSVE] in {
872
887
} // End HasSVE
873
888
874
889
let Predicates = [HasSVE_or_SME] in {
875
- defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b00, "fmla", int_aarch64_sve_fmla_lane >;
876
- defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b01, "fmls", int_aarch64_sve_fmls_lane >;
890
+ defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b00, "fmla", AArch64fmlaidx >;
891
+ defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b01, "fmls", AArch64fmlsidx >;
877
892
878
893
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
879
- defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
894
+ defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", AArch64fmulidx>;
895
+
896
+ // 64B segmented lane splats currently end up as trn instructions instead.
897
+ def : Pat<(nxv2f64 (AArch64fmul nxv2f64:$L, (AArch64trn1 nxv2f64:$R, nxv2f64:$R))),
898
+ (FMUL_ZZZI_D $L, $R, 0)>;
899
+ def : Pat<(nxv2f64 (AArch64fmul nxv2f64:$L, (AArch64trn2 nxv2f64:$R, nxv2f64:$R))),
900
+ (FMUL_ZZZI_D $L, $R, 1)>;
901
+ def : Pat<(nxv2f64 (AArch64fmla_p (SVEAllActive), nxv2f64:$Acc, nxv2f64:$L, (AArch64trn1 nxv2f64:$R, nxv2f64:$R))),
902
+ (FMLA_ZZZI_D $Acc, $L, $R, 0)>;
903
+ def : Pat<(nxv2f64 (AArch64fmla_p (SVEAllActive), nxv2f64:$Acc, nxv2f64:$L, (AArch64trn2 nxv2f64:$R, nxv2f64:$R))),
904
+ (FMLA_ZZZI_D $Acc, $L, $R, 1)>;
905
+ def : Pat<(nxv2f64 (AArch64fmls_p (SVEAllActive), nxv2f64:$Acc, nxv2f64:$L, (AArch64trn1 nxv2f64:$R, nxv2f64:$R))),
906
+ (FMLS_ZZZI_D $Acc, $L, $R, 0)>;
907
+ def : Pat<(nxv2f64 (AArch64fmls_p (SVEAllActive), nxv2f64:$Acc, nxv2f64:$L, (AArch64trn2 nxv2f64:$R, nxv2f64:$R))),
908
+ (FMLS_ZZZI_D $Acc, $L, $R, 1)>;
880
909
} // End HasSVE_or_SME
881
910
882
911
let Predicates = [HasSVE] in {
@@ -4349,10 +4378,10 @@ defm BFMLS_ZPmZZ : sve_fp_3op_p_zds_a_bfloat<0b01, "bfmls", "BFMLS_ZPZZZ", AArch
4349
4378
defm BFMLA_ZPZZZ : sve_fp_3op_pred_bfloat<AArch64fmla_p>;
4350
4379
defm BFMLS_ZPZZZ : sve_fp_3op_pred_bfloat<AArch64fmls_p>;
4351
4380
4352
- defm BFMLA_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmla", 0b10, int_aarch64_sve_fmla_lane >;
4353
- defm BFMLS_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmls", 0b11, int_aarch64_sve_fmls_lane >;
4381
+ defm BFMLA_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmla", 0b10, AArch64fmlaidx >;
4382
+ defm BFMLS_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmls", 0b11, AArch64fmlsidx >;
4354
4383
4355
- defm BFMUL_ZZZI : sve_fp_fmul_by_indexed_elem_bfloat<"bfmul", int_aarch64_sve_fmul_lane >;
4384
+ defm BFMUL_ZZZI : sve_fp_fmul_by_indexed_elem_bfloat<"bfmul", AArch64fmulidx >;
4356
4385
4357
4386
defm BFCLAMP_ZZZ : sve_fp_clamp_bfloat<"bfclamp", AArch64fclamp>;
4358
4387
} // End HasSVEB16B16
0 commit comments