Skip to content

Commit d2ca10a

Browse files
authored
[AArch64] Match indexed forms of fmul/fmla/fmls (#144892)
Matches dupq segmented lane splats in one of the operands of the fmul/fmla/fmls instructions, and uses the indexed form.
1 parent 9903c19 commit d2ca10a

File tree

2 files changed

+403
-8
lines changed

2 files changed

+403
-8
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -436,13 +436,23 @@ def AArch64fabd_p : PatFrags<(ops node:$pg, node:$op1, node:$op2),
436436
(AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)]>;
437437

438438
def AArch64fmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
439-
[(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za)]>;
439+
[(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za),
440+
(AArch64fma_p node:$pg, node:$zm, node:$zn, node:$za)]>;
441+
442+
def AArch64fmlaidx : PatFrags<(ops node:$acc, node:$op1, node:$op2, node:$idx),
443+
[(AArch64fmla_p (SVEAllActive), node:$acc, node:$op1, (int_aarch64_sve_dup_laneq node:$op2, node:$idx)),
444+
(int_aarch64_sve_fmla_lane node:$acc, node:$op1, node:$op2, node:$idx)]>;
440445

441446
def AArch64fmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
442447
[(int_aarch64_sve_fmls_u node:$pg, node:$za, node:$zn, node:$zm),
443448
(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za),
444449
(AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za)]>;
445450

451+
def AArch64fmlsidx : PatFrags<(ops node:$acc, node:$op1, node:$op2, node:$idx),
452+
[(AArch64fmla_p (SVEAllActive), node:$acc, (AArch64fneg_mt(SVEAllActive), node:$op1, (undef)), (int_aarch64_sve_dup_laneq node:$op2, node:$idx)),
453+
(int_aarch64_sve_fmls_lane node:$acc, node:$op1, node:$op2, node:$idx)]>;
454+
455+
446456
def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
447457
[(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm),
448458
(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))),
@@ -560,7 +570,12 @@ def AArch64fadd : PatFrags<(ops node:$op1, node:$op2),
560570

561571
def AArch64fmul : PatFrags<(ops node:$op1, node:$op2),
562572
[(fmul node:$op1, node:$op2),
563-
(AArch64fmul_p (SVEAllActive), node:$op1, node:$op2)]>;
573+
(AArch64fmul_p (SVEAllActive), node:$op1, node:$op2),
574+
(AArch64fmul_p (SVEAllActive), node:$op2, node:$op1)]>;
575+
576+
def AArch64fmulidx : PatFrags<(ops node:$op1, node:$op2, node:$idx),
577+
[(AArch64fmul node:$op1, (int_aarch64_sve_dup_laneq node:$op2, node:$idx)),
578+
(int_aarch64_sve_fmul_lane node:$op1, node:$op2, node:$idx)]>;
564579

565580
def AArch64fsub : PatFrags<(ops node:$op1, node:$op2),
566581
[(fsub node:$op1, node:$op2),
@@ -872,11 +887,25 @@ let Predicates = [HasSVE] in {
872887
} // End HasSVE
873888

874889
let Predicates = [HasSVE_or_SME] in {
875-
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b00, "fmla", int_aarch64_sve_fmla_lane>;
876-
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b01, "fmls", int_aarch64_sve_fmls_lane>;
890+
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b00, "fmla", AArch64fmlaidx>;
891+
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b01, "fmls", AArch64fmlsidx>;
877892

878893
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
879-
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
894+
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", AArch64fmulidx>;
895+
896+
// 64B segmented lane splats currently end up as trn instructions instead.
897+
def : Pat<(nxv2f64 (AArch64fmul nxv2f64:$L, (AArch64trn1 nxv2f64:$R, nxv2f64:$R))),
898+
(FMUL_ZZZI_D $L, $R, 0)>;
899+
def : Pat<(nxv2f64 (AArch64fmul nxv2f64:$L, (AArch64trn2 nxv2f64:$R, nxv2f64:$R))),
900+
(FMUL_ZZZI_D $L, $R, 1)>;
901+
def : Pat<(nxv2f64 (AArch64fmla_p (SVEAllActive), nxv2f64:$Acc, nxv2f64:$L, (AArch64trn1 nxv2f64:$R, nxv2f64:$R))),
902+
(FMLA_ZZZI_D $Acc, $L, $R, 0)>;
903+
def : Pat<(nxv2f64 (AArch64fmla_p (SVEAllActive), nxv2f64:$Acc, nxv2f64:$L, (AArch64trn2 nxv2f64:$R, nxv2f64:$R))),
904+
(FMLA_ZZZI_D $Acc, $L, $R, 1)>;
905+
def : Pat<(nxv2f64 (AArch64fmls_p (SVEAllActive), nxv2f64:$Acc, nxv2f64:$L, (AArch64trn1 nxv2f64:$R, nxv2f64:$R))),
906+
(FMLS_ZZZI_D $Acc, $L, $R, 0)>;
907+
def : Pat<(nxv2f64 (AArch64fmls_p (SVEAllActive), nxv2f64:$Acc, nxv2f64:$L, (AArch64trn2 nxv2f64:$R, nxv2f64:$R))),
908+
(FMLS_ZZZI_D $Acc, $L, $R, 1)>;
880909
} // End HasSVE_or_SME
881910

882911
let Predicates = [HasSVE] in {
@@ -4349,10 +4378,10 @@ defm BFMLS_ZPmZZ : sve_fp_3op_p_zds_a_bfloat<0b01, "bfmls", "BFMLS_ZPZZZ", AArch
43494378
defm BFMLA_ZPZZZ : sve_fp_3op_pred_bfloat<AArch64fmla_p>;
43504379
defm BFMLS_ZPZZZ : sve_fp_3op_pred_bfloat<AArch64fmls_p>;
43514380

4352-
defm BFMLA_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmla", 0b10, int_aarch64_sve_fmla_lane>;
4353-
defm BFMLS_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmls", 0b11, int_aarch64_sve_fmls_lane>;
4381+
defm BFMLA_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmla", 0b10, AArch64fmlaidx>;
4382+
defm BFMLS_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmls", 0b11, AArch64fmlsidx>;
43544383

4355-
defm BFMUL_ZZZI : sve_fp_fmul_by_indexed_elem_bfloat<"bfmul", int_aarch64_sve_fmul_lane>;
4384+
defm BFMUL_ZZZI : sve_fp_fmul_by_indexed_elem_bfloat<"bfmul", AArch64fmulidx>;
43564385

43574386
defm BFCLAMP_ZZZ : sve_fp_clamp_bfloat<"bfclamp", AArch64fclamp>;
43584387
} // End HasSVEB16B16

0 commit comments

Comments
 (0)