Skip to content

Commit 3728a7d

Browse files
[SVE] Add ISel for fabs(fsub(a,b)) ==> FABD.
Differential Revision: https://reviews.llvm.org/D116227
1 parent 4325fd7 commit 3728a7d

File tree

2 files changed

+73
-1
lines changed

2 files changed

+73
-1
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,8 +277,11 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
277277
return N->hasOneUse();
278278
}]>;
279279

280+
def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
281+
(AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>;
282+
280283
def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
281-
(AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
284+
(AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
282285
return N->getFlags().hasNoSignedZeros();
283286
}]>;
284287

@@ -469,6 +472,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
469472
defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fminnm_p>;
470473
defm FMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmax_p>;
471474
defm FMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmin_p>;
475+
defm FABD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fabd_p>;
472476
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
473477
} // End HasSVEorStreamingSVE
474478

llvm/test/CodeGen/AArch64/sve-fp.ll

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,74 @@ define <vscale x 2 x double> @fabs_nxv2f64(<vscale x 2 x double> %a) {
581581
ret <vscale x 2 x double> %res
582582
}
583583

584+
; FABD
585+
586+
define <vscale x 8 x half> @fabd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
587+
; CHECK-LABEL: fabd_nxv8f16:
588+
; CHECK: // %bb.0:
589+
; CHECK-NEXT: ptrue p0.h
590+
; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h
591+
; CHECK-NEXT: ret
592+
%sub = fsub <vscale x 8 x half> %a, %b
593+
%res = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %sub)
594+
ret <vscale x 8 x half> %res
595+
}
596+
597+
define <vscale x 4 x half> @fabd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
598+
; CHECK-LABEL: fabd_nxv4f16:
599+
; CHECK: // %bb.0:
600+
; CHECK-NEXT: ptrue p0.s
601+
; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h
602+
; CHECK-NEXT: ret
603+
%sub = fsub <vscale x 4 x half> %a, %b
604+
%res = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %sub)
605+
ret <vscale x 4 x half> %res
606+
}
607+
608+
define <vscale x 2 x half> @fabd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
609+
; CHECK-LABEL: fabd_nxv2f16:
610+
; CHECK: // %bb.0:
611+
; CHECK-NEXT: ptrue p0.d
612+
; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h
613+
; CHECK-NEXT: ret
614+
%sub = fsub <vscale x 2 x half> %a, %b
615+
%res = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %sub)
616+
ret <vscale x 2 x half> %res
617+
}
618+
619+
define <vscale x 4 x float> @fabd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
620+
; CHECK-LABEL: fabd_nxv4f32:
621+
; CHECK: // %bb.0:
622+
; CHECK-NEXT: ptrue p0.s
623+
; CHECK-NEXT: fabd z0.s, p0/m, z0.s, z1.s
624+
; CHECK-NEXT: ret
625+
%sub = fsub <vscale x 4 x float> %a, %b
626+
%res = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %sub)
627+
ret <vscale x 4 x float> %res
628+
}
629+
630+
define <vscale x 2 x float> @fabd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
631+
; CHECK-LABEL: fabd_nxv2f32:
632+
; CHECK: // %bb.0:
633+
; CHECK-NEXT: ptrue p0.d
634+
; CHECK-NEXT: fabd z0.s, p0/m, z0.s, z1.s
635+
; CHECK-NEXT: ret
636+
%sub = fsub <vscale x 2 x float> %a, %b
637+
%res = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %sub)
638+
ret <vscale x 2 x float> %res
639+
}
640+
641+
define <vscale x 2 x double> @fabd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
642+
; CHECK-LABEL: fabd_nxv2f64:
643+
; CHECK: // %bb.0:
644+
; CHECK-NEXT: ptrue p0.d
645+
; CHECK-NEXT: fabd z0.d, p0/m, z0.d, z1.d
646+
; CHECK-NEXT: ret
647+
%sub = fsub <vscale x 2 x double> %a, %b
648+
%res = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %sub)
649+
ret <vscale x 2 x double> %res
650+
}
651+
584652
; maxnum minnum
585653

586654
define <vscale x 16 x half> @maxnum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {

0 commit comments

Comments
 (0)