Skip to content

Commit 343e3c6

Browse files
[LLVM][CodeGen][SVE] Make bf16 fabs/fneg isel consistent with fp16. (#147543)
Whilst at first glance there appears to be no native bfloat instructions to modify the sign bit, this is only the case when FEAT_AFP is implemented. Without this feature vector FABS/FNEG does not care about the floating point format beyond needing to know the position of the sign bit. From what I can see LLVM has no support for FEAT_AFP in terms of feature detection or ACLE builtins and so I believe the compiler can work under the assumption the feature is not enabled. In fact, if FEAT_AFP is enabled then I believe the current isel is likely broken for half, float and double anyway.
1 parent 628c735 commit 343e3c6

File tree

6 files changed

+50
-29
lines changed

6 files changed

+50
-29
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,9 +1748,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
17481748
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
17491749
setOperationAction(ISD::BITCAST, VT, Custom);
17501750
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1751-
setOperationAction(ISD::FABS, VT, Legal);
1751+
setOperationAction(ISD::FABS, VT, Custom);
17521752
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1753-
setOperationAction(ISD::FNEG, VT, Legal);
1753+
setOperationAction(ISD::FNEG, VT, Custom);
17541754
setOperationAction(ISD::FP_EXTEND, VT, Custom);
17551755
setOperationAction(ISD::FP_ROUND, VT, Custom);
17561756
setOperationAction(ISD::MLOAD, VT, Custom);

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -737,15 +737,6 @@ let Predicates = [HasSVE_or_SME] in {
737737
defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>;
738738
defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>;
739739

740-
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
741-
// No dedicated instruction, so just clear the sign bit.
742-
def : Pat<(VT (fabs VT:$op)),
743-
(AND_ZI $op, (i64 (logical_imm64_XFORM(i64 0x7fff7fff7fff7fff))))>;
744-
// No dedicated instruction, so just invert the sign bit.
745-
def : Pat<(VT (fneg VT:$op)),
746-
(EOR_ZI $op, (i64 (logical_imm64_XFORM(i64 0x8000800080008000))))>;
747-
}
748-
749740
// zext(cmpeq(x, splat(0))) -> cnot(x)
750741
def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive):$Pg), nxv16i8:$Op2, (SVEDup0), SETEQ)))),
751742
(CNOT_ZPmZ_B $Op2, $Pg, $Op2)>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5049,6 +5049,9 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
50495049
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
50505050
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
50515051
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
5052+
def : SVE_1_Op_Passthru_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME # _H)>;
5053+
def : SVE_1_Op_Passthru_Pat<nxv4bf16, op, nxv4i1, nxv4bf16, !cast<Instruction>(NAME # _H)>;
5054+
def : SVE_1_Op_Passthru_Pat<nxv2bf16, op, nxv2i1, nxv2bf16, !cast<Instruction>(NAME # _H)>;
50525055

50535056
def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
50545057
def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
@@ -5060,6 +5063,9 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
50605063
defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
50615064
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
50625065
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
5066+
defm : SVE_1_Op_PassthruUndef_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Pseudo>(NAME # _H_UNDEF)>;
5067+
defm : SVE_1_Op_PassthruUndef_Pat<nxv4bf16, op, nxv4i1, nxv4bf16, !cast<Pseudo>(NAME # _H_UNDEF)>;
5068+
defm : SVE_1_Op_PassthruUndef_Pat<nxv2bf16, op, nxv2i1, nxv2bf16, !cast<Pseudo>(NAME # _H_UNDEF)>;
50635069
}
50645070

50655071
multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternOperator op> {

llvm/test/CodeGen/AArch64/sve-bf16-arith.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ target triple = "aarch64-unknown-linux-gnu"
1313
define <vscale x 2 x bfloat> @fabs_nxv2bf16(<vscale x 2 x bfloat> %a) {
1414
; CHECK-LABEL: fabs_nxv2bf16:
1515
; CHECK: // %bb.0:
16-
; CHECK-NEXT: and z0.h, z0.h, #0x7fff
16+
; CHECK-NEXT: ptrue p0.d
17+
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
1718
; CHECK-NEXT: ret
1819
%res = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> %a)
1920
ret <vscale x 2 x bfloat> %res
@@ -22,7 +23,8 @@ define <vscale x 2 x bfloat> @fabs_nxv2bf16(<vscale x 2 x bfloat> %a) {
2223
define <vscale x 4 x bfloat> @fabs_nxv4bf16(<vscale x 4 x bfloat> %a) {
2324
; CHECK-LABEL: fabs_nxv4bf16:
2425
; CHECK: // %bb.0:
25-
; CHECK-NEXT: and z0.h, z0.h, #0x7fff
26+
; CHECK-NEXT: ptrue p0.s
27+
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
2628
; CHECK-NEXT: ret
2729
%res = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> %a)
2830
ret <vscale x 4 x bfloat> %res
@@ -31,7 +33,8 @@ define <vscale x 4 x bfloat> @fabs_nxv4bf16(<vscale x 4 x bfloat> %a) {
3133
define <vscale x 8 x bfloat> @fabs_nxv8bf16(<vscale x 8 x bfloat> %a) {
3234
; CHECK-LABEL: fabs_nxv8bf16:
3335
; CHECK: // %bb.0:
34-
; CHECK-NEXT: and z0.h, z0.h, #0x7fff
36+
; CHECK-NEXT: ptrue p0.h
37+
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
3538
; CHECK-NEXT: ret
3639
%res = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> %a)
3740
ret <vscale x 8 x bfloat> %res
@@ -586,7 +589,8 @@ define <vscale x 8 x bfloat> @fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x
586589
define <vscale x 2 x bfloat> @fneg_nxv2bf16(<vscale x 2 x bfloat> %a) {
587590
; CHECK-LABEL: fneg_nxv2bf16:
588591
; CHECK: // %bb.0:
589-
; CHECK-NEXT: eor z0.h, z0.h, #0x8000
592+
; CHECK-NEXT: ptrue p0.d
593+
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
590594
; CHECK-NEXT: ret
591595
%res = fneg <vscale x 2 x bfloat> %a
592596
ret <vscale x 2 x bfloat> %res
@@ -595,7 +599,8 @@ define <vscale x 2 x bfloat> @fneg_nxv2bf16(<vscale x 2 x bfloat> %a) {
595599
define <vscale x 4 x bfloat> @fneg_nxv4bf16(<vscale x 4 x bfloat> %a) {
596600
; CHECK-LABEL: fneg_nxv4bf16:
597601
; CHECK: // %bb.0:
598-
; CHECK-NEXT: eor z0.h, z0.h, #0x8000
602+
; CHECK-NEXT: ptrue p0.s
603+
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
599604
; CHECK-NEXT: ret
600605
%res = fneg <vscale x 4 x bfloat> %a
601606
ret <vscale x 4 x bfloat> %res
@@ -604,7 +609,8 @@ define <vscale x 4 x bfloat> @fneg_nxv4bf16(<vscale x 4 x bfloat> %a) {
604609
define <vscale x 8 x bfloat> @fneg_nxv8bf16(<vscale x 8 x bfloat> %a) {
605610
; CHECK-LABEL: fneg_nxv8bf16:
606611
; CHECK: // %bb.0:
607-
; CHECK-NEXT: eor z0.h, z0.h, #0x8000
612+
; CHECK-NEXT: ptrue p0.h
613+
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
608614
; CHECK-NEXT: ret
609615
%res = fneg <vscale x 8 x bfloat> %a
610616
ret <vscale x 8 x bfloat> %res

llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,17 @@ define <vscale x 2 x double> @fabs_d(<vscale x 2 x double> %a, <vscale x 2 x i1>
7575
ret <vscale x 2 x double> %out
7676
}
7777

78+
define <vscale x 8 x bfloat> @fabs_bf(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %b) {
79+
; CHECK-LABEL: fabs_bf:
80+
; CHECK: // %bb.0:
81+
; CHECK-NEXT: fabs z0.h, p0/m, z1.h
82+
; CHECK-NEXT: ret
83+
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fabs.nxv8bf16(<vscale x 8 x bfloat> %a,
84+
<vscale x 8 x i1> %pg,
85+
<vscale x 8 x bfloat> %b)
86+
ret <vscale x 8 x bfloat> %out
87+
}
88+
7889
;
7990
; FADD
8091
;
@@ -835,6 +846,17 @@ define <vscale x 2 x double> @fneg_d(<vscale x 2 x double> %a, <vscale x 2 x i1>
835846
ret <vscale x 2 x double> %out
836847
}
837848

849+
define <vscale x 8 x bfloat> @fneg_bf(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %b) {
850+
; CHECK-LABEL: fneg_bf:
851+
; CHECK: // %bb.0:
852+
; CHECK-NEXT: fneg z0.h, p0/m, z1.h
853+
; CHECK-NEXT: ret
854+
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fneg.nxv8bf16(<vscale x 8 x bfloat> %a,
855+
<vscale x 8 x i1> %pg,
856+
<vscale x 8 x bfloat> %b)
857+
ret <vscale x 8 x bfloat> %out
858+
}
859+
838860
;
839861
; FNMAD
840862
;
@@ -1613,6 +1635,7 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>,
16131635
declare <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
16141636
declare <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
16151637
declare <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
1638+
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fabs.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
16161639

16171640
declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
16181641
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
@@ -1692,6 +1715,7 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>,
16921715
declare <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
16931716
declare <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
16941717
declare <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
1718+
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fneg.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
16951719

16961720
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
16971721
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)

llvm/test/CodeGen/AArch64/sve-merging-unary.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,7 @@ define <vscale x 2 x double> @fabs_nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x
187187
define <vscale x 2 x bfloat> @fabs_nxv2bf16(<vscale x 2 x i1> %pg, <vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
188188
; CHECK-LABEL: fabs_nxv2bf16:
189189
; CHECK: // %bb.0:
190-
; CHECK-NEXT: and z1.h, z1.h, #0x7fff
191-
; CHECK-NEXT: mov z0.d, p0/m, z1.d
190+
; CHECK-NEXT: fabs z0.h, p0/m, z1.h
192191
; CHECK-NEXT: ret
193192
%b.op = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> %b)
194193
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x bfloat> %b.op, <vscale x 2 x bfloat> %a
@@ -198,8 +197,7 @@ define <vscale x 2 x bfloat> @fabs_nxv2bf16(<vscale x 2 x i1> %pg, <vscale x 2 x
198197
define <vscale x 4 x bfloat> @fabs_nxv4bf16(<vscale x 4 x i1> %pg, <vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
199198
; CHECK-LABEL: fabs_nxv4bf16:
200199
; CHECK: // %bb.0:
201-
; CHECK-NEXT: and z1.h, z1.h, #0x7fff
202-
; CHECK-NEXT: mov z0.s, p0/m, z1.s
200+
; CHECK-NEXT: fabs z0.h, p0/m, z1.h
203201
; CHECK-NEXT: ret
204202
%b.op = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> %b)
205203
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x bfloat> %b.op, <vscale x 4 x bfloat> %a
@@ -209,8 +207,7 @@ define <vscale x 4 x bfloat> @fabs_nxv4bf16(<vscale x 4 x i1> %pg, <vscale x 4 x
209207
define <vscale x 8 x bfloat> @fabs_nxv8bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
210208
; CHECK-LABEL: fabs_nxv8bf16:
211209
; CHECK: // %bb.0:
212-
; CHECK-NEXT: and z1.h, z1.h, #0x7fff
213-
; CHECK-NEXT: mov z0.h, p0/m, z1.h
210+
; CHECK-NEXT: fabs z0.h, p0/m, z1.h
214211
; CHECK-NEXT: ret
215212
%b.op = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> %b)
216213
%res = select <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %b.op, <vscale x 8 x bfloat> %a
@@ -545,8 +542,7 @@ define <vscale x 2 x double> @fneg_nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x
545542
define <vscale x 2 x bfloat> @fneg_nxv2bf16(<vscale x 2 x i1> %pg, <vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
546543
; CHECK-LABEL: fneg_nxv2bf16:
547544
; CHECK: // %bb.0:
548-
; CHECK-NEXT: eor z1.h, z1.h, #0x8000
549-
; CHECK-NEXT: mov z0.d, p0/m, z1.d
545+
; CHECK-NEXT: fneg z0.h, p0/m, z1.h
550546
; CHECK-NEXT: ret
551547
%b.op = fneg <vscale x 2 x bfloat> %b
552548
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x bfloat> %b.op, <vscale x 2 x bfloat> %a
@@ -556,8 +552,7 @@ define <vscale x 2 x bfloat> @fneg_nxv2bf16(<vscale x 2 x i1> %pg, <vscale x 2 x
556552
define <vscale x 4 x bfloat> @fneg_nxv4bf16(<vscale x 4 x i1> %pg, <vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
557553
; CHECK-LABEL: fneg_nxv4bf16:
558554
; CHECK: // %bb.0:
559-
; CHECK-NEXT: eor z1.h, z1.h, #0x8000
560-
; CHECK-NEXT: mov z0.s, p0/m, z1.s
555+
; CHECK-NEXT: fneg z0.h, p0/m, z1.h
561556
; CHECK-NEXT: ret
562557
%b.op = fneg <vscale x 4 x bfloat> %b
563558
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x bfloat> %b.op, <vscale x 4 x bfloat> %a
@@ -567,8 +562,7 @@ define <vscale x 4 x bfloat> @fneg_nxv4bf16(<vscale x 4 x i1> %pg, <vscale x 4 x
567562
define <vscale x 8 x bfloat> @fneg_nxv8bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
568563
; CHECK-LABEL: fneg_nxv8bf16:
569564
; CHECK: // %bb.0:
570-
; CHECK-NEXT: eor z1.h, z1.h, #0x8000
571-
; CHECK-NEXT: mov z0.h, p0/m, z1.h
565+
; CHECK-NEXT: fneg z0.h, p0/m, z1.h
572566
; CHECK-NEXT: ret
573567
%b.op = fneg <vscale x 8 x bfloat> %b
574568
%res = select <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %b.op, <vscale x 8 x bfloat> %a

0 commit comments

Comments
 (0)