Skip to content

Commit 5d18d57

Browse files
authored
[AMDGPU] Make fneg/fabs/copysign legal for bf16 (llvm#91676)
These are just bit operations, exactly the same as with f16.
1 parent 2d5634a commit 5d18d57

File tree

4 files changed

+169
-236
lines changed

4 files changed

+169
-236
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -946,14 +946,14 @@ bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
946946

947947
// Packed operations do not have a fabs modifier.
948948
return VT == MVT::f32 || VT == MVT::f64 ||
949-
(Subtarget->has16BitInsts() && VT == MVT::f16);
949+
(Subtarget->has16BitInsts() && (VT == MVT::f16 || VT == MVT::bf16));
950950
}
951951

952952
bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
953953
assert(VT.isFloatingPoint());
954954
// Report this based on the end legalized type.
955955
VT = VT.getScalarType();
956-
return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16;
956+
return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16 || VT == MVT::bf16;
957957
}
958958

959959
bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
225225
setOperationAction(ISD::SELECT, MVT::bf16, Promote);
226226
AddPromotedToType(ISD::SELECT, MVT::bf16, MVT::i16);
227227

228-
// TODO: Could make these legal
229-
setOperationAction(ISD::FABS, MVT::bf16, Expand);
230-
setOperationAction(ISD::FNEG, MVT::bf16, Expand);
231-
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
228+
setOperationAction(ISD::FABS, MVT::bf16, Legal);
229+
setOperationAction(ISD::FNEG, MVT::bf16, Legal);
230+
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Legal);
232231

233232
// We only need to custom lower because we can't specify an action for bf16
234233
// sources.

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,20 +1908,22 @@ def : GCNPat <
19081908
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000)))
19091909
>;
19101910

1911+
foreach fp16vt = [f16, bf16] in {
19111912
def : GCNPat <
1912-
(UniformUnaryFrag<fneg> (f16 SReg_32:$src)),
1913+
(UniformUnaryFrag<fneg> (fp16vt SReg_32:$src)),
19131914
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000)))
19141915
>;
19151916

19161917
def : GCNPat <
1917-
(UniformUnaryFrag<fabs> (f16 SReg_32:$src)),
1918+
(UniformUnaryFrag<fabs> (fp16vt SReg_32:$src)),
19181919
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff)))
19191920
>;
19201921

19211922
def : GCNPat <
1922-
(UniformUnaryFrag<fneg> (fabs (f16 SReg_32:$src))),
1923+
(UniformUnaryFrag<fneg> (fabs (fp16vt SReg_32:$src))),
19231924
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
19241925
>;
1926+
} // End foreach fp16vt = ...
19251927

19261928
def : GCNPat <
19271929
(UniformUnaryFrag<fneg> (v2f16 SReg_32:$src)),
@@ -2030,20 +2032,22 @@ def : GCNPat <
20302032
(V_XOR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
20312033
>;
20322034

2035+
foreach fp16vt = [f16, bf16] in {
20332036
def : GCNPat <
2034-
(fabs (f16 VGPR_32:$src)),
2037+
(fabs (fp16vt VGPR_32:$src)),
20352038
(V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
20362039
>;
20372040

20382041
def : GCNPat <
2039-
(fneg (f16 VGPR_32:$src)),
2042+
(fneg (fp16vt VGPR_32:$src)),
20402043
(V_XOR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
20412044
>;
20422045

20432046
def : GCNPat <
2044-
(fneg (fabs (f16 VGPR_32:$src))),
2047+
(fneg (fabs (fp16vt VGPR_32:$src))),
20452048
(V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
20462049
>;
2050+
} // End foreach fp16vt = ...
20472051

20482052
def : GCNPat <
20492053
(fneg (v2f16 VGPR_32:$src)),

0 commit comments

Comments
 (0)