Skip to content

Commit 125e737

Browse files
committed
[X86] Remove LowerFCanonicalize and use generic expansion
1 parent 4be3e95 commit 125e737

File tree

6 files changed

+65
-58
lines changed

6 files changed

+65
-58
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5013,6 +5013,10 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
50135013
return DL.isLittleEndian();
50145014
}
50155015

5016+
virtual bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const {
5017+
return false;
5018+
}
5019+
50165020
/// Returns a 0 terminated array of registers that can be safely used as
50175021
/// scratch registers.
50185022
virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
@@ -5675,6 +5679,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
56755679
/// only the first Count elements of the vector are used.
56765680
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
56775681

5682+
/// This implements llvm.canonicalize.f* by multiplication with 1.0, as
5683+
/// suggested in
5684+
/// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
5685+
/// It uses strict_fp operations even outside a strict_fp context in order
5686+
/// to guarantee that the canonicalization is not optimized away by later
5687+
/// passes. The result chain introduced by that is intentionally ignored
5688+
/// since no ordering requirement is intended here.
5689+
SDValue expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
5690+
SelectionDAG &DAG) const;
5691+
56785692
/// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
56795693
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
56805694

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3356,29 +3356,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
33563356
break;
33573357
}
33583358
case ISD::FCANONICALIZE: {
3359-
// This implements llvm.canonicalize.f* by multiplication with 1.0, as
3360-
// suggested in
3361-
// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
3362-
// It uses strict_fp operations even outside a strict_fp context in order
3363-
// to guarantee that the canonicalization is not optimized away by later
3364-
// passes. The result chain introduced by that is intentionally ignored
3365-
// since no ordering requirement is intended here.
3366-
3367-
// Create strict multiplication by 1.0.
3368-
SDValue Operand = Node->getOperand(0);
3369-
EVT VT = Operand.getValueType();
3370-
SDValue One = DAG.getConstantFP(1.0, dl, VT);
3371-
SDValue Chain = DAG.getEntryNode();
3372-
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
3373-
{Chain, Operand, One});
3374-
3375-
// Propagate existing flags on canonicalize, and additionally set
3376-
// NoFPExcept.
3377-
SDNodeFlags CanonicalizeFlags = Node->getFlags();
3378-
CanonicalizeFlags.setNoFPExcept(true);
3379-
Mul->setFlags(CanonicalizeFlags);
3380-
3381-
Results.push_back(Mul);
3359+
SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
3360+
Results.push_back(Result);
33823361
break;
33833362
}
33843363
case ISD::SIGN_EXTEND_INREG: {

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
13091309
return;
13101310
}
13111311
break;
1312+
case ISD::FCANONICALIZE: {
1313+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1314+
if (TLI.shouldExpandVectorFCANONICALIZEInVectorLegalizer()) {
1315+
SDLoc dl(Node);
1316+
SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
1317+
Results.push_back(Result);
1318+
return;
1319+
}
1320+
}
13121321
}
13131322

13141323
SDValue Unrolled = DAG.UnrollVectorOp(Node);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11580,6 +11580,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
1158011580
return Res;
1158111581
}
1158211582

11583+
SDValue
11584+
TargetLowering::expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
11585+
SelectionDAG &DAG) const {
11586+
// Create strict multiplication by 1.0.
11587+
SDValue Operand = Node->getOperand(0);
11588+
EVT VT = Operand.getValueType();
11589+
SDValue One = DAG.getConstantFP(1.0, DL, VT);
11590+
SDValue Chain = DAG.getEntryNode();
11591+
11592+
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
11593+
{Chain, Operand, One});
11594+
11595+
// Propagate existing flags on canonicalize, and additionally set NoFPExcept.
11596+
SDNodeFlags Flags = Node->getFlags();
11597+
Flags.setNoFPExcept(true);
11598+
Mul->setFlags(Flags);
11599+
11600+
return Mul;
11601+
}
11602+
1158311603
SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
1158411604
SDLoc dl(Node);
1158511605
SDValue AccOp = Node->getOperand(0);

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 18 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
316316
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
317317
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
318318
}
319-
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
320-
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
321319
if (Subtarget.is64Bit()) {
322320
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
323321
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
@@ -348,9 +346,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
348346
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
349347
if (!Subtarget.hasSSE2()) {
350348
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
351-
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
352-
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
353-
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
349+
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
354350
if (Subtarget.is64Bit()) {
355351
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
356352
// Without SSE, i64->f64 goes through memory.
@@ -716,7 +712,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
716712
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
717713
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
718714
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
719-
setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
720715
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
721716
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
722717
setOperationAction(ISD::LRINT, MVT::f16, Expand);
@@ -871,7 +866,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
871866
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
872867
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
873868
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
874-
setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
869+
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
875870
if (isTypeLegal(MVT::f16)) {
876871
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
877872
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
@@ -934,7 +929,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
934929
if (isTypeLegal(MVT::f80)) {
935930
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
936931
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
937-
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
932+
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
938933
}
939934

940935
setOperationAction(ISD::SETCC, MVT::f128, Custom);
@@ -1070,11 +1065,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
10701065
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
10711066
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
10721067
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
1073-
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
1068+
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Expand);
10741069

10751070
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
10761071
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
1077-
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
1072+
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Expand);
10781073

10791074
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
10801075
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
@@ -1137,7 +1132,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
11371132
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
11381133

11391134
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1140-
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
1135+
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Expand);
11411136
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
11421137
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
11431138

@@ -1473,7 +1468,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
14731468
setOperationAction(ISD::FMINIMUM, VT, Custom);
14741469
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
14751470
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
1476-
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
1471+
setOperationAction(ISD::FCANONICALIZE, VT, Expand);
14771472
}
14781473

14791474
setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
@@ -1741,9 +1736,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
17411736
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
17421737
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
17431738
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1744-
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
1745-
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
1746-
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
1739+
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Expand);
1740+
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Expand);
1741+
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Expand);
17471742

17481743
// There is no byte sized k-register load or store without AVX512DQ.
17491744
if (!Subtarget.hasDQI()) {
@@ -1825,7 +1820,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
18251820
setOperationAction(ISD::FMA, VT, Legal);
18261821
setOperationAction(ISD::STRICT_FMA, VT, Legal);
18271822
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1828-
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
1823+
setOperationAction(ISD::FCANONICALIZE, VT, Expand);
18291824
}
18301825
setOperationAction(ISD::LRINT, MVT::v16f32,
18311826
Subtarget.hasDQI() ? Legal : Custom);
@@ -3318,6 +3313,13 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
33183313
return true;
33193314
}
33203315

3316+
// X86 prefers to defer vector FCANONICALIZE to DAG legalization
3317+
// to avoid scalarization during vector legalization.
3318+
bool X86TargetLowering::shouldExpandVectorFCANONICALIZEInVectorLegalizer()
3319+
const {
3320+
return true;
3321+
}
3322+
33213323
bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
33223324
// If we are using XMM registers in the ABI and the condition of the select is
33233325
// a floating-point compare and we have blendv or conditional move, then it is
@@ -33426,24 +33428,6 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
3342633428
return Op;
3342733429
}
3342833430

33429-
static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
33430-
SDNode *N = Op.getNode();
33431-
SDValue Operand = N->getOperand(0);
33432-
EVT VT = Operand.getValueType();
33433-
SDLoc dl(N);
33434-
33435-
SDValue One = DAG.getConstantFP(1.0, dl, VT);
33436-
33437-
// TODO: Fix Crash for bf16 when generating strict_fmul as it
33438-
// leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
33439-
// ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
33440-
// promote this operator's result!
33441-
SDValue Chain = DAG.getEntryNode();
33442-
SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
33443-
{Chain, Operand, One});
33444-
return StrictFmul;
33445-
}
33446-
3344733431
static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
3344833432
unsigned OpNo) {
3344933433
const APInt Operand(32, OpNo);
@@ -33583,7 +33567,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3358333567
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
3358433568
case ISD::FSHL:
3358533569
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
33586-
case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
3358733570
case ISD::STRICT_SINT_TO_FP:
3358833571
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
3358933572
case ISD::STRICT_UINT_TO_FP:

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,6 +1522,8 @@ namespace llvm {
15221522
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
15231523
Type *Ty) const override;
15241524

1525+
bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const override;
1526+
15251527
bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
15261528

15271529
bool convertSelectOfConstantsToMath(EVT VT) const override;

0 commit comments

Comments
 (0)