Skip to content

[X86] Remove LowerFCanonicalize and use generic expansion #147877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -5013,6 +5013,10 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
return DL.isLittleEndian();
}

virtual bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const {
return false;
}

/// Returns a 0 terminated array of registers that can be safely used as
/// scratch registers.
virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
Expand Down Expand Up @@ -5675,6 +5679,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// only the first Count elements of the vector are used.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;

/// This implements llvm.canonicalize.f* by multiplication with 1.0, as
/// suggested in
/// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
/// It uses strict_fp operations even outside a strict_fp context in order
/// to guarantee that the canonicalization is not optimized away by later
/// passes. The result chain introduced by that is intentionally ignored
/// since no ordering requirement is intended here.
SDValue expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
SelectionDAG &DAG) const;

/// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;

Expand Down
25 changes: 2 additions & 23 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3356,29 +3356,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::FCANONICALIZE: {
// This implements llvm.canonicalize.f* by multiplication with 1.0, as
// suggested in
// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
// It uses strict_fp operations even outside a strict_fp context in order
// to guarantee that the canonicalization is not optimized away by later
// passes. The result chain introduced by that is intentionally ignored
// since no ordering requirement is intended here.

// Create strict multiplication by 1.0.
SDValue Operand = Node->getOperand(0);
EVT VT = Operand.getValueType();
SDValue One = DAG.getConstantFP(1.0, dl, VT);
SDValue Chain = DAG.getEntryNode();
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
{Chain, Operand, One});

// Propagate existing flags on canonicalize, and additionally set
// NoFPExcept.
SDNodeFlags CanonicalizeFlags = Node->getFlags();
CanonicalizeFlags.setNoFPExcept(true);
Mul->setFlags(CanonicalizeFlags);

Results.push_back(Mul);
SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
Results.push_back(Result);
break;
}
case ISD::SIGN_EXTEND_INREG: {
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1309,6 +1309,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
case ISD::FCANONICALIZE: {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't TLI already available?

if (TLI.shouldExpandVectorFCANONICALIZEInVectorLegalizer()) {
SDLoc dl(Node);
SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
Results.push_back(Result);
return;
}
}
}

SDValue Unrolled = DAG.UnrollVectorOp(Node);
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11580,6 +11580,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
return Res;
}

SDValue
TargetLowering::expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
SelectionDAG &DAG) const {
// Create strict multiplication by 1.0.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you've lost all the comments when you moved the expansion code to here

SDValue Operand = Node->getOperand(0);
EVT VT = Operand.getValueType();
SDValue One = DAG.getConstantFP(1.0, DL, VT);
SDValue Chain = DAG.getEntryNode();

SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
{Chain, Operand, One});

// Propagate existing flags on canonicalize, and additionally set NoFPExcept.
SDNodeFlags Flags = Node->getFlags();
Flags.setNoFPExcept(true);
Mul->setFlags(Flags);

return Mul;
}

SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
SDValue AccOp = Node->getOperand(0);
Expand Down
53 changes: 18 additions & 35 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
Expand Down Expand Up @@ -348,9 +346,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
Expand Down Expand Up @@ -716,7 +712,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
Expand Down Expand Up @@ -871,7 +866,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
Expand Down Expand Up @@ -934,7 +929,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (isTypeLegal(MVT::f80)) {
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
}

setOperationAction(ISD::SETCC, MVT::f128, Custom);
Expand Down Expand Up @@ -1070,11 +1065,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Expand);

setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Expand);

setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
Expand Down Expand Up @@ -1137,7 +1132,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);

setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);

Expand Down Expand Up @@ -1473,7 +1468,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}

setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
Expand Down Expand Up @@ -1741,9 +1736,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Expand);

// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
Expand Down Expand Up @@ -1825,7 +1820,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v16f32,
Subtarget.hasDQI() ? Legal : Custom);
Expand Down Expand Up @@ -3318,6 +3313,13 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}

// X86 prefers to defer vector FCANONICALIZE to DAG legalization
// to avoid scalarization during vector legalization.
bool X86TargetLowering::shouldExpandVectorFCANONICALIZEInVectorLegalizer()
const {
return true;
}

bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
// If we are using XMM registers in the ABI and the condition of the select is
// a floating-point compare and we have blendv or conditional move, then it is
Expand Down Expand Up @@ -33426,24 +33428,6 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}

static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
SDValue Operand = N->getOperand(0);
EVT VT = Operand.getValueType();
SDLoc dl(N);

SDValue One = DAG.getConstantFP(1.0, dl, VT);

// TODO: Fix Crash for bf16 when generating strict_fmul as it
// leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
// ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
// promote this operator's result!
SDValue Chain = DAG.getEntryNode();
SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
{Chain, Operand, One});
return StrictFmul;
}

static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) {
const APInt Operand(32, OpNo);
Expand Down Expand Up @@ -33583,7 +33567,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::STRICT_UINT_TO_FP:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1522,6 +1522,8 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;

bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const override;

bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;

bool convertSelectOfConstantsToMath(EVT VT) const override;
Expand Down
Loading