Skip to content

[X86] Remove LowerFCanonicalize and use generic expansion #147877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -5675,6 +5675,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// only the first Count elements of the vector are used.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;

/// This implements llvm.canonicalize.f* by multiplication with 1.0, as
/// suggested in
/// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
/// It uses strict_fp operations even outside a strict_fp context in order
/// to guarantee that the canonicalization is not optimized away by later
/// passes. The result chain introduced by that is intentionally ignored
/// since no ordering requirement is intended here.
SDValue expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
SelectionDAG &DAG) const;

/// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;

Expand Down
25 changes: 2 additions & 23 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3356,29 +3356,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::FCANONICALIZE: {
// This implements llvm.canonicalize.f* by multiplication with 1.0, as
// suggested in
// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
// It uses strict_fp operations even outside a strict_fp context in order
// to guarantee that the canonicalization is not optimized away by later
// passes. The result chain introduced by that is intentionally ignored
// since no ordering requirement is intended here.

// Create strict multiplication by 1.0.
SDValue Operand = Node->getOperand(0);
EVT VT = Operand.getValueType();
SDValue One = DAG.getConstantFP(1.0, dl, VT);
SDValue Chain = DAG.getEntryNode();
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
{Chain, Operand, One});

// Propagate existing flags on canonicalize, and additionally set
// NoFPExcept.
SDNodeFlags CanonicalizeFlags = Node->getFlags();
CanonicalizeFlags.setNoFPExcept(true);
Mul->setFlags(CanonicalizeFlags);

Results.push_back(Mul);
SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
Results.push_back(Result);
break;
}
case ISD::SIGN_EXTEND_INREG: {
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1309,6 +1309,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
case ISD::FCANONICALIZE: {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't TLI already available?

SDLoc dl(Node);
SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
Results.push_back(Result);
return;
}
}

SDValue Unrolled = DAG.UnrollVectorOp(Node);
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11580,6 +11580,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
return Res;
}

SDValue
TargetLowering::expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
SelectionDAG &DAG) const {
// Create strict multiplication by 1.0.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you've lost all the comments when you moved the expansion code to here

SDValue Operand = Node->getOperand(0);
EVT VT = Operand.getValueType();
SDValue One = DAG.getConstantFP(1.0, DL, VT);
SDValue Chain = DAG.getEntryNode();

SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
{Chain, Operand, One});

// Propagate existing flags on canonicalize, and additionally set NoFPExcept.
SDNodeFlags Flags = Node->getFlags();
Flags.setNoFPExcept(true);
Mul->setFlags(Flags);

return Mul;
}

SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
SDValue AccOp = Node->getOperand(0);
Expand Down
46 changes: 11 additions & 35 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
Expand Down Expand Up @@ -348,9 +346,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
Expand Down Expand Up @@ -716,7 +712,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
Expand Down Expand Up @@ -871,7 +866,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
Expand Down Expand Up @@ -934,7 +929,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (isTypeLegal(MVT::f80)) {
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
}

setOperationAction(ISD::SETCC, MVT::f128, Custom);
Expand Down Expand Up @@ -1070,11 +1065,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Expand);

setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Expand);

setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
Expand Down Expand Up @@ -1137,7 +1132,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);

setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);

Expand Down Expand Up @@ -1473,7 +1468,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}

setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
Expand Down Expand Up @@ -1741,9 +1736,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Expand);

// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
Expand Down Expand Up @@ -1825,7 +1820,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v16f32,
Subtarget.hasDQI() ? Legal : Custom);
Expand Down Expand Up @@ -33426,24 +33421,6 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}

static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
SDValue Operand = N->getOperand(0);
EVT VT = Operand.getValueType();
SDLoc dl(N);

SDValue One = DAG.getConstantFP(1.0, dl, VT);

// TODO: Fix Crash for bf16 when generating strict_fmul as it
// leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
// ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
// promote this operator's result!
SDValue Chain = DAG.getEntryNode();
SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
{Chain, Operand, One});
return StrictFmul;
}

static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) {
const APInt Operand(32, OpNo);
Expand Down Expand Up @@ -33583,7 +33560,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::STRICT_UINT_TO_FP:
Expand Down
Loading