-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[X86] Remove LowerFCanonicalize and use generic expansion #147877
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[X86] Remove LowerFCanonicalize and use generic expansion #147877
Conversation
@llvm/pr-subscribers-backend-x86 Author: woruyu (woruyu) ChangesSummaryThis PR resolves #143862 Remove LowerFCanonicalizet for x86 to use generic expansion Full diff: https://github.com/llvm/llvm-project/pull/147877.diff 6 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index fa46d296bf533..7d884299a3ba8 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5013,6 +5013,10 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
return DL.isLittleEndian();
}
+ virtual bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const {
+ return false;
+ }
+
/// Returns a 0 terminated array of registers that can be safely used as
/// scratch registers.
virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
@@ -5675,6 +5679,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// only the first Count elements of the vector are used.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
+ /// This implements llvm.canonicalize.f* by multiplication with 1.0, as
+ /// suggested in
+ /// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
+ /// It uses strict_fp operations even outside a strict_fp context in order
+ /// to guarantee that the canonicalization is not optimized away by later
+ /// passes. The result chain introduced by that is intentionally ignored
+ /// since no ordering requirement is intended here.
+ SDValue expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
+ SelectionDAG &DAG) const;
+
/// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 528136a55f14a..9877015c96269 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3356,29 +3356,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::FCANONICALIZE: {
- // This implements llvm.canonicalize.f* by multiplication with 1.0, as
- // suggested in
- // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
- // It uses strict_fp operations even outside a strict_fp context in order
- // to guarantee that the canonicalization is not optimized away by later
- // passes. The result chain introduced by that is intentionally ignored
- // since no ordering requirement is intended here.
-
- // Create strict multiplication by 1.0.
- SDValue Operand = Node->getOperand(0);
- EVT VT = Operand.getValueType();
- SDValue One = DAG.getConstantFP(1.0, dl, VT);
- SDValue Chain = DAG.getEntryNode();
- SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, Operand, One});
-
- // Propagate existing flags on canonicalize, and additionally set
- // NoFPExcept.
- SDNodeFlags CanonicalizeFlags = Node->getFlags();
- CanonicalizeFlags.setNoFPExcept(true);
- Mul->setFlags(CanonicalizeFlags);
-
- Results.push_back(Mul);
+ SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
+ Results.push_back(Result);
break;
}
case ISD::SIGN_EXTEND_INREG: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f908a66128ec8..fccd1cb61b7d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1309,6 +1309,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FCANONICALIZE: {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.shouldExpandVectorFCANONICALIZEInVectorLegalizer()) {
+ SDLoc dl(Node);
+ SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
+ Results.push_back(Result);
+ return;
+ }
+ }
}
SDValue Unrolled = DAG.UnrollVectorOp(Node);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b90c80002151f..47783e27647e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11580,6 +11580,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
return Res;
}
+SDValue
+TargetLowering::expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
+ SelectionDAG &DAG) const {
+ // Create strict multiplication by 1.0.
+ SDValue Operand = Node->getOperand(0);
+ EVT VT = Operand.getValueType();
+ SDValue One = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Chain = DAG.getEntryNode();
+
+ SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
+ {Chain, Operand, One});
+
+ // Propagate existing flags on canonicalize, and additionally set NoFPExcept.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoFPExcept(true);
+ Mul->setFlags(Flags);
+
+ return Mul;
+}
+
SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
SDValue AccOp = Node->getOperand(0);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 347ba1262b66b..07457095cdf6c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -316,8 +316,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
- setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
@@ -348,9 +346,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
- setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
- setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
@@ -716,7 +712,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
@@ -871,7 +866,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
- setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
@@ -934,7 +929,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (isTypeLegal(MVT::f80)) {
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
}
setOperationAction(ISD::SETCC, MVT::f128, Custom);
@@ -1070,11 +1065,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Expand);
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Expand);
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
@@ -1137,7 +1132,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
@@ -1473,7 +1468,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
@@ -1741,9 +1736,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Expand);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Expand);
// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
@@ -1825,7 +1820,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v16f32,
Subtarget.hasDQI() ? Legal : Custom);
@@ -3318,6 +3313,13 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+// X86 prefers to defer vector FCANONICALIZE to DAG legalization
+// to avoid scalarization during vector legalization.
+bool X86TargetLowering::shouldExpandVectorFCANONICALIZEInVectorLegalizer()
+ const {
+ return true;
+}
+
bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
// If we are using XMM registers in the ABI and the condition of the select is
// a floating-point compare and we have blendv or conditional move, then it is
@@ -33426,24 +33428,6 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
-static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
- SDNode *N = Op.getNode();
- SDValue Operand = N->getOperand(0);
- EVT VT = Operand.getValueType();
- SDLoc dl(N);
-
- SDValue One = DAG.getConstantFP(1.0, dl, VT);
-
- // TODO: Fix Crash for bf16 when generating strict_fmul as it
- // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
- // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
- // promote this operator's result!
- SDValue Chain = DAG.getEntryNode();
- SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, Operand, One});
- return StrictFmul;
-}
-
static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) {
const APInt Operand(32, OpNo);
@@ -33583,7 +33567,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
- case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::STRICT_UINT_TO_FP:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5cb6b3e493a32..d9c72b27b46fa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1522,6 +1522,8 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const override;
+
bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
bool convertSelectOfConstantsToMath(EVT VT) const override;
|
@llvm/pr-subscribers-llvm-selectiondag Author: woruyu (woruyu) ChangesSummaryThis PR resolves #143862 Remove LowerFCanonicalizet for x86 to use generic expansion Full diff: https://github.com/llvm/llvm-project/pull/147877.diff 6 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index fa46d296bf533..7d884299a3ba8 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5013,6 +5013,10 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
return DL.isLittleEndian();
}
+ virtual bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const {
+ return false;
+ }
+
/// Returns a 0 terminated array of registers that can be safely used as
/// scratch registers.
virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
@@ -5675,6 +5679,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// only the first Count elements of the vector are used.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
+ /// This implements llvm.canonicalize.f* by multiplication with 1.0, as
+ /// suggested in
+ /// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
+ /// It uses strict_fp operations even outside a strict_fp context in order
+ /// to guarantee that the canonicalization is not optimized away by later
+ /// passes. The result chain introduced by that is intentionally ignored
+ /// since no ordering requirement is intended here.
+ SDValue expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
+ SelectionDAG &DAG) const;
+
/// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 528136a55f14a..9877015c96269 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3356,29 +3356,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::FCANONICALIZE: {
- // This implements llvm.canonicalize.f* by multiplication with 1.0, as
- // suggested in
- // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
- // It uses strict_fp operations even outside a strict_fp context in order
- // to guarantee that the canonicalization is not optimized away by later
- // passes. The result chain introduced by that is intentionally ignored
- // since no ordering requirement is intended here.
-
- // Create strict multiplication by 1.0.
- SDValue Operand = Node->getOperand(0);
- EVT VT = Operand.getValueType();
- SDValue One = DAG.getConstantFP(1.0, dl, VT);
- SDValue Chain = DAG.getEntryNode();
- SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, Operand, One});
-
- // Propagate existing flags on canonicalize, and additionally set
- // NoFPExcept.
- SDNodeFlags CanonicalizeFlags = Node->getFlags();
- CanonicalizeFlags.setNoFPExcept(true);
- Mul->setFlags(CanonicalizeFlags);
-
- Results.push_back(Mul);
+ SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
+ Results.push_back(Result);
break;
}
case ISD::SIGN_EXTEND_INREG: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f908a66128ec8..fccd1cb61b7d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1309,6 +1309,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FCANONICALIZE: {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.shouldExpandVectorFCANONICALIZEInVectorLegalizer()) {
+ SDLoc dl(Node);
+ SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
+ Results.push_back(Result);
+ return;
+ }
+ }
}
SDValue Unrolled = DAG.UnrollVectorOp(Node);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b90c80002151f..47783e27647e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11580,6 +11580,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
return Res;
}
+SDValue
+TargetLowering::expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
+ SelectionDAG &DAG) const {
+ // Create strict multiplication by 1.0.
+ SDValue Operand = Node->getOperand(0);
+ EVT VT = Operand.getValueType();
+ SDValue One = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Chain = DAG.getEntryNode();
+
+ SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
+ {Chain, Operand, One});
+
+ // Propagate existing flags on canonicalize, and additionally set NoFPExcept.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoFPExcept(true);
+ Mul->setFlags(Flags);
+
+ return Mul;
+}
+
SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
SDValue AccOp = Node->getOperand(0);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 347ba1262b66b..07457095cdf6c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -316,8 +316,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
- setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
@@ -348,9 +346,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
- setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
- setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
@@ -716,7 +712,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
@@ -871,7 +866,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
- setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
@@ -934,7 +929,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (isTypeLegal(MVT::f80)) {
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
}
setOperationAction(ISD::SETCC, MVT::f128, Custom);
@@ -1070,11 +1065,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Expand);
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Expand);
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
@@ -1137,7 +1132,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
@@ -1473,7 +1468,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
@@ -1741,9 +1736,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Expand);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Expand);
// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
@@ -1825,7 +1820,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v16f32,
Subtarget.hasDQI() ? Legal : Custom);
@@ -3318,6 +3313,13 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+// X86 prefers to defer vector FCANONICALIZE to DAG legalization
+// to avoid scalarization during vector legalization.
+bool X86TargetLowering::shouldExpandVectorFCANONICALIZEInVectorLegalizer()
+ const {
+ return true;
+}
+
bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
// If we are using XMM registers in the ABI and the condition of the select is
// a floating-point compare and we have blendv or conditional move, then it is
@@ -33426,24 +33428,6 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
-static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
- SDNode *N = Op.getNode();
- SDValue Operand = N->getOperand(0);
- EVT VT = Operand.getValueType();
- SDLoc dl(N);
-
- SDValue One = DAG.getConstantFP(1.0, dl, VT);
-
- // TODO: Fix Crash for bf16 when generating strict_fmul as it
- // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
- // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
- // promote this operator's result!
- SDValue Chain = DAG.getEntryNode();
- SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, Operand, One});
- return StrictFmul;
-}
-
static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) {
const APInt Operand(32, OpNo);
@@ -33583,7 +33567,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
- case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::STRICT_UINT_TO_FP:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5cb6b3e493a32..d9c72b27b46fa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1522,6 +1522,8 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const override;
+
bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
bool convertSelectOfConstantsToMath(EVT VT) const override;
|
By the way, @wzssyqa. Currently, MIPS is the only backend that provides a custom lowering for FCANONICALIZE on f32 and f64. I'm not sure if this behavior should be updated or unified with the generic STRICT_FMUL-based expansion.Just FYI. |
@RKSimon This PR should now be ready for review — welcome to take a look! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do not add a new TLI hook for this
The hook is used to distinguish between X86 and (AMDGPU systemZ) behavior for vector FCANONICALIZE. |
You should not be trying to distinguish these, and this problem has nothing to do with canonicalize and everything to do with how much of a mess the vector legalization logic and process is. The legalizer has a bias towards just scalarizing everything, so the AMDGPU workaround is to just custom lower all the vectors and manually split them for the cases where there are legal vector types. Dealing with this is beyond the scope of this patch, just remove what's there and take whatever regression happens. |
I've removed the target hook as suggested.
I’ll analyze the failures in more detail and post the results shortly. |
@@ -1309,6 +1309,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { | |||
return; | |||
} | |||
break; | |||
case ISD::FCANONICALIZE: { | |||
const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isn't TLI already available?
SDValue | ||
TargetLowering::expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL, | ||
SelectionDAG &DAG) const { | ||
// Create strict multiplication by 1.0. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you've lost all the comments when you moved the expansion code to here
Summary
This PR resolves #143862
Remove LowerFCanonicalizet for x86 to use generic expansion