@@ -140,12 +140,19 @@ class SelectionDAGLegalize {
140
140
RTLIB::Libcall Call_F128,
141
141
RTLIB::Libcall Call_PPCF128,
142
142
SmallVectorImpl<SDValue> &Results);
143
- SDValue ExpandIntLibCall (SDNode *Node, bool isSigned,
144
- RTLIB::Libcall Call_I8,
145
- RTLIB::Libcall Call_I16,
146
- RTLIB::Libcall Call_I32,
147
- RTLIB::Libcall Call_I64,
148
- RTLIB::Libcall Call_I128);
143
+
144
+ void
145
+ ExpandFastFPLibCall (SDNode *Node, bool IsFast,
146
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
147
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
148
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
149
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
150
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
151
+ SmallVectorImpl<SDValue> &Results);
152
+
153
+ SDValue ExpandIntLibCall (SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
154
+ RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
155
+ RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
149
156
void ExpandArgFPLibCall (SDNode *Node,
150
157
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
151
158
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2229,6 +2236,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
2229
2236
ExpandFPLibCall (Node, LC, Results);
2230
2237
}
2231
2238
2239
+ void SelectionDAGLegalize::ExpandFastFPLibCall (
2240
+ SDNode *Node, bool IsFast,
2241
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
2242
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
2243
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
2244
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
2245
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
2246
+ SmallVectorImpl<SDValue> &Results) {
2247
+
2248
+ EVT VT = Node->getSimpleValueType (0 );
2249
+
2250
+ RTLIB::Libcall LC;
2251
+
2252
+ // FIXME: Probably should define fast to respect nan/inf and only be
2253
+ // approximate functions.
2254
+
2255
+ if (IsFast) {
2256
+ LC = RTLIB::getFPLibCall (VT, Call_F32.first , Call_F64.first , Call_F80.first ,
2257
+ Call_F128.first , Call_PPCF128.first );
2258
+ }
2259
+
2260
+ if (!IsFast || TLI.getLibcallImpl (LC) == RTLIB::Unsupported) {
2261
+ // Fall back if we don't have a fast implementation.
2262
+ LC = RTLIB::getFPLibCall (VT, Call_F32.second , Call_F64.second ,
2263
+ Call_F80.second , Call_F128.second ,
2264
+ Call_PPCF128.second );
2265
+ }
2266
+
2267
+ ExpandFPLibCall (Node, LC, Results);
2268
+ }
2269
+
2232
2270
SDValue SelectionDAGLegalize::ExpandIntLibCall (SDNode* Node, bool isSigned,
2233
2271
RTLIB::Libcall Call_I8,
2234
2272
RTLIB::Libcall Call_I16,
@@ -4489,6 +4527,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
4489
4527
return true ;
4490
4528
}
4491
4529
4530
+ // / Return if we can use the FAST_* variant of a math libcall for the node.
4531
+ // / FIXME: This is just guessing, we probably should have unique specific sets
4532
+ // / flags required per libcall.
4533
+ static bool canUseFastMathLibcall (const SDNode *Node) {
4534
+ // FIXME: Probably should define fast to respect nan/inf and only be
4535
+ // approximate functions.
4536
+
4537
+ SDNodeFlags Flags = Node->getFlags ();
4538
+ return Flags.hasApproximateFuncs () && Flags.hasNoNaNs () &&
4539
+ Flags.hasNoInfs () && Flags.hasNoSignedZeros ();
4540
+ }
4541
+
4492
4542
void SelectionDAGLegalize::ConvertNodeToLibcall (SDNode *Node) {
4493
4543
LLVM_DEBUG (dbgs () << " Trying to convert node to libcall\n " );
4494
4544
SmallVector<SDValue, 8 > Results;
@@ -4609,11 +4659,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
4609
4659
RTLIB::FMAXIMUM_NUM_PPCF128, Results);
4610
4660
break ;
4611
4661
case ISD::FSQRT:
4612
- case ISD::STRICT_FSQRT:
4613
- ExpandFPLibCall (Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
4614
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
4615
- RTLIB::SQRT_PPCF128, Results);
4662
+ case ISD::STRICT_FSQRT: {
4663
+ // FIXME: Probably should define fast to respect nan/inf and only be
4664
+ // approximate functions.
4665
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
4666
+ {RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32},
4667
+ {RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64},
4668
+ {RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80},
4669
+ {RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128},
4670
+ {RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128},
4671
+ Results);
4616
4672
break ;
4673
+ }
4617
4674
case ISD::FCBRT:
4618
4675
ExpandFPLibCall (Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
4619
4676
RTLIB::CBRT_F80, RTLIB::CBRT_F128,
@@ -4850,11 +4907,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
4850
4907
RTLIB::LLRINT_PPCF128, Results);
4851
4908
break ;
4852
4909
case ISD::FDIV:
4853
- case ISD::STRICT_FDIV:
4854
- ExpandFPLibCall (Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
4855
- RTLIB::DIV_F80, RTLIB::DIV_F128,
4856
- RTLIB::DIV_PPCF128, Results);
4910
+ case ISD::STRICT_FDIV: {
4911
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
4912
+ {RTLIB::FAST_DIV_F32, RTLIB::DIV_F32},
4913
+ {RTLIB::FAST_DIV_F64, RTLIB::DIV_F64},
4914
+ {RTLIB::FAST_DIV_F80, RTLIB::DIV_F80},
4915
+ {RTLIB::FAST_DIV_F128, RTLIB::DIV_F128},
4916
+ {RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results);
4857
4917
break ;
4918
+ }
4858
4919
case ISD::FREM:
4859
4920
case ISD::STRICT_FREM:
4860
4921
ExpandFPLibCall (Node, RTLIB::REM_F32, RTLIB::REM_F64,
@@ -4868,17 +4929,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
4868
4929
RTLIB::FMA_PPCF128, Results);
4869
4930
break ;
4870
4931
case ISD::FADD:
4871
- case ISD::STRICT_FADD:
4872
- ExpandFPLibCall (Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
4873
- RTLIB::ADD_F80, RTLIB::ADD_F128,
4874
- RTLIB::ADD_PPCF128, Results);
4932
+ case ISD::STRICT_FADD: {
4933
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
4934
+ {RTLIB::FAST_ADD_F32, RTLIB::ADD_F32},
4935
+ {RTLIB::FAST_ADD_F64, RTLIB::ADD_F64},
4936
+ {RTLIB::FAST_ADD_F80, RTLIB::ADD_F80},
4937
+ {RTLIB::FAST_ADD_F128, RTLIB::ADD_F128},
4938
+ {RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results);
4875
4939
break ;
4940
+ }
4876
4941
case ISD::FMUL:
4877
- case ISD::STRICT_FMUL:
4878
- ExpandFPLibCall (Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
4879
- RTLIB::MUL_F80, RTLIB::MUL_F128,
4880
- RTLIB::MUL_PPCF128, Results);
4942
+ case ISD::STRICT_FMUL: {
4943
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
4944
+ {RTLIB::FAST_MUL_F32, RTLIB::MUL_F32},
4945
+ {RTLIB::FAST_MUL_F64, RTLIB::MUL_F64},
4946
+ {RTLIB::FAST_MUL_F80, RTLIB::MUL_F80},
4947
+ {RTLIB::FAST_MUL_F128, RTLIB::MUL_F128},
4948
+ {RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results);
4881
4949
break ;
4950
+ }
4882
4951
case ISD::FP16_TO_FP:
4883
4952
if (Node->getValueType (0 ) == MVT::f32 ) {
4884
4953
Results.push_back (ExpandLibCall (RTLIB::FPEXT_F16_F32, Node, false ).first );
@@ -5051,11 +5120,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
5051
5120
break ;
5052
5121
}
5053
5122
case ISD::FSUB:
5054
- case ISD::STRICT_FSUB:
5055
- ExpandFPLibCall (Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
5056
- RTLIB::SUB_F80, RTLIB::SUB_F128,
5057
- RTLIB::SUB_PPCF128, Results);
5123
+ case ISD::STRICT_FSUB: {
5124
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
5125
+ {RTLIB::FAST_SUB_F32, RTLIB::SUB_F32},
5126
+ {RTLIB::FAST_SUB_F64, RTLIB::SUB_F64},
5127
+ {RTLIB::FAST_SUB_F80, RTLIB::SUB_F80},
5128
+ {RTLIB::FAST_SUB_F128, RTLIB::SUB_F128},
5129
+ {RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results);
5058
5130
break ;
5131
+ }
5059
5132
case ISD::SREM:
5060
5133
Results.push_back (ExpandIntLibCall (Node, true ,
5061
5134
RTLIB::SREM_I8,
0 commit comments