@@ -140,12 +140,19 @@ class SelectionDAGLegalize {
140
140
RTLIB::Libcall Call_F128,
141
141
RTLIB::Libcall Call_PPCF128,
142
142
SmallVectorImpl<SDValue> &Results);
143
- SDValue ExpandIntLibCall (SDNode *Node, bool isSigned,
144
- RTLIB::Libcall Call_I8,
145
- RTLIB::Libcall Call_I16,
146
- RTLIB::Libcall Call_I32,
147
- RTLIB::Libcall Call_I64,
148
- RTLIB::Libcall Call_I128);
143
+
144
+ void
145
+ ExpandFastFPLibCall (SDNode *Node, bool IsFast,
146
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
147
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
148
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
149
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
150
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
151
+ SmallVectorImpl<SDValue> &Results);
152
+
153
+ SDValue ExpandIntLibCall (SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
154
+ RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
155
+ RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
149
156
void ExpandArgFPLibCall (SDNode *Node,
150
157
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
151
158
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2228,6 +2235,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
2228
2235
ExpandFPLibCall (Node, LC, Results);
2229
2236
}
2230
2237
2238
+ void SelectionDAGLegalize::ExpandFastFPLibCall (
2239
+ SDNode *Node, bool IsFast,
2240
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
2241
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
2242
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
2243
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
2244
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
2245
+ SmallVectorImpl<SDValue> &Results) {
2246
+
2247
+ EVT VT = Node->getSimpleValueType (0 );
2248
+
2249
+ RTLIB::Libcall LC;
2250
+
2251
+ // FIXME: Probably should define fast to respect nan/inf and only be
2252
+ // approximate functions.
2253
+
2254
+ if (IsFast) {
2255
+ LC = RTLIB::getFPLibCall (VT, Call_F32.first , Call_F64.first , Call_F80.first ,
2256
+ Call_F128.first , Call_PPCF128.first );
2257
+ }
2258
+
2259
+ if (!IsFast || TLI.getLibcallImpl (LC) == RTLIB::Unsupported) {
2260
+ // Fall back if we don't have a fast implementation.
2261
+ LC = RTLIB::getFPLibCall (VT, Call_F32.second , Call_F64.second ,
2262
+ Call_F80.second , Call_F128.second ,
2263
+ Call_PPCF128.second );
2264
+ }
2265
+
2266
+ ExpandFPLibCall (Node, LC, Results);
2267
+ }
2268
+
2231
2269
SDValue SelectionDAGLegalize::ExpandIntLibCall (SDNode* Node, bool isSigned,
2232
2270
RTLIB::Libcall Call_I8,
2233
2271
RTLIB::Libcall Call_I16,
@@ -4514,6 +4552,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
4514
4552
return true ;
4515
4553
}
4516
4554
4555
+ // / Return if we can use the FAST_* variant of a math libcall for the node.
4556
+ // / FIXME: This is just guessing, we probably should have unique specific sets
4557
+ // / flags required per libcall.
4558
+ static bool canUseFastMathLibcall (const SDNode *Node) {
4559
+ // FIXME: Probably should define fast to respect nan/inf and only be
4560
+ // approximate functions.
4561
+
4562
+ SDNodeFlags Flags = Node->getFlags ();
4563
+ return Flags.hasApproximateFuncs () && Flags.hasNoNaNs () &&
4564
+ Flags.hasNoInfs () && Flags.hasNoSignedZeros ();
4565
+ }
4566
+
4517
4567
void SelectionDAGLegalize::ConvertNodeToLibcall (SDNode *Node) {
4518
4568
LLVM_DEBUG (dbgs () << " Trying to convert node to libcall\n " );
4519
4569
SmallVector<SDValue, 8 > Results;
@@ -4634,11 +4684,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
4634
4684
RTLIB::FMAXIMUM_NUM_PPCF128, Results);
4635
4685
break ;
4636
4686
case ISD::FSQRT:
4637
- case ISD::STRICT_FSQRT:
4638
- ExpandFPLibCall (Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
4639
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
4640
- RTLIB::SQRT_PPCF128, Results);
4687
+ case ISD::STRICT_FSQRT: {
4688
+ // FIXME: Probably should define fast to respect nan/inf and only be
4689
+ // approximate functions.
4690
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
4691
+ {RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32},
4692
+ {RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64},
4693
+ {RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80},
4694
+ {RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128},
4695
+ {RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128},
4696
+ Results);
4641
4697
break ;
4698
+ }
4642
4699
case ISD::FCBRT:
4643
4700
ExpandFPLibCall (Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
4644
4701
RTLIB::CBRT_F80, RTLIB::CBRT_F128,
@@ -4875,11 +4932,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
4875
4932
RTLIB::LLRINT_PPCF128, Results);
4876
4933
break ;
4877
4934
case ISD::FDIV:
4878
- case ISD::STRICT_FDIV:
4879
- ExpandFPLibCall (Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
4880
- RTLIB::DIV_F80, RTLIB::DIV_F128,
4881
- RTLIB::DIV_PPCF128, Results);
4935
+ case ISD::STRICT_FDIV: {
4936
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
4937
+ {RTLIB::FAST_DIV_F32, RTLIB::DIV_F32},
4938
+ {RTLIB::FAST_DIV_F64, RTLIB::DIV_F64},
4939
+ {RTLIB::FAST_DIV_F80, RTLIB::DIV_F80},
4940
+ {RTLIB::FAST_DIV_F128, RTLIB::DIV_F128},
4941
+ {RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results);
4882
4942
break ;
4943
+ }
4883
4944
case ISD::FREM:
4884
4945
case ISD::STRICT_FREM:
4885
4946
ExpandFPLibCall (Node, RTLIB::REM_F32, RTLIB::REM_F64,
@@ -4893,17 +4954,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
4893
4954
RTLIB::FMA_PPCF128, Results);
4894
4955
break ;
4895
4956
case ISD::FADD:
4896
- case ISD::STRICT_FADD:
4897
- ExpandFPLibCall (Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
4898
- RTLIB::ADD_F80, RTLIB::ADD_F128,
4899
- RTLIB::ADD_PPCF128, Results);
4957
+ case ISD::STRICT_FADD: {
4958
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
4959
+ {RTLIB::FAST_ADD_F32, RTLIB::ADD_F32},
4960
+ {RTLIB::FAST_ADD_F64, RTLIB::ADD_F64},
4961
+ {RTLIB::FAST_ADD_F80, RTLIB::ADD_F80},
4962
+ {RTLIB::FAST_ADD_F128, RTLIB::ADD_F128},
4963
+ {RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results);
4900
4964
break ;
4965
+ }
4901
4966
case ISD::FMUL:
4902
- case ISD::STRICT_FMUL:
4903
- ExpandFPLibCall (Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
4904
- RTLIB::MUL_F80, RTLIB::MUL_F128,
4905
- RTLIB::MUL_PPCF128, Results);
4967
+ case ISD::STRICT_FMUL: {
4968
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
4969
+ {RTLIB::FAST_MUL_F32, RTLIB::MUL_F32},
4970
+ {RTLIB::FAST_MUL_F64, RTLIB::MUL_F64},
4971
+ {RTLIB::FAST_MUL_F80, RTLIB::MUL_F80},
4972
+ {RTLIB::FAST_MUL_F128, RTLIB::MUL_F128},
4973
+ {RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results);
4906
4974
break ;
4975
+ }
4907
4976
case ISD::FP16_TO_FP:
4908
4977
if (Node->getValueType (0 ) == MVT::f32 ) {
4909
4978
Results.push_back (ExpandLibCall (RTLIB::FPEXT_F16_F32, Node, false ).first );
@@ -5076,11 +5145,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
5076
5145
break ;
5077
5146
}
5078
5147
case ISD::FSUB:
5079
- case ISD::STRICT_FSUB:
5080
- ExpandFPLibCall (Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
5081
- RTLIB::SUB_F80, RTLIB::SUB_F128,
5082
- RTLIB::SUB_PPCF128, Results);
5148
+ case ISD::STRICT_FSUB: {
5149
+ ExpandFastFPLibCall (Node, canUseFastMathLibcall (Node),
5150
+ {RTLIB::FAST_SUB_F32, RTLIB::SUB_F32},
5151
+ {RTLIB::FAST_SUB_F64, RTLIB::SUB_F64},
5152
+ {RTLIB::FAST_SUB_F80, RTLIB::SUB_F80},
5153
+ {RTLIB::FAST_SUB_F128, RTLIB::SUB_F128},
5154
+ {RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results);
5083
5155
break ;
5156
+ }
5084
5157
case ISD::SREM:
5085
5158
Results.push_back (ExpandIntLibCall (Node, true ,
5086
5159
RTLIB::SREM_I8,
0 commit comments