Skip to content

Commit 83257ed

Browse files
committed
DAG: Use fast variants of fast math libcalls
Hexagon currently has an untested global flag to control fast math variants of libcalls. Add fast variants as explicit libcall options so this can be a flag based lowering decision, and implement it. I have no idea what fast math flags the hexagon case requires, so I picked the maximally potentially relevant set of flags although this probably is refinable per call. Looking in compiler-rt, I'm not sure if the fast variants are anything more than aliases.
1 parent 469114e commit 83257ed

File tree

4 files changed

+501
-60
lines changed

4 files changed

+501
-60
lines changed

llvm/include/llvm/IR/RuntimeLibcalls.td

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,24 @@ foreach IntTy = ["I32", "I64", "I128"] in {
6262

6363
foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
6464
def ADD_#FPTy : RuntimeLibcall;
65+
def FAST_ADD_#FPTy : RuntimeLibcall;
66+
6567
def SUB_#FPTy : RuntimeLibcall;
68+
def FAST_SUB_#FPTy : RuntimeLibcall;
69+
6670
def MUL_#FPTy : RuntimeLibcall;
71+
def FAST_MUL_#FPTy : RuntimeLibcall;
72+
6773
def DIV_#FPTy : RuntimeLibcall;
74+
def FAST_DIV_#FPTy : RuntimeLibcall;
75+
6876
def REM_#FPTy : RuntimeLibcall;
6977
def FMA_#FPTy : RuntimeLibcall;
7078
def POWI_#FPTy : RuntimeLibcall;
79+
7180
def SQRT_#FPTy : RuntimeLibcall;
81+
def FAST_SQRT_#FPTy : RuntimeLibcall;
82+
7283
def CBRT_#FPTy : RuntimeLibcall;
7384
def LOG_#FPTy : RuntimeLibcall;
7485
def LOG_FINITE_#FPTy : RuntimeLibcall;
@@ -1411,27 +1422,26 @@ def __hexagon_moddi3 : RuntimeLibcallImpl<SREM_I64>;
14111422
def __hexagon_umodsi3 : RuntimeLibcallImpl<UREM_I32>;
14121423
def __hexagon_umoddi3 : RuntimeLibcallImpl<UREM_I64>;
14131424

1414-
// FIXME: "Fast" versions should be treated as a separate RTLIB::FAST_* function
14151425
def __hexagon_adddf3 : RuntimeLibcallImpl<ADD_F64>;
1416-
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<ADD_F64>;
1426+
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<FAST_ADD_F64>;
14171427

14181428
def __hexagon_subdf3 : RuntimeLibcallImpl<SUB_F64>;
1419-
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<SUB_F64>;
1429+
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<FAST_SUB_F64>;
14201430

14211431
def __hexagon_muldf3 : RuntimeLibcallImpl<MUL_F64>;
1422-
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<MUL_F64>;
1432+
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<FAST_MUL_F64>;
14231433

14241434
def __hexagon_divdf3 : RuntimeLibcallImpl<DIV_F64>;
1425-
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<DIV_F64>;
1435+
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<FAST_DIV_F64>;
14261436

14271437
def __hexagon_divsf3 : RuntimeLibcallImpl<DIV_F32>;
1428-
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<DIV_F32>;
1438+
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<FAST_DIV_F32>;
14291439

14301440
def __hexagon_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
1431-
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
1441+
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<FAST_SQRT_F32>;
14321442

14331443
// This is the only fast library function for sqrtd.
1434-
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<SQRT_F64>;
1444+
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<FAST_SQRT_F64>;
14351445

14361446
def __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
14371447
: RuntimeLibcallImpl<HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES>;

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 99 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,19 @@ class SelectionDAGLegalize {
140140
RTLIB::Libcall Call_F128,
141141
RTLIB::Libcall Call_PPCF128,
142142
SmallVectorImpl<SDValue> &Results);
143-
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
144-
RTLIB::Libcall Call_I8,
145-
RTLIB::Libcall Call_I16,
146-
RTLIB::Libcall Call_I32,
147-
RTLIB::Libcall Call_I64,
148-
RTLIB::Libcall Call_I128);
143+
144+
void
145+
ExpandFastFPLibCall(SDNode *Node, bool IsFast,
146+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
147+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
148+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
149+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
150+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
151+
SmallVectorImpl<SDValue> &Results);
152+
153+
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
154+
RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
155+
RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
149156
void ExpandArgFPLibCall(SDNode *Node,
150157
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
151158
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2229,6 +2236,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
22292236
ExpandFPLibCall(Node, LC, Results);
22302237
}
22312238

2239+
void SelectionDAGLegalize::ExpandFastFPLibCall(
2240+
SDNode *Node, bool IsFast,
2241+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
2242+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
2243+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
2244+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
2245+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
2246+
SmallVectorImpl<SDValue> &Results) {
2247+
2248+
EVT VT = Node->getSimpleValueType(0);
2249+
2250+
RTLIB::Libcall LC;
2251+
2252+
// FIXME: Probably should define fast to respect nan/inf and only be
2253+
// approximate functions.
2254+
2255+
if (IsFast) {
2256+
LC = RTLIB::getFPLibCall(VT, Call_F32.first, Call_F64.first, Call_F80.first,
2257+
Call_F128.first, Call_PPCF128.first);
2258+
}
2259+
2260+
if (!IsFast || TLI.getLibcallImpl(LC) == RTLIB::Unsupported) {
2261+
// Fall back if we don't have a fast implementation.
2262+
LC = RTLIB::getFPLibCall(VT, Call_F32.second, Call_F64.second,
2263+
Call_F80.second, Call_F128.second,
2264+
Call_PPCF128.second);
2265+
}
2266+
2267+
ExpandFPLibCall(Node, LC, Results);
2268+
}
2269+
22322270
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
22332271
RTLIB::Libcall Call_I8,
22342272
RTLIB::Libcall Call_I16,
@@ -4515,6 +4553,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
45154553
return true;
45164554
}
45174555

4556+
/// Return if we can use the FAST_* variant of a math libcall for the node.
4557+
/// FIXME: This is just guessing, we probably should have unique specific sets
4558+
/// flags required per libcall.
4559+
static bool canUseFastMathLibcall(const SDNode *Node) {
4560+
// FIXME: Probably should define fast to respect nan/inf and only be
4561+
// approximate functions.
4562+
4563+
SDNodeFlags Flags = Node->getFlags();
4564+
return Flags.hasApproximateFuncs() && Flags.hasNoNaNs() &&
4565+
Flags.hasNoInfs() && Flags.hasNoSignedZeros();
4566+
}
4567+
45184568
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
45194569
LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
45204570
SmallVector<SDValue, 8> Results;
@@ -4635,11 +4685,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
46354685
RTLIB::FMAXIMUM_NUM_PPCF128, Results);
46364686
break;
46374687
case ISD::FSQRT:
4638-
case ISD::STRICT_FSQRT:
4639-
ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
4640-
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
4641-
RTLIB::SQRT_PPCF128, Results);
4688+
case ISD::STRICT_FSQRT: {
4689+
// FIXME: Probably should define fast to respect nan/inf and only be
4690+
// approximate functions.
4691+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4692+
{RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32},
4693+
{RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64},
4694+
{RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80},
4695+
{RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128},
4696+
{RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128},
4697+
Results);
46424698
break;
4699+
}
46434700
case ISD::FCBRT:
46444701
ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
46454702
RTLIB::CBRT_F80, RTLIB::CBRT_F128,
@@ -4876,11 +4933,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
48764933
RTLIB::LLRINT_PPCF128, Results);
48774934
break;
48784935
case ISD::FDIV:
4879-
case ISD::STRICT_FDIV:
4880-
ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
4881-
RTLIB::DIV_F80, RTLIB::DIV_F128,
4882-
RTLIB::DIV_PPCF128, Results);
4936+
case ISD::STRICT_FDIV: {
4937+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4938+
{RTLIB::FAST_DIV_F32, RTLIB::DIV_F32},
4939+
{RTLIB::FAST_DIV_F64, RTLIB::DIV_F64},
4940+
{RTLIB::FAST_DIV_F80, RTLIB::DIV_F80},
4941+
{RTLIB::FAST_DIV_F128, RTLIB::DIV_F128},
4942+
{RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results);
48834943
break;
4944+
}
48844945
case ISD::FREM:
48854946
case ISD::STRICT_FREM:
48864947
ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
@@ -4894,17 +4955,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
48944955
RTLIB::FMA_PPCF128, Results);
48954956
break;
48964957
case ISD::FADD:
4897-
case ISD::STRICT_FADD:
4898-
ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
4899-
RTLIB::ADD_F80, RTLIB::ADD_F128,
4900-
RTLIB::ADD_PPCF128, Results);
4958+
case ISD::STRICT_FADD: {
4959+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4960+
{RTLIB::FAST_ADD_F32, RTLIB::ADD_F32},
4961+
{RTLIB::FAST_ADD_F64, RTLIB::ADD_F64},
4962+
{RTLIB::FAST_ADD_F80, RTLIB::ADD_F80},
4963+
{RTLIB::FAST_ADD_F128, RTLIB::ADD_F128},
4964+
{RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results);
49014965
break;
4966+
}
49024967
case ISD::FMUL:
4903-
case ISD::STRICT_FMUL:
4904-
ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
4905-
RTLIB::MUL_F80, RTLIB::MUL_F128,
4906-
RTLIB::MUL_PPCF128, Results);
4968+
case ISD::STRICT_FMUL: {
4969+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4970+
{RTLIB::FAST_MUL_F32, RTLIB::MUL_F32},
4971+
{RTLIB::FAST_MUL_F64, RTLIB::MUL_F64},
4972+
{RTLIB::FAST_MUL_F80, RTLIB::MUL_F80},
4973+
{RTLIB::FAST_MUL_F128, RTLIB::MUL_F128},
4974+
{RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results);
49074975
break;
4976+
}
49084977
case ISD::FP16_TO_FP:
49094978
if (Node->getValueType(0) == MVT::f32) {
49104979
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
@@ -5077,11 +5146,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
50775146
break;
50785147
}
50795148
case ISD::FSUB:
5080-
case ISD::STRICT_FSUB:
5081-
ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
5082-
RTLIB::SUB_F80, RTLIB::SUB_F128,
5083-
RTLIB::SUB_PPCF128, Results);
5149+
case ISD::STRICT_FSUB: {
5150+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
5151+
{RTLIB::FAST_SUB_F32, RTLIB::SUB_F32},
5152+
{RTLIB::FAST_SUB_F64, RTLIB::SUB_F64},
5153+
{RTLIB::FAST_SUB_F80, RTLIB::SUB_F80},
5154+
{RTLIB::FAST_SUB_F128, RTLIB::SUB_F128},
5155+
{RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results);
50845156
break;
5157+
}
50855158
case ISD::SREM:
50865159
Results.push_back(ExpandIntLibCall(Node, true,
50875160
RTLIB::SREM_I8,

llvm/lib/IR/RuntimeLibcalls.cpp

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ using namespace RTLIB;
1818
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
1919
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
2020

21-
static cl::opt<bool>
22-
HexagonEnableFastMathRuntimeCalls("hexagon-fast-math", cl::Hidden,
23-
cl::desc("Enable Fast Math processing"));
24-
2521
static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
2622
FloatABI::ABIType FloatABIType,
2723
EABI EABIVersion) {
@@ -268,32 +264,25 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
268264
setLibcallImpl(RTLIB::UREM_I32, RTLIB::__hexagon_umodsi3);
269265
setLibcallImpl(RTLIB::UREM_I64, RTLIB::__hexagon_umoddi3);
270266

271-
const bool FastMath = HexagonEnableFastMathRuntimeCalls;
272-
// This is the only fast library function for sqrtd.
273-
if (FastMath)
274-
setLibcallImpl(RTLIB::SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);
275-
276267
// Prefix is: nothing for "slow-math",
277268
// "fast2_" for V5+ fast-math double-precision
278269
// (actually, keep fast-math and fast-math2 separate for now)
279-
if (FastMath) {
280-
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_fast_adddf3);
281-
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_fast_subdf3);
282-
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_fast_muldf3);
283-
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_fast_divdf3);
284-
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_fast_divsf3);
285-
} else {
286-
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
287-
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
288-
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
289-
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
290-
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
291-
}
292270

293-
if (FastMath)
294-
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
295-
else
296-
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
271+
setLibcallImpl(RTLIB::FAST_ADD_F64, RTLIB::__hexagon_fast_adddf3);
272+
setLibcallImpl(RTLIB::FAST_SUB_F64, RTLIB::__hexagon_fast_subdf3);
273+
setLibcallImpl(RTLIB::FAST_MUL_F64, RTLIB::__hexagon_fast_muldf3);
274+
setLibcallImpl(RTLIB::FAST_DIV_F64, RTLIB::__hexagon_fast_divdf3);
275+
setLibcallImpl(RTLIB::FAST_DIV_F32, RTLIB::__hexagon_fast_divsf3);
276+
setLibcallImpl(RTLIB::FAST_SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
277+
// This is the only fast library function for sqrtd.
278+
setLibcallImpl(RTLIB::FAST_SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);
279+
280+
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
281+
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
282+
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
283+
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
284+
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
285+
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
297286

298287
setLibcallImpl(
299288
RTLIB::HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES,

0 commit comments

Comments
 (0)