Skip to content

Commit 24ff719

Browse files
committed
DAG: Use fast variants of fast math libcalls
Hexagon currently has an untested global flag to control fast math variants of libcalls. Add fast variants as explicit libcall options so this can be a flag based lowering decision, and implement it. I have no idea what fast math flags the hexagon case requires, so I picked the maximally potentially relevant set of flags although this probably is refinable per call. Looking in compiler-rt, I'm not sure if the fast variants are anything more than aliases.
1 parent 711189f commit 24ff719

File tree

4 files changed

+501
-60
lines changed

4 files changed

+501
-60
lines changed

llvm/include/llvm/IR/RuntimeLibcalls.td

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,24 @@ foreach IntTy = ["I32", "I64", "I128"] in {
6262

6363
foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
6464
def ADD_#FPTy : RuntimeLibcall;
65+
def FAST_ADD_#FPTy : RuntimeLibcall;
66+
6567
def SUB_#FPTy : RuntimeLibcall;
68+
def FAST_SUB_#FPTy : RuntimeLibcall;
69+
6670
def MUL_#FPTy : RuntimeLibcall;
71+
def FAST_MUL_#FPTy : RuntimeLibcall;
72+
6773
def DIV_#FPTy : RuntimeLibcall;
74+
def FAST_DIV_#FPTy : RuntimeLibcall;
75+
6876
def REM_#FPTy : RuntimeLibcall;
6977
def FMA_#FPTy : RuntimeLibcall;
7078
def POWI_#FPTy : RuntimeLibcall;
79+
7180
def SQRT_#FPTy : RuntimeLibcall;
81+
def FAST_SQRT_#FPTy : RuntimeLibcall;
82+
7283
def CBRT_#FPTy : RuntimeLibcall;
7384
def LOG_#FPTy : RuntimeLibcall;
7485
def LOG_FINITE_#FPTy : RuntimeLibcall;
@@ -1412,27 +1423,26 @@ def __hexagon_moddi3 : RuntimeLibcallImpl<SREM_I64>;
14121423
def __hexagon_umodsi3 : RuntimeLibcallImpl<UREM_I32>;
14131424
def __hexagon_umoddi3 : RuntimeLibcallImpl<UREM_I64>;
14141425

1415-
// FIXME: "Fast" versions should be treated as a separate RTLIB::FAST_* function
14161426
def __hexagon_adddf3 : RuntimeLibcallImpl<ADD_F64>;
1417-
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<ADD_F64>;
1427+
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<FAST_ADD_F64>;
14181428

14191429
def __hexagon_subdf3 : RuntimeLibcallImpl<SUB_F64>;
1420-
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<SUB_F64>;
1430+
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<FAST_SUB_F64>;
14211431

14221432
def __hexagon_muldf3 : RuntimeLibcallImpl<MUL_F64>;
1423-
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<MUL_F64>;
1433+
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<FAST_MUL_F64>;
14241434

14251435
def __hexagon_divdf3 : RuntimeLibcallImpl<DIV_F64>;
1426-
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<DIV_F64>;
1436+
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<FAST_DIV_F64>;
14271437

14281438
def __hexagon_divsf3 : RuntimeLibcallImpl<DIV_F32>;
1429-
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<DIV_F32>;
1439+
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<FAST_DIV_F32>;
14301440

14311441
def __hexagon_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
1432-
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
1442+
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<FAST_SQRT_F32>;
14331443

14341444
// This is the only fast library function for sqrtd.
1435-
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<SQRT_F64>;
1445+
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<FAST_SQRT_F64>;
14361446

14371447
def __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
14381448
: RuntimeLibcallImpl<HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES>;

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 99 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,19 @@ class SelectionDAGLegalize {
140140
RTLIB::Libcall Call_F128,
141141
RTLIB::Libcall Call_PPCF128,
142142
SmallVectorImpl<SDValue> &Results);
143-
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
144-
RTLIB::Libcall Call_I8,
145-
RTLIB::Libcall Call_I16,
146-
RTLIB::Libcall Call_I32,
147-
RTLIB::Libcall Call_I64,
148-
RTLIB::Libcall Call_I128);
143+
144+
void
145+
ExpandFastFPLibCall(SDNode *Node, bool IsFast,
146+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
147+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
148+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
149+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
150+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
151+
SmallVectorImpl<SDValue> &Results);
152+
153+
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
154+
RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
155+
RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
149156
void ExpandArgFPLibCall(SDNode *Node,
150157
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
151158
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2229,6 +2236,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
22292236
ExpandFPLibCall(Node, LC, Results);
22302237
}
22312238

2239+
void SelectionDAGLegalize::ExpandFastFPLibCall(
2240+
SDNode *Node, bool IsFast,
2241+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
2242+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
2243+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
2244+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
2245+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
2246+
SmallVectorImpl<SDValue> &Results) {
2247+
2248+
EVT VT = Node->getSimpleValueType(0);
2249+
2250+
RTLIB::Libcall LC;
2251+
2252+
// FIXME: Probably should define fast to respect nan/inf and only be
2253+
// approximate functions.
2254+
2255+
if (IsFast) {
2256+
LC = RTLIB::getFPLibCall(VT, Call_F32.first, Call_F64.first, Call_F80.first,
2257+
Call_F128.first, Call_PPCF128.first);
2258+
}
2259+
2260+
if (!IsFast || TLI.getLibcallImpl(LC) == RTLIB::Unsupported) {
2261+
// Fall back if we don't have a fast implementation.
2262+
LC = RTLIB::getFPLibCall(VT, Call_F32.second, Call_F64.second,
2263+
Call_F80.second, Call_F128.second,
2264+
Call_PPCF128.second);
2265+
}
2266+
2267+
ExpandFPLibCall(Node, LC, Results);
2268+
}
2269+
22322270
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
22332271
RTLIB::Libcall Call_I8,
22342272
RTLIB::Libcall Call_I16,
@@ -4489,6 +4527,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
44894527
return true;
44904528
}
44914529

4530+
/// Return if we can use the FAST_* variant of a math libcall for the node.
4531+
/// FIXME: This is just guessing, we probably should have unique specific sets
4532+
/// flags required per libcall.
4533+
static bool canUseFastMathLibcall(const SDNode *Node) {
4534+
// FIXME: Probably should define fast to respect nan/inf and only be
4535+
// approximate functions.
4536+
4537+
SDNodeFlags Flags = Node->getFlags();
4538+
return Flags.hasApproximateFuncs() && Flags.hasNoNaNs() &&
4539+
Flags.hasNoInfs() && Flags.hasNoSignedZeros();
4540+
}
4541+
44924542
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
44934543
LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
44944544
SmallVector<SDValue, 8> Results;
@@ -4609,11 +4659,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
46094659
RTLIB::FMAXIMUM_NUM_PPCF128, Results);
46104660
break;
46114661
case ISD::FSQRT:
4612-
case ISD::STRICT_FSQRT:
4613-
ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
4614-
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
4615-
RTLIB::SQRT_PPCF128, Results);
4662+
case ISD::STRICT_FSQRT: {
4663+
// FIXME: Probably should define fast to respect nan/inf and only be
4664+
// approximate functions.
4665+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4666+
{RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32},
4667+
{RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64},
4668+
{RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80},
4669+
{RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128},
4670+
{RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128},
4671+
Results);
46164672
break;
4673+
}
46174674
case ISD::FCBRT:
46184675
ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
46194676
RTLIB::CBRT_F80, RTLIB::CBRT_F128,
@@ -4850,11 +4907,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
48504907
RTLIB::LLRINT_PPCF128, Results);
48514908
break;
48524909
case ISD::FDIV:
4853-
case ISD::STRICT_FDIV:
4854-
ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
4855-
RTLIB::DIV_F80, RTLIB::DIV_F128,
4856-
RTLIB::DIV_PPCF128, Results);
4910+
case ISD::STRICT_FDIV: {
4911+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4912+
{RTLIB::FAST_DIV_F32, RTLIB::DIV_F32},
4913+
{RTLIB::FAST_DIV_F64, RTLIB::DIV_F64},
4914+
{RTLIB::FAST_DIV_F80, RTLIB::DIV_F80},
4915+
{RTLIB::FAST_DIV_F128, RTLIB::DIV_F128},
4916+
{RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results);
48574917
break;
4918+
}
48584919
case ISD::FREM:
48594920
case ISD::STRICT_FREM:
48604921
ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
@@ -4868,17 +4929,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
48684929
RTLIB::FMA_PPCF128, Results);
48694930
break;
48704931
case ISD::FADD:
4871-
case ISD::STRICT_FADD:
4872-
ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
4873-
RTLIB::ADD_F80, RTLIB::ADD_F128,
4874-
RTLIB::ADD_PPCF128, Results);
4932+
case ISD::STRICT_FADD: {
4933+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4934+
{RTLIB::FAST_ADD_F32, RTLIB::ADD_F32},
4935+
{RTLIB::FAST_ADD_F64, RTLIB::ADD_F64},
4936+
{RTLIB::FAST_ADD_F80, RTLIB::ADD_F80},
4937+
{RTLIB::FAST_ADD_F128, RTLIB::ADD_F128},
4938+
{RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results);
48754939
break;
4940+
}
48764941
case ISD::FMUL:
4877-
case ISD::STRICT_FMUL:
4878-
ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
4879-
RTLIB::MUL_F80, RTLIB::MUL_F128,
4880-
RTLIB::MUL_PPCF128, Results);
4942+
case ISD::STRICT_FMUL: {
4943+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4944+
{RTLIB::FAST_MUL_F32, RTLIB::MUL_F32},
4945+
{RTLIB::FAST_MUL_F64, RTLIB::MUL_F64},
4946+
{RTLIB::FAST_MUL_F80, RTLIB::MUL_F80},
4947+
{RTLIB::FAST_MUL_F128, RTLIB::MUL_F128},
4948+
{RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results);
48814949
break;
4950+
}
48824951
case ISD::FP16_TO_FP:
48834952
if (Node->getValueType(0) == MVT::f32) {
48844953
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
@@ -5051,11 +5120,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
50515120
break;
50525121
}
50535122
case ISD::FSUB:
5054-
case ISD::STRICT_FSUB:
5055-
ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
5056-
RTLIB::SUB_F80, RTLIB::SUB_F128,
5057-
RTLIB::SUB_PPCF128, Results);
5123+
case ISD::STRICT_FSUB: {
5124+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
5125+
{RTLIB::FAST_SUB_F32, RTLIB::SUB_F32},
5126+
{RTLIB::FAST_SUB_F64, RTLIB::SUB_F64},
5127+
{RTLIB::FAST_SUB_F80, RTLIB::SUB_F80},
5128+
{RTLIB::FAST_SUB_F128, RTLIB::SUB_F128},
5129+
{RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results);
50585130
break;
5131+
}
50595132
case ISD::SREM:
50605133
Results.push_back(ExpandIntLibCall(Node, true,
50615134
RTLIB::SREM_I8,

llvm/lib/IR/RuntimeLibcalls.cpp

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ using namespace RTLIB;
1818
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
1919
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
2020

21-
static cl::opt<bool>
22-
HexagonEnableFastMathRuntimeCalls("hexagon-fast-math", cl::Hidden,
23-
cl::desc("Enable Fast Math processing"));
24-
2521
static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
2622
FloatABI::ABIType FloatABIType,
2723
EABI EABIVersion) {
@@ -268,32 +264,25 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
268264
setLibcallImpl(RTLIB::UREM_I32, RTLIB::__hexagon_umodsi3);
269265
setLibcallImpl(RTLIB::UREM_I64, RTLIB::__hexagon_umoddi3);
270266

271-
const bool FastMath = HexagonEnableFastMathRuntimeCalls;
272-
// This is the only fast library function for sqrtd.
273-
if (FastMath)
274-
setLibcallImpl(RTLIB::SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);
275-
276267
// Prefix is: nothing for "slow-math",
277268
// "fast2_" for V5+ fast-math double-precision
278269
// (actually, keep fast-math and fast-math2 separate for now)
279-
if (FastMath) {
280-
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_fast_adddf3);
281-
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_fast_subdf3);
282-
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_fast_muldf3);
283-
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_fast_divdf3);
284-
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_fast_divsf3);
285-
} else {
286-
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
287-
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
288-
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
289-
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
290-
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
291-
}
292270

293-
if (FastMath)
294-
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
295-
else
296-
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
271+
setLibcallImpl(RTLIB::FAST_ADD_F64, RTLIB::__hexagon_fast_adddf3);
272+
setLibcallImpl(RTLIB::FAST_SUB_F64, RTLIB::__hexagon_fast_subdf3);
273+
setLibcallImpl(RTLIB::FAST_MUL_F64, RTLIB::__hexagon_fast_muldf3);
274+
setLibcallImpl(RTLIB::FAST_DIV_F64, RTLIB::__hexagon_fast_divdf3);
275+
setLibcallImpl(RTLIB::FAST_DIV_F32, RTLIB::__hexagon_fast_divsf3);
276+
setLibcallImpl(RTLIB::FAST_SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
277+
// This is the only fast library function for sqrtd.
278+
setLibcallImpl(RTLIB::FAST_SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);
279+
280+
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
281+
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
282+
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
283+
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
284+
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
285+
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
297286

298287
setLibcallImpl(
299288
RTLIB::HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES,

0 commit comments

Comments
 (0)