diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index cd676e1661d62..3ef1803018756 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -62,13 +62,24 @@ foreach IntTy = ["I32", "I64", "I128"] in { foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in { def ADD_#FPTy : RuntimeLibcall; + def FAST_ADD_#FPTy : RuntimeLibcall; + def SUB_#FPTy : RuntimeLibcall; + def FAST_SUB_#FPTy : RuntimeLibcall; + def MUL_#FPTy : RuntimeLibcall; + def FAST_MUL_#FPTy : RuntimeLibcall; + def DIV_#FPTy : RuntimeLibcall; + def FAST_DIV_#FPTy : RuntimeLibcall; + def REM_#FPTy : RuntimeLibcall; def FMA_#FPTy : RuntimeLibcall; def POWI_#FPTy : RuntimeLibcall; + def SQRT_#FPTy : RuntimeLibcall; + def FAST_SQRT_#FPTy : RuntimeLibcall; + def CBRT_#FPTy : RuntimeLibcall; def LOG_#FPTy : RuntimeLibcall; def LOG_FINITE_#FPTy : RuntimeLibcall; @@ -1411,27 +1422,26 @@ def __hexagon_moddi3 : RuntimeLibcallImpl; def __hexagon_umodsi3 : RuntimeLibcallImpl; def __hexagon_umoddi3 : RuntimeLibcallImpl; -// FIXME: "Fast" versions should be treated as a separate RTLIB::FAST_* function def __hexagon_adddf3 : RuntimeLibcallImpl; -def __hexagon_fast_adddf3 : RuntimeLibcallImpl; +def __hexagon_fast_adddf3 : RuntimeLibcallImpl; def __hexagon_subdf3 : RuntimeLibcallImpl; -def __hexagon_fast_subdf3 : RuntimeLibcallImpl; +def __hexagon_fast_subdf3 : RuntimeLibcallImpl; def __hexagon_muldf3 : RuntimeLibcallImpl; -def __hexagon_fast_muldf3 : RuntimeLibcallImpl; +def __hexagon_fast_muldf3 : RuntimeLibcallImpl; def __hexagon_divdf3 : RuntimeLibcallImpl; -def __hexagon_fast_divdf3 : RuntimeLibcallImpl; +def __hexagon_fast_divdf3 : RuntimeLibcallImpl; def __hexagon_divsf3 : RuntimeLibcallImpl; -def __hexagon_fast_divsf3 : RuntimeLibcallImpl; +def __hexagon_fast_divsf3 : RuntimeLibcallImpl; def __hexagon_sqrtf : RuntimeLibcallImpl; -def __hexagon_fast2_sqrtf : RuntimeLibcallImpl; +def __hexagon_fast2_sqrtf : RuntimeLibcallImpl; // This is the only fast library function for sqrtd. -def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl; +def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl; def __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes : RuntimeLibcallImpl; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a48dd0e5fedba..10f49c2b65768 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -140,12 +140,19 @@ class SelectionDAGLegalize { RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128, SmallVectorImpl &Results); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, - RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128); + + void + ExpandFastFPLibCall(SDNode *Node, bool IsFast, + std::pair Call_F32, + std::pair Call_F64, + std::pair Call_F80, + std::pair Call_F128, + std::pair Call_PPCF128, + SmallVectorImpl &Results); + + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); void ExpandArgFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -2229,6 +2236,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, ExpandFPLibCall(Node, LC, Results); } +void SelectionDAGLegalize::ExpandFastFPLibCall( + SDNode *Node, bool IsFast, + std::pair Call_F32, + std::pair Call_F64, + std::pair Call_F80, + std::pair Call_F128, + std::pair Call_PPCF128, + SmallVectorImpl &Results) { + + EVT VT = Node->getSimpleValueType(0); + + RTLIB::Libcall LC; + + // FIXME: Probably should define fast to respect nan/inf and only be + // approximate functions. + + if (IsFast) { + LC = RTLIB::getFPLibCall(VT, Call_F32.first, Call_F64.first, Call_F80.first, + Call_F128.first, Call_PPCF128.first); + } + + if (!IsFast || TLI.getLibcallImpl(LC) == RTLIB::Unsupported) { + // Fall back if we don't have a fast implementation. + LC = RTLIB::getFPLibCall(VT, Call_F32.second, Call_F64.second, + Call_F80.second, Call_F128.second, + Call_PPCF128.second); + } + + ExpandFPLibCall(Node, LC, Results); +} + SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, @@ -4515,6 +4553,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { return true; } +/// Return if we can use the FAST_* variant of a math libcall for the node. +/// FIXME: This is just guessing, we probably should have unique specific sets +/// flags required per libcall. +static bool canUseFastMathLibcall(const SDNode *Node) { + // FIXME: Probably should define fast to respect nan/inf and only be + // approximate functions. + + SDNodeFlags Flags = Node->getFlags(); + return Flags.hasApproximateFuncs() && Flags.hasNoNaNs() && + Flags.hasNoInfs() && Flags.hasNoSignedZeros(); +} + void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector Results; @@ -4635,11 +4685,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::FMAXIMUM_NUM_PPCF128, Results); break; case ISD::FSQRT: - case ISD::STRICT_FSQRT: - ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128, Results); + case ISD::STRICT_FSQRT: { + // FIXME: Probably should define fast to respect nan/inf and only be + // approximate functions. + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32}, + {RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64}, + {RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80}, + {RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128}, + {RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128}, + Results); break; + } case ISD::FCBRT: ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, RTLIB::CBRT_F80, RTLIB::CBRT_F128, @@ -4876,11 +4933,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::LLRINT_PPCF128, Results); break; case ISD::FDIV: - case ISD::STRICT_FDIV: - ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_F128, - RTLIB::DIV_PPCF128, Results); + case ISD::STRICT_FDIV: { + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_DIV_F32, RTLIB::DIV_F32}, + {RTLIB::FAST_DIV_F64, RTLIB::DIV_F64}, + {RTLIB::FAST_DIV_F80, RTLIB::DIV_F80}, + {RTLIB::FAST_DIV_F128, RTLIB::DIV_F128}, + {RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results); break; + } case ISD::FREM: case ISD::STRICT_FREM: ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, @@ -4894,17 +4955,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::FMA_PPCF128, Results); break; case ISD::FADD: - case ISD::STRICT_FADD: - ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128, Results); + case ISD::STRICT_FADD: { + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_ADD_F32, RTLIB::ADD_F32}, + {RTLIB::FAST_ADD_F64, RTLIB::ADD_F64}, + {RTLIB::FAST_ADD_F80, RTLIB::ADD_F80}, + {RTLIB::FAST_ADD_F128, RTLIB::ADD_F128}, + {RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results); break; + } case ISD::FMUL: - case ISD::STRICT_FMUL: - ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, - RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128, Results); + case ISD::STRICT_FMUL: { + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_MUL_F32, RTLIB::MUL_F32}, + {RTLIB::FAST_MUL_F64, RTLIB::MUL_F64}, + {RTLIB::FAST_MUL_F80, RTLIB::MUL_F80}, + {RTLIB::FAST_MUL_F128, RTLIB::MUL_F128}, + {RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results); break; + } case ISD::FP16_TO_FP: if (Node->getValueType(0) == MVT::f32) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first); @@ -5077,11 +5146,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; } case ISD::FSUB: - case ISD::STRICT_FSUB: - ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, - RTLIB::SUB_F80, RTLIB::SUB_F128, - RTLIB::SUB_PPCF128, Results); + case ISD::STRICT_FSUB: { + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_SUB_F32, RTLIB::SUB_F32}, + {RTLIB::FAST_SUB_F64, RTLIB::SUB_F64}, + {RTLIB::FAST_SUB_F80, RTLIB::SUB_F80}, + {RTLIB::FAST_SUB_F128, RTLIB::SUB_F128}, + {RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results); break; + } case ISD::SREM: Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SREM_I8, diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 64c9415c54d4d..c4fd40f313077 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -18,10 +18,6 @@ using namespace RTLIB; #undef GET_INIT_RUNTIME_LIBCALL_NAMES #undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS -static cl::opt - HexagonEnableFastMathRuntimeCalls("hexagon-fast-math", cl::Hidden, - cl::desc("Enable Fast Math processing")); - static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT, FloatABI::ABIType FloatABIType, EABI EABIVersion) { @@ -268,32 +264,25 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, setLibcallImpl(RTLIB::UREM_I32, RTLIB::__hexagon_umodsi3); setLibcallImpl(RTLIB::UREM_I64, RTLIB::__hexagon_umoddi3); - const bool FastMath = HexagonEnableFastMathRuntimeCalls; - // This is the only fast library function for sqrtd. - if (FastMath) - setLibcallImpl(RTLIB::SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2); - // Prefix is: nothing for "slow-math", // "fast2_" for V5+ fast-math double-precision // (actually, keep fast-math and fast-math2 separate for now) - if (FastMath) { - setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_fast_adddf3); - setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_fast_subdf3); - setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_fast_muldf3); - setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_fast_divdf3); - setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_fast_divsf3); - } else { - setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3); - setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3); - setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3); - setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3); - setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3); - } - if (FastMath) - setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_fast2_sqrtf); - else - setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf); + setLibcallImpl(RTLIB::FAST_ADD_F64, RTLIB::__hexagon_fast_adddf3); + setLibcallImpl(RTLIB::FAST_SUB_F64, RTLIB::__hexagon_fast_subdf3); + setLibcallImpl(RTLIB::FAST_MUL_F64, RTLIB::__hexagon_fast_muldf3); + setLibcallImpl(RTLIB::FAST_DIV_F64, RTLIB::__hexagon_fast_divdf3); + setLibcallImpl(RTLIB::FAST_DIV_F32, RTLIB::__hexagon_fast_divsf3); + setLibcallImpl(RTLIB::FAST_SQRT_F32, RTLIB::__hexagon_fast2_sqrtf); + // This is the only fast library function for sqrtd. + setLibcallImpl(RTLIB::FAST_SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2); + + setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3); + setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3); + setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3); + setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3); + setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3); + setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf); setLibcallImpl( RTLIB::HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES, diff --git a/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll b/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll new file mode 100644 index 0000000000000..6bc60132d3e6a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll @@ -0,0 +1,369 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=hexagon -mcpu=hexagonv5 < %s | FileCheck %s + +;--------------------------------------------------------------------- +; fast sqrt +;--------------------------------------------------------------------- + +define float @fast_sqrt_f32(float %x) { +; CHECK-LABEL: fast_sqrt_f32: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_fast2_sqrtf +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = call nnan ninf nsz afn float @llvm.sqrt.f32(float %x) + ret float %result +} + +define double @fast_sqrt_f64(double %x) { +; CHECK-LABEL: fast_sqrt_f64: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_fast2_sqrtdf2 +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = call nnan ninf nsz afn double @llvm.sqrt.f64(double %x) + ret double %result +} + +;--------------------------------------------------------------------- +; fast fadd +;--------------------------------------------------------------------- + +define float @fast_add_f32(float %x, float %y) { +; CHECK-LABEL: fast_add_f32: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sfadd(r0,r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fadd nnan ninf nsz afn float %x, %y + ret float %result +} + +define double @fast_add_f64(double %x, double %y) { +; CHECK-LABEL: fast_add_f64: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_fast_adddf3 +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = fadd nnan ninf nsz afn double %x, %y + ret double %result +} + +;--------------------------------------------------------------------- +; fast fsub +;--------------------------------------------------------------------- + +define float @fast_sub_f32(float %x, float %y) { +; CHECK-LABEL: fast_sub_f32: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sfsub(r0,r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fsub nnan ninf nsz afn float %x, %y + ret float %result +} + +define double @fast_sub_f64(double %x, double %y) { +; CHECK-LABEL: fast_sub_f64: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_fast_subdf3 +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = fsub nnan ninf nsz afn double %x, %y + ret double %result +} + +;--------------------------------------------------------------------- +; fast fmul +;--------------------------------------------------------------------- + +define float @fast_mul_f32(float %x, float %y) { +; CHECK-LABEL: fast_mul_f32: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sfmpy(r0,r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fmul nnan ninf nsz afn float %x, %y + ret float %result +} + +define double @fast_mul_f64(double %x, double %y) { +; CHECK-LABEL: fast_mul_f64: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_fast_muldf3 +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = fmul nnan ninf nsz afn double %x, %y + ret double %result +} + +;--------------------------------------------------------------------- +; fast fdiv +;--------------------------------------------------------------------- + +define float @fast_div_f32(float %x, float %y) { +; CHECK-LABEL: fast_div_f32: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r2 = sffixupn(r0,r1) +; CHECK-NEXT: r4,p0 = sfrecipa(r0,r1) +; CHECK-NEXT: r5 = ##1065353216 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = sffixupd(r0,r1) +; CHECK-NEXT: r6 = ##1065353216 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 -= sfmpy(r1,r4):lib +; CHECK-NEXT: r0 = and(r2,##-2147483648) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 += sfmpy(r5,r4):lib +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 += sfmpy(r2,r4):lib +; CHECK-NEXT: r6 -= sfmpy(r1,r4):lib +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 -= sfmpy(r1,r0):lib +; CHECK-NEXT: r4 += sfmpy(r6,r4):lib +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 += sfmpy(r3,r4):lib +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 -= sfmpy(r0,r1):lib +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 += sfmpy(r2,r4,p0):scale +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fdiv nnan ninf nsz afn float %x, %y + ret float %result +} + +define double @fast_div_f64(double %x, double %y) { +; CHECK-LABEL: fast_div_f64: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_fast_divdf3 +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = fdiv nnan ninf nsz afn double %x, %y + ret double %result +} + +;--------------------------------------------------------------------- +; Negative tests sqrt +;--------------------------------------------------------------------- + +; TODO: What flags do we really need here? +define float @sqrt_f32__afn(float %x) { +; CHECK-LABEL: sqrt_f32__afn: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_sqrtf +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = call afn float @llvm.sqrt.f32(float %x) + ret float %result +} + +define float @sqrt_f32__afn_ninf(float %x) { +; CHECK-LABEL: sqrt_f32__afn_ninf: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_sqrtf +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = call afn ninf float @llvm.sqrt.f32(float %x) + ret float %result +} + +define float @sqrt_f32__afn_nnan(float %x) { +; CHECK-LABEL: sqrt_f32__afn_nnan: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_sqrtf +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = call afn nnan float @llvm.sqrt.f32(float %x) + ret float %result +} + +define float @sqrt_f32__nnan(float %x) { +; CHECK-LABEL: sqrt_f32__nnan: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_sqrtf +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = call nnan float @llvm.sqrt.f32(float %x) + ret float %result +} + +define float @sqrt_f32_nnan_ninf_afn(float %x) { +; CHECK-LABEL: sqrt_f32_nnan_ninf_afn: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call __hexagon_sqrtf +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %result = call nnan ninf afn float @llvm.sqrt.f32(float %x) + ret float %result +} + +;--------------------------------------------------------------------- +; Negative tests fadd +;--------------------------------------------------------------------- + +; TODO: What flags do we really need here? +define float @fadd_f32_afn(float %x, float %y) { +; CHECK-LABEL: fadd_f32_afn: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sfadd(r0,r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fadd afn float %x, %y + ret float %result +} + +define float @fadd_f32__afn_ninf(float %x, float %y) { +; CHECK-LABEL: fadd_f32__afn_ninf: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sfadd(r0,r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fadd afn ninf float %x, %y + ret float %result +} + +define float @fadd_f32__afn_nnan(float %x, float %y) { +; CHECK-LABEL: fadd_f32__afn_nnan: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sfadd(r0,r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fadd afn nnan float %x, %y + ret float %result +} + +define float @fadd_f32__nnan(float %x, float %y) { +; CHECK-LABEL: fadd_f32__nnan: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sfadd(r0,r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fadd nnan float %x, %y + ret float %result +} + +define float @fadd_f32__nnan_ninf_afn(float %x, float %y) { +; CHECK-LABEL: fadd_f32__nnan_ninf_afn: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sfadd(r0,r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = fadd nnan ninf afn float %x, %y + ret float %result +}