diff --git a/base/div.jl b/base/div.jl index 9c2187e662ee9..69d3e146deddb 100644 --- a/base/div.jl +++ b/base/div.jl @@ -364,7 +364,66 @@ function div(x::T, y::T, ::typeof(RoundUp)) where T<:Integer return d + (((x > 0) == (y > 0)) & (d * y != x)) end -# Real -# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division, -# so it is used here as the basis of float div(). -div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y)) +function mw_div(x::NTuple{2,T}, y::T) where {T<:AbstractFloat} + (x_hi, x_lo) = x + + # "DWDivFP3" AKA "Algorithm 15" from a paper by Joldes, Muller and + # Popescu: https://doi.org/10.1145/3121432 + hi = x_hi / y + π = Base.Math.two_mul(hi, y) + δ_hi = x_hi - first(π) # exact operation + δ_t = δ_hi - last(π) # exact operation + δ = δ_t + x_lo + lo = δ / y + canonicalize2(hi, lo) +end + +div_impl4(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + T(div_impl(widen(x), widen(y), r)) + +div_impl3(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + round(x / y, r) + +div_impl2(x::NTuple{2,T}, y::T) where {T<:AbstractFloat} = + round(first(mw_div(x, y)), RoundNearest) + +# Approximately rounded x (with respect to y and r) +frac_round(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + add12(x, -rem(x, y, r)) + +div_impl1(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + div_impl2(frac_round(x, y, r), y) + +div_impl0(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + ifelse( + isfinite(abs(x) + abs(y)), + div_impl1(x, y, r), + div_impl3(x, y, r), # try to avoid overflow + ) + +# Other rounding modes are assumed not to have `rem`. +const RoundingModesWithRem = Union{ + RoundingMode{:Nearest}, + RoundingMode{:Up}, + RoundingMode{:Down}, + RoundingMode{:FromZero}, + RoundingMode{:ToZero}, +} + +div_impl(x::T, y::T, r::RoundingModesWithRem) where {T<:AbstractFloat} = + div_impl0(x, y, r) +div_impl(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + div_impl3(x, y, r) + +# The only way to achieve close-to-perfect results with `Float16` seems +# to be to widen to `Float32`. +div_impl(x::Float16, y::Float16, r::RoundingModesWithRem) = + div_impl4(x, y, r) +div_impl(x::Float16, y::Float16, r::RoundingMode) = + div_impl4(x, y, r) + +function div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} + isnan(x) && (return x) + isnan(y) && (return y) + div_impl(x, y, r) +end diff --git a/test/numbers.jl b/test/numbers.jl index efb2702aff1c2..5832d24c69d7d 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -1702,6 +1702,110 @@ end @test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp) == ceil(big(-1.1)/big(0.1)) == -11.0 @test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0 end + + @testset "correctness despite possible numerical issues: $Small, $rounding_mode_with_rem" for + Small ∈ (Float16, Float32, Float64), + rounding_mode_with_rem ∈ (RoundNearest, RoundUp, RoundDown, RoundFromZero, RoundToZero), + xy = ( + (4.445, 0.00675), + (2.941, -0.00301), + (-0.1852, 0.0001928), + (4.56, 0.006817), + (-0.178, -0.0001851), + (-6.51e4, -4.06e3), + (0.008255, -1.01e-5), + (-276.0, -0.1678), + (-39.7, -0.02045), + (0.7246, 0.0003948), + (-6.55e4, -29.81), + (-6.55e4, -27.81), + (-6.55e4, -24.25), + (6.55e4, 29.2), + (-6.55e4, -30.14), + (76.8, -0.11804), + (5.252e3, -2.607), + (4.365e4, -22.03), + (229.0, 0.1162), + (1.009, -0.0006504), + (-6.55e4, 29.81), + (6.55e4, -27.08), + (6.55e4, -31.62), + (6.55e4, -21.34), + (6.55e4, -31.27), + (6.307e4, 2.4e4), + (2.804e3, 3.502), + (-5.786e4, 5.005e4), + (-4.97e4, -2.387e4), + (-2.217, -0.001557), + (6.55e4, 26.6), + (6.55e4, 17.06), + (-6.55e4, -22.73), + (-6.55e4, 29.25), + (6.55e4, 29.28), + (1.169, -0.001584), + (20.94, 0.01427), + (0.1521, 0.0001633), + (2.406e4, -11.78), + (0.0766, 4.73e-5), + (-1.3231137e30, 2.4482906e23), + (-4.8544084e-25, 7.407144e-32), + (3.103397e26, 4.4375036e19), + (-4.7189755e-22, -6.6208687e-29), + (1.4102157e10, 1819.6719), + (1.1742472e13, -1.4370354e6), + (1.483496e-18, 9.9707963e-26), + (-5.016058e24, -4.956325e17), + (4.040856e22, 2.7740254e15), + (3.277167e14, -2.6412792e7), + (-1.849337e32, 1.7634905e25), + (-1.625298e-17, 1.0709875e-24), + (1.1791839e-31, 1.0768432e-38), + (-2.3660772e-30, -1.6153781e-37), + (-1.0070932e29, 7.4530127e21), + (-0.00061855844, -4.1053827e-11), + (2.6379836e12, -173271.67), + (9.8551985e-27, -1.48112095e-33), + (1.858432e26, 1.3008654e19), + (4.271524e19, -3.1128782e12), + (9.1970936e29, 1.2572408e23), + (-36199.926, -0.0023891365), + (2.2035416e20, -1.3350574e13), + (-6.933978e-26, -1.06443724e-32), + (-8.4979565e19, -7.333614e12), + (1.0172350506694424e17, 23.315505841986997), + (-1.47773180470849e-22, -4.0998853221425914e-38), + (-4.816178860121429e-125, 1.5705370217600465e-140), + (1.438033388799865e129, 3.8802573321302744e113), + (5.630449277879664e-200, -2.076135363156452e-215), + (-3.29867858971227e241, -8.079378598133822e225), + (-9.287161662516348e12, -0.00261425024965316), + (-4.017546451947597e-21, -6.334812648091802e-37), + (1.3802702942552817e-103, -1.5665085059273602e-119), + (2.2912868470550055e-76, -2.7514392143441017e-92), + (1.7074328973956706e151, -2.0403029505651566e135), + (-9.290503413404926e-267, -1.0856248919388625e-282), + (1.635940091056537e-143, -3.771680017287185e-159), + (1.1031633123212033e-289, 1.4605623613009784e-305), + (-5.984102559690793e-25, 1.3308245167101786e-40), + (-2.89964579258566e-97, -4.34310409713307e-113), + (-4.895423533487687e-224, -5.77559167362022e-240), + (3.2050860779440005e-66, 4.23835027226126e-82), + (-2.046347683435319e-194, 5.5954131552084545e-210), + (-9.223282007712624e-73, 1.0588268730479068e-88), + (-6.996263999884589e-36, 1.848582153492107e-51), + (-2.42983435949937e178, -6.557998479200877e162), + (3.402679467861051e45, -5.068403522082035e29), + (-1.9402396639482086e35, 2.7453450266399908e19), + (7.114570134851155e264, 1.6742521824981587e249), + ) + ab = map(Small, xy) + AB = map(BigFloat, ab) + tested = div(ab..., rounding_mode_with_rem) + correct = div(AB..., rounding_mode_with_rem) + correct_small = Small(correct) + # Only test when the correct result is representable + (correct == correct_small) && @test tested == correct_small + end end @testset "return types" begin for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)