From cd4d79622351d2c287e79f4b90b176a5ea9ecd2d Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Fri, 28 Apr 2023 20:37:23 +0200 Subject: [PATCH] Base: fix floating-point div MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Double-word arithmetics are used, except for rounding modes without `rem`, which now get simple fallback implementations (on master their `div` methods fail when called). The idea to fix `Float16` by widening to `Float32` is taken from Simon Byrne's #49637 A script used for assessing the correctness of `div`: ```julia using Random accurate_div(x, y, r::RoundingMode) = div(BigFloat(x), BigFloat(y), r) function count_wrong_floats( div_fun::Fun, r::RoundingMode, ::Type{F}, ::Type{U}, n::Int, m::Int, ) where {Fun <: Function, F, U} count_wrong_huge_quo = 0 count_wrong_friendly_quo = 0 count_total_huge_quo = 0 count_total_friendly_quo = 0 vec_x = zeros(U, m) vec_y = zeros(U, m) for i ∈ 1:n Random.rand!(vec_x) Random.rand!(vec_y) for (x_raw, y_raw) ∈ zip(vec_x, vec_y) x = reinterpret(F, x_raw) y = reinterpret(F, y_raw) (!isfinite(x) | !isfinite(y)) && continue quo_is_huge = (maxintfloat(F) < abs(x / y)) acc_big = accurate_div(x, y, r) acc = F(acc_big) # Skip cases when the result isn't representable, the # correct result is not specified for this case, and # it's not clear what a user would expect either. (acc == acc_big) || continue if quo_is_huge count_total_huge_quo += true else count_total_friendly_quo += true end d = div_fun(x, y, r) is_ok = (d == acc) | (isnan(d) & isnan(acc)) if !is_ok if quo_is_huge count_wrong_huge_quo += true else count_wrong_friendly_quo += true end end end end ( bad_ratio_huge_quotient = count_wrong_huge_quo / count_total_huge_quo, bad_ratio_friendly_quotient = count_wrong_friendly_quo / count_total_friendly_quo, total_huge_quotient_count = count_total_huge_quo, total_friendly_quotient_count = count_total_friendly_quo, ) end const float_types = (Float16, Float32, Float64) const bits_types = (UInt16, UInt32, UInt64) # not supported on master const rounding_modes_other = ( RoundNearestTiesAway, RoundNearestTiesUp ) const rounding_modes = ( RoundNearest, RoundUp, RoundDown, RoundFromZero, RoundToZero ) function experiment(itcnt::Int, itcnt_inner::Int) for (F, U) ∈ zip(float_types, bits_types) println("$F $U") for rm ∈ rounding_modes println(" $rm") flush(stdout) res = count_wrong_floats( div, rm, F, U, itcnt, itcnt_inner ) ch = res.total_huge_quotient_count cf = res.total_friendly_quotient_count rh = res.bad_ratio_huge_quotient rf = res.bad_ratio_friendly_quotient println(" quotients of huge magnitude:") println(" total count: $ch") println(" ratio of incorrect results: $rh") println(" quotients of more normal magnitude:") println(" total count: $cf") println(" ratio of incorrect results: $rf") flush(stdout) end println() flush(stdout) end nothing end experiment(20, 2^20) ``` Results on master: ``` Float16 UInt16 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 371923 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15566588 ratio of incorrect results: 0.0017671823780522746 RoundingMode{:Up}() quotients of huge magnitude: total count: 375420 ratio of incorrect results: 5.327366682648766e-6 quotients of more normal magnitude: total count: 15567018 ratio of incorrect results: 0.004671543387436181 RoundingMode{:Down}() quotients of huge magnitude: total count: 375158 ratio of incorrect results: 2.665543584303147e-6 quotients of more normal magnitude: total count: 15565611 ratio of incorrect results: 0.004713146178457113 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 376802 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564500 ratio of incorrect results: 0.005449195284140191 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 374335 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15563355 ratio of incorrect results: 0.003891191841347833 Float32 UInt32 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 73547 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12270911 ratio of incorrect results: 0.0003218994905920188 RoundingMode{:Up}() quotients of huge magnitude: total count: 73215 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12272614 ratio of incorrect results: 0.0009024972186039583 RoundingMode{:Down}() quotients of huge magnitude: total count: 73355 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269080 ratio of incorrect results: 0.0009091961255448657 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 73961 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12273238 ratio of incorrect results: 0.0009261614579624383 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 73327 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12268024 ratio of incorrect results: 0.0008781365279363653 Float64 UInt64 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 9807 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009917 ratio of incorrect results: 4.677601111797664e-5 RoundingMode{:Up}() quotients of huge magnitude: total count: 10015 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11012903 ratio of incorrect results: 0.00013012009639965048 RoundingMode{:Down}() quotients of huge magnitude: total count: 10048 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11011455 ratio of incorrect results: 0.00012886580383791242 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 9902 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009214 ratio of incorrect results: 0.00012843787031481085 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 9879 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009033 ratio of incorrect results: 0.00013797760439086704 ``` Results after this change (everything is correct): ``` Float16 UInt16 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 372015 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15568185 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 375936 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564509 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 375737 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564872 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 377102 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15563927 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 374618 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15567337 ratio of incorrect results: 0.0 Float32 UInt32 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 73567 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12267866 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 73190 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12271827 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 73491 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12266797 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 73504 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269685 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 73552 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269727 ratio of incorrect results: 0.0 Float64 UInt64 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 9750 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11007512 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 10046 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009576 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 9939 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11014458 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 9855 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11007247 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 9798 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11006320 ratio of incorrect results: 0.0 ``` Results for methods that weren't implemented at all on master (those that miss a `rem` method): ``` Float16 UInt16 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 373333 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564325 ratio of incorrect results: 0.0 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 374334 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15567551 ratio of incorrect results: 0.0 Float32 UInt32 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 73722 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12271459 ratio of incorrect results: 0.00301610427904294 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 73861 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12273032 ratio of incorrect results: 0.003042850373078144 Float64 UInt64 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 9981 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009598 ratio of incorrect results: 0.00044415790658296515 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 9926 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11013978 ratio of incorrect results: 0.00046023335074756825 ``` Fixes #49450 --- base/div.jl | 67 +++++++++++++++++++++++++++++-- test/numbers.jl | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 4 deletions(-) diff --git a/base/div.jl b/base/div.jl index 9c2187e662ee9..69d3e146deddb 100644 --- a/base/div.jl +++ b/base/div.jl @@ -364,7 +364,66 @@ function div(x::T, y::T, ::typeof(RoundUp)) where T<:Integer return d + (((x > 0) == (y > 0)) & (d * y != x)) end -# Real -# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division, -# so it is used here as the basis of float div(). -div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y)) +function mw_div(x::NTuple{2,T}, y::T) where {T<:AbstractFloat} + (x_hi, x_lo) = x + + # "DWDivFP3" AKA "Algorithm 15" from a paper by Joldes, Muller and + # Popescu: https://doi.org/10.1145/3121432 + hi = x_hi / y + π = Base.Math.two_mul(hi, y) + δ_hi = x_hi - first(π) # exact operation + δ_t = δ_hi - last(π) # exact operation + δ = δ_t + x_lo + lo = δ / y + canonicalize2(hi, lo) +end + +div_impl4(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + T(div_impl(widen(x), widen(y), r)) + +div_impl3(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + round(x / y, r) + +div_impl2(x::NTuple{2,T}, y::T) where {T<:AbstractFloat} = + round(first(mw_div(x, y)), RoundNearest) + +# Approximately rounded x (with respect to y and r) +frac_round(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + add12(x, -rem(x, y, r)) + +div_impl1(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + div_impl2(frac_round(x, y, r), y) + +div_impl0(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + ifelse( + isfinite(abs(x) + abs(y)), + div_impl1(x, y, r), + div_impl3(x, y, r), # try to avoid overflow + ) + +# Other rounding modes are assumed not to have `rem`. +const RoundingModesWithRem = Union{ + RoundingMode{:Nearest}, + RoundingMode{:Up}, + RoundingMode{:Down}, + RoundingMode{:FromZero}, + RoundingMode{:ToZero}, +} + +div_impl(x::T, y::T, r::RoundingModesWithRem) where {T<:AbstractFloat} = + div_impl0(x, y, r) +div_impl(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = + div_impl3(x, y, r) + +# The only way to achieve close-to-perfect results with `Float16` seems +# to be to widen to `Float32`. +div_impl(x::Float16, y::Float16, r::RoundingModesWithRem) = + div_impl4(x, y, r) +div_impl(x::Float16, y::Float16, r::RoundingMode) = + div_impl4(x, y, r) + +function div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} + isnan(x) && (return x) + isnan(y) && (return y) + div_impl(x, y, r) +end diff --git a/test/numbers.jl b/test/numbers.jl index efb2702aff1c2..5832d24c69d7d 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -1702,6 +1702,110 @@ end @test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp) == ceil(big(-1.1)/big(0.1)) == -11.0 @test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0 end + + @testset "correctness despite possible numerical issues: $Small, $rounding_mode_with_rem" for + Small ∈ (Float16, Float32, Float64), + rounding_mode_with_rem ∈ (RoundNearest, RoundUp, RoundDown, RoundFromZero, RoundToZero), + xy = ( + (4.445, 0.00675), + (2.941, -0.00301), + (-0.1852, 0.0001928), + (4.56, 0.006817), + (-0.178, -0.0001851), + (-6.51e4, -4.06e3), + (0.008255, -1.01e-5), + (-276.0, -0.1678), + (-39.7, -0.02045), + (0.7246, 0.0003948), + (-6.55e4, -29.81), + (-6.55e4, -27.81), + (-6.55e4, -24.25), + (6.55e4, 29.2), + (-6.55e4, -30.14), + (76.8, -0.11804), + (5.252e3, -2.607), + (4.365e4, -22.03), + (229.0, 0.1162), + (1.009, -0.0006504), + (-6.55e4, 29.81), + (6.55e4, -27.08), + (6.55e4, -31.62), + (6.55e4, -21.34), + (6.55e4, -31.27), + (6.307e4, 2.4e4), + (2.804e3, 3.502), + (-5.786e4, 5.005e4), + (-4.97e4, -2.387e4), + (-2.217, -0.001557), + (6.55e4, 26.6), + (6.55e4, 17.06), + (-6.55e4, -22.73), + (-6.55e4, 29.25), + (6.55e4, 29.28), + (1.169, -0.001584), + (20.94, 0.01427), + (0.1521, 0.0001633), + (2.406e4, -11.78), + (0.0766, 4.73e-5), + (-1.3231137e30, 2.4482906e23), + (-4.8544084e-25, 7.407144e-32), + (3.103397e26, 4.4375036e19), + (-4.7189755e-22, -6.6208687e-29), + (1.4102157e10, 1819.6719), + (1.1742472e13, -1.4370354e6), + (1.483496e-18, 9.9707963e-26), + (-5.016058e24, -4.956325e17), + (4.040856e22, 2.7740254e15), + (3.277167e14, -2.6412792e7), + (-1.849337e32, 1.7634905e25), + (-1.625298e-17, 1.0709875e-24), + (1.1791839e-31, 1.0768432e-38), + (-2.3660772e-30, -1.6153781e-37), + (-1.0070932e29, 7.4530127e21), + (-0.00061855844, -4.1053827e-11), + (2.6379836e12, -173271.67), + (9.8551985e-27, -1.48112095e-33), + (1.858432e26, 1.3008654e19), + (4.271524e19, -3.1128782e12), + (9.1970936e29, 1.2572408e23), + (-36199.926, -0.0023891365), + (2.2035416e20, -1.3350574e13), + (-6.933978e-26, -1.06443724e-32), + (-8.4979565e19, -7.333614e12), + (1.0172350506694424e17, 23.315505841986997), + (-1.47773180470849e-22, -4.0998853221425914e-38), + (-4.816178860121429e-125, 1.5705370217600465e-140), + (1.438033388799865e129, 3.8802573321302744e113), + (5.630449277879664e-200, -2.076135363156452e-215), + (-3.29867858971227e241, -8.079378598133822e225), + (-9.287161662516348e12, -0.00261425024965316), + (-4.017546451947597e-21, -6.334812648091802e-37), + (1.3802702942552817e-103, -1.5665085059273602e-119), + (2.2912868470550055e-76, -2.7514392143441017e-92), + (1.7074328973956706e151, -2.0403029505651566e135), + (-9.290503413404926e-267, -1.0856248919388625e-282), + (1.635940091056537e-143, -3.771680017287185e-159), + (1.1031633123212033e-289, 1.4605623613009784e-305), + (-5.984102559690793e-25, 1.3308245167101786e-40), + (-2.89964579258566e-97, -4.34310409713307e-113), + (-4.895423533487687e-224, -5.77559167362022e-240), + (3.2050860779440005e-66, 4.23835027226126e-82), + (-2.046347683435319e-194, 5.5954131552084545e-210), + (-9.223282007712624e-73, 1.0588268730479068e-88), + (-6.996263999884589e-36, 1.848582153492107e-51), + (-2.42983435949937e178, -6.557998479200877e162), + (3.402679467861051e45, -5.068403522082035e29), + (-1.9402396639482086e35, 2.7453450266399908e19), + (7.114570134851155e264, 1.6742521824981587e249), + ) + ab = map(Small, xy) + AB = map(BigFloat, ab) + tested = div(ab..., rounding_mode_with_rem) + correct = div(AB..., rounding_mode_with_rem) + correct_small = Small(correct) + # Only test when the correct result is representable + (correct == correct_small) && @test tested == correct_small + end end @testset "return types" begin for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)