Base: fix floating-point div

nsajko · nsajko · commit bf6357b3e35f · 2023-05-05T20:12:36.000+02:00
Double-word arithmetics are used, except for rounding modes without `rem`, which now get simple fallback implementations (on master their `div` methods fail when called). The idea to fix `Float16` by widening to `Float32` is taken from Simon Byrne's JuliaLang#49637 A script used for assessing the correctness of `div`: ```julia using Random accurate_div(x, y, r::RoundingMode) = div(BigFloat(x), BigFloat(y), r) function count_wrong_floats( div_fun::Fun, r::RoundingMode, ::Type{F}, ::Type{U}, n::Int, m::Int, ) where {Fun <: Function, F, U} count_wrong_huge_quo = 0 count_wrong_friendly_quo = 0 count_total_huge_quo = 0 count_total_friendly_quo = 0 vec_x = zeros(U, m) vec_y = zeros(U, m) for i ∈ 1:n Random.rand!(vec_x) Random.rand!(vec_y) for (x_raw, y_raw) ∈ zip(vec_x, vec_y) x = reinterpret(F, x_raw) y = reinterpret(F, y_raw) (!isfinite(x) | !isfinite(y)) && continue quo_is_huge = (maxintfloat(F) < abs(x / y)) acc_big = accurate_div(x, y, r) acc = F(acc_big) # Skip cases when the result isn't representable, the # correct result is not specified for this case, and # it's not clear what a user would expect either. (acc == acc_big) || continue if quo_is_huge count_total_huge_quo += true else count_total_friendly_quo += true end d = div_fun(x, y, r) is_ok = (d == acc) | (isnan(d) & isnan(acc)) if !is_ok if quo_is_huge count_wrong_huge_quo += true else count_wrong_friendly_quo += true end end end end ( bad_ratio_huge_quotient = count_wrong_huge_quo / count_total_huge_quo, bad_ratio_friendly_quotient = count_wrong_friendly_quo / count_total_friendly_quo, total_huge_quotient_count = count_total_huge_quo, total_friendly_quotient_count = count_total_friendly_quo, ) end const float_types = (Float16, Float32, Float64) const bits_types = (UInt16, UInt32, UInt64) # not supported on master const rounding_modes_other = ( RoundNearestTiesAway, RoundNearestTiesUp ) const rounding_modes = ( RoundNearest, RoundUp, RoundDown, RoundFromZero, RoundToZero ) function experiment(itcnt::Int, itcnt_inner::Int) for (F, U) ∈ zip(float_types, bits_types) println("$F $U") for rm ∈ rounding_modes println(" $rm") flush(stdout) res = count_wrong_floats( div, rm, F, U, itcnt, itcnt_inner ) ch = res.total_huge_quotient_count cf = res.total_friendly_quotient_count rh = res.bad_ratio_huge_quotient rf = res.bad_ratio_friendly_quotient println(" quotients of huge magnitude:") println(" total count: $ch") println(" ratio of incorrect results: $rh") println(" quotients of more normal magnitude:") println(" total count: $cf") println(" ratio of incorrect results: $rf") flush(stdout) end println() flush(stdout) end nothing end experiment(20, 2^20) ``` Results on master: ``` Float16 UInt16 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 371923 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15566588 ratio of incorrect results: 0.0017671823780522746 RoundingMode{:Up}() quotients of huge magnitude: total count: 375420 ratio of incorrect results: 5.327366682648766e-6 quotients of more normal magnitude: total count: 15567018 ratio of incorrect results: 0.004671543387436181 RoundingMode{:Down}() quotients of huge magnitude: total count: 375158 ratio of incorrect results: 2.665543584303147e-6 quotients of more normal magnitude: total count: 15565611 ratio of incorrect results: 0.004713146178457113 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 376802 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564500 ratio of incorrect results: 0.005449195284140191 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 374335 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15563355 ratio of incorrect results: 0.003891191841347833 Float32 UInt32 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 73547 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 1227091 ratio of incorrect results: 0.0003218994905920188 RoundingMode{:Up}() quotients of huge magnitude: total count: 73215 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12272614 ratio of incorrect results: 0.0009024972186039583 RoundingMode{:Down}() quotients of huge magnitude: total count: 73355 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269080 ratio of incorrect results: 0.0009091961255448657 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 73961 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12273238 ratio of incorrect results: 0.0009261614579624383 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 73327 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12268024 ratio of incorrect results: 0.0008781365279363653 Float64 UInt64 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 9807 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009917 ratio of incorrect results: 4.677601111797664e-5 RoundingMode{:Up}() quotients of huge magnitude: total count: 10015 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11012903 ratio of incorrect results: 0.00013012009639965048 RoundingMode{:Down}() quotients of huge magnitude: total count: 10048 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11011455 ratio of incorrect results: 0.00012886580383791242 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 9902 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009214 ratio of incorrect results: 0.00012843787031481085 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 9879 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009033 ratio of incorrect results: 0.00013797760439086704 ``` Results after this change (everything is correct): ``` Float16 UInt16 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 372015 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15568185 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 375936 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564509 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 375737 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564872 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 377102 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15563927 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 374618 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15567337 ratio of incorrect results: 0.0 Float32 UInt32 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 73567 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12267866 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 73190 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12271827 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 73491 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12266797 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 73504 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269685 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 73552 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269727 ratio of incorrect results: 0.0 Float64 UInt64 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 9750 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11007512 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 10046 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009576 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 9939 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11014458 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 9855 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11007247 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 9798 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11006320 ratio of incorrect results: 0.0 ``` Results for methods that weren't implemented at all on master (those that miss a `rem` method): ``` Float16 UInt16 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 373333 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564325 ratio of incorrect results: 0.0 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 374334 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15567551 ratio of incorrect results: 0.0 Float32 UInt32 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 73722 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12271459 ratio of incorrect results: 0.00301610427904294 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 73861 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12273032 ratio of incorrect results: 0.003042850373078144 Float64 UInt64 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 9981 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009598 ratio of incorrect results: 0.00044415790658296515 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 9926 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11013978 ratio of incorrect results: 0.00046023335074756825 ``` Fixes JuliaLang#49450
diff --git a/base/div.jl b/base/div.jl
@@ -364,7 +364,66 @@ function div(x::T, y::T, ::typeof(RoundUp)) where T<:Integer
     return d + (((x > 0) == (y > 0)) & (d * y != x))
 end
 
-# Real
-# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
-# so it is used here as the basis of float div().
-div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
+function mw_div(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
+    (x_hi, x_lo) = x
+
+    # "DWDivFP3" AKA "Algorithm 15" from a paper by Joldes, Muller and
+    # Popescu: https://doi.org/10.1145/3121432
+    hi = x_hi / y
+    π = Base.Math.two_mul(hi, y)
+    δ_hi = x_hi - first(π)  # exact operation
+    δ_t = δ_hi - last(π)    # exact operation
+    δ = δ_t + x_lo
+    lo = δ / y
+    canonicalize2(hi, lo)
+end
+
+div_impl4(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    T(div_impl(widen(x), widen(y), r))
+
+div_impl3(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    round(x / y, r)
+
+div_impl2(x::NTuple{2,T}, y::T) where {T<:AbstractFloat} =
+    round(first(mw_div(x, y)), RoundNearest)
+
+# Approximately rounded x (with respect to y and r)
+frac_round(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    add12(x, -rem(x, y, r))
+
+div_impl1(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    div_impl2(frac_round(x, y, r), y)
+
+div_impl0(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    ifelse(
+        isfinite(abs(x) + abs(y)),
+        div_impl1(x, y, r),
+        div_impl3(x, y, r),  # try to avoid overflow
+    )
+
+# Other rounding modes are assumed not to have `rem`.
+const RoundingModesWithRem = Union{
+    RoundingMode{:Nearest},
+    RoundingMode{:Up},
+    RoundingMode{:Down},
+    RoundingMode{:FromZero},
+    RoundingMode{:ToZero},
+}
+
+div_impl(x::T, y::T, r::RoundingModesWithRem) where {T<:AbstractFloat} =
+    div_impl0(x, y, r)
+div_impl(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    div_impl3(x, y, r)
+
+# The only way to achieve close-to-perfect results with `Float16` seems
+# to be to widen to `Float32`.
+div_impl(x::Float16, y::Float16, r::RoundingModesWithRem) where {T<:AbstractFloat} =
+    div_impl4(x, y, r)
+div_impl(x::Float16, y::Float16, r::RoundingMode) where {T<:AbstractFloat} =
+    div_impl4(x, y, r)
+
+function div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat}
+    isnan(x) && (return x)
+    isnan(y) && (return y)
+    div_impl(x, y, r)
+end
diff --git a/test/numbers.jl b/test/numbers.jl
@@ -1702,6 +1702,107 @@ end
         @test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp)   ==  ceil(big(-1.1)/big(0.1)) == -11.0
         @test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0
     end
+
+    @testset "correctness despite possible numerical issues: $Small, $rounding_mode_with_rem" for
+        Small ∈ (Float16, Float32, Float64),
+        rounding_mode_with_rem ∈ (RoundNearest, RoundUp, RoundDown, RoundFromZero, RoundToZero),
+        xy = (
+            (4.445, 0.00675),
+            (2.941, -0.00301),
+            (-0.1852, 0.0001928),
+            (4.56, 0.006817),
+            (-0.178, -0.0001851),
+            (-6.51e4, -4.06e3),
+            (0.008255, -1.01e-5),
+            (-276.0, -0.1678),
+            (-39.7, -0.02045),
+            (0.7246, 0.0003948),
+            (-6.55e4, -29.81),
+            (-6.55e4, -27.81),
+            (-6.55e4, -24.25),
+            (6.55e4, 29.2),
+            (-6.55e4, -30.14),
+            (76.8, -0.11804),
+            (5.252e3, -2.607),
+            (4.365e4, -22.03),
+            (229.0, 0.1162),
+            (1.009, -0.0006504),
+            (-6.55e4, 29.81),
+            (6.55e4, -27.08),
+            (6.55e4, -31.62),
+            (6.55e4, -21.34),
+            (6.55e4, -31.27),
+            (6.307e4, 2.4e4),
+            (2.804e3, 3.502),
+            (-5.786e4, 5.005e4),
+            (-4.97e4, -2.387e4),
+            (-2.217, -0.001557),
+            (6.55e4, 26.6),
+            (6.55e4, 17.06),
+            (-6.55e4, -22.73),
+            (-6.55e4, 29.25),
+            (6.55e4, 29.28),
+            (1.169, -0.001584),
+            (20.94, 0.01427),
+            (0.1521, 0.0001633),
+            (2.406e4, -11.78),
+            (0.0766, 4.73e-5),
+            (-1.3231137e30, 2.4482906e23),
+            (-4.8544084e-25, 7.407144e-32),
+            (3.103397e26, 4.4375036e19),
+            (-4.7189755e-22, -6.6208687e-29),
+            (1.4102157e10, 1819.6719),
+            (1.1742472e13, -1.4370354e6),
+            (1.483496e-18, 9.9707963e-26),
+            (-5.016058e24, -4.956325e17),
+            (4.040856e22, 2.7740254e15),
+            (3.277167e14, -2.6412792e7),
+            (-1.849337e32, 1.7634905e25),
+            (-1.625298e-17, 1.0709875e-24),
+            (1.1791839e-31, 1.0768432e-38),
+            (-2.3660772e-30, -1.6153781e-37),
+            (-1.0070932e29, 7.4530127e21),
+            (-0.00061855844, -4.1053827e-11),
+            (2.6379836e12, -173271.67),
+            (9.8551985e-27, -1.48112095e-33),
+            (1.858432e26, 1.3008654e19),
+            (4.271524e19, -3.1128782e12),
+            (9.1970936e29, 1.2572408e23),
+            (-36199.926, -0.0023891365),
+            (2.2035416e20, -1.3350574e13),
+            (-6.933978e-26, -1.06443724e-32),
+            (-8.4979565e19, -7.333614e12),
+            (1.0172350506694424e17, 23.315505841986997),
+            (-1.47773180470849e-22, -4.0998853221425914e-38),
+            (-4.816178860121429e-125, 1.5705370217600465e-140),
+            (1.438033388799865e129, 3.8802573321302744e113),
+            (5.630449277879664e-200, -2.076135363156452e-215),
+            (-3.29867858971227e241, -8.079378598133822e225),
+            (-9.287161662516348e12, -0.00261425024965316),
+            (-4.017546451947597e-21, -6.334812648091802e-37),
+            (1.3802702942552817e-103, -1.5665085059273602e-119),
+            (2.2912868470550055e-76, -2.7514392143441017e-92),
+            (1.7074328973956706e151, -2.0403029505651566e135),
+            (-9.290503413404926e-267, -1.0856248919388625e-282),
+            (1.635940091056537e-143, -3.771680017287185e-159),
+            (1.1031633123212033e-289, 1.4605623613009784e-305),
+            (-5.984102559690793e-25, 1.3308245167101786e-40),
+            (-2.89964579258566e-97, -4.34310409713307e-113),
+            (-4.895423533487687e-224, -5.77559167362022e-240),
+            (3.2050860779440005e-66, 4.23835027226126e-82),
+            (-2.046347683435319e-194, 5.5954131552084545e-210),
+            (-9.223282007712624e-73, 1.0588268730479068e-88),
+            (-6.996263999884589e-36, 1.848582153492107e-51),
+            (-2.42983435949937e178, -6.557998479200877e162),
+            (3.402679467861051e45, -5.068403522082035e29),
+            (-1.9402396639482086e35, 2.7453450266399908e19),
+            (7.114570134851155e264, 1.6742521824981587e249),
+        )
+        ab = map(Small, xy)
+        AB = map(BigFloat, ab)
+        @test div(ab..., rounding_mode_with_rem) ==
+              div(AB..., rounding_mode_with_rem)
+    end
 end
 @testset "return types" begin
     for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)