Base: fix floating-point div

nsajko · nsajko · commit 7648a97cff9c · 2023-04-29T02:08:57.000+02:00
Experimental script to show improvements in correctness: ``` using Random accurate_div(x, y, r::RoundingMode) = div(BigFloat(x), BigFloat(y), r) function count_wrong_floats( div_fun::Fun, r::RoundingMode, ::Type{F}, ::Type{U}, n::Int, m::Int, ) where {Fun <: Function, F, U} count_wrong = 0 sample_size = 0 vec_x = zeros(U, m) vec_y = zeros(U, m) for i ∈ 1:n Random.rand!(vec_x) Random.rand!(vec_y) for (x_raw, y_raw) ∈ zip(vec_x, vec_y) x = reinterpret(F, x_raw) y = reinterpret(F, y_raw) # skip huge quotients, they're difficult to round correctly (maxintfloat(F) < abs(x / y)) && continue acc_big = accurate_div(x, y, r) acc = F(acc_big) # skip cases when the result isn't representable (acc == acc_big) || continue sample_size += true d = div_fun(x, y, r) is_ok = (d == acc) | (isnan(d) & isnan(acc)) count_wrong += !is_ok end end ( bad_ratio = count_wrong / sample_size, sample_size = sample_size, ) end const float_types = (Float16, Float32, Float64) const bits_types = (UInt16, UInt32, UInt64) const rounding_modes = (RoundUp, RoundDown, RoundFromZero, RoundToZero, RoundNearest) function experiment(itcnt::Int, itcnt_inner::Int = 2^24) for (F, U) ∈ zip(float_types, bits_types) println("$F $U") for rm ∈ rounding_modes println(" $rm") flush(stdout) res = count_wrong_floats(div, rm, F, U, itcnt, itcnt_inner) println(" sample size: $(res.sample_size)") println(" ratio of bad results among all results: $(res.bad_ratio)") flush(stdout) end println() end nothing end experiment(16) ``` The script only checks pairs of numbers whose quotient isn't huge, with respect to `maxintfloat(F)`, and also skips cases where the correct answer isn't representable in the type. The `RoundNearestTiesAway` and `RoundNearestTiesUp` rounding modes were not tested because they don't work for `BigFloat`, because the corresponding `rem` methods are missing, for example, `rem(::BigFloat, ::BigFloat, ::RoundingMode{:NearestTiesUp})` Results for master: ``` Float16 UInt16 RoundingMode{:Up}() sample size: 199242282 ratio of bad results among all results: 0.0046785199940643125 RoundingMode{:Down}() sample size: 199236001 ratio of bad results among all results: 0.004678426566090332 RoundingMode{:FromZero}() sample size: 199231891 ratio of bad results among all results: 0.005443671665998492 RoundingMode{:ToZero}() sample size: 199220943 ratio of bad results among all results: 0.0039044690196050323 RoundingMode{:Nearest}() sample size: 199243950 ratio of bad results among all results: 0.0017626532700240082 Float32 UInt32 RoundingMode{:Up}() sample size: 157054574 ratio of bad results among all results: 0.00089996742151553 RoundingMode{:Down}() sample size: 157035223 ratio of bad results among all results: 0.0009016130094583939 RoundingMode{:FromZero}() sample size: 157037914 ratio of bad results among all results: 0.000918612558748074 RoundingMode{:ToZero}() sample size: 157052945 ratio of bad results among all results: 0.0008894962141588621 RoundingMode{:Nearest}() sample size: 157056698 ratio of bad results among all results: 0.00032412498574241 Float64 UInt64 RoundingMode{:Up}() sample size: 140927221 ratio of bad results among all results: 0.00013208236044049998 RoundingMode{:Down}() sample size: 140958292 ratio of bad results among all results: 0.00013300388174397005 RoundingMode{:FromZero}() sample size: 140941597 ratio of bad results among all results: 0.000134701184065624 RoundingMode{:ToZero}() sample size: 140939689 ratio of bad results among all results: 0.0001340360556634973 RoundingMode{:Nearest}() sample size: 140928865 ratio of bad results among all results: 4.791069593869219e-5 ``` Results after this commit: ``` Float16 UInt16 RoundingMode{:Up}() sample size: 199235658 ratio of bad results among all results: 7.583983786677382e-6 RoundingMode{:Down}() sample size: 199234976 ratio of bad results among all results: 7.257761809854109e-6 RoundingMode{:FromZero}() sample size: 199239270 ratio of bad results among all results: 7.588865387832429e-6 RoundingMode{:ToZero}() sample size: 199233698 ratio of bad results among all results: 7.584058395583261e-6 RoundingMode{:Nearest}() sample size: 199231993 ratio of bad results among all results: 8.266744588556116e-6 Float32 UInt32 RoundingMode{:Up}() sample size: 157061442 ratio of bad results among all results: 0.0 RoundingMode{:Down}() sample size: 157048602 ratio of bad results among all results: 0.0 RoundingMode{:FromZero}() sample size: 157056325 ratio of bad results among all results: 0.0 RoundingMode{:ToZero}() sample size: 157061341 ratio of bad results among all results: 0.0 RoundingMode{:Nearest}() sample size: 157053623 ratio of bad results among all results: 0.0 Float64 UInt64 RoundingMode{:Up}() sample size: 140945983 ratio of bad results among all results: 0.0 RoundingMode{:Down}() sample size: 140933327 ratio of bad results among all results: 0.0 RoundingMode{:FromZero}() sample size: 140940387 ratio of bad results among all results: 0.0 RoundingMode{:ToZero}() sample size: 140952881 ratio of bad results among all results: 0.0 RoundingMode{:Nearest}() sample size: 140932846 ratio of bad results among all results: 0.0 ``` Experimental function for checking the status regarding #49450: ``` function count_wrong_floats() wrong_fld_count = 0 wrong_cld_count = 0 for i in 0x0000:0xffff, j in 0x0000:0xffff x = reinterpret(Float16, i) y = reinterpret(Float16, j) quotient = x / y f = fld(x, y) c = cld(x, y) floor_is_wrong = (f > quotient) | (isnan(f) & !isnan(quotient)) ceil_is_wrong = (c < quotient) | (isnan(c) & !isnan(quotient)) wrong_fld_count += floor_is_wrong wrong_cld_count += ceil_is_wrong end n = Int(0x10000)^2 ( fld_bad_ratio = wrong_fld_count / n, cld_bad_ratio = wrong_cld_count / n, ) end ``` Result of `count_wrong_floats()` on master: ``` (fld_bad_ratio = 0.0003659049980342388, cld_bad_ratio = 0.0003659049980342388) ``` Result of `count_wrong_floats()` after this commit: ``` (fld_bad_ratio = 7.445923984050751e-7, cld_bad_ratio = 7.445923984050751e-7) ``` Fixes #49450
diff --git a/base/div.jl b/base/div.jl
@@ -364,7 +364,58 @@ function div(x::T, y::T, ::typeof(RoundUp)) where T<:Integer
     return d + (((x > 0) == (y > 0)) & (d * y != x))
 end
 
-# Real
-# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
-# so it is used here as the basis of float div().
-div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
+function fast_two_sum(a::T, b::T) where {T<:AbstractFloat}
+    s = a + b
+    z = s - a
+    t = b - z
+    (s, t)
+end
+
+function two_sum(a::T, b::T) where {T<:AbstractFloat}
+    s = a + b
+    a_ = s - b
+    b_ = s - a_
+    δa = a - a_
+    δb = b - b_
+    t = δa + δb
+    (s, t)
+end
+
+# Assumes that `rounding(T)` is one of the "Nearest" rounding modes.
+#
+# "DWDivFP3" AKA "Algorithm 15" from
+# https://doi.org/10.1145/3121432 by Joldes, Muller, Popescu.
+function two_div(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
+    (x_hi, x_lo) = x
+    hi = x_hi / y
+    π = Math.two_mul(hi, y)
+    δ_hi = x_hi - first(π)  # exact operation
+    δ_t = δ_hi - last(π)    # exact operation
+    δ = δ_t + x_lo
+    lo = δ / y
+    fast_two_sum(hi, lo)
+end
+
+div_impl3(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    round(x / y, r)
+
+div_impl2(x::NTuple{2,T}, y::T) where {T<:AbstractFloat} =
+    round(first(two_div(x, y)), RoundNearest)
+
+# Approximately rounded x (with respect to y and r)
+frac_round(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    two_sum(x, -rem(x, y, r))
+
+div_impl1(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
+    div_impl2(frac_round(x, y, r), y)
+
+function div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat}
+    isnan(x) && (return x)
+    isnan(y) && (return y)
+    q = x / y
+    (isinf(x) | iszero(x) | isinf(y) | iszero(y) | isinf(q)) && (return q)
+    isfinite(abs(x) + abs(y)) ||
+        # prevent overflow
+        (return div_impl3(x, y, r))
+    div_impl1(x, y, r)
+end
diff --git a/base/mpfr.jl b/base/mpfr.jl
@@ -1149,6 +1149,11 @@ function lerpi(j::Integer, d::Integer, a::BigFloat, b::BigFloat)
     fma(t, b, fma(-t, a, a))
 end
 
+function Base.Math.two_mul(x::BigFloat, y::BigFloat)
+    xy = x*y
+    xy, fma(x, y, -xy)
+end
+
 # flags
 clear_flags() = ccall((:mpfr_clear_flags, libmpfr), Cvoid, ())
 had_underflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0