Skip to content

Commit 7648a97

Browse files
committed
Base: fix floating-point div
Experimental script to show improvements in correctness: ``` using Random accurate_div(x, y, r::RoundingMode) = div(BigFloat(x), BigFloat(y), r) function count_wrong_floats( div_fun::Fun, r::RoundingMode, ::Type{F}, ::Type{U}, n::Int, m::Int, ) where {Fun <: Function, F, U} count_wrong = 0 sample_size = 0 vec_x = zeros(U, m) vec_y = zeros(U, m) for i ∈ 1:n Random.rand!(vec_x) Random.rand!(vec_y) for (x_raw, y_raw) ∈ zip(vec_x, vec_y) x = reinterpret(F, x_raw) y = reinterpret(F, y_raw) # skip huge quotients, they're difficult to round correctly (maxintfloat(F) < abs(x / y)) && continue acc_big = accurate_div(x, y, r) acc = F(acc_big) # skip cases when the result isn't representable (acc == acc_big) || continue sample_size += true d = div_fun(x, y, r) is_ok = (d == acc) | (isnan(d) & isnan(acc)) count_wrong += !is_ok end end ( bad_ratio = count_wrong / sample_size, sample_size = sample_size, ) end const float_types = (Float16, Float32, Float64) const bits_types = (UInt16, UInt32, UInt64) const rounding_modes = (RoundUp, RoundDown, RoundFromZero, RoundToZero, RoundNearest) function experiment(itcnt::Int, itcnt_inner::Int = 2^24) for (F, U) ∈ zip(float_types, bits_types) println("$F $U") for rm ∈ rounding_modes println(" $rm") flush(stdout) res = count_wrong_floats(div, rm, F, U, itcnt, itcnt_inner) println(" sample size: $(res.sample_size)") println(" ratio of bad results among all results: $(res.bad_ratio)") flush(stdout) end println() end nothing end experiment(16) ``` The script only checks pairs of numbers whose quotient isn't huge, with respect to `maxintfloat(F)`, and also skips cases where the correct answer isn't representable in the type. The `RoundNearestTiesAway` and `RoundNearestTiesUp` rounding modes were not tested because they don't work for `BigFloat`, because the corresponding `rem` methods are missing, for example, `rem(::BigFloat, ::BigFloat, ::RoundingMode{:NearestTiesUp})` Results for master: ``` Float16 UInt16 RoundingMode{:Up}() sample size: 199242282 ratio of bad results among all results: 0.0046785199940643125 RoundingMode{:Down}() sample size: 199236001 ratio of bad results among all results: 0.004678426566090332 RoundingMode{:FromZero}() sample size: 199231891 ratio of bad results among all results: 0.005443671665998492 RoundingMode{:ToZero}() sample size: 199220943 ratio of bad results among all results: 0.0039044690196050323 RoundingMode{:Nearest}() sample size: 199243950 ratio of bad results among all results: 0.0017626532700240082 Float32 UInt32 RoundingMode{:Up}() sample size: 157054574 ratio of bad results among all results: 0.00089996742151553 RoundingMode{:Down}() sample size: 157035223 ratio of bad results among all results: 0.0009016130094583939 RoundingMode{:FromZero}() sample size: 157037914 ratio of bad results among all results: 0.000918612558748074 RoundingMode{:ToZero}() sample size: 157052945 ratio of bad results among all results: 0.0008894962141588621 RoundingMode{:Nearest}() sample size: 157056698 ratio of bad results among all results: 0.00032412498574241 Float64 UInt64 RoundingMode{:Up}() sample size: 140927221 ratio of bad results among all results: 0.00013208236044049998 RoundingMode{:Down}() sample size: 140958292 ratio of bad results among all results: 0.00013300388174397005 RoundingMode{:FromZero}() sample size: 140941597 ratio of bad results among all results: 0.000134701184065624 RoundingMode{:ToZero}() sample size: 140939689 ratio of bad results among all results: 0.0001340360556634973 RoundingMode{:Nearest}() sample size: 140928865 ratio of bad results among all results: 4.791069593869219e-5 ``` Results after this commit: ``` Float16 UInt16 RoundingMode{:Up}() sample size: 199235658 ratio of bad results among all results: 7.583983786677382e-6 RoundingMode{:Down}() sample size: 199234976 ratio of bad results among all results: 7.257761809854109e-6 RoundingMode{:FromZero}() sample size: 199239270 ratio of bad results among all results: 7.588865387832429e-6 RoundingMode{:ToZero}() sample size: 199233698 ratio of bad results among all results: 7.584058395583261e-6 RoundingMode{:Nearest}() sample size: 199231993 ratio of bad results among all results: 8.266744588556116e-6 Float32 UInt32 RoundingMode{:Up}() sample size: 157061442 ratio of bad results among all results: 0.0 RoundingMode{:Down}() sample size: 157048602 ratio of bad results among all results: 0.0 RoundingMode{:FromZero}() sample size: 157056325 ratio of bad results among all results: 0.0 RoundingMode{:ToZero}() sample size: 157061341 ratio of bad results among all results: 0.0 RoundingMode{:Nearest}() sample size: 157053623 ratio of bad results among all results: 0.0 Float64 UInt64 RoundingMode{:Up}() sample size: 140945983 ratio of bad results among all results: 0.0 RoundingMode{:Down}() sample size: 140933327 ratio of bad results among all results: 0.0 RoundingMode{:FromZero}() sample size: 140940387 ratio of bad results among all results: 0.0 RoundingMode{:ToZero}() sample size: 140952881 ratio of bad results among all results: 0.0 RoundingMode{:Nearest}() sample size: 140932846 ratio of bad results among all results: 0.0 ``` Experimental function for checking the status regarding #49450: ``` function count_wrong_floats() wrong_fld_count = 0 wrong_cld_count = 0 for i in 0x0000:0xffff, j in 0x0000:0xffff x = reinterpret(Float16, i) y = reinterpret(Float16, j) quotient = x / y f = fld(x, y) c = cld(x, y) floor_is_wrong = (f > quotient) | (isnan(f) & !isnan(quotient)) ceil_is_wrong = (c < quotient) | (isnan(c) & !isnan(quotient)) wrong_fld_count += floor_is_wrong wrong_cld_count += ceil_is_wrong end n = Int(0x10000)^2 ( fld_bad_ratio = wrong_fld_count / n, cld_bad_ratio = wrong_cld_count / n, ) end ``` Result of `count_wrong_floats()` on master: ``` (fld_bad_ratio = 0.0003659049980342388, cld_bad_ratio = 0.0003659049980342388) ``` Result of `count_wrong_floats()` after this commit: ``` (fld_bad_ratio = 7.445923984050751e-7, cld_bad_ratio = 7.445923984050751e-7) ``` Fixes #49450
1 parent 4158640 commit 7648a97

File tree

2 files changed

+60
-4
lines changed

2 files changed

+60
-4
lines changed

base/div.jl

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,58 @@ function div(x::T, y::T, ::typeof(RoundUp)) where T<:Integer
364364
return d + (((x > 0) == (y > 0)) & (d * y != x))
365365
end
366366

367-
# Real
368-
# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
369-
# so it is used here as the basis of float div().
370-
div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
367+
function fast_two_sum(a::T, b::T) where {T<:AbstractFloat}
368+
s = a + b
369+
z = s - a
370+
t = b - z
371+
(s, t)
372+
end
373+
374+
function two_sum(a::T, b::T) where {T<:AbstractFloat}
375+
s = a + b
376+
a_ = s - b
377+
b_ = s - a_
378+
δa = a - a_
379+
δb = b - b_
380+
t = δa + δb
381+
(s, t)
382+
end
383+
384+
# Assumes that `rounding(T)` is one of the "Nearest" rounding modes.
385+
#
386+
# "DWDivFP3" AKA "Algorithm 15" from
387+
# https://doi.org/10.1145/3121432 by Joldes, Muller, Popescu.
388+
function two_div(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
389+
(x_hi, x_lo) = x
390+
hi = x_hi / y
391+
π = Math.two_mul(hi, y)
392+
δ_hi = x_hi - first(π) # exact operation
393+
δ_t = δ_hi - last(π) # exact operation
394+
δ = δ_t + x_lo
395+
lo = δ / y
396+
fast_two_sum(hi, lo)
397+
end
398+
399+
div_impl3(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
400+
round(x / y, r)
401+
402+
div_impl2(x::NTuple{2,T}, y::T) where {T<:AbstractFloat} =
403+
round(first(two_div(x, y)), RoundNearest)
404+
405+
# Approximately rounded x (with respect to y and r)
406+
frac_round(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
407+
two_sum(x, -rem(x, y, r))
408+
409+
div_impl1(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
410+
div_impl2(frac_round(x, y, r), y)
411+
412+
function div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat}
413+
isnan(x) && (return x)
414+
isnan(y) && (return y)
415+
q = x / y
416+
(isinf(x) | iszero(x) | isinf(y) | iszero(y) | isinf(q)) && (return q)
417+
isfinite(abs(x) + abs(y)) ||
418+
# prevent overflow
419+
(return div_impl3(x, y, r))
420+
div_impl1(x, y, r)
421+
end

base/mpfr.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,11 @@ function lerpi(j::Integer, d::Integer, a::BigFloat, b::BigFloat)
11491149
fma(t, b, fma(-t, a, a))
11501150
end
11511151

1152+
function Base.Math.two_mul(x::BigFloat, y::BigFloat)
1153+
xy = x*y
1154+
xy, fma(x, y, -xy)
1155+
end
1156+
11521157
# flags
11531158
clear_flags() = ccall((:mpfr_clear_flags, libmpfr), Cvoid, ())
11541159
had_underflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0

0 commit comments

Comments
 (0)