Skip to content

Commit bf6357b

Browse files
committed
Base: fix floating-point div
Double-word arithmetics are used, except for rounding modes without `rem`, which now get simple fallback implementations (on master their `div` methods fail when called). The idea to fix `Float16` by widening to `Float32` is taken from Simon Byrne's JuliaLang#49637 A script used for assessing the correctness of `div`: ```julia using Random accurate_div(x, y, r::RoundingMode) = div(BigFloat(x), BigFloat(y), r) function count_wrong_floats( div_fun::Fun, r::RoundingMode, ::Type{F}, ::Type{U}, n::Int, m::Int, ) where {Fun <: Function, F, U} count_wrong_huge_quo = 0 count_wrong_friendly_quo = 0 count_total_huge_quo = 0 count_total_friendly_quo = 0 vec_x = zeros(U, m) vec_y = zeros(U, m) for i ∈ 1:n Random.rand!(vec_x) Random.rand!(vec_y) for (x_raw, y_raw) ∈ zip(vec_x, vec_y) x = reinterpret(F, x_raw) y = reinterpret(F, y_raw) (!isfinite(x) | !isfinite(y)) && continue quo_is_huge = (maxintfloat(F) < abs(x / y)) acc_big = accurate_div(x, y, r) acc = F(acc_big) # Skip cases when the result isn't representable, the # correct result is not specified for this case, and # it's not clear what a user would expect either. (acc == acc_big) || continue if quo_is_huge count_total_huge_quo += true else count_total_friendly_quo += true end d = div_fun(x, y, r) is_ok = (d == acc) | (isnan(d) & isnan(acc)) if !is_ok if quo_is_huge count_wrong_huge_quo += true else count_wrong_friendly_quo += true end end end end ( bad_ratio_huge_quotient = count_wrong_huge_quo / count_total_huge_quo, bad_ratio_friendly_quotient = count_wrong_friendly_quo / count_total_friendly_quo, total_huge_quotient_count = count_total_huge_quo, total_friendly_quotient_count = count_total_friendly_quo, ) end const float_types = (Float16, Float32, Float64) const bits_types = (UInt16, UInt32, UInt64) # not supported on master const rounding_modes_other = ( RoundNearestTiesAway, RoundNearestTiesUp ) const rounding_modes = ( RoundNearest, RoundUp, RoundDown, RoundFromZero, RoundToZero ) function experiment(itcnt::Int, itcnt_inner::Int) for (F, U) ∈ zip(float_types, bits_types) println("$F $U") for rm ∈ rounding_modes println(" $rm") flush(stdout) res = count_wrong_floats( div, rm, F, U, itcnt, itcnt_inner ) ch = res.total_huge_quotient_count cf = res.total_friendly_quotient_count rh = res.bad_ratio_huge_quotient rf = res.bad_ratio_friendly_quotient println(" quotients of huge magnitude:") println(" total count: $ch") println(" ratio of incorrect results: $rh") println(" quotients of more normal magnitude:") println(" total count: $cf") println(" ratio of incorrect results: $rf") flush(stdout) end println() flush(stdout) end nothing end experiment(20, 2^20) ``` Results on master: ``` Float16 UInt16 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 371923 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15566588 ratio of incorrect results: 0.0017671823780522746 RoundingMode{:Up}() quotients of huge magnitude: total count: 375420 ratio of incorrect results: 5.327366682648766e-6 quotients of more normal magnitude: total count: 15567018 ratio of incorrect results: 0.004671543387436181 RoundingMode{:Down}() quotients of huge magnitude: total count: 375158 ratio of incorrect results: 2.665543584303147e-6 quotients of more normal magnitude: total count: 15565611 ratio of incorrect results: 0.004713146178457113 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 376802 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564500 ratio of incorrect results: 0.005449195284140191 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 374335 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15563355 ratio of incorrect results: 0.003891191841347833 Float32 UInt32 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 73547 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 1227091 ratio of incorrect results: 0.0003218994905920188 RoundingMode{:Up}() quotients of huge magnitude: total count: 73215 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12272614 ratio of incorrect results: 0.0009024972186039583 RoundingMode{:Down}() quotients of huge magnitude: total count: 73355 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269080 ratio of incorrect results: 0.0009091961255448657 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 73961 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12273238 ratio of incorrect results: 0.0009261614579624383 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 73327 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12268024 ratio of incorrect results: 0.0008781365279363653 Float64 UInt64 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 9807 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009917 ratio of incorrect results: 4.677601111797664e-5 RoundingMode{:Up}() quotients of huge magnitude: total count: 10015 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11012903 ratio of incorrect results: 0.00013012009639965048 RoundingMode{:Down}() quotients of huge magnitude: total count: 10048 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11011455 ratio of incorrect results: 0.00012886580383791242 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 9902 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009214 ratio of incorrect results: 0.00012843787031481085 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 9879 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009033 ratio of incorrect results: 0.00013797760439086704 ``` Results after this change (everything is correct): ``` Float16 UInt16 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 372015 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15568185 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 375936 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564509 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 375737 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564872 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 377102 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15563927 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 374618 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15567337 ratio of incorrect results: 0.0 Float32 UInt32 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 73567 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12267866 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 73190 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12271827 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 73491 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12266797 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 73504 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269685 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 73552 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12269727 ratio of incorrect results: 0.0 Float64 UInt64 RoundingMode{:Nearest}() quotients of huge magnitude: total count: 9750 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11007512 ratio of incorrect results: 0.0 RoundingMode{:Up}() quotients of huge magnitude: total count: 10046 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009576 ratio of incorrect results: 0.0 RoundingMode{:Down}() quotients of huge magnitude: total count: 9939 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11014458 ratio of incorrect results: 0.0 RoundingMode{:FromZero}() quotients of huge magnitude: total count: 9855 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11007247 ratio of incorrect results: 0.0 RoundingMode{:ToZero}() quotients of huge magnitude: total count: 9798 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11006320 ratio of incorrect results: 0.0 ``` Results for methods that weren't implemented at all on master (those that miss a `rem` method): ``` Float16 UInt16 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 373333 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15564325 ratio of incorrect results: 0.0 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 374334 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 15567551 ratio of incorrect results: 0.0 Float32 UInt32 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 73722 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12271459 ratio of incorrect results: 0.00301610427904294 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 73861 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 12273032 ratio of incorrect results: 0.003042850373078144 Float64 UInt64 RoundingMode{:NearestTiesAway}() quotients of huge magnitude: total count: 9981 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11009598 ratio of incorrect results: 0.00044415790658296515 RoundingMode{:NearestTiesUp}() quotients of huge magnitude: total count: 9926 ratio of incorrect results: 0.0 quotients of more normal magnitude: total count: 11013978 ratio of incorrect results: 0.00046023335074756825 ``` Fixes JuliaLang#49450
1 parent b8c347a commit bf6357b

File tree

2 files changed

+164
-4
lines changed

2 files changed

+164
-4
lines changed

base/div.jl

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,66 @@ function div(x::T, y::T, ::typeof(RoundUp)) where T<:Integer
364364
return d + (((x > 0) == (y > 0)) & (d * y != x))
365365
end
366366

367-
# Real
368-
# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
369-
# so it is used here as the basis of float div().
370-
div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
367+
function mw_div(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
368+
(x_hi, x_lo) = x
369+
370+
# "DWDivFP3" AKA "Algorithm 15" from a paper by Joldes, Muller and
371+
# Popescu: https://doi.org/10.1145/3121432
372+
hi = x_hi / y
373+
π = Base.Math.two_mul(hi, y)
374+
δ_hi = x_hi - first(π) # exact operation
375+
δ_t = δ_hi - last(π) # exact operation
376+
δ = δ_t + x_lo
377+
lo = δ / y
378+
canonicalize2(hi, lo)
379+
end
380+
381+
div_impl4(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
382+
T(div_impl(widen(x), widen(y), r))
383+
384+
div_impl3(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
385+
round(x / y, r)
386+
387+
div_impl2(x::NTuple{2,T}, y::T) where {T<:AbstractFloat} =
388+
round(first(mw_div(x, y)), RoundNearest)
389+
390+
# Approximately rounded x (with respect to y and r)
391+
frac_round(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
392+
add12(x, -rem(x, y, r))
393+
394+
div_impl1(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
395+
div_impl2(frac_round(x, y, r), y)
396+
397+
div_impl0(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
398+
ifelse(
399+
isfinite(abs(x) + abs(y)),
400+
div_impl1(x, y, r),
401+
div_impl3(x, y, r), # try to avoid overflow
402+
)
403+
404+
# Other rounding modes are assumed not to have `rem`.
405+
const RoundingModesWithRem = Union{
406+
RoundingMode{:Nearest},
407+
RoundingMode{:Up},
408+
RoundingMode{:Down},
409+
RoundingMode{:FromZero},
410+
RoundingMode{:ToZero},
411+
}
412+
413+
div_impl(x::T, y::T, r::RoundingModesWithRem) where {T<:AbstractFloat} =
414+
div_impl0(x, y, r)
415+
div_impl(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} =
416+
div_impl3(x, y, r)
417+
418+
# The only way to achieve close-to-perfect results with `Float16` seems
419+
# to be to widen to `Float32`.
420+
div_impl(x::Float16, y::Float16, r::RoundingModesWithRem) where {T<:AbstractFloat} =
421+
div_impl4(x, y, r)
422+
div_impl(x::Float16, y::Float16, r::RoundingMode) where {T<:AbstractFloat} =
423+
div_impl4(x, y, r)
424+
425+
function div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat}
426+
isnan(x) && (return x)
427+
isnan(y) && (return y)
428+
div_impl(x, y, r)
429+
end

test/numbers.jl

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,6 +1702,107 @@ end
17021702
@test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp) == ceil(big(-1.1)/big(0.1)) == -11.0
17031703
@test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0
17041704
end
1705+
1706+
@testset "correctness despite possible numerical issues: $Small, $rounding_mode_with_rem" for
1707+
Small (Float16, Float32, Float64),
1708+
rounding_mode_with_rem (RoundNearest, RoundUp, RoundDown, RoundFromZero, RoundToZero),
1709+
xy = (
1710+
(4.445, 0.00675),
1711+
(2.941, -0.00301),
1712+
(-0.1852, 0.0001928),
1713+
(4.56, 0.006817),
1714+
(-0.178, -0.0001851),
1715+
(-6.51e4, -4.06e3),
1716+
(0.008255, -1.01e-5),
1717+
(-276.0, -0.1678),
1718+
(-39.7, -0.02045),
1719+
(0.7246, 0.0003948),
1720+
(-6.55e4, -29.81),
1721+
(-6.55e4, -27.81),
1722+
(-6.55e4, -24.25),
1723+
(6.55e4, 29.2),
1724+
(-6.55e4, -30.14),
1725+
(76.8, -0.11804),
1726+
(5.252e3, -2.607),
1727+
(4.365e4, -22.03),
1728+
(229.0, 0.1162),
1729+
(1.009, -0.0006504),
1730+
(-6.55e4, 29.81),
1731+
(6.55e4, -27.08),
1732+
(6.55e4, -31.62),
1733+
(6.55e4, -21.34),
1734+
(6.55e4, -31.27),
1735+
(6.307e4, 2.4e4),
1736+
(2.804e3, 3.502),
1737+
(-5.786e4, 5.005e4),
1738+
(-4.97e4, -2.387e4),
1739+
(-2.217, -0.001557),
1740+
(6.55e4, 26.6),
1741+
(6.55e4, 17.06),
1742+
(-6.55e4, -22.73),
1743+
(-6.55e4, 29.25),
1744+
(6.55e4, 29.28),
1745+
(1.169, -0.001584),
1746+
(20.94, 0.01427),
1747+
(0.1521, 0.0001633),
1748+
(2.406e4, -11.78),
1749+
(0.0766, 4.73e-5),
1750+
(-1.3231137e30, 2.4482906e23),
1751+
(-4.8544084e-25, 7.407144e-32),
1752+
(3.103397e26, 4.4375036e19),
1753+
(-4.7189755e-22, -6.6208687e-29),
1754+
(1.4102157e10, 1819.6719),
1755+
(1.1742472e13, -1.4370354e6),
1756+
(1.483496e-18, 9.9707963e-26),
1757+
(-5.016058e24, -4.956325e17),
1758+
(4.040856e22, 2.7740254e15),
1759+
(3.277167e14, -2.6412792e7),
1760+
(-1.849337e32, 1.7634905e25),
1761+
(-1.625298e-17, 1.0709875e-24),
1762+
(1.1791839e-31, 1.0768432e-38),
1763+
(-2.3660772e-30, -1.6153781e-37),
1764+
(-1.0070932e29, 7.4530127e21),
1765+
(-0.00061855844, -4.1053827e-11),
1766+
(2.6379836e12, -173271.67),
1767+
(9.8551985e-27, -1.48112095e-33),
1768+
(1.858432e26, 1.3008654e19),
1769+
(4.271524e19, -3.1128782e12),
1770+
(9.1970936e29, 1.2572408e23),
1771+
(-36199.926, -0.0023891365),
1772+
(2.2035416e20, -1.3350574e13),
1773+
(-6.933978e-26, -1.06443724e-32),
1774+
(-8.4979565e19, -7.333614e12),
1775+
(1.0172350506694424e17, 23.315505841986997),
1776+
(-1.47773180470849e-22, -4.0998853221425914e-38),
1777+
(-4.816178860121429e-125, 1.5705370217600465e-140),
1778+
(1.438033388799865e129, 3.8802573321302744e113),
1779+
(5.630449277879664e-200, -2.076135363156452e-215),
1780+
(-3.29867858971227e241, -8.079378598133822e225),
1781+
(-9.287161662516348e12, -0.00261425024965316),
1782+
(-4.017546451947597e-21, -6.334812648091802e-37),
1783+
(1.3802702942552817e-103, -1.5665085059273602e-119),
1784+
(2.2912868470550055e-76, -2.7514392143441017e-92),
1785+
(1.7074328973956706e151, -2.0403029505651566e135),
1786+
(-9.290503413404926e-267, -1.0856248919388625e-282),
1787+
(1.635940091056537e-143, -3.771680017287185e-159),
1788+
(1.1031633123212033e-289, 1.4605623613009784e-305),
1789+
(-5.984102559690793e-25, 1.3308245167101786e-40),
1790+
(-2.89964579258566e-97, -4.34310409713307e-113),
1791+
(-4.895423533487687e-224, -5.77559167362022e-240),
1792+
(3.2050860779440005e-66, 4.23835027226126e-82),
1793+
(-2.046347683435319e-194, 5.5954131552084545e-210),
1794+
(-9.223282007712624e-73, 1.0588268730479068e-88),
1795+
(-6.996263999884589e-36, 1.848582153492107e-51),
1796+
(-2.42983435949937e178, -6.557998479200877e162),
1797+
(3.402679467861051e45, -5.068403522082035e29),
1798+
(-1.9402396639482086e35, 2.7453450266399908e19),
1799+
(7.114570134851155e264, 1.6742521824981587e249),
1800+
)
1801+
ab = map(Small, xy)
1802+
AB = map(BigFloat, ab)
1803+
@test div(ab..., rounding_mode_with_rem) ==
1804+
div(AB..., rounding_mode_with_rem)
1805+
end
17051806
end
17061807
@testset "return types" begin
17071808
for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)

0 commit comments

Comments
 (0)