Base: make TwicePrecision more accurate using standard algorithms

nsajko · nsajko · commit 05121ef6b7ef · 2023-04-29T18:02:29.000+02:00
It seems that previously the compensated arithmetic code was all designed ad-hoc instead of using the standard algorithms. Some of the introduced code will also be used in `base/div.jl` to fix I didn't check how much this improves the situation. In particular, the example in #33677 still gives the same result, and I wasn't able to evaluate #23497 because of how much Julia changed in the meantime.
diff --git a/base/mpfr.jl b/base/mpfr.jl
@@ -986,6 +986,8 @@ isfinite(x::BigFloat) = !isinf(x) && !isnan(x)
 iszero(x::BigFloat) = x == Clong(0)
 isone(x::BigFloat) = x == Clong(1)
 
+Base.add12(x::T, y::T) where {T<:BigFloat} = Base.add12_branchful(x, y)
+
 @eval typemax(::Type{BigFloat}) = $(BigFloat(Inf))
 @eval typemin(::Type{BigFloat}) = $(BigFloat(-Inf))
 
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
@@ -38,16 +38,50 @@ truncbits(x, nb) = x
 
 ## Dekker arithmetic
 
+# Reference for double-word floating-point arithmetic:
+#
+# Tight and Rigorous Error Bounds for Basic Building Blocks of
+# Double-Word Arithmetic
+#
+# ACM Transactions on Mathematical Software, Vol. 44, No. 2, Article
+# 15res. Publication date: October 2017
+#
+# Mioara Joldes, Jean-Michel Muller, and Valentina Popescu
+#
+# https://doi.org/10.1145/3121432
+
+function fast_two_sum(big::T, little::T) where {T<:AbstractFloat}
+    h = big + little
+    (h, (big - h) + little)
+end
+
 """
     hi, lo = canonicalize2(big, little)
 
 Generate a representation where all the nonzero bits in `hi` are more
 significant than any of the nonzero bits in `lo`. `big` must be larger
 in absolute value than `little`.
 """
-function canonicalize2(big, little)
-    h = big+little
-    h, (big - h) + little
+canonicalize2(big::T, little::T) where {T<:AbstractFloat} = fast_two_sum(big, little)
+canonicalize2(big::T, little::T) where {T} = (big + little, zero(big))
+
+# `add12_branchful` and `add12_branchless` are equivalent, known to
+# produce the same results as long as there's no overflow and no
+# underflow
+
+function add12_branchful(x::T, y::T) where {T<:AbstractFloat}
+    x, y = ifelse(abs(y) > abs(x), (y, x), (x, y))
+    fast_two_sum(x, y)
+end
+
+function add12_branchless(a::T, b::T) where {T<:AbstractFloat}
+    s = a + b
+    a_ = s - b
+    b_ = s - a_
+    δa = a - a_
+    δb = b - b_
+    t = δa + δb
+    (s, t)
 end
 
 """
@@ -80,10 +114,8 @@ julia> big(hi) + big(lo)
 `lo` differs from 1.0e-19 because `hi` is not exactly equal to
 the first 16 decimal digits of the answer.
 """
-function add12(x::T, y::T) where {T}
-    x, y = ifelse(abs(y) > abs(x), (y, x), (x, y))
-    canonicalize2(x, y)
-end
+add12(x::T, y::T) where {T<:AbstractFloat} = add12_branchless(x, y)
+add12(x::T, y::T) where {T} = (x + y, zero(x))
 add12(x, y) = add12(promote(x, y)...)
 
 """
@@ -118,6 +150,76 @@ end
 mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p)))
 mul12(x, y) = mul12(promote(x, y)...)
 
+# "DWPlusFP" AKA "Algorithm 4" from Joldes, Muller, Popescu
+function dw_plus_fp(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
+    (x_hi, x_lo) = x
+    (s_hi, s_lo) = add12(x_hi, y)
+    v = x_lo + s_lo
+    fast_two_sum(s_hi, v)
+end
+
+# "AccurateDWPlusDW" AKA "Algorithm 6" from Joldes, Muller, Popescu
+function dw_plus_dw(x::NTuple{2,T}, y::NTuple{2,T}) where {T<:AbstractFloat}
+    (x_hi, x_lo) = x
+    (y_hi, y_lo) = y
+    (s_hi, s_lo) = add12(x_hi, y_hi)
+    (t_hi, t_lo) = add12(x_lo, y_lo)
+    c = s_lo + t_hi
+    (v_hi, v_lo) = fast_two_sum(s_hi, c)
+    w = t_lo + v_lo
+    fast_two_sum(v_hi, w)
+end
+
+# "DWTimesFP1" AKA "Algorithm 7" from Joldes, Muller, Popescu
+function dw_times_fp(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
+    (x_hi, x_lo) = x
+    (c_hi, c_l1) = mul12(x_hi, y)
+    c_l2 = x_lo * y
+    (t_hi, t_l1) = fast_two_sum(c_hi, c_l2)
+    t_l2 = t_l1 + c_l1
+    fast_two_sum(t_hi, t_l2)
+end
+
+# "DWTimesDW3" AKA "Algorithm 12" from Joldes, Muller, Popescu
+function dw_times_dw(x::NTuple{2,T}, y::NTuple{2,T}) where {T<:AbstractFloat}
+    (x_hi, x_lo) = x
+    (y_hi, y_lo) = y
+    (c_hi, c_l1) = mul12(x_hi, y_hi)
+    t_l0 = x_lo * y_lo
+    t_l1 = fma(x_hi, y_lo, t_l0)
+    c_l2 = fma(x_lo, y_hi, t_l1)
+    c_l3 = c_l1 + c_l2
+    fast_two_sum(c_hi, c_l3)
+end
+
+# "DWDivFP3" AKA "Algorithm 15" from Joldes, Muller, Popescu
+function dw_div_fp(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
+    (x_hi, x_lo) = x
+    hi = x_hi / y
+    π = Base.Math.two_mul(hi, y)
+    δ_hi = x_hi - first(π)  # exact operation
+    δ_t = δ_hi - last(π)    # exact operation
+    δ = δ_t + x_lo
+    lo = δ / y
+    fast_two_sum(hi, lo)
+end
+
+# "DWDivDW3" AKA "Algorithm 18" from Joldes, Muller, Popescu
+function dw_div_dw(x::NTuple{2,T}, y::NTuple{2,T}) where {T<:AbstractFloat}
+    (x_hi, x_lo) = x
+    (y_hi, y_lo) = y
+    t_hi = 1 / y_hi
+    r_hi = fma(-y_hi, t_hi, true) # exact operation
+    r_lo = -y_lo * t_hi
+    e = fast_two_sum(r_hi, r_lo)
+    δ = dw_times_fp(e, t_hi)
+    m = dw_plus_fp(δ, t_hi)
+    dw_times_dw(x, m)
+end
+
+div12_kernel(x::T, y::T) where {T<:AbstractFloat} =
+    dw_div_fp((x, zero(x)), y)
+
 """
     zhi, zlo = div12(x, y)
 
@@ -149,7 +251,7 @@ function div12(x::T, y::T) where {T<:AbstractFloat}
     xs, xe = frexp(x)
     ys, ye = frexp(y)
     r = xs / ys
-    rh, rl = canonicalize2(r, -fma(r, ys, -xs)/ys)
+    rh, rl = div12_kernel(xs, ys)
     ifelse(iszero(r) | !isfinite(r), (r, r), (ldexp(rh, xe-ye), ldexp(rl, xe-ye)))
 end
 div12(x::T, y::T) where {T} = (p = x / y; (p, zero(p)))
@@ -198,6 +300,8 @@ struct TwicePrecision{T}
     lo::T    # least significant bits
 end
 
+(::Type{<:Tuple})(t::TwicePrecision) = (t.hi, t.lo)
+
 TwicePrecision{T}(x::T) where {T} = TwicePrecision{T}(x, zero(T))
 
 function TwicePrecision{T}(x) where {T}
@@ -288,23 +392,23 @@ end
 
 # Arithmetic
 
-function +(x::TwicePrecision, y::Number)
-    s_hi, s_lo = add12(x.hi, y)
-    TwicePrecision(canonicalize2(s_hi, s_lo+x.lo)...)
-end
++(x::TwicePrecision{T}, y::T) where {T<:AbstractFloat} =
+    TwicePrecision{T}(dw_plus_fp(Tuple(x), y)...)
+
 +(x::Number, y::TwicePrecision) = y+x
 
-function +(x::TwicePrecision{T}, y::TwicePrecision{T}) where T
-    r = x.hi + y.hi
-    s = abs(x.hi) > abs(y.hi) ? (((x.hi - r) + y.hi) + y.lo) + x.lo : (((y.hi - r) + x.hi) + x.lo) + y.lo
-    TwicePrecision(canonicalize2(r, s)...)
-end
++(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T<:AbstractFloat} =
+    TwicePrecision{T}(dw_plus_dw(Tuple(x), Tuple(y))...)
+
 +(x::TwicePrecision, y::TwicePrecision) = +(promote(x, y)...)
 
 -(x::TwicePrecision, y::TwicePrecision) = x + (-y)
 -(x::TwicePrecision, y::Number) = x + (-y)
 -(x::Number, y::TwicePrecision) = x + (-y)
 
+*(x::TwicePrecision{T}, y::T) where {T<:AbstractFloat} =
+    TwicePrecision{T}(dw_times_fp(Tuple(x), y)...)
+
 function *(x::TwicePrecision, v::Number)
     v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
     x * TwicePrecision(oftype(x.hi*v, v))
@@ -317,23 +421,31 @@ function *(x::TwicePrecision{<:IEEEFloat}, v::Integer)
 end
 *(v::Number, x::TwicePrecision) = x*v
 
-function *(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T}
-    zh, zl = mul12(x.hi, y.hi)
-    ret = TwicePrecision{T}(canonicalize2(zh, (x.hi * y.lo + x.lo * y.hi) + zl)...)
-    ifelse(iszero(zh) | !isfinite(zh), TwicePrecision{T}(zh, zh), ret)
-end
+*(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T<:AbstractFloat} =
+    let zh = x.hi * y.hi
+        ifelse(
+            !isfinite(zh),
+            TwicePrecision{T}(zh, zh),
+            TwicePrecision{T}(dw_times_dw(Tuple(x), Tuple(y))...),
+        )
+    end
+
 *(x::TwicePrecision, y::TwicePrecision) = *(promote(x, y)...)
 
+/(x::TwicePrecision{T}, y::T) where {T<:AbstractFloat} =
+    TwicePrecision{T}(dw_div_fp(Tuple(x), y)...)
+
 function /(x::TwicePrecision, v::Number)
     x / TwicePrecision(oftype(x.hi/v, v))
 end
 
-function /(x::TwicePrecision, y::TwicePrecision)
+function /(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T<:AbstractFloat}
     hi = x.hi / y.hi
-    uh, ul = mul12(hi, y.hi)
-    lo = ((((x.hi - uh) - ul) + x.lo) - hi*y.lo)/y.hi
-    ret = TwicePrecision(canonicalize2(hi, lo)...)
-    ifelse(iszero(hi) | !isfinite(hi), TwicePrecision(hi, hi), ret)
+    ifelse(
+        !isfinite(hi),
+        TwicePrecision(hi, hi),
+        TwicePrecision{T}(dw_div_dw(Tuple(x), Tuple(y))...),
+    )
 end
 
 ## StepRangeLen