Skip to content

Commit 979d8ec

Browse files
committed
Base: make TwicePrecision more accurate using standard algorithms
It seems that previously the compensated arithmetic code was all designed ad-hoc instead of using the standard algorithms. Some of the introduced code will also be used in `base/div.jl` to fix issue #49450. I didn't check how much this improves the situation. In particular, the example in #33677 still gives the same result, and I wasn't able to evaluate #23497 because of how much Julia changed in the meantime.
1 parent 219dc10 commit 979d8ec

File tree

2 files changed

+141
-27
lines changed

2 files changed

+141
-27
lines changed

base/mpfr.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,8 @@ isfinite(x::BigFloat) = !isinf(x) && !isnan(x)
986986
iszero(x::BigFloat) = x == Clong(0)
987987
isone(x::BigFloat) = x == Clong(1)
988988

989+
Base.add12(x::T, y::T) where {T<:BigFloat} = Base.add12_branchful(x, y)
990+
989991
@eval typemax(::Type{BigFloat}) = $(BigFloat(Inf))
990992
@eval typemin(::Type{BigFloat}) = $(BigFloat(-Inf))
991993

base/twiceprecision.jl

Lines changed: 139 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,50 @@ truncbits(x, nb) = x
3838

3939
## Dekker arithmetic
4040

41+
# Reference for double-word floating-point arithmetic:
42+
#
43+
# Tight and Rigorous Error Bounds for Basic Building Blocks of
44+
# Double-Word Arithmetic
45+
#
46+
# ACM Transactions on Mathematical Software, Vol. 44, No. 2, Article
47+
# 15res. Publication date: October 2017
48+
#
49+
# Mioara Joldes, Jean-Michel Muller, and Valentina Popescu
50+
#
51+
# https://doi.org/10.1145/3121432
52+
53+
function fast_two_sum(big::T, little::T) where {T<:AbstractFloat}
54+
h = big + little
55+
(h, (big - h) + little)
56+
end
57+
4158
"""
4259
hi, lo = canonicalize2(big, little)
4360
4461
Generate a representation where all the nonzero bits in `hi` are more
4562
significant than any of the nonzero bits in `lo`. `big` must be larger
4663
in absolute value than `little`.
4764
"""
48-
function canonicalize2(big, little)
49-
h = big+little
50-
h, (big - h) + little
65+
canonicalize2(big::T, little::T) where {T<:AbstractFloat} = fast_two_sum(big, little)
66+
canonicalize2(big::T, little::T) where {T} = (big + little, zero(big))
67+
68+
# `add12_branchful` and `add12_branchless` are equivalent, known to
69+
# produce the same results as long as there's no overflow and no
70+
# underflow
71+
72+
function add12_branchful(x::T, y::T) where {T<:AbstractFloat}
73+
x, y = ifelse(abs(y) > abs(x), (y, x), (x, y))
74+
fast_two_sum(x, y)
75+
end
76+
77+
function add12_branchless(a::T, b::T) where {T<:AbstractFloat}
78+
s = a + b
79+
a_ = s - b
80+
b_ = s - a_
81+
δa = a - a_
82+
δb = b - b_
83+
t = δa + δb
84+
(s, t)
5185
end
5286

5387
"""
@@ -80,10 +114,8 @@ julia> big(hi) + big(lo)
80114
`lo` differs from 1.0e-19 because `hi` is not exactly equal to
81115
the first 16 decimal digits of the answer.
82116
"""
83-
function add12(x::T, y::T) where {T}
84-
x, y = ifelse(abs(y) > abs(x), (y, x), (x, y))
85-
canonicalize2(x, y)
86-
end
117+
add12(x::T, y::T) where {T<:AbstractFloat} = add12_branchless(x, y)
118+
add12(x::T, y::T) where {T} = (x + y, zero(x))
87119
add12(x, y) = add12(promote(x, y)...)
88120

89121
"""
@@ -118,6 +150,76 @@ end
118150
mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p)))
119151
mul12(x, y) = mul12(promote(x, y)...)
120152

153+
# "DWPlusFP" AKA "Algorithm 4" from Joldes, Muller, Popescu
154+
function dw_plus_fp(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
155+
(x_hi, x_lo) = x
156+
(s_hi, s_lo) = add12(x_hi, y)
157+
v = x_lo + s_lo
158+
fast_two_sum(s_hi, v)
159+
end
160+
161+
# "AccurateDWPlusDW" AKA "Algorithm 6" from Joldes, Muller, Popescu
162+
function dw_plus_dw(x::NTuple{2,T}, y::NTuple{2,T}) where {T<:AbstractFloat}
163+
(x_hi, x_lo) = x
164+
(y_hi, y_lo) = y
165+
(s_hi, s_lo) = add12(x_hi, y_hi)
166+
(t_hi, t_lo) = add12(x_lo, y_lo)
167+
c = s_lo + t_hi
168+
(v_hi, v_lo) = fast_two_sum(s_hi, c)
169+
w = t_lo + v_lo
170+
fast_two_sum(v_hi, w)
171+
end
172+
173+
# "DWTimesFP1" AKA "Algorithm 7" from Joldes, Muller, Popescu
174+
function dw_times_fp(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
175+
(x_hi, x_lo) = x
176+
(c_hi, c_l1) = mul12(x_hi, y)
177+
c_l2 = x_lo * y
178+
(t_hi, t_l1) = fast_two_sum(c_hi, c_l2)
179+
t_l2 = t_l1 + c_l1
180+
fast_two_sum(t_hi, t_l2)
181+
end
182+
183+
# "DWTimesDW3" AKA "Algorithm 12" from Joldes, Muller, Popescu
184+
function dw_times_dw(x::NTuple{2,T}, y::NTuple{2,T}) where {T<:AbstractFloat}
185+
(x_hi, x_lo) = x
186+
(y_hi, y_lo) = y
187+
(c_hi, c_l1) = mul12(x_hi, y_hi)
188+
t_l0 = x_lo * y_lo
189+
t_l1 = fma(x_hi, y_lo, t_l0)
190+
c_l2 = fma(x_lo, y_hi, t_l1)
191+
c_l3 = c_l1 + c_l2
192+
fast_two_sum(c_hi, c_l3)
193+
end
194+
195+
# "DWDivFP3" AKA "Algorithm 15" from Joldes, Muller, Popescu
196+
function dw_div_fp(x::NTuple{2,T}, y::T) where {T<:AbstractFloat}
197+
(x_hi, x_lo) = x
198+
hi = x_hi / y
199+
π = Base.Math.two_mul(hi, y)
200+
δ_hi = x_hi - first(π) # exact operation
201+
δ_t = δ_hi - last(π) # exact operation
202+
δ = δ_t + x_lo
203+
lo = δ / y
204+
fast_two_sum(hi, lo)
205+
end
206+
207+
# "DWDivDW3" AKA "Algorithm 18" from Joldes, Muller, Popescu
208+
function dw_div_dw(x::NTuple{2,T}, y::NTuple{2,T}) where {T<:AbstractFloat}
209+
(x_hi, x_lo) = x
210+
(y_hi, y_lo) = y
211+
t_hi = 1 / y_hi
212+
r_hi = fma(-y_hi, t_hi, true) # exact operation
213+
r_lo = -y_lo * t_hi
214+
e = fast_two_sum(r_hi, r_lo)
215+
δ = dw_times_fp(e, t_hi)
216+
m = dw_plus_fp(δ, t_hi)
217+
dw_times_dw(x, m)
218+
end
219+
220+
div12_kernel(x::T, y::T) where {T<:AbstractFloat} =
221+
dw_div_fp((x, zero(x)), y)
222+
121223
"""
122224
zhi, zlo = div12(x, y)
123225
@@ -149,7 +251,7 @@ function div12(x::T, y::T) where {T<:AbstractFloat}
149251
xs, xe = frexp(x)
150252
ys, ye = frexp(y)
151253
r = xs / ys
152-
rh, rl = canonicalize2(r, -fma(r, ys, -xs)/ys)
254+
rh, rl = div12_kernel(xs, ys)
153255
ifelse(iszero(r) | !isfinite(r), (r, r), (ldexp(rh, xe-ye), ldexp(rl, xe-ye)))
154256
end
155257
div12(x::T, y::T) where {T} = (p = x / y; (p, zero(p)))
@@ -198,6 +300,8 @@ struct TwicePrecision{T}
198300
lo::T # least significant bits
199301
end
200302

303+
(::Type{<:Tuple})(t::TwicePrecision) = (t.hi, t.lo)
304+
201305
TwicePrecision{T}(x::T) where {T} = TwicePrecision{T}(x, zero(T))
202306

203307
function TwicePrecision{T}(x) where {T}
@@ -288,23 +392,23 @@ end
288392

289393
# Arithmetic
290394

291-
function +(x::TwicePrecision, y::Number)
292-
s_hi, s_lo = add12(x.hi, y)
293-
TwicePrecision(canonicalize2(s_hi, s_lo+x.lo)...)
294-
end
395+
+(x::TwicePrecision{T}, y::T) where {T<:AbstractFloat} =
396+
TwicePrecision{T}(dw_plus_fp(Tuple(x), y)...)
397+
295398
+(x::Number, y::TwicePrecision) = y+x
296399

297-
function +(x::TwicePrecision{T}, y::TwicePrecision{T}) where T
298-
r = x.hi + y.hi
299-
s = abs(x.hi) > abs(y.hi) ? (((x.hi - r) + y.hi) + y.lo) + x.lo : (((y.hi - r) + x.hi) + x.lo) + y.lo
300-
TwicePrecision(canonicalize2(r, s)...)
301-
end
400+
+(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T<:AbstractFloat} =
401+
TwicePrecision{T}(dw_plus_dw(Tuple(x), Tuple(y))...)
402+
302403
+(x::TwicePrecision, y::TwicePrecision) = +(promote(x, y)...)
303404

304405
-(x::TwicePrecision, y::TwicePrecision) = x + (-y)
305406
-(x::TwicePrecision, y::Number) = x + (-y)
306407
-(x::Number, y::TwicePrecision) = x + (-y)
307408

409+
*(x::TwicePrecision{T}, y::T) where {T<:AbstractFloat} =
410+
TwicePrecision{T}(dw_times_fp(Tuple(x), y)...)
411+
308412
function *(x::TwicePrecision, v::Number)
309413
v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
310414
x * TwicePrecision(oftype(x.hi*v, v))
@@ -317,23 +421,31 @@ function *(x::TwicePrecision{<:IEEEFloat}, v::Integer)
317421
end
318422
*(v::Number, x::TwicePrecision) = x*v
319423

320-
function *(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T}
321-
zh, zl = mul12(x.hi, y.hi)
322-
ret = TwicePrecision{T}(canonicalize2(zh, (x.hi * y.lo + x.lo * y.hi) + zl)...)
323-
ifelse(iszero(zh) | !isfinite(zh), TwicePrecision{T}(zh, zh), ret)
324-
end
424+
*(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T<:AbstractFloat} =
425+
let zh = x.hi * y.hi
426+
ifelse(
427+
!isfinite(zh),
428+
TwicePrecision{T}(zh, zh),
429+
TwicePrecision{T}(dw_times_dw(Tuple(x), Tuple(y))...),
430+
)
431+
end
432+
325433
*(x::TwicePrecision, y::TwicePrecision) = *(promote(x, y)...)
326434

435+
/(x::TwicePrecision{T}, y::T) where {T<:AbstractFloat} =
436+
TwicePrecision{T}(dw_div_fp(Tuple(x), y)...)
437+
327438
function /(x::TwicePrecision, v::Number)
328439
x / TwicePrecision(oftype(x.hi/v, v))
329440
end
330441

331-
function /(x::TwicePrecision, y::TwicePrecision)
442+
function /(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T<:AbstractFloat}
332443
hi = x.hi / y.hi
333-
uh, ul = mul12(hi, y.hi)
334-
lo = ((((x.hi - uh) - ul) + x.lo) - hi*y.lo)/y.hi
335-
ret = TwicePrecision(canonicalize2(hi, lo)...)
336-
ifelse(iszero(hi) | !isfinite(hi), TwicePrecision(hi, hi), ret)
444+
ifelse(
445+
!isfinite(hi),
446+
TwicePrecision(hi, hi),
447+
TwicePrecision{T}(dw_div_dw(Tuple(x), Tuple(y))...),
448+
)
337449
end
338450

339451
## StepRangeLen

0 commit comments

Comments
 (0)