@@ -38,16 +38,50 @@ truncbits(x, nb) = x
38
38
39
39
# # Dekker arithmetic
40
40
41
+ # Reference for double-word floating-point arithmetic:
42
+ #
43
+ # Tight and Rigorous Error Bounds for Basic Building Blocks of
44
+ # Double-Word Arithmetic
45
+ #
46
+ # ACM Transactions on Mathematical Software, Vol. 44, No. 2, Article
47
+ # 15res. Publication date: October 2017
48
+ #
49
+ # Mioara Joldes, Jean-Michel Muller, and Valentina Popescu
50
+ #
51
+ # https://doi.org/10.1145/3121432
52
+
53
+ function fast_two_sum (big:: T , little:: T ) where {T<: AbstractFloat }
54
+ h = big + little
55
+ (h, (big - h) + little)
56
+ end
57
+
41
58
"""
42
59
hi, lo = canonicalize2(big, little)
43
60
44
61
Generate a representation where all the nonzero bits in `hi` are more
45
62
significant than any of the nonzero bits in `lo`. `big` must be larger
46
63
in absolute value than `little`.
47
64
"""
48
- function canonicalize2 (big, little)
49
- h = big+ little
50
- h, (big - h) + little
65
+ canonicalize2 (big:: T , little:: T ) where {T<: AbstractFloat } = fast_two_sum (big, little)
66
+ canonicalize2 (big:: T , little:: T ) where {T} = (big + little, zero (big))
67
+
68
+ # `add12_branchful` and `add12_branchless` are equivalent, known to
69
+ # produce the same results as long as there's no overflow and no
70
+ # underflow
71
+
72
+ function add12_branchful (x:: T , y:: T ) where {T<: AbstractFloat }
73
+ x, y = ifelse (abs (y) > abs (x), (y, x), (x, y))
74
+ fast_two_sum (x, y)
75
+ end
76
+
77
+ function add12_branchless (a:: T , b:: T ) where {T<: AbstractFloat }
78
+ s = a + b
79
+ a_ = s - b
80
+ b_ = s - a_
81
+ δa = a - a_
82
+ δb = b - b_
83
+ t = δa + δb
84
+ (s, t)
51
85
end
52
86
53
87
"""
@@ -80,10 +114,8 @@ julia> big(hi) + big(lo)
80
114
`lo` differs from 1.0e-19 because `hi` is not exactly equal to
81
115
the first 16 decimal digits of the answer.
82
116
"""
83
- function add12 (x:: T , y:: T ) where {T}
84
- x, y = ifelse (abs (y) > abs (x), (y, x), (x, y))
85
- canonicalize2 (x, y)
86
- end
117
+ add12 (x:: T , y:: T ) where {T<: AbstractFloat } = add12_branchless (x, y)
118
+ add12 (x:: T , y:: T ) where {T} = (x + y, zero (x))
87
119
add12 (x, y) = add12 (promote (x, y)... )
88
120
89
121
"""
118
150
mul12 (x:: T , y:: T ) where {T} = (p = x * y; (p, zero (p)))
119
151
mul12 (x, y) = mul12 (promote (x, y)... )
120
152
153
+ # "DWPlusFP" AKA "Algorithm 4" from Joldes, Muller, Popescu
154
+ function dw_plus_fp (x:: NTuple{2,T} , y:: T ) where {T<: AbstractFloat }
155
+ (x_hi, x_lo) = x
156
+ (s_hi, s_lo) = add12 (x_hi, y)
157
+ v = x_lo + s_lo
158
+ fast_two_sum (s_hi, v)
159
+ end
160
+
161
+ # "AccurateDWPlusDW" AKA "Algorithm 6" from Joldes, Muller, Popescu
162
+ function dw_plus_dw (x:: NTuple{2,T} , y:: NTuple{2,T} ) where {T<: AbstractFloat }
163
+ (x_hi, x_lo) = x
164
+ (y_hi, y_lo) = y
165
+ (s_hi, s_lo) = add12 (x_hi, y_hi)
166
+ (t_hi, t_lo) = add12 (x_lo, y_lo)
167
+ c = s_lo + t_hi
168
+ (v_hi, v_lo) = fast_two_sum (s_hi, c)
169
+ w = t_lo + v_lo
170
+ fast_two_sum (v_hi, w)
171
+ end
172
+
173
+ # "DWTimesFP1" AKA "Algorithm 7" from Joldes, Muller, Popescu
174
+ function dw_times_fp (x:: NTuple{2,T} , y:: T ) where {T<: AbstractFloat }
175
+ (x_hi, x_lo) = x
176
+ (c_hi, c_l1) = mul12 (x_hi, y)
177
+ c_l2 = x_lo * y
178
+ (t_hi, t_l1) = fast_two_sum (c_hi, c_l2)
179
+ t_l2 = t_l1 + c_l1
180
+ fast_two_sum (t_hi, t_l2)
181
+ end
182
+
183
+ # "DWTimesDW3" AKA "Algorithm 12" from Joldes, Muller, Popescu
184
+ function dw_times_dw (x:: NTuple{2,T} , y:: NTuple{2,T} ) where {T<: AbstractFloat }
185
+ (x_hi, x_lo) = x
186
+ (y_hi, y_lo) = y
187
+ (c_hi, c_l1) = mul12 (x_hi, y_hi)
188
+ t_l0 = x_lo * y_lo
189
+ t_l1 = fma (x_hi, y_lo, t_l0)
190
+ c_l2 = fma (x_lo, y_hi, t_l1)
191
+ c_l3 = c_l1 + c_l2
192
+ fast_two_sum (c_hi, c_l3)
193
+ end
194
+
195
+ # "DWDivFP3" AKA "Algorithm 15" from Joldes, Muller, Popescu
196
+ function dw_div_fp (x:: NTuple{2,T} , y:: T ) where {T<: AbstractFloat }
197
+ (x_hi, x_lo) = x
198
+ hi = x_hi / y
199
+ π = Base. Math. two_mul (hi, y)
200
+ δ_hi = x_hi - first (π) # exact operation
201
+ δ_t = δ_hi - last (π) # exact operation
202
+ δ = δ_t + x_lo
203
+ lo = δ / y
204
+ fast_two_sum (hi, lo)
205
+ end
206
+
207
+ # "DWDivDW3" AKA "Algorithm 18" from Joldes, Muller, Popescu
208
+ function dw_div_dw (x:: NTuple{2,T} , y:: NTuple{2,T} ) where {T<: AbstractFloat }
209
+ (x_hi, x_lo) = x
210
+ (y_hi, y_lo) = y
211
+ t_hi = 1 / y_hi
212
+ r_hi = fma (- y_hi, t_hi, true ) # exact operation
213
+ r_lo = - y_lo * t_hi
214
+ e = fast_two_sum (r_hi, r_lo)
215
+ δ = dw_times_fp (e, t_hi)
216
+ m = dw_plus_fp (δ, t_hi)
217
+ dw_times_dw (x, m)
218
+ end
219
+
220
+ div12_kernel (x:: T , y:: T ) where {T<: AbstractFloat } =
221
+ dw_div_fp ((x, zero (x)), y)
222
+
121
223
"""
122
224
zhi, zlo = div12(x, y)
123
225
@@ -149,7 +251,7 @@ function div12(x::T, y::T) where {T<:AbstractFloat}
149
251
xs, xe = frexp (x)
150
252
ys, ye = frexp (y)
151
253
r = xs / ys
152
- rh, rl = canonicalize2 (r, - fma (r, ys, - xs) / ys)
254
+ rh, rl = div12_kernel (xs, ys)
153
255
ifelse (iszero (r) | ! isfinite (r), (r, r), (ldexp (rh, xe- ye), ldexp (rl, xe- ye)))
154
256
end
155
257
div12 (x:: T , y:: T ) where {T} = (p = x / y; (p, zero (p)))
@@ -198,6 +300,8 @@ struct TwicePrecision{T}
198
300
lo:: T # least significant bits
199
301
end
200
302
303
+ (:: Type{<:Tuple} )(t:: TwicePrecision ) = (t. hi, t. lo)
304
+
201
305
TwicePrecision {T} (x:: T ) where {T} = TwicePrecision {T} (x, zero (T))
202
306
203
307
function TwicePrecision {T} (x) where {T}
@@ -288,23 +392,23 @@ end
288
392
289
393
# Arithmetic
290
394
291
- function + (x:: TwicePrecision , y:: Number )
292
- s_hi, s_lo = add12 (x. hi, y)
293
- TwicePrecision (canonicalize2 (s_hi, s_lo+ x. lo)... )
294
- end
395
+ + (x:: TwicePrecision{T} , y:: T ) where {T<: AbstractFloat } =
396
+ TwicePrecision {T} (dw_plus_fp (Tuple (x), y)... )
397
+
295
398
+ (x:: Number , y:: TwicePrecision ) = y+ x
296
399
297
- function + (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where T
298
- r = x. hi + y. hi
299
- s = abs (x. hi) > abs (y. hi) ? (((x. hi - r) + y. hi) + y. lo) + x. lo : (((y. hi - r) + x. hi) + x. lo) + y. lo
300
- TwicePrecision (canonicalize2 (r, s)... )
301
- end
400
+ + (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where {T<: AbstractFloat } =
401
+ TwicePrecision {T} (dw_plus_dw (Tuple (x), Tuple (y))... )
402
+
302
403
+ (x:: TwicePrecision , y:: TwicePrecision ) = + (promote (x, y)... )
303
404
304
405
- (x:: TwicePrecision , y:: TwicePrecision ) = x + (- y)
305
406
- (x:: TwicePrecision , y:: Number ) = x + (- y)
306
407
- (x:: Number , y:: TwicePrecision ) = x + (- y)
307
408
409
+ * (x:: TwicePrecision{T} , y:: T ) where {T<: AbstractFloat } =
410
+ TwicePrecision {T} (dw_times_fp (Tuple (x), y)... )
411
+
308
412
function * (x:: TwicePrecision , v:: Number )
309
413
v == 0 && return TwicePrecision (x. hi* v, x. lo* v)
310
414
x * TwicePrecision (oftype (x. hi* v, v))
@@ -317,23 +421,31 @@ function *(x::TwicePrecision{<:IEEEFloat}, v::Integer)
317
421
end
318
422
* (v:: Number , x:: TwicePrecision ) = x* v
319
423
320
- function * (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where {T}
321
- zh, zl = mul12 (x. hi, y. hi)
322
- ret = TwicePrecision {T} (canonicalize2 (zh, (x. hi * y. lo + x. lo * y. hi) + zl)... )
323
- ifelse (iszero (zh) | ! isfinite (zh), TwicePrecision {T} (zh, zh), ret)
324
- end
424
+ * (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where {T<: AbstractFloat } =
425
+ let zh = x. hi * y. hi
426
+ ifelse (
427
+ ! isfinite (zh),
428
+ TwicePrecision {T} (zh, zh),
429
+ TwicePrecision {T} (dw_times_dw (Tuple (x), Tuple (y))... ),
430
+ )
431
+ end
432
+
325
433
* (x:: TwicePrecision , y:: TwicePrecision ) = * (promote (x, y)... )
326
434
435
+ / (x:: TwicePrecision{T} , y:: T ) where {T<: AbstractFloat } =
436
+ TwicePrecision {T} (dw_div_fp (Tuple (x), y)... )
437
+
327
438
function / (x:: TwicePrecision , v:: Number )
328
439
x / TwicePrecision (oftype (x. hi/ v, v))
329
440
end
330
441
331
- function / (x:: TwicePrecision , y:: TwicePrecision )
442
+ function / (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where {T <: AbstractFloat }
332
443
hi = x. hi / y. hi
333
- uh, ul = mul12 (hi, y. hi)
334
- lo = ((((x. hi - uh) - ul) + x. lo) - hi* y. lo)/ y. hi
335
- ret = TwicePrecision (canonicalize2 (hi, lo)... )
336
- ifelse (iszero (hi) | ! isfinite (hi), TwicePrecision (hi, hi), ret)
444
+ ifelse (
445
+ ! isfinite (hi),
446
+ TwicePrecision (hi, hi),
447
+ TwicePrecision {T} (dw_div_dw (Tuple (x), Tuple (y))... ),
448
+ )
337
449
end
338
450
339
451
# # StepRangeLen
0 commit comments