@@ -34,37 +34,40 @@ import Base: reinterpret, zero, one, abs, sign, ==, <, <=, +, -, /, *, div, rem,
34
34
realmin, realmax, print, show, string, convert, parse, promote_rule, min, max,
35
35
trunc, round, floor, ceil, eps, float, widemul
36
36
37
- const IEEEFloat = Union{Float16, Float32, Float64}
37
+ const BitInteger = Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64,
38
+ UInt64, Int128, UInt128}
39
+
40
+ # floats that support fma and are roughly IEEE-like
41
+ const FMAFloat = Union{Float16, Float32, Float64, BigFloat}
38
42
39
43
for fn in [:trunc , :floor , :ceil ]
40
44
fnname = Symbol (fn, " mul" )
41
45
42
46
@eval begin
43
47
@doc """
44
- $($ fnname) (x, y) :: Integer
48
+ $($ fnname) (I, x, y) :: I
45
49
46
- Compute `$($ fn) (x * y)`. For floating point values, this function can
47
- be more accurate than `$($ fn) (x * y)`. The boundary behavior of this
48
- function (e.g. at large values of `x`, `y`) is untested and possibly
49
- incorrect.
50
+ Compute `$($ fn) (I, x * y)`, returning the result as type `I`. For
51
+ floating point values, this function can be more accurate than
52
+ `$($ fn) (x * y)`.
50
53
""" function $ fnname end
51
54
52
- $ fnname {T <: Number} (x:: T , y:: T ) = $ fn (x * y)
55
+ $ fnname {I, T <: Number} (:: Type{I} , x:: T , y:: T ) = $ fn (I, x * y)
53
56
54
- $ fnname ( x:: Number , y:: Number ) = $ fnname (promote (x, y)... )
57
+ $ fnname {I} ( :: Type{I} , x:: Number , y:: Number ) = $ fnname (I, promote (x, y)... )
55
58
end
56
59
57
60
if fn === :trunc
58
61
# trunc a little different, implement in terms of floor
59
- @eval function $fnname {T <: IEEEFloat} ( x:: T , y:: T )
60
- copysign (floormul (abs (x), abs (y)), x* y)
62
+ @eval function $fnname {I, T <: FMAFloat} ( :: Type{I} , x:: T , y:: T )
63
+ copysign (floormul (I, abs (x), abs (y)), x* y)
61
64
end
62
65
else
63
66
# floor and ceil can be implemented the same way
64
- @eval function $fnname {T <: IEEEFloat} ( x:: T , y:: T )
67
+ @eval function $fnname {I, T <: FMAFloat} ( :: Type{I} , x:: T , y:: T )
65
68
a = x * y
66
69
b = fma (x, y, - a)
67
- ifelse (isinteger (a), a + $ fn (b), $ fn (a ))
70
+ $ fn (I, a) + ifelse (isinteger (a), $ fn (I, b), zero (I ))
68
71
end
69
72
end
70
73
end
@@ -190,14 +193,46 @@ function ceil{T, f}(x::FD{T, f})
190
193
end
191
194
end
192
195
196
+ """
197
+ required_precision(n::Integer)
198
+
199
+ Compute the number of bits of precision needed to represent an integer exactly
200
+ as a floating point number.
201
+
202
+ This is equivalent to counting the number of bits needed to represent the
203
+ integer, excluding any trailing zeros.
204
+ """
205
+ required_precision (n:: Integer ) = ndigits (n, 2 ) - trailing_zeros (n)
206
+
207
+ """
208
+ _apply_exact_float(f, T, x::Real, i::Integer)
209
+
210
+ Compute `f(T, x, i)::T` but avoiding possible loss of precision from an
211
+ intermediate conversion of `i` to a floating point type by instead using a
212
+ `BigFloat` with sufficient precision if necessary.
213
+ """
214
+ function _apply_exact_float {T} (f, :: Type{T} , x:: FMAFloat , i:: Integer )
215
+ prec = required_precision (i)
216
+ if prec > 53
217
+ setprecision (BigFloat, prec) do
218
+ f (T, x, BigFloat (i))
219
+ end
220
+ else
221
+ f (T, x, Float64 (i))
222
+ end
223
+ end
224
+
225
+ _apply_exact_float {T} (f, :: Type{T} , x:: Real , i:: Integer ) = f (T, x, i)
226
+
193
227
for fn in [:trunc , :floor , :ceil ]
194
228
@eval $ fn {TI <: Integer} (:: Type{TI} , x:: FD ):: TI = $ fn (x)
195
229
196
230
# round/trunc/ceil/flooring to FD; generic
197
- # TODO . we may need to check overflow and boundary conditions here.
198
231
@eval function $fn {T, f} (:: Type{FD{T, f}} , x:: Real )
199
232
powt = coefficient (FD{T, f})
200
- val = trunc (T, $ (Symbol (fn, " mul" ))(x, powt))
233
+ # Use machine Float64 if possible, but fall back to BigFloat if we need
234
+ # more precision. 4f bits suffices.
235
+ val = _apply_exact_float ($ (Symbol (fn, " mul" )), T, x, powt)
201
236
reinterpret (FD{T, f}, val)
202
237
end
203
238
end
0 commit comments