Skip to content

Commit 0b23bea

Browse files
TotalVerbomus
authored andcommitted
Fix float conversion overflow problems
1 parent 61fcdb6 commit 0b23bea

File tree

2 files changed

+62
-18
lines changed

2 files changed

+62
-18
lines changed

src/FixedPointDecimals.jl

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,37 +34,40 @@ import Base: reinterpret, zero, one, abs, sign, ==, <, <=, +, -, /, *, div, rem,
3434
realmin, realmax, print, show, string, convert, parse, promote_rule, min, max,
3535
trunc, round, floor, ceil, eps, float, widemul
3636

37-
const IEEEFloat = Union{Float16, Float32, Float64}
37+
const BitInteger = Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64,
38+
UInt64, Int128, UInt128}
39+
40+
# floats that support fma and are roughly IEEE-like
41+
const FMAFloat = Union{Float16, Float32, Float64, BigFloat}
3842

3943
for fn in [:trunc, :floor, :ceil]
4044
fnname = Symbol(fn, "mul")
4145

4246
@eval begin
4347
@doc """
44-
$($fnname)(x, y) :: Integer
48+
$($fnname)(I, x, y) :: I
4549
46-
Compute `$($fn)(x * y)`. For floating point values, this function can
47-
be more accurate than `$($fn)(x * y)`. The boundary behavior of this
48-
function (e.g. at large values of `x`, `y`) is untested and possibly
49-
incorrect.
50+
Compute `$($fn)(I, x * y)`, returning the result as type `I`. For
51+
floating point values, this function can be more accurate than
52+
`$($fn)(x * y)`.
5053
""" function $fnname end
5154

52-
$fnname{T <: Number}(x::T, y::T) = $fn(x * y)
55+
$fnname{I, T <: Number}(::Type{I}, x::T, y::T) = $fn(I, x * y)
5356

54-
$fnname(x::Number, y::Number) = $fnname(promote(x, y)...)
57+
$fnname{I}(::Type{I}, x::Number, y::Number) = $fnname(I, promote(x, y)...)
5558
end
5659

5760
if fn === :trunc
5861
# trunc a little different, implement in terms of floor
59-
@eval function $fnname{T <: IEEEFloat}(x::T, y::T)
60-
copysign(floormul(abs(x), abs(y)), x*y)
62+
@eval function $fnname{I, T <: FMAFloat}(::Type{I}, x::T, y::T)
63+
copysign(floormul(I, abs(x), abs(y)), x*y)
6164
end
6265
else
6366
# floor and ceil can be implemented the same way
64-
@eval function $fnname{T <: IEEEFloat}(x::T, y::T)
67+
@eval function $fnname{I, T <: FMAFloat}(::Type{I}, x::T, y::T)
6568
a = x * y
6669
b = fma(x, y, -a)
67-
ifelse(isinteger(a), a + $fn(b), $fn(a))
70+
$fn(I, a) + ifelse(isinteger(a), $fn(I, b), zero(I))
6871
end
6972
end
7073
end
@@ -190,14 +193,46 @@ function ceil{T, f}(x::FD{T, f})
190193
end
191194
end
192195

196+
"""
197+
required_precision(n::Integer)
198+
199+
Compute the number of bits of precision needed to represent an integer exactly
200+
as a floating point number.
201+
202+
This is equivalent to counting the number of bits needed to represent the
203+
integer, excluding any trailing zeros.
204+
"""
205+
required_precision(n::Integer) = ndigits(n, 2) - trailing_zeros(n)
206+
207+
"""
208+
_apply_exact_float(f, T, x::Real, i::Integer)
209+
210+
Compute `f(T, x, i)::T` but avoiding possible loss of precision from an
211+
intermediate conversion of `i` to a floating point type by instead using a
212+
`BigFloat` with sufficient precision if necessary.
213+
"""
214+
function _apply_exact_float{T}(f, ::Type{T}, x::FMAFloat, i::Integer)
215+
prec = required_precision(i)
216+
if prec > 53
217+
setprecision(BigFloat, prec) do
218+
f(T, x, BigFloat(i))
219+
end
220+
else
221+
f(T, x, Float64(i))
222+
end
223+
end
224+
225+
_apply_exact_float{T}(f, ::Type{T}, x::Real, i::Integer) = f(T, x, i)
226+
193227
for fn in [:trunc, :floor, :ceil]
194228
@eval $fn{TI <: Integer}(::Type{TI}, x::FD)::TI = $fn(x)
195229

196230
# round/trunc/ceil/flooring to FD; generic
197-
# TODO. we may need to check overflow and boundary conditions here.
198231
@eval function $fn{T, f}(::Type{FD{T, f}}, x::Real)
199232
powt = coefficient(FD{T, f})
200-
val = trunc(T, $(Symbol(fn, "mul"))(x, powt))
233+
# Use machine Float64 if possible, but fall back to BigFloat if we need
234+
# more precision. 4f bits suffices.
235+
val = _apply_exact_float($(Symbol(fn, "mul")), T, x, powt)
201236
reinterpret(FD{T, f}, val)
202237
end
203238
end

test/runtests.jl

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ const INTS = Dict(
5353
2.2 => "220000000000000017763568394002504646778106689453125",
5454
2.3 => "229999999999999982236431605997495353221893310546875"
5555
)
56+
for (k, v) in INTS
57+
INTS[k] = rpad(v, 201, "0")
58+
end
5659
const smaller_than_decimal = [1.22, 1.23, 2.3]
5760
const bigger_than_decimal = [1.51, 2.2]
5861

@@ -603,16 +606,22 @@ end
603606
@test trunc(FD2, x) == FD2(x - 0.01)
604607
@test trunc(FD3, x) == FD3(x - 0.001)
605608

606-
for f in 0:12
609+
for f in 0:18
607610
@test trunc(FD{Int64, f}, x) == parse_int(FD{Int64, f}, INTS[x])
608611
end
612+
for f in 0:200
613+
@test trunc(FD{BigInt, f}, x) == parse_int(FD{BigInt, f}, INTS[x])
614+
end
609615
end
610616

611617
for x in bigger_than_decimal
612618
exactval = FD3(x)
613-
for f in 3:12
619+
for f in 3:14
614620
@test trunc(FD{Int64, f}, x) == exactval
615621
end
622+
for f in 0:18
623+
@test trunc(FD{Int64, f}, x) == parse_int(FD{Int64, f}, INTS[x])
624+
end
616625
end
617626
end
618627

@@ -661,7 +670,7 @@ epsi{T}(::Type{T}) = eps(T)
661670
@test floor(FD2, x) == FD2(x - 0.01)
662671
@test floor(FD3, x) == FD3(x - 0.001)
663672

664-
for f in 0:12
673+
for f in 0:18
665674
@test floor(FD{Int64, f}, x) == parse_int(FD{Int64, f}, INTS[x])
666675
end
667676

@@ -673,7 +682,7 @@ epsi{T}(::Type{T}) = eps(T)
673682
@test ceil(FD2, x) == FD2(x + 0.01)
674683
@test ceil(FD3, x) == FD3(x + 0.001)
675684

676-
for f in 0:12
685+
for f in 0:18
677686
@test ceil(FD{Int64, f}, x) == parse_int(FD{Int64, f}, INTS[x], ceil=true)
678687
end
679688

0 commit comments

Comments
 (0)