Skip to content

Commit 428d6f7

Browse files
committed
Finish removing the BigInts from * for FD{Int128}!
Finally implements the fast-multiplication optimization from #45, but this time for 128-bit FixedDecimals! :) This is a follow-up to #93, which introduces an Int256 type for widemul. However, the fldmod still required 2 BigInt allocations. Now, this PR uses a custom implementation of the LLVM div-by-const optimization for (U)Int256, which briefly widens to Int512 (😅) to perform the fldmod by the constant 10^f coefficient. This brings 128-bit FD multiply to the same performance as 64-bit. :)
1 parent be4883d commit 428d6f7

File tree

2 files changed

+17
-37
lines changed

2 files changed

+17
-37
lines changed

src/FixedPointDecimals.jl

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ export checked_abs, checked_add, checked_cld, checked_div, checked_fld,
3636

3737
using Base: decompose, BitInteger
3838

39-
import BitIntegers # For 128-bit _widemul / _widen
39+
using BitIntegers: BitIntegers, UInt256, Int256
4040
import Parsers
4141

42+
include("fldmod-by-const.jl")
43+
4244
# floats that support fma and are roughly IEEE-like
4345
const FMAFloat = Union{Float16, Float32, Float64, BigFloat}
4446

@@ -129,8 +131,10 @@ _widemul(x::Unsigned,y::Signed) = signed(_widen(x)) * _widen(y)
129131

130132
# Custom widen implementation to avoid the cost of widening to BigInt.
131133
# FD{Int128} operations should widen to 256 bits internally, rather than to a BigInt.
132-
_widen(::Type{Int128}) = BitIntegers.Int256
133-
_widen(::Type{UInt128}) = BitIntegers.UInt256
134+
_widen(::Type{Int128}) = Int256
135+
_widen(::Type{UInt128}) = UInt256
136+
_widen(::Type{Int256}) = BitIntegers.Int512
137+
_widen(::Type{UInt256}) = BitIntegers.UInt512
134138
_widen(t::Type) = widen(t)
135139
_widen(x::T) where {T} = (_widen(T))(x)
136140

@@ -196,41 +200,12 @@ function _round_to_nearest(quotient::T,
196200
end
197201
_round_to_nearest(q, r, d, m=RoundNearest) = _round_to_nearest(promote(q, r, d)..., m)
198202

199-
# In many of our calls to fldmod, `y` is a constant (the coefficient, 10^f). However, since
200-
# `fldmod` is sometimes not being inlined, that constant information is not available to the
201-
# optimizer. We need an inlined version of fldmod so that the compiler can replace expensive
202-
# divide-by-power-of-ten instructions with the cheaper multiply-by-inverse-coefficient.
203-
@inline fldmodinline(x,::Val{y}) where {y} = (fld(x,y), mod(x,y))
204-
205-
# Note that LLVM *can* handle *div-by-const* for 256-bit integers, so we override the
206-
# implementation from BitInteger, which calls out to big().
207-
const BitInteger256 = Union{BitIntegers.Int256, BitIntegers.UInt256}
208-
@inline function fldmodinline(x::T, ::Val{y}) where {T<:BitInteger256, y}
209-
(_fld(x,T(y)), _mod(x,T(y)))
210-
end
211-
@inline _fld(x, y) = _div(promote(x, y)..., RoundDown)
212-
@inline function _mod(x::T, y) where T<:Integer
213-
y == -1 && return T(0) # avoid potential overflow in fld
214-
return x - _fld(x, y) * y
215-
end
216-
@inline _mod(x::T, y::T) where {T<:Unsigned} = _rem(x, y)
217-
@inline _rem(x::T, y::T) where {T<:Signed} = checked_srem_int(x, y)
218-
@inline _rem(x::T, y::T) where {T<:Unsigned} = checked_urem_int(x, y)
219-
220-
# fld(x, y) == div(x, y) - ((x >= 0) != (y >= 0) && rem(x, y) != 0 ? 1 : 0)
221-
@inline _div(x::T, y::T, ::typeof(RoundDown)) where {T<:Unsigned} = Base.checked_udiv_int(x, y)
222-
@inline function _div(x::T, y::T, ::typeof(RoundDown)) where T<:Integer
223-
d = Base.checked_sdiv_int(x, y) # Explicitly call out
224-
return d - (signbit(x y) & (d * y != x))
225-
end
226-
227-
228203
# multiplication rounds to nearest even representation
229204
# TODO: can we use floating point to speed this up? after we build a
230205
# correctness test suite.
231206
function Base.:*(x::FD{T, f}, y::FD{T, f}) where {T, f}
232207
powt = coefficient(FD{T, f})
233-
quotient, remainder = fldmodinline(_widemul(x.i, y.i), Val(powt))
208+
quotient, remainder = fldmod_by_const(_widemul(x.i, y.i), Val(powt))
234209
reinterpret(FD{T, f}, _round_to_nearest(quotient, remainder, powt))
235210
end
236211

@@ -257,12 +232,12 @@ function Base.round(x::FD{T, f},
257232
RoundingMode{:NearestTiesUp},
258233
RoundingMode{:NearestTiesAway}}=RoundNearest) where {T, f}
259234
powt = coefficient(FD{T, f})
260-
quotient, remainder = fldmodinline(x.i, Val(powt))
235+
quotient, remainder = fldmod_by_const(x.i, Val(powt))
261236
FD{T, f}(_round_to_nearest(quotient, remainder, powt, m))
262237
end
263238
function Base.ceil(x::FD{T, f}) where {T, f}
264239
powt = coefficient(FD{T, f})
265-
quotient, remainder = fldmodinline(x.i, Val(powt))
240+
quotient, remainder = fldmod_by_const(x.i, Val(powt))
266241
if remainder > 0
267242
FD{T, f}(quotient + one(quotient))
268243
else
@@ -458,7 +433,7 @@ function Base.checked_sub(x::T, y::T) where {T<:FD}
458433
end
459434
function Base.checked_mul(x::FD{T,f}, y::FD{T,f}) where {T<:Integer,f}
460435
powt = coefficient(FD{T, f})
461-
quotient, remainder = fldmodinline(_widemul(x.i, y.i), Val(powt))
436+
quotient, remainder = fldmod_by_const(_widemul(x.i, y.i), Val(powt))
462437
v = _round_to_nearest(quotient, remainder, powt)
463438
typemin(T) <= v <= typemax(T) || Base.Checked.throw_overflowerr_binaryop(:*, x, y)
464439
return reinterpret(FD{T, f}, T(v))

test/runtests.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,9 @@ include(joinpath(pkg_path, "test", "utils.jl"))
1010

1111
@testset "FixedPointDecimals" begin
1212
include("FixedDecimal.jl")
13-
end # global testset
13+
end
14+
15+
@testset "FixedPointDecimals" begin
16+
include("fldmod-by-const_tests.jl")
17+
end
18+

0 commit comments

Comments
 (0)