|
| 1 | +# NOTE: We apply this optimization to values of type (U)Int128 and (U)Int256, which covers |
| 2 | +# FixedDecimal{(U)Int64} and FixedDecimal{(U)Int128}. |
| 3 | +# Julia+LLVM have built-in optimizations that apply this already for FD{(U)Int64}, however |
| 4 | +# this customized implementation appears to produce still even faster code than LLVM can |
| 5 | +# produce on its own. So we apply this for both sizes. |
| 6 | +# Before: |
| 7 | +# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int64,3}(1.234)) |
| 8 | +# 247.709 μs (0 allocations: 0 bytes) |
| 9 | +# FixedDecimal{Int64,3}(4230510070790917.029) |
| 10 | +# After: |
| 11 | +# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int64,3}(1.234)) |
| 12 | +# 106.125 μs (0 allocations: 0 bytes) |
| 13 | +# FixedDecimal{Int64,3}(4230510070790917.029) |
1 | 14 |
|
2 |
| -const BitInteger256 = Union{UInt256, Int256} |
| 15 | +const BigBitIntegers = Union{UInt128, Int128, UInt256, Int256} |
3 | 16 |
|
4 | 17 | @inline function fldmod_by_const(x, y)
|
5 | 18 | # For small-to-normal integers, LLVM can correctly optimize away the division, if it
|
6 | 19 | # knows it's dividing by a const. We cannot call `Base.fldmod` since it's not
|
7 | 20 | # inlined, so here we have explictly inlined it instead.
|
8 | 21 | return (fld(x,y), mod(x,y))
|
9 | 22 | end
|
10 |
| -@inline function fldmod_by_const(x::BitInteger256, y) |
| 23 | +@inline function fldmod_by_const(x::BigBitIntegers, y) |
11 | 24 | # For large or non-standard Int types, LLVM doesn't optimize
|
12 | 25 | # well, so we use a custom implementation of fldmod.
|
13 | 26 | d = fld_by_const(x, Val(y))
|
|
17 | 30 | # Calculate fld(x,y) when y is a Val constant.
|
18 | 31 | # The implementation for fld_by_const was lifted directly from Base.fld(x,y), except that
|
19 | 32 | # it uses `div_by_const` instead of `div`.
|
20 |
| -fld_by_const(x::T, y::Val{C}) where {T<:UInt256, C} = div_by_const(x, y) |
21 |
| -function fld_by_const(x::T, y::Val{C}) where {T<:Int256, C} |
| 33 | +fld_by_const(x::T, y::Val{C}) where {T<:Unsigned, C} = div_by_const(x, y) |
| 34 | +function fld_by_const(x::T, y::Val{C}) where {T<:Signed, C} |
22 | 35 | d = div_by_const(x, y)
|
23 | 36 | return d - (signbit(x ⊻ C) & (d * C != x))
|
24 | 37 | end
|
25 | 38 |
|
26 | 39 | # Calculate `mod(x,y)` after you've already acquired quotient, the result of `fld(x,y)`.
|
27 | 40 | # REQUIRES:
|
28 | 41 | # - `y != -1`
|
29 |
| -@inline function manual_mod(x::T, y::T, quotient::T) where T<:BitInteger256 |
| 42 | +@inline function manual_mod(x::T, y::T, quotient::T) where T<:BigBitIntegers |
30 | 43 | return x - quotient * y
|
31 | 44 | end
|
32 | 45 |
|
|
94 | 107 | # https://github.com/rfourquet/BitIntegers.jl/pull/2
|
95 | 108 | _unsigned(x) = unsigned(x)
|
96 | 109 | _unsigned(::Type{Int256}) = UInt256
|
| 110 | +_unsigned(::Type{UInt256}) = UInt256 |
| 111 | +_unsigned(::Type{Int128}) = UInt128 |
| 112 | +_unsigned(::Type{UInt128}) = UInt128 |
97 | 113 |
|
98 | 114 | nbits(x) = sizeof(x) * 8
|
0 commit comments