|
1 |
| -# NOTE: We apply this optimization to values of type (U)Int128 and (U)Int256, which covers |
2 |
| -# FixedDecimal{(U)Int64} and FixedDecimal{(U)Int128}. |
3 |
| -# Julia+LLVM have built-in optimizations that apply this already for FD{(U)Int64}, however |
4 |
| -# this customized implementation appears to produce still even faster code than LLVM can |
5 |
| -# produce on its own. So we apply this for both sizes. |
| 1 | +# NOTE: Surprisingly, even though LLVM implements a version of this optimization on its own |
| 2 | +# for smaller integer sizes (<=64-bits), using the code in this file produces faster |
| 3 | +# multiplications for *all* types of integers. So we use our custom fldmod_by_const for all |
| 4 | +# bit integer types. |
6 | 5 | # Before:
|
| 6 | +# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int32,3}(1.234)) |
| 7 | +# 84.959 μs (0 allocations: 0 bytes) |
| 8 | +# FixedDecimal{Int32,3}(1700943.280) |
| 9 | +# |
7 | 10 | # julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int64,3}(1.234))
|
8 | 11 | # 247.709 μs (0 allocations: 0 bytes)
|
9 | 12 | # FixedDecimal{Int64,3}(4230510070790917.029)
|
| 13 | +# |
| 14 | +#julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int128,3}(1.234)) |
| 15 | +# 4.077 ms (160798 allocations: 3.22 MiB) |
| 16 | +# FixedDecimal{Int128,3}(-66726338547984585007169386718143307.324) |
| 17 | +# |
10 | 18 | # After:
|
| 19 | +# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int32,3}(1.234)) |
| 20 | +# 68.416 μs (0 allocations: 0 bytes) |
| 21 | +# FixedDecimal{Int32,3}(1700943.280) |
| 22 | +# |
11 | 23 | # julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int64,3}(1.234))
|
12 | 24 | # 106.125 μs (0 allocations: 0 bytes)
|
13 | 25 | # FixedDecimal{Int64,3}(4230510070790917.029)
|
| 26 | +# |
| 27 | +# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int128,3}(1.234)) |
| 28 | +# 204.125 μs (0 allocations: 0 bytes) |
| 29 | +# FixedDecimal{Int128,3}(-66726338547984585007169386718143307.324) |
14 | 30 |
|
15 |
| -const BigBitIntegers = Union{UInt128, Int128, UInt256, Int256} |
| 31 | +""" |
| 32 | + ShouldUseCustomFldmodByConst(::Type{<:MyCustomIntType})) = true |
| 33 | +A trait to control opt-in for the custom `fldmod_by_const` implementation. To use this for a |
| 34 | +given integer type, you can define this overload for your integer type. |
| 35 | +You will also need to implement some parts of the interface below, including _widen(). |
| 36 | +""" |
| 37 | +ShouldUseCustomFldmodByConst(::Type{<:Base.BitInteger}) = true |
| 38 | +ShouldUseCustomFldmodByConst(::Type{<:Union{Int256,Int128}}) = true |
| 39 | +ShouldUseCustomFldmodByConst(::Type) = false |
16 | 40 |
|
17 | 41 | @inline function fldmod_by_const(x, y)
|
18 |
| - # For small-to-normal integers, LLVM can correctly optimize away the division, if it |
19 |
| - # knows it's dividing by a const. We cannot call `Base.fldmod` since it's not |
20 |
| - # inlined, so here we have explictly inlined it instead. |
21 |
| - return (fld(x,y), mod(x,y)) |
22 |
| -end |
23 |
| -@inline function fldmod_by_const(x::BigBitIntegers, y) |
24 |
| - # For large or non-standard Int types, LLVM doesn't optimize |
25 |
| - # well, so we use a custom implementation of fldmod. |
26 |
| - d = fld_by_const(x, Val(y)) |
27 |
| - return d, manual_mod(promote(x, y, d)...) |
| 42 | + if ShouldUseCustomFldmodByConst(typeof(x)) |
| 43 | + # For large Int types, LLVM doesn't optimize well, so we use a custom implementation |
| 44 | + # of fldmod, which extends that optimization to those larger integer types. |
| 45 | + d = fld_by_const(x, Val(y)) |
| 46 | + return d, manual_mod(promote(x, y, d)...) |
| 47 | + else |
| 48 | + # For other integers, LLVM might be able to correctly optimize away the division, if |
| 49 | + # it knows it's dividing by a const. We cannot call `Base.fldmod` since it's not |
| 50 | + # inlined, so here we have explictly inlined it instead. |
| 51 | + return (fld(x,y), mod(x,y)) |
| 52 | + end |
28 | 53 | end
|
29 | 54 |
|
30 | 55 | # Calculate fld(x,y) when y is a Val constant.
|
|
39 | 64 | # Calculate `mod(x,y)` after you've already acquired quotient, the result of `fld(x,y)`.
|
40 | 65 | # REQUIRES:
|
41 | 66 | # - `y != -1`
|
42 |
| -@inline function manual_mod(x::T, y::T, quotient::T) where T<:BigBitIntegers |
| 67 | +@inline function manual_mod(x::T, y::T, quotient::T) where T<:Integer |
43 | 68 | return x - quotient * y
|
44 | 69 | end
|
45 | 70 |
|
|
0 commit comments