Skip to content

Commit 3f39b8a

Browse files
committed
Adjust to run for all integer types!
1 parent 4ed8ebf commit 3f39b8a

File tree

1 file changed

+42
-17
lines changed

1 file changed

+42
-17
lines changed

src/fldmod-by-const.jl

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,55 @@
1-
# NOTE: We apply this optimization to values of type (U)Int128 and (U)Int256, which covers
2-
# FixedDecimal{(U)Int64} and FixedDecimal{(U)Int128}.
3-
# Julia+LLVM have built-in optimizations that apply this already for FD{(U)Int64}, however
4-
# this customized implementation appears to produce still even faster code than LLVM can
5-
# produce on its own. So we apply this for both sizes.
1+
# NOTE: Surprisingly, even though LLVM implements a version of this optimization on its own
2+
# for smaller integer sizes (<=64-bits), using the code in this file produces faster
3+
# multiplications for *all* types of integers. So we use our custom fldmod_by_const for all
4+
# bit integer types.
65
# Before:
6+
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int32,3}(1.234))
7+
# 84.959 μs (0 allocations: 0 bytes)
8+
# FixedDecimal{Int32,3}(1700943.280)
9+
#
710
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int64,3}(1.234))
811
# 247.709 μs (0 allocations: 0 bytes)
912
# FixedDecimal{Int64,3}(4230510070790917.029)
13+
#
14+
#julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int128,3}(1.234))
15+
# 4.077 ms (160798 allocations: 3.22 MiB)
16+
# FixedDecimal{Int128,3}(-66726338547984585007169386718143307.324)
17+
#
1018
# After:
19+
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int32,3}(1.234))
20+
# 68.416 μs (0 allocations: 0 bytes)
21+
# FixedDecimal{Int32,3}(1700943.280)
22+
#
1123
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int64,3}(1.234))
1224
# 106.125 μs (0 allocations: 0 bytes)
1325
# FixedDecimal{Int64,3}(4230510070790917.029)
26+
#
27+
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int128,3}(1.234))
28+
# 204.125 μs (0 allocations: 0 bytes)
29+
# FixedDecimal{Int128,3}(-66726338547984585007169386718143307.324)
1430

15-
const BigBitIntegers = Union{UInt128, Int128, UInt256, Int256}
31+
"""
32+
ShouldUseCustomFldmodByConst(::Type{<:MyCustomIntType})) = true
33+
A trait to control opt-in for the custom `fldmod_by_const` implementation. To use this for a
34+
given integer type, you can define this overload for your integer type.
35+
You will also need to implement some parts of the interface below, including _widen().
36+
"""
37+
ShouldUseCustomFldmodByConst(::Type{<:Base.BitInteger}) = true
38+
ShouldUseCustomFldmodByConst(::Type{<:Union{Int256,Int128}}) = true
39+
ShouldUseCustomFldmodByConst(::Type) = false
1640

1741
@inline function fldmod_by_const(x, y)
18-
# For small-to-normal integers, LLVM can correctly optimize away the division, if it
19-
# knows it's dividing by a const. We cannot call `Base.fldmod` since it's not
20-
# inlined, so here we have explictly inlined it instead.
21-
return (fld(x,y), mod(x,y))
22-
end
23-
@inline function fldmod_by_const(x::BigBitIntegers, y)
24-
# For large or non-standard Int types, LLVM doesn't optimize
25-
# well, so we use a custom implementation of fldmod.
26-
d = fld_by_const(x, Val(y))
27-
return d, manual_mod(promote(x, y, d)...)
42+
if ShouldUseCustomFldmodByConst(typeof(x))
43+
# For large Int types, LLVM doesn't optimize well, so we use a custom implementation
44+
# of fldmod, which extends that optimization to those larger integer types.
45+
d = fld_by_const(x, Val(y))
46+
return d, manual_mod(promote(x, y, d)...)
47+
else
48+
# For other integers, LLVM might be able to correctly optimize away the division, if
49+
# it knows it's dividing by a const. We cannot call `Base.fldmod` since it's not
50+
# inlined, so here we have explictly inlined it instead.
51+
return (fld(x,y), mod(x,y))
52+
end
2853
end
2954

3055
# Calculate fld(x,y) when y is a Val constant.
@@ -39,7 +64,7 @@ end
3964
# Calculate `mod(x,y)` after you've already acquired quotient, the result of `fld(x,y)`.
4065
# REQUIRES:
4166
# - `y != -1`
42-
@inline function manual_mod(x::T, y::T, quotient::T) where T<:BigBitIntegers
67+
@inline function manual_mod(x::T, y::T, quotient::T) where T<:Integer
4368
return x - quotient * y
4469
end
4570

0 commit comments

Comments
 (0)