Skip to content

Commit a970e16

Browse files
authored
Integer can be hashed rapidly as well (#58440)
as noted in #58388 (comment) , using `hash_bytes` for `BigInt` limbs in isolation will break some hashing invariants. accordingly this PR updates also the `hash_integer` fallback to use rapidhash some surgery was needed to make `BigFloat` and `Rational` and such still match, but I think (hope?) I got it all below are some benchmarks. the `y` axis is nanoseconds and the `x` axis is an input of size `1234^x`, so `length = 10` means input `hash(big(1234^10))` hashing `BigFloat` got a small bit slower, but this already allocates and already seems less common than hashing `BigInt` (whose hashing remains non-allocating) ![image](https://github.com/user-attachments/assets/df6de62d-5c8e-4779-b5fc-67143f868572) ![image](https://github.com/user-attachments/assets/5c7201b2-78a4-4e80-bee6-989fd29cfc86)
1 parent 8567a3a commit a970e16

File tree

5 files changed

+101
-39
lines changed

5 files changed

+101
-39
lines changed

base/gmp.jl

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -843,24 +843,25 @@ Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), st
843843

844844
## streamlined hashing for BigInt, by avoiding allocation from shifts ##
845845

846+
Base._hash_shl!(x::BigInt, n) = MPZ.mul_2exp!(x, n)
847+
846848
if Limb === UInt64 === UInt
847849
# On 64 bit systems we can define
848850
# an optimized version for BigInt of hash_integer (used e.g. for Rational{BigInt}),
849851
# and of hash
850852

851-
using .Base: hash_finalizer
853+
using .Base: HASH_SECRET, hash_bytes, hash_finalizer
852854

853855
function hash_integer(n::BigInt, h::UInt)
854856
GC.@preserve n begin
855857
s = n.size
856-
s == 0 && return hash_integer(0, h)
857-
p = convert(Ptr{UInt64}, n.d)
858-
b = unsafe_load(p)
859-
h ⊻= hash_finalizer(ifelse(s < 0, -b, b) h)
860-
for k = 2:abs(s)
861-
h ⊻= hash_finalizer(unsafe_load(p, k) h)
862-
end
863-
return h
858+
h ⊻= (s < 0)
859+
hash_bytes(
860+
Ptr{UInt8}(n.d),
861+
8 * abs(s),
862+
h,
863+
HASH_SECRET
864+
)
864865
end
865866
end
866867

@@ -892,21 +893,16 @@ if Limb === UInt64 === UInt
892893
return hash(ldexp(flipsign(Float64(limb), sz), pow), h)
893894
end
894895
h = hash_integer(pow, h)
895-
h ⊻= hash_finalizer(flipsign(limb, sz) h)
896-
for idx = idx+1:asz
897-
if shift == 0
898-
limb = unsafe_load(ptr, idx)
899-
else
900-
limb1 = limb2
901-
if idx == asz
902-
limb = limb1 >> shift
903-
limb == 0 && break # don't hash leading zeros
904-
else
905-
limb2 = unsafe_load(ptr, idx+1)
906-
limb = limb2 << upshift | limb1 >> shift
907-
end
908-
end
909-
h ⊻= hash_finalizer(limb h)
896+
897+
h ⊻= (sz < 0)
898+
trailing_zero_bytes = div(pow, 8)
899+
GC.@preserve x begin
900+
h = hash_bytes(
901+
Ptr{UInt8}(x.d) + 8 * trailing_zero_bytes,
902+
8 * (asz - trailing_zero_bytes),
903+
h,
904+
HASH_SECRET
905+
)
910906
end
911907
return h
912908
end

base/hashing.jl

Lines changed: 70 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,72 @@ hash(x::UInt64, h::UInt) = hash_uint64(hash_mix_linear(x, h))
6969
hash(x::Int64, h::UInt) = hash(bitcast(UInt64, x), h)
7070
hash(x::Union{Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32}, h::UInt) = hash(Int64(x), h)
7171

72-
function hash_integer(n::Integer, h::UInt)
73-
h ⊻= hash_uint((n % UInt) h)
74-
n = abs(n)
75-
n >>>= sizeof(UInt) << 3
76-
while n != 0
77-
h ⊻= hash_uint((n % UInt) h)
78-
n >>>= sizeof(UInt) << 3
72+
hash_integer(x::Integer, h::UInt) = _hash_integer(x, UInt64(h)) % UInt
73+
function _hash_integer(
74+
x::Integer,
75+
seed::UInt64 = HASH_SEED,
76+
secret::NTuple{3, UInt64} = HASH_SECRET
77+
)
78+
seed ⊻= (x < 0)
79+
u = abs(x)
80+
81+
# always left-pad to multiple of 8 bytes
82+
buflen = UInt(cld(top_set_bit(u), 64) * 8)
83+
seed = seed (hash_mix(seed secret[1], secret[2]) buflen)
84+
85+
a = zero(UInt64)
86+
b = zero(UInt64)
87+
88+
if buflen 16
89+
a = (UInt64(u % UInt32) << 32) |
90+
UInt64((u >>> ((buflen - 4) * 8)) % UInt32)
91+
92+
delta = (buflen & 24) >>> (buflen >>> 3)
93+
94+
b = (UInt64((u >>> (8 * delta)) % UInt32) << 32) |
95+
UInt64((u >>> (8 * (buflen - 4 - delta))) % UInt32)
96+
else
97+
a = (u >>> 8(buflen - 16)) % UInt
98+
b = (u >>> 8(buflen - 8)) % UInt
99+
100+
i = buflen
101+
if i > 48
102+
see1 = seed
103+
see2 = seed
104+
while i 48
105+
l0 = u % UInt; u >>>= 64
106+
l1 = u % UInt; u >>>= 64
107+
l2 = u % UInt; u >>>= 64
108+
l3 = u % UInt; u >>>= 64
109+
l4 = u % UInt; u >>>= 64
110+
l5 = u % UInt; u >>>= 64
111+
112+
seed = hash_mix(l0 secret[1], l1 seed)
113+
see1 = hash_mix(l2 secret[2], l3 see1)
114+
see2 = hash_mix(l4 secret[3], l5 see2)
115+
end
116+
seed = seed see1 see2
117+
i -= 48
118+
end
119+
if i > 16
120+
l0 = u % UInt; u >>>= 64
121+
l1 = u % UInt; u >>>= 64
122+
seed = hash_mix(l0 secret[3], l1 seed secret[2])
123+
if i > 32
124+
l2 = u % UInt; u >>>= 64
125+
l3 = u % UInt; u >>>= 64
126+
seed = hash_mix(l2 secret[3], l3 seed)
127+
end
128+
end
79129
end
80-
return h
130+
131+
a = a secret[2]
132+
b = b seed
133+
b, a = mul_parts(a, b)
134+
return hash_mix(a secret[1] buflen, b secret[2])
81135
end
82136

137+
83138
## efficient value-based hashing of floats ##
84139

85140
const hx_NaN = hash(reinterpret(UInt64, NaN))
@@ -117,6 +172,7 @@ function hash(x::Float16, h::UInt)
117172
end
118173

119174
## generic hashing for rational values ##
175+
_hash_shl!(x, n) = (x << n)
120176
function hash(x::Real, h::UInt)
121177
# decompose x as num*2^pow/den
122178
num, pow, den = decompose(x)
@@ -132,6 +188,7 @@ function hash(x::Real, h::UInt)
132188
den = -den
133189
end
134190
num_z = trailing_zeros(num)
191+
135192
num >>= num_z
136193
den_z = trailing_zeros(den)
137194
den >>= den_z
@@ -156,7 +213,10 @@ function hash(x::Real, h::UInt)
156213
end
157214
# handle generic rational values
158215
h = hash_integer(pow, h)
159-
h = hash_integer(num, h)
216+
217+
# trimming only whole bytes of trailing zeros simplifies greatly
218+
# some specializations for memory-backed bitintegers
219+
h = hash_integer((pow > 0) ? _hash_shl!(num, pow % 8) : num, h)
160220
return h
161221
end
162222

@@ -209,7 +269,7 @@ end
209269
else
210270
pos = 1
211271
i = buflen
212-
while i 48
272+
if i > 48
213273
see1 = seed
214274
see2 = seed
215275
while i 48

base/irrationals.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ isinteger(::AbstractIrrational) = false
182182
iszero(::AbstractIrrational) = false
183183
isone(::AbstractIrrational) = false
184184

185-
hash(x::Irrational, h::UInt) = 3*objectid(x) - h
185+
hash(x::Irrational, h::UInt) = 3h - objectid(x)
186186

187187
widen(::Type{T}) where {T<:Irrational} = T
188188

base/rational.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
620620
end
621621
end
622622
h = hash_integer(pow, h)
623-
h = hash_integer(num, h)
623+
h = hash_integer((pow > 0) ? (num << (pow % 64)) : num, h)
624624
return h
625625
end
626626

test/gmp.jl

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -811,8 +811,14 @@ end
811811

812812
@testset "hashing" begin
813813
for i in 1:10:100
814-
bint = big(11)^i
815-
bfloat = big(11.0)^i
816-
@test (hash(bint) == hash(bfloat)) == (bint == bfloat)
814+
for shift in 0:3
815+
bint = big(11)^i << shift
816+
bfloat = float(bint)
817+
@test (hash(bint) == hash(bfloat)) == (bint == bfloat)
818+
@test hash(bint, Base.HASH_SEED) ==
819+
@invoke(hash(bint::Real, Base.HASH_SEED))
820+
@test Base.hash_integer(bint, Base.HASH_SEED) ==
821+
@invoke(Base.hash_integer(bint::Integer, Base.HASH_SEED))
822+
end
817823
end
818824
end

0 commit comments

Comments
 (0)