Skip to content

Commit 1998d5b

Browse files
authored
Chained hash pipelining in array hashing (#58252)
the proposed switch in #57509 from `3h - hash_finalizer(x)` to `hash_finalizer(3h -x)` should increase the hash quality of chained hashes, as the expanded expression goes from something like `sum((-3)^k * hash(x) for k in ...)` to a non-simplifiable composition this does have the unfortunate impact of long chains of hashes getting a bit slower as there is more data dependency and the CPU cannot work on the next element's hash before combining the previous one (I think --- I'm not particularly an expert on this low level stuff). As far as I know this only really impacts `AbstractArray` so, I've implemented a proposal that does some unrolling / pipelining manually to recover `AbstractArray` hashing performance. in fact, it's quite a lot faster now for most lengths. I tuned the thresholds (8 accumulators, certain length breakpoints) by hand on my own machine.
1 parent 40c6d1b commit 1998d5b

File tree

3 files changed

+103
-77
lines changed

3 files changed

+103
-77
lines changed

base/abstractarray.jl

Lines changed: 0 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -3567,81 +3567,6 @@ pushfirst!(A, a, b, c...) = pushfirst!(pushfirst!(A, c...), a, b)
35673567
# sizehint! does not nothing by default
35683568
sizehint!(a::AbstractVector, _) = a
35693569

3570-
## hashing AbstractArray ##
3571-
3572-
const hash_abstractarray_seed = UInt === UInt64 ? 0x7e2d6fb6448beb77 : 0xd4514ce5
3573-
function hash(A::AbstractArray, h::UInt)
3574-
h ⊻= hash_abstractarray_seed
3575-
# Axes are themselves AbstractArrays, so hashing them directly would stack overflow
3576-
# Instead hash the tuple of firsts and lasts along each dimension
3577-
h = hash(map(first, axes(A)), h)
3578-
h = hash(map(last, axes(A)), h)
3579-
3580-
# For short arrays, it's not worth doing anything complicated
3581-
if length(A) < 8192
3582-
for x in A
3583-
h = hash(x, h)
3584-
end
3585-
return h
3586-
end
3587-
3588-
# Goal: Hash approximately log(N) entries with a higher density of hashed elements
3589-
# weighted towards the end and special consideration for repeated values. Colliding
3590-
# hashes will often subsequently be compared by equality -- and equality between arrays
3591-
# works elementwise forwards and is short-circuiting. This means that a collision
3592-
# between arrays that differ by elements at the beginning is cheaper than one where the
3593-
# difference is towards the end. Furthermore, choosing `log(N)` arbitrary entries from a
3594-
# sparse array will likely only choose the same element repeatedly (zero in this case).
3595-
3596-
# To achieve this, we work backwards, starting by hashing the last element of the
3597-
# array. After hashing each element, we skip `fibskip` elements, where `fibskip`
3598-
# is pulled from the Fibonacci sequence -- Fibonacci was chosen as a simple
3599-
# ~O(log(N)) algorithm that ensures we don't hit a common divisor of a dimension
3600-
# and only end up hashing one slice of the array (as might happen with powers of
3601-
# two). Finally, we find the next distinct value from the one we just hashed.
3602-
3603-
# This is a little tricky since skipping an integer number of values inherently works
3604-
# with linear indices, but `findprev` uses `keys`. Hoist out the conversion "maps":
3605-
ks = keys(A)
3606-
key_to_linear = LinearIndices(ks) # Index into this map to compute the linear index
3607-
linear_to_key = vec(ks) # And vice-versa
3608-
3609-
# Start at the last index
3610-
keyidx = last(ks)
3611-
linidx = key_to_linear[keyidx]
3612-
fibskip = prevfibskip = oneunit(linidx)
3613-
first_linear = first(LinearIndices(linear_to_key))
3614-
n = 0
3615-
while true
3616-
n += 1
3617-
# Hash the element
3618-
elt = A[keyidx]
3619-
h = hash(keyidx=>elt, h)
3620-
3621-
# Skip backwards a Fibonacci number of indices -- this is a linear index operation
3622-
linidx = key_to_linear[keyidx]
3623-
linidx < fibskip + first_linear && break
3624-
linidx -= fibskip
3625-
keyidx = linear_to_key[linidx]
3626-
3627-
# Only increase the Fibonacci skip once every N iterations. This was chosen
3628-
# to be big enough that all elements of small arrays get hashed while
3629-
# obscenely large arrays are still tractable. With a choice of N=4096, an
3630-
# entirely-distinct 8000-element array will have ~75% of its elements hashed,
3631-
# with every other element hashed in the first half of the array. At the same
3632-
# time, hashing a `typemax(Int64)`-length Float64 range takes about a second.
3633-
if rem(n, 4096) == 0
3634-
fibskip, prevfibskip = fibskip + prevfibskip, fibskip
3635-
end
3636-
3637-
# Find a key index with a value distinct from `elt` -- might be `keyidx` itself
3638-
keyidx = findprev(!isequal(elt), A, keyidx)
3639-
keyidx === nothing && break
3640-
end
3641-
3642-
return h
3643-
end
3644-
36453570
# The semantics of `collect` are weird. Better to write our own
36463571
function rest(a::AbstractArray{T}, state...) where {T}
36473572
v = Vector{T}(undef, 0)
@@ -3650,7 +3575,6 @@ function rest(a::AbstractArray{T}, state...) where {T}
36503575
return foldl(push!, Iterators.rest(a, state...), init=v)
36513576
end
36523577

3653-
36543578
## keepat! ##
36553579

36563580
# NOTE: since these use `@inbounds`, they are actually only intended for Vector and BitVector

base/hashing.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ end
4545
hash_mix(a::UInt64, b::UInt64) = (mul_parts(a, b)...)
4646

4747
# faster-but-weaker than hash_mix intended for small keys
48-
hash_mix_linear(x::UInt64, h::UInt) = 3h - x
48+
hash_mix_linear(x::Union{UInt64, UInt32}, h::UInt) = 3h - x
4949
function hash_finalizer(x::UInt64)
5050
x ⊻= (x >> 32)
5151
x *= 0x63652a4cd374b267

base/multidimensional.jl

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2017,3 +2017,105 @@ end
20172017

20182018
getindex(b::Ref, ::CartesianIndex{0}) = getindex(b)
20192019
setindex!(b::Ref, x, ::CartesianIndex{0}) = setindex!(b, x)
2020+
2021+
## hashing AbstractArray ## can't be put in abstractarray.jl due to bootstrapping problems with the use of @nexpr
2022+
2023+
function _hash_fib(A, h::UInt)
2024+
# Goal: Hash approximately log(N) entries with a higher density of hashed elements
2025+
# weighted towards the end and special consideration for repeated values. Colliding
2026+
# hashes will often subsequently be compared by equality -- and equality between arrays
2027+
# works elementwise forwards and is short-circuiting. This means that a collision
2028+
# between arrays that differ by elements at the beginning is cheaper than one where the
2029+
# difference is towards the end. Furthermore, choosing `log(N)` arbitrary entries from a
2030+
# sparse array will likely only choose the same element repeatedly (zero in this case).
2031+
2032+
# To achieve this, we work backwards, starting by hashing the last element of the
2033+
# array. After hashing each element, we skip `fibskip` elements, where `fibskip`
2034+
# is pulled from the Fibonacci sequence -- Fibonacci was chosen as a simple
2035+
# ~O(log(N)) algorithm that ensures we don't hit a common divisor of a dimension
2036+
# and only end up hashing one slice of the array (as might happen with powers of
2037+
# two). Finally, we find the next distinct value from the one we just hashed.
2038+
2039+
# This is a little tricky since skipping an integer number of values inherently works
2040+
# with linear indices, but `findprev` uses `keys`. Hoist out the conversion "maps":
2041+
ks = keys(A)
2042+
key_to_linear = LinearIndices(ks) # Index into this map to compute the linear index
2043+
linear_to_key = vec(ks) # And vice-versa
2044+
2045+
# Start at the last index
2046+
keyidx = last(ks)
2047+
linidx = key_to_linear[keyidx]
2048+
fibskip = prevfibskip = oneunit(linidx)
2049+
first_linear = first(LinearIndices(linear_to_key))
2050+
@nexprs 4 i -> p_i = h
2051+
2052+
n = 0
2053+
while true
2054+
n += 1
2055+
# Hash the element
2056+
elt = A[keyidx]
2057+
2058+
stream_idx = mod1(n, 4)
2059+
@nexprs 4 i -> stream_idx == i && (p_i = hash_mix_linear(hash(keyidx, p_i), hash(elt, p_i)))
2060+
2061+
# Skip backwards a Fibonacci number of indices -- this is a linear index operation
2062+
linidx = key_to_linear[keyidx]
2063+
linidx < fibskip + first_linear && break
2064+
linidx -= fibskip
2065+
keyidx = linear_to_key[linidx]
2066+
2067+
# Only increase the Fibonacci skip once every N iterations. This was chosen
2068+
# to be big enough that all elements of small arrays get hashed while
2069+
# obscenely large arrays are still tractable. With a choice of N=4096, an
2070+
# entirely-distinct 8000-element array will have ~75% of its elements hashed,
2071+
# with every other element hashed in the first half of the array. At the same
2072+
# time, hashing a `typemax(Int64)`-length Float64 range takes about a second.
2073+
if rem(n, 4096) == 0
2074+
fibskip, prevfibskip = fibskip + prevfibskip, fibskip
2075+
end
2076+
2077+
# Find a key index with a value distinct from `elt` -- might be `keyidx` itself
2078+
keyidx = findprev(!isequal(elt), A, keyidx)
2079+
keyidx === nothing && break
2080+
end
2081+
2082+
@nexprs 4 i -> h = hash_mix_linear(p_i, h)
2083+
return hash_uint(h)
2084+
end
2085+
2086+
function hash_shaped(A, h::UInt)
2087+
# Axes are themselves AbstractArrays, so hashing them directly would stack overflow
2088+
# Instead hash the tuple of firsts and lasts along each dimension
2089+
h = hash(map(first, axes(A)), h)
2090+
h = hash(map(last, axes(A)), h)
2091+
len = length(A)
2092+
2093+
if len < 8
2094+
# for the shortest arrays we chain directly
2095+
for elt in A
2096+
h = hash(elt, h)
2097+
end
2098+
return h
2099+
elseif len < 32768
2100+
# separate accumulator streams, unrolled
2101+
@nexprs 8 i -> p_i = h
2102+
n = 1
2103+
limit = len - 7
2104+
while n <= limit
2105+
@nexprs 8 i -> p_i = hash(A[n + i - 1], p_i)
2106+
n += 8
2107+
end
2108+
while n <= len
2109+
p_1 = hash(A[n], p_1)
2110+
n += 1
2111+
end
2112+
# fold all streams back together
2113+
@nexprs 8 i -> h = hash_mix_linear(p_i, h)
2114+
return hash_uint(h)
2115+
else
2116+
return _hash_fib(A, h)
2117+
end
2118+
end
2119+
2120+
const hash_abstractarray_seed = UInt === UInt64 ? 0x7e2d6fb6448beb77 : 0xd4514ce5
2121+
hash(A::AbstractArray, h::UInt) = hash_shaped(A, h hash_abstractarray_seed)

0 commit comments

Comments
 (0)