Skip to content

Commit 2426638

Browse files
committed
Implement collision_probability for LpHash when sim == 0. Fixes issue #13.
1 parent b0dbbdb commit 2426638

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

src/hashes/lphash.jl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -185,19 +185,25 @@ n_hashes(h::LpHash) = length(h.shift)
185185
hashtype(::LpHash) = Int32
186186

187187
# See Section 3.2 of the reference paper
188-
function single_hash_collision_probability(hashfn::LpHash, sim::Real)
188+
function single_hash_collision_probability(hashfn::LpHash, sim::T) where {T <: Real}
189+
### If sim ≈ 0 then the integral won't be possible to numerically compute,
190+
### however we know that the probability equals one.
191+
if sim T(0)
192+
return T(1)
193+
end
194+
189195
### Compute the collision probability for a single hash function
190196
distr, scale = hashfn.distr, hashfn.scale
191-
integral, err = quadgk(x -> pdf(distr, x/sim) * (1 - x/scale),
192-
0, scale, rtol=1e-5)
197+
integral, err = quadgk(x -> pdf(distr, x/sim) * (T(1) - x/scale),
198+
T(0), T(scale), rtol=1e-5)
193199
integral = integral ./ sim
194200

195201
# Note that from the reference for the L^p LSH family, we're supposed to
196202
# integrate over the p.d.f. for the _absolute value_ of the underlying
197203
# random variable, rather than the raw p.d.f. Luckily, all of the
198204
# distributions we have to deal with here are symmetric and centered at
199205
# zero, so all we have to do is multiply the integral by two.
200-
single_hash_prob = integral .* 2
206+
single_hash_prob = T(integral .* 2)
201207
end
202208

203209
function similarity(hashfn::LpHash)

test/hashes/test_lphash.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,20 @@ Tests
123123
@test size(hashfn.coeff) == (n_hashes, 16)
124124
end
125125
end
126+
127+
@testset "collision_probability works correctly" begin
128+
hashfn = L1Hash()
129+
130+
# collision_probability should be 1 for two inputs of distance zero
131+
x = rand(4)
132+
@test collision_probability(hashfn, x, x) 1.0
133+
134+
# collision_probability with n_hashes=N should be the same as
135+
# collision_probability with n_hashes=1, raised to the power N
136+
y = rand(4)
137+
@test collision_probability(hashfn, x, y; n_hashes=10)
138+
collision_probability(hashfn, x, y; n_hashes=1)^10
139+
end
126140
end
127141

128142

0 commit comments

Comments
 (0)