Skip to content

Commit 0cf47ae

Browse files
committed
Change the 'r' parameter of LpHash to 'scale'. Fix for issue #11.
1 parent 65cb03b commit 0cf47ae

File tree

3 files changed

+22
-21
lines changed

3 files changed

+22
-21
lines changed

src/hashes/lphash.jl

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ mutable struct LpHash{T <: Union{Float32,Float64}, D} <: SymmetricLSHFunction
2424
# Coefficient matrix with which we multiply the input to the hash function
2525
coeff :: Matrix{T}
2626

27-
# "Denominator" parameter r. Higher values of r lead to higher collision
28-
# rates. This parameter is user-specified
29-
r :: T
27+
# "Denominator" parameter scale (called "r" in the reference paper). Higher
28+
# values of r lead to higher collision rates. This parameter is
29+
# user-specified.
30+
scale :: T
3031

3132
# "Shift" parameter (referred to as 'b' in the 'p-stable distributions' paper.
3233
# There's one shift parameter for every hash function; each parameter is
@@ -56,7 +57,7 @@ abstract type L2Hash <: SymmetricLSHFunction end
5657
### External LpHash constructors
5758

5859
function LpHash{T}(n_hashes::Integer = DEFAULT_N_HASHES;
59-
r::Real = T(1.0),
60+
scale::Real = T(1.0),
6061
power::Integer = 2,
6162
resize_pow2::Bool = DEFAULT_RESIZE_POW2) where {T <: Union{Float32,Float64}}
6263

@@ -73,7 +74,7 @@ function LpHash{T}(n_hashes::Integer = DEFAULT_N_HASHES;
7374
end
7475
end
7576

76-
LpHash(coeff, T(r), shift, Int64(power), distr, resize_pow2)
77+
LpHash(coeff, T(scale), shift, Int64(power), distr, resize_pow2)
7778
end
7879

7980
L1Hash(args...; kws...) where {T} = LpHash(args...; power = 1, kws...)
@@ -186,9 +187,9 @@ hashtype(::LpHash) = Int32
186187
# See Section 3.2 of the reference paper
187188
function single_hash_collision_probability(hashfn::LpHash, sim::Real)
188189
### Compute the collision probability for a single hash function
189-
distr, r = hashfn.distr, hashfn.r
190-
integral, err = quadgk(x -> pdf(distr, x/sim) * (1 - x/r),
191-
0, r, rtol=1e-5)
190+
distr, scale = hashfn.distr, hashfn.scale
191+
integral, err = quadgk(x -> pdf(distr, x/sim) * (1 - x/scale),
192+
0, scale, rtol=1e-5)
192193
integral = integral ./ sim
193194

194195
# Note that from the reference for the L^p LSH family, we're supposed to
@@ -223,6 +224,6 @@ function (hashfn::LpHash{T})(x::AbstractArray{T}) where T
223224
end
224225

225226
h = @views hashfn.coeff[1:end,1:n] * x
226-
h = @. h / hashfn.r + hashfn.shift
227+
h = @. h / hashfn.scale + hashfn.shift
227228
floor.(Int32, h)
228229
end

test/hashes/test_lphash.jl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,17 @@ Tests
1010

1111
@testset "Can construct an ℓ^p distance hash function" begin
1212
# Construct a hash for L^1 distance
13-
L1_hash = L1Hash(5; r = 2)
13+
L1_hash = L1Hash(5; scale = 2)
1414
@test n_hashes(L1_hash) == 5
15-
@test L1_hash.r == 2
15+
@test L1_hash.scale == 2
1616
@test L1_hash.power == 1
1717
@test similarity(L1_hash) == ℓ1
1818
@test hashtype(L1_hash) == Int32
1919

2020
# Construct a hash for L^2 distance
21-
L2_hash = L2Hash(12; r = 3.4)
21+
L2_hash = L2Hash(12; scale = 3.4)
2222
@test n_hashes(L2_hash) == 12
23-
@test L2_hash.r == Float32(3.4)
23+
@test L2_hash.scale == Float32(3.4)
2424
@test L2_hash.power == 2
2525
@test similarity(L2_hash) == ℓ2
2626

@@ -31,22 +31,22 @@ Tests
3131

3232
@testset "Hashes are correctly computed" begin
3333
n_hashes = 8
34-
r = 2
34+
scale = 2
3535

36-
hashfn = L2Hash(n_hashes; r = r)
36+
hashfn = L2Hash(n_hashes; scale=scale)
3737

3838
# Test on a single input
3939
x = randn(8)
4040
hashes = hashfn(x)
41-
manual_hashes = floor.(Int32, hashfn.coeff * x ./ r .+ hashfn.shift)
41+
manual_hashes = floor.(Int32, hashfn.coeff * x ./ scale .+ hashfn.shift)
4242

4343
@test isa(hashes, Vector{Int32})
4444
@test hashes == manual_hashes
4545

4646
# Test on many inputs, simultaneously
4747
x = randn(8, 128)
4848
hashes = hashfn(x)
49-
manual_hashes = floor.(Int32, hashfn.coeff * x ./ r .+ hashfn.shift)
49+
manual_hashes = floor.(Int32, hashfn.coeff * x ./ scale .+ hashfn.shift)
5050

5151
@test isa(hashes, Matrix{Int32})
5252
@test hashes == manual_hashes
@@ -66,7 +66,7 @@ Tests
6666
end
6767

6868
@testset "Nearby points experience more frequent collisions" begin
69-
hashfn = L2Hash(1024; dtype=Float64, r=4)
69+
hashfn = L2Hash(1024; dtype=Float64, scale=4)
7070

7171
x1 = randn(128)
7272
x2 = x1 + 0.05 * randn(length(x1))
@@ -78,7 +78,7 @@ Tests
7878
end
7979

8080
@testset "Hash collision frequency matches probability" begin
81-
hashfn = L2Hash(1024; r = 4)
81+
hashfn = L2Hash(1024; scale = 4)
8282

8383
# Dry run
8484
@test test_collision_probability(hashfn, 0.05)

test/hashes/test_lshfunction.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ end
6262
end
6363

6464
@testset "Create L^p distance hash function" begin
65-
hashfn = LSHFunction(ℓ1, 20; r = 4.0)
65+
hashfn = LSHFunction(ℓ1, 20; scale = 4.0)
6666

6767
@test similarity(hashfn) == ℓ1
6868
@test n_hashes(hashfn) == 20
6969
@test isa(hashfn, LSHFunctions.LpHash)
70-
@test hashfn.r == 4.0
70+
@test hashfn.scale == 4.0
7171
end
7272

7373
@testset "Create Jaccard similarity hash function" begin

0 commit comments

Comments
 (0)