Skip to content

Commit f85536b

Browse files
committed
Add documentation for all of the available similarity functions.
1 parent 9b7a765 commit f85536b

File tree

4 files changed

+226
-23
lines changed

4 files changed

+226
-23
lines changed

docs/make.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ makedocs(
1111
sitename = "LSH.jl",
1212
format = Documenter.HTML(),
1313
modules = [LSH],
14-
pages = ["Home" => "index.md"]
14+
pages = ["Home" => "index.md",
15+
"Similarity functions" => "similarities.md"]
1516
)
1617

1718
deploydocs(

docs/src/index.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,6 @@ LSH.jl is a package that provides definitions of locality-sensitive hash functio
3333
- ``L^2`` (Euclidean) distance (`ℓ2`)
3434
- Inner product (`inner_prod`)
3535
- Function-space hashes (`L1`, `L2`, and `cossim`)
36+
37+
```@contents
38+
```

docs/src/similarities.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Similarity functions
2+
3+
## Cosine similarity
4+
5+
```@docs
6+
cossim
7+
```
8+
9+
## ``\ell^p`` and ``L^p`` distance
10+
11+
```@docs
12+
ℓp
13+
```
14+
15+
```@docs
16+
Lp
17+
```
18+
19+
## Jaccard similarity
20+
21+
```@docs
22+
jaccard
23+
```
24+
25+
## Inner product
26+
27+
```@docs
28+
inner_prod
29+
```

src/similarities.jl

Lines changed: 192 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@ Cosine similarity
1818
@doc raw"""
1919
cossim(x,y)
2020
21-
Computes the cosine similarity between two inputs, `x` and `y`. Cosine similarity is defined as
21+
Computes the cosine similarity between two inputs ``x`` and ``y``. Cosine similarity is defined as
2222
23-
```\math
24-
cossim(x,y) = \frac{\left\langle x,y\right\rangle}{\|x\|\cdot\|y\|}
25-
```
23+
``\text{cossim}(x,y) = \frac{\left\langle x,y\right\rangle}{\|x\|\cdot\|y\|}``
2624
27-
where ``\left\langle\cdot,\cdot\right\rangle`` is an inner product (e.g. dot product) and ``\|\cdot\|`` is its derived norm. This is roughly interpreted as being related to the angle between the inputs `x` and `y`: when `x` and `y` have low angle between them, `cossim(x,y)` is high (close to `1`). Meanwhile, when `x` and `y` have large angle between them, `cossim(x,y)` is low (close to `-1`).
25+
where ``\left\langle\cdot,\cdot\right\rangle`` is an inner product (e.g. dot product) and ``\|\cdot\|`` is its derived norm. This is roughly interpreted as being related to the angle between the inputs ``x`` and ``y``: when ``x`` and ``y`` have low angle between them, `cossim(x,y)` is high (close to ``1``). When ``x`` and ``y`` have large angle between them, `cossim(x,y)` is low (close to ``-1``).
2826
2927
# Arguments
3028
- `x` and `y`: two inputs for which `dot(x,y)`, `norm(x)`, and `norm(y)` are defined.
@@ -73,17 +71,30 @@ L^p distance
7371
====================#
7472

7573
@doc raw"""
76-
ℓp(p, x, y)
77-
ℓ1(x, y)
78-
ℓ2(x, y)
74+
ℓp(x::AbstractVector, y::AbstractVector, p::Real)
75+
ℓ1(x::AbstractVector, y::AbstractVector)
76+
ℓ2(x::AbstractVector, y::AbstractVector)
7977
8078
Computes the ``\ell^p`` distance between a pair of vectors, given by
8179
82-
```\math
83-
\ell^p(x,y) \coloneqq \|x - y\|_p = \sum \left|x_i - y_i\right|^p
80+
``\ell^p(x,y) \coloneqq \|x - y\|_p = \left(\sum_i \left|x_i - y_i\right|^p\right)^{1/p}``
81+
82+
`ℓ1(x,y)` is the same as `ℓp(x,y,1)`, and `ℓ2(x,y)` is the same as `ℓp(x,y,2)`.
83+
84+
# Examples
85+
```jldoctest; setup = :(using LSH)
86+
julia> x = [1, 2, 3];
87+
88+
julia> y = [4, 5, 6];
89+
90+
julia> ℓp(x,y,2) == (abs(1-4)^2 + abs(2-5)^2 + abs(3-6)^2)^(1/2)
91+
true
92+
93+
julia> ℓp(x,y,3) == (abs(1-4)^3 + abs(2-5)^3 + abs(3-6)^3)^(1/3)
94+
true
8495
```
8596
86-
Since ``\ell^1`` and ``\ell^2`` are both common cases of ``\ell^p`` distance, they are given unique function names `ℓ1` and `ℓ2` that you can use to call them.
97+
See also: [`ℓp_norm`](@ref)
8798
"""
8899
ℓp(x::AbstractVector, y::AbstractVector, p::Real=2) = Lp(x, y, p)
89100

@@ -93,6 +104,13 @@ Since ``\ell^1`` and ``\ell^2`` are both common cases of ``\ell^p`` distance, th
93104
@doc (@doc ℓp)
94105
ℓ2(x::AbstractVector, y::AbstractVector) = L2(x, y)
95106

107+
@doc raw"""
108+
Lp(x::AbstractVector, y::AbstractVector, p::Real)
109+
L1(x::AbstractVector, y::AbstractVector)
110+
L2(x::AbstractVector, y::AbstractVector)
111+
112+
Computes the ``ℓ^p`` distance between a pair of vectors ``x`` and ``y``. Identical to `ℓp(x,y,p)`, `ℓ1(x,y)`, and `ℓ2(x,y)`, respectively.
113+
"""
96114
function Lp(x::AbstractVector{T}, y::AbstractVector{T}, p::Real=2) where {T}
97115
if p 0
98116
"p must be positive" |> ErrorException |> throw
@@ -108,6 +126,7 @@ function Lp(x::AbstractVector{T}, y::AbstractVector{T}, p::Real=2) where {T}
108126
return result^(1/p)
109127
end
110128

129+
@doc (@doc Lp)
111130
function L1(x::AbstractVector{T}, y::AbstractVector{T}) where {T}
112131
if length(x) != length(y)
113132
"length(x) != length(y)" |> DimensionMismatch |> throw
@@ -121,6 +140,7 @@ function L1(x::AbstractVector{T}, y::AbstractVector{T}) where {T}
121140
return result
122141
end
123142

143+
@doc (@doc Lp)
124144
function L2(x::AbstractVector{T}, y::AbstractVector{T}) where {T}
125145
if length(x) != length(y)
126146
"length(x) != length(y)" |> DimensionMismatch |> throw
@@ -135,9 +155,47 @@ function L2(x::AbstractVector{T}, y::AbstractVector{T}) where {T}
135155
end
136156

137157
# Function space L^p distances
158+
159+
@doc raw"""
160+
Lp(f, g, interval::LSH.RealInterval, p)
161+
L1(f, g, interval::LSH.RealInterval)
162+
L2(f, g, interval::LSH.RealInterval)
163+
164+
Computes the ``L^p`` distance between two functions, given by
165+
166+
``L^p(f,g) \coloneqq \|f - g\|_p = \left(\int_a^b \left|f(x) - g(x)\right|^p \hspace{0.15cm} dx\right)^{1/p}``
167+
168+
# Examples
169+
Below we compute the ``L^1``, ``L^2``, and ``L^3`` distances between ``f(x) = x^2 + 1`` and ``g(x) = 2x`` over the interval ``[0,1]``. The distances are computed by evaluating the integral
170+
171+
``\left(\int_0^1 \left|f(x) - g(x)\right|^p \hspace{0.15cm}dx\right)^{1/p} = \left(\int_0^1 \left|x^2 - 2x + 1\right|^p \hspace{0.15cm}dx\right)^{1/p} = \left(\int_0^1 (x - 1)^{2p} \hspace{0.15cm}dx\right)^{1/p}``
172+
173+
for ``p = 1``, ``p = 2``, and ``p = 3``.
174+
175+
```jldoctest; setup = :(using LSH)
176+
julia> f(x) = x^2 + 1; g(x) = 2x;
177+
178+
julia> interval = LSH.@interval(0 ≤ x ≤ 1);
179+
180+
julia> Lp(f, g, interval, 1) ≈ L1(f, g, interval) ≈ 3^(-1)
181+
true
182+
183+
julia> Lp(f, g, interval, 2) ≈ L2(f, g, interval) ≈ 5^(-1/2)
184+
true
185+
186+
julia> Lp(f, g, interval, 3) ≈ 7^(-1/3)
187+
true
188+
```
189+
190+
See also: [`Lp_norm`](@ref), [`ℓp`](@ref)
191+
"""
138192
Lp(f, g, interval, p::Real=2) = Lp_norm(x -> f(x) - g(x), interval, p)
139-
L1(f, g, interval) = L1_norm(x -> f(x) - g(x), interval)
140-
L2(f, g, interval) = L2_norm(x -> f(x) - g(x), interval)
193+
194+
@doc (@doc Lp)
195+
L1(f, g, interval) = L1_norm(x -> f(x) - g(x), interval)
196+
197+
@doc (@doc Lp)
198+
L2(f, g, interval) = L2_norm(x -> f(x) - g(x), interval)
141199

142200
#====================
143201
Jaccard similarity
@@ -146,11 +204,9 @@ Jaccard similarity
146204
@doc raw"""
147205
jaccard(A::Set, B::Set) :: Float64
148206
149-
Computes the Jaccard similarity between sets `A` and `B`, which is defined as
207+
Computes the Jaccard similarity between sets ``A`` and ``B``, which is defined as
150208
151-
```\math
152-
J(A,B) = \frac{\left|A \cap B\right|}{\left|A \cup B\right|}
153-
```
209+
``\text{Jaccard}(A,B) = \frac{\left|A \cap B\right|}{\left|A \cup B\right|}``
154210
155211
# Arguments
156212
- `A::Set`, `B::Set`: the two sets with which to compute Jaccard similarity.
@@ -186,25 +242,136 @@ Inner product and norms
186242

187243
### Inner products
188244
# TODO: docs
245+
246+
@doc raw"""
247+
inner_prod(x::AbstractVector, y::AbstractVector)
248+
249+
Computes the ``\ell^2`` inner product (dot product)
250+
251+
``\left\langle x, y\right\rangle = \sum_i x_iy_i``
252+
253+
# Examples
254+
```jldoctest; setup = :(using LSH)
255+
julia> using LinearAlgebra: dot;
256+
257+
julia> x, y = randn(4), randn(4);
258+
259+
julia> inner_prod(x,y) ≈ dot(x,y)
260+
true
261+
```
262+
"""
189263
inner_prod(x::AbstractVector, y::AbstractVector) = dot(x,y)
190264

191265
# 1-dimensional inner product between L^2 functions
266+
@doc raw"""
267+
inner_prod(f, g, interval::LSH.RealInterval)
268+
269+
Computes the ``L^2`` inner product
270+
271+
``\left\langle f, g\right\rangle = \int_a^b f(x)g(x) \hspace{0.15cm} dx``
272+
273+
where the interval we're integrating over is specified by the `interval` argument.
274+
275+
# Examples
276+
```jldoctest; setup = :(using LSH)
277+
julia> f(x) = cos(x); g(x) = sin(x);
278+
279+
julia> inner_prod(f, g, LSH.@interval(0 ≤ x ≤ π/2)) ≈ 1/2
280+
true
281+
```
282+
"""
192283
inner_prod(f, g, interval::LSH.RealInterval) =
193284
quadgk(x -> f(x)g(x), interval.lower, interval.upper)[1]
194285

195286
### L^p norms
287+
@doc raw"""
288+
Lp_norm(x::AbstractVector, p::Real = 2)
289+
L1_norm(x::AbstractVector)
290+
L2_norm(x::AbstractVector)
291+
292+
Compute the ``\ell^p`` norm of a vector ``x``. Identical to `ℓp_norm(x, p)`, `ℓ1_norm(x)`, and `ℓ2_norm(x)`, respectively.
293+
294+
See also: [`ℓp_norm`](@ref)
295+
"""
196296
Lp_norm(x::AbstractVector, p::Real = 2) = norm(x,p)
297+
298+
@doc (@doc Lp_norm)
197299
L1_norm(x::AbstractVector) = norm(x,1)
300+
301+
@doc (@doc Lp_norm)
198302
L2_norm(x::AbstractVector) = norm(x)
199303

304+
@doc raw"""
305+
ℓp_norm(x::AbstractVector, p::Real = 2)
306+
ℓ1_norm(x::AbstractVector)
307+
ℓ2_norm(x::AbstractVector)
308+
309+
Compute the ``\ell^p`` norm of a point ``x``, defined as
310+
311+
``\|x\|_p = \left(\sum_i \left|x_i\right|^p\right)^{1/p}``
312+
313+
# Examples
314+
315+
```jldoctest; setup = :(using LSH)
316+
julia> x = randn(4);
317+
318+
julia> ℓp_norm(x, 1) ≈ ℓ1_norm(x) ≈ (map(u -> abs(u)^1, x) |> sum)^(1/1)
319+
true
320+
321+
julia> ℓp_norm(x, 2) ≈ ℓ2_norm(x) ≈ (map(u -> abs(u)^2, x) |> sum)^(1/2)
322+
true
323+
324+
julia> ℓp_norm(x, 3) ≈ (map(u -> abs(u)^3, x) |> sum)^(1/3)
325+
true
326+
```
327+
328+
See also: [`ℓp`](@ref), [`Lp_norm`](@ref)
329+
"""
200330
ℓp_norm(x::AbstractVector, p::Real = 2) = Lp_norm(x, p)
201-
ℓ1_norm(x::AbstractVector) = L1_norm(x)
202-
ℓ2_norm(x::AbstractVector) = L2_norm(x)
331+
332+
@doc (@doc ℓp_norm)
333+
ℓ1_norm(x::AbstractVector) = L1_norm(x)
334+
335+
@doc (@doc ℓp_norm)
336+
ℓ2_norm(x::AbstractVector) = L2_norm(x)
203337

204338
# 1-dimensional L^p norms
339+
340+
@doc raw"""
341+
Lp_norm(f, interval::LSH.RealInterval, p::Real=2)
342+
L1_norm(f, interval::LSH.RealInterval)
343+
L2_norm(f, interval::LSH.RealInterval)
344+
345+
Computes the ``L^p`` function-space norm of a function ``f``, which is given by the equation
346+
347+
``\|f\|_p = \left(\int_a^b \left|f(x)\right|^p \hspace{0.15cm} dx\right)^{1/p}``
348+
349+
`L1_norm(f, interval)` is the same as `Lp_norm(f, interval, 1)`, and `L2_norm(f, interval)` is the same as `Lp_norm(f, interval, 2)`.
350+
351+
# Examples
352+
353+
```jldoctest; setup = :(using LSH)
354+
julia> f(x) = x;
355+
356+
julia> interval = LSH.@interval(0 ≤ x ≤ 1);
357+
358+
julia> Lp_norm(f, interval, 1) ≈ L1_norm(f, interval) ≈ 2^(-1/1)
359+
true
360+
361+
julia> Lp_norm(f, interval, 2) ≈ L2_norm(f, interval) ≈ 3^(-1/2)
362+
true
363+
364+
julia> Lp_norm(f, interval, 3) ≈ 4^(-1/3)
365+
true
366+
```
367+
"""
205368
Lp_norm(f, interval::LSH.RealInterval, p::Real=2) = (quadgk(x -> abs(f(x)).^p, interval.lower, interval.upper)[1])^(1/p)
206-
L1_norm(f, interval::LSH.RealInterval) = quadgk(x -> abs(f(x)), interval.lower, interval.upper)[1]
207-
L2_norm(f, interval::LSH.RealInterval) = quadgk(x -> abs2(f(x)), interval.lower, interval.upper)[1]
369+
370+
@doc (@doc Lp_norm)
371+
L1_norm(f, interval::LSH.RealInterval) = quadgk(x -> abs(f(x)), interval.lower, interval.upper)[1]
372+
373+
@doc (@doc Lp_norm)
374+
L2_norm(f, interval::LSH.RealInterval) = quadgk(x -> abs2(f(x)), interval.lower, interval.upper)[1]
208375

209376
#====================
210377
1D Wasserstein distance
@@ -238,9 +405,12 @@ function wasserstein_1d(f, g, p::AbstractFloat)
238405
error("TODO")
239406
end
240407

408+
@doc (@doc wasserstein_1d)
241409
wasserstein1_1d(f, g) = wasserstein_1d(f, g, 1)
410+
emd = wasserstein1_1d
411+
412+
@doc (@doc wasserstein_1d)
242413
wasserstein2_2d(f, g) = wasserstein_1d(f, g, 2)
243-
emd(f, g) = wasserstein1_1d(f, g)
244414

245415
#====================
246416
Definitions for similarity function-related components of the AbstractLSHFunction

0 commit comments

Comments
 (0)