Skip to content

Commit 517e8a2

Browse files
committed
Add a method for Jaccard similarity between BitArrays.
1 parent 4b05ba3 commit 517e8a2

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

src/similarities.jl

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,37 @@ function jaccard(A::Set, B::Set) :: Float64
239239
end
240240
end
241241

242+
@doc raw"""
243+
function jaccard(x::BitArray{1}, y::BitArray{1})
244+
245+
Computes the Jaccard similarity between a pair of binary vectors. Here, Jaccard similarity is defined as
246+
247+
``J(x, y) = \\frac{\\sum_{i} \\min{(x_i,y_i)}}{\\sum_{i} \\max{(x_i,y_i)}}``
248+
249+
# Arguments
250+
- `x::BitArray{1}`, `y::BitArray{1}`: two binary vectors, in the form of `BitArray`s.
251+
252+
# Examples
253+
```jldoctest; setup = :(using LSHFunctions)
254+
julia> x = BitArray([true, false, true, true, false]);
255+
256+
julia> y = BitArray([false, false, true, true, true]);
257+
258+
julia> jaccard(x,y)
259+
0.5
260+
```
261+
"""
262+
function jaccard(x::BitArray{1}, y::BitArray{1}) :: Float64
263+
union = sum(x .| y)
264+
if union == 0
265+
# To avoid corner cases where x and y are both full of zeros
266+
Float64(0)
267+
else
268+
intersection = sum(x .& y)
269+
intersection / union
270+
end
271+
end
272+
242273
#====================
243274
Inner product and norms
244275
====================#

test/test_similarities.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,19 @@ end
232232
# Convention used in this module
233233
@test jaccard(Set(), Set()) == 0
234234
end
235+
236+
@testset "Compute Jaccard similarity between binary vectors" begin
237+
x = BitArray([true, false, true, true, false])
238+
y = BitArray([false, false, true, true, true])
239+
240+
@test jaccard(x, y) == jaccard(y, x) == 2 / 4
241+
242+
# When x and y are both full of false values, we define the
243+
# Jaccard similarity between them to be zero.
244+
x = falses(5)
245+
y = falses(5)
246+
@test jaccard(x, y) == 0
247+
end
235248
end
236249

237250
@testset "Inner product similarity tests" begin

0 commit comments

Comments
 (0)