|
231 | 231 | See also: [`MinHash`](@ref)
|
232 | 232 | """
|
233 | 233 | function jaccard(A::Set, B::Set) :: Float64
|
234 |
| - # To avoid corner cases where A and B are both empty |
235 | 234 | if isempty(A)
|
| 235 | + # Use the convention that if A = B = ∅, their Jaccard |
| 236 | + # similarity is zero. |
236 | 237 | Float64(0)
|
237 | 238 | else
|
238 | 239 | length(A ∩ B) / length(A ∪ B)
|
@@ -262,20 +263,72 @@ julia> jaccard(x,y)
|
262 | 263 | function jaccard(x::BitArray{1}, y::BitArray{1}) :: Float64
|
263 | 264 | union = sum(x .| y)
|
264 | 265 | if union == 0
|
265 |
| - # To avoid corner cases where x and y are both full of zeros |
| 266 | + # Use the convention that if x and y are full of zeros, their Jaccard |
| 267 | + # similarity is zero. |
266 | 268 | Float64(0)
|
267 | 269 | else
|
268 | 270 | intersection = sum(x .& y)
|
269 | 271 | intersection / union
|
270 | 272 | end
|
271 | 273 | end
|
272 | 274 |
|
| 275 | +@doc raw""" |
| 276 | + function jaccard(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) |
| 277 | +
|
| 278 | +Computes the Jaccard similarity between a pair of vectors of real numbers. Here, Jaccard similarity is defined as |
| 279 | +
|
| 280 | +``J(x, y) = \\frac{\\sum_{i} \\min{(x_i,y_i)}}{\\sum_{i} \\max{(x_i,y_i)}}`` |
| 281 | +
|
| 282 | +# Arguments |
| 283 | +- `x::AbstractVector{<:Real}`, `y::AbstractVector{<:Real}`: a pair of vectors containing real numbers (subtypes of `Real`). |
| 284 | +
|
| 285 | +# Examples |
| 286 | +```jldoctest; setup = :(using LSHFunctions) |
| 287 | +julia> x = [0.8, 0.1, 0.3, 0.4, 0.1]; |
| 288 | +
|
| 289 | +julia> y = [1.0, 0.6, 0.0, 0.4, 0.5]; |
| 290 | +
|
| 291 | +julia> jaccard(x,y) |
| 292 | +0.5 |
| 293 | +``` |
| 294 | +""" |
| 295 | +function jaccard(x::AbstractVector{T}, y::AbstractVector{<:Real}) where {T <: Real} |
| 296 | + if length(x) != length(y) |
| 297 | + DimensionMismatch("dimensions must match") |> throw |
| 298 | + end |
| 299 | + |
| 300 | + intersection = T(0) |
| 301 | + union = T(0) |
| 302 | + |
| 303 | + @inbounds @simd for ii = 1:length(x) |
| 304 | + if 0 ≤ x[ii] ≤ y[ii] |
| 305 | + intersection += x[ii] |
| 306 | + union += y[ii] |
| 307 | + elseif 0 ≤ y[ii] < x[ii] |
| 308 | + intersection += y[ii] |
| 309 | + union += x[ii] |
| 310 | + else |
| 311 | + ErrorException("vectors must have non-negative elements") |> throw |
| 312 | + end |
| 313 | + end |
| 314 | + |
| 315 | + if union == T(0) |
| 316 | + # Use the convention that if x and y are full of zeros, their Jaccard |
| 317 | + # similarity is zero. |
| 318 | + T(union) |
| 319 | + else |
| 320 | + T(intersection / union) |
| 321 | + end |
| 322 | +end |
| 323 | + |
| 324 | +jaccard(x::AbstractVector{<:Integer}, y::AbstractVector{<:AbstractFloat}) = |
| 325 | + jaccard(y, x) |
| 326 | + |
273 | 327 | #====================
|
274 | 328 | Inner product and norms
|
275 | 329 | ====================#
|
276 | 330 |
|
277 | 331 | ### Inner products
|
278 |
| -# TODO: docs |
279 | 332 |
|
280 | 333 | @doc raw"""
|
281 | 334 | inner_prod(x::AbstractVector, y::AbstractVector)
|
|
0 commit comments