@@ -212,10 +212,7 @@ Computes the Jaccard similarity between sets ``A`` and ``B``, which is defined a
212
212
``\t ext{Jaccard}(A,B) = \f rac{\l eft|A \c ap B\r ight|}{\l eft|A \c up B\r ight|}``
213
213
214
214
# Arguments
215
- - `A::Set`, `B::Set`: the two sets with which to compute Jaccard similarity.
216
-
217
- # Returns
218
- `Float64`: the Jaccard similarity between sets `A` and `B`, which is between `0` and `1`.
215
+ - `A::Set`, `B::Set`: two sets whose Jaccard similarity we would like to compute.
219
216
220
217
# Examples
221
218
```jldoctest; setup = :(using LSHFunctions)
243
240
@doc raw """
244
241
function jaccard(x::BitArray{1}, y::BitArray{1})
245
242
246
- Computes the Jaccard similarity between a pair of binary vectors. Here, Jaccard similarity is defined as
243
+ Computes the Jaccard similarity between a pair of binary vectors:
247
244
248
- ``J(x, y) = \\ frac{\\ sum_{i} \\ min{(x_i,y_i)}}{\\ sum_{i} \ \ max{(x_i,y_i)}}``
245
+ ``J(x, y) = \f rac{\s um_{i} \m in{(x_i,y_i)}}{\s um_{i} \m ax{(x_i,y_i)}}``
249
246
250
247
# Arguments
251
248
- `x::BitArray{1}`, `y::BitArray{1}`: two binary vectors, in the form of `BitArray`s.
275
272
@doc raw """
276
273
function jaccard(x::AbstractVector{<:Real}, y::AbstractVector{<:Real})
277
274
278
- Computes the Jaccard similarity between a pair of vectors of real numbers. Here, Jaccard similarity is defined as
275
+ Computes the Jaccard similarity between a pair of vectors of real numbers:
279
276
280
- ``J(x, y) = \\ frac{\\ sum_{i} \\ min{(x_i,y_i)}}{\\ sum_{i} \ \ max{(x_i,y_i)}}``
277
+ ``J(x, y) = \f rac{\s um_{i} \m in{(x_i,y_i)}}{\s um_{i} \m ax{(x_i,y_i)}}``
281
278
282
279
# Arguments
283
280
- `x::AbstractVector{<:Real}`, `y::AbstractVector{<:Real}`: a pair of vectors containing real numbers (subtypes of `Real`).
@@ -292,7 +289,8 @@ julia> jaccard(x,y)
292
289
0.5
293
290
```
294
291
"""
295
- function jaccard (x:: AbstractVector{T} , y:: AbstractVector{<:Real} ) where {T <: Real }
292
+ function jaccard (x:: AbstractVector{T} ,
293
+ y:: AbstractVector ) :: Float64 where {T <: Real }
296
294
if length (x) != length (y)
297
295
DimensionMismatch (" dimensions must match" ) |> throw
298
296
end
@@ -315,15 +313,64 @@ function jaccard(x::AbstractVector{T}, y::AbstractVector{<:Real}) where {T <: Re
315
313
if union == T (0 )
316
314
# Use the convention that if x and y are full of zeros, their Jaccard
317
315
# similarity is zero.
318
- T (union )
316
+ Float64 ( 0 )
319
317
else
320
- T (intersection / union)
318
+ Float64 (intersection / union)
321
319
end
322
320
end
323
321
324
322
jaccard (x:: AbstractVector{<:Integer} , y:: AbstractVector{<:AbstractFloat} ) =
325
323
jaccard (y, x)
326
324
325
+ @doc raw """
326
+ function jaccard(A::Set{<:K},
327
+ B::Set{<:K},
328
+ weights::Dict{K,V}) where {K,V<:Number}
329
+
330
+ Computes the weighted Jaccard similarity between two sets:
331
+
332
+ ``J(x, y) = \f rac{\s um_{x\i n A\c ap B} w_x}{\s um_{y\i n A\c up B} w_y}``
333
+
334
+ # Arguments
335
+ - `A::Set`, `B::Set`: two sets whose Jaccard similarity we would like to compute.
336
+ - `weights::Dict`: a dictionary mapping symbols in the sets `A` and `B` to numerical weights. These weights must be positive.
337
+
338
+ # Examples
339
+ ```jldoctest; setup = :(using LSHFunctions)
340
+ julia> A = Set(["a", "b", "c"]);
341
+
342
+ julia> B = Set(["b", "c", "d"]);
343
+
344
+ julia> W = Dict("a" => 0.2, "b" => 2.4, "c" => 0.6, "d" => 1.8);
345
+
346
+ julia> jaccard(A,B,W)
347
+ 0.6
348
+ ```
349
+ """
350
+ function jaccard (A:: Set{<:K} ,
351
+ B:: Set{<:K} ,
352
+ weights:: Dict{K,V} ) :: Float64 where {K,V<: Real }
353
+
354
+ union_weight = V (0 )
355
+
356
+ for el in A ∪ B
357
+ w = weights[el]
358
+ if w < 0
359
+ ErrorException (" weights must be non-negative" ) |> throw
360
+ end
361
+ union_weight += w
362
+ end
363
+
364
+ intersection_weight = sum (weights[el] for el in A ∩ B)
365
+
366
+ # By convention, if A = B = ∅, their Jaccard similarity is zero
367
+ if union_weight == V (0 )
368
+ Float64 (0 )
369
+ else
370
+ Float64 (intersection_weight / union_weight)
371
+ end
372
+ end
373
+
327
374
#= ===================
328
375
Inner product and norms
329
376
====================#
0 commit comments