Add doctests for losses and stateless layers

Saransh-cpp · Saransh-cpp · commit 898b558192d7 · 2022-08-16T20:28:24.000+05:30
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
@@ -32,6 +32,31 @@ end
 Normalise `x` to mean 0 and standard deviation 1 across the dimension(s) given by `dims`.
 Per default, `dims` is the last dimension. 
 `ϵ` is a small additive factor added to the denominator for numerical stability.
+
+# Examples
+```jldoctest
+julia> x = [9, 10, 20, 60];
+
+julia> Flux.std(x)
+24.01908963026423
+
+julia> y = Flux.normalise(x);
+
+julia> Flux.std(y)
+1.1546999832655012
+
+julia> x = rand(1:100, 10, 2);
+
+julia> Flux.std(x, dims=1)
+1×2 Matrix{Float64}:
+ 28.5324  34.6425
+
+julia> y = Flux.normalise(x, dims=1);
+
+julia> Flux.std(y, dims=1)
+1×2 Matrix{Float64}:
+ 1.05409  1.05409
+```
 """
 @inline function normalise(x::AbstractArray; dims=ndims(x), ϵ=ofeltype(x, 1e-5))
   μ = mean(x, dims=dims)
diff --git a/src/losses/functions.jl b/src/losses/functions.jl
@@ -80,6 +80,17 @@ given the prediction `ŷ` and true values `y`.
                  | 0.5 * |ŷ - y|^2,            for |ŷ - y| <= δ
     Huber loss = |
                  |  δ * (|ŷ - y| - 0.5 * δ), otherwise
+
+# Example
+```jldoctest
+julia> ŷ = [1.1, 2.1, 3.1];
+
+julia> Flux.huber_loss(ŷ, 1:3)  # default δ = 1 > |ŷ - y|
+0.005000000000000009
+
+julia> Flux.huber_loss(ŷ, 1:3, δ=0.05)  # changes behaviour as |ŷ - y| > δ
+0.003750000000000005
+```
 """
 function huber_loss(ŷ, y; agg = mean, δ = ofeltype(ŷ, 1))
    _check_sizes(ŷ, y)
@@ -377,12 +388,22 @@ function kldivergence(ŷ, y; dims = 1, agg = mean, ϵ = epseltype(ŷ))
 end
 
 """
-    poisson_loss(ŷ, y)
+    poisson_loss(ŷ, y; agg = mean)
 
-# Return how much the predicted distribution `ŷ` diverges from the expected Poisson
-# distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`.
+Return how much the predicted distribution `ŷ` diverges from the expected Poisson
+distribution `y`; calculated as -
+
+    `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`.
 
 [More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
+
+# Example
+```jldoctest
+julia> y_model = [1, 3, 3];  # data should only take integral values
+
+julia> poisson_loss(y_model, 1:3)
+0.5023128522198171
+```
 """
 function poisson_loss(ŷ, y; agg = mean)
   _check_sizes(ŷ, y)
@@ -392,11 +413,32 @@ end
 """
     hinge_loss(ŷ, y; agg = mean)
 
-Return the [hinge_loss loss](https://en.wikipedia.org/wiki/Hinge_loss) given the
+Return the [hinge_loss](https://en.wikipedia.org/wiki/Hinge_loss) given the
 prediction `ŷ` and true labels `y` (containing 1 or -1); calculated as
-`sum(max.(0, 1 .- ŷ .* y)) / size(y, 2)`.
 
+    `sum(max.(0, 1 .- ŷ .* y)) / size(y, 2)`.
+
+Usually used with classifiers like Support Vector Machines.
 See also: [`squared_hinge_loss`](@ref)
+
+# Example
+```jldoctest
+julia> y_true = [1, -1, 1, 1];
+
+julia> y_pred = [0.1, 0.3, 1, 1.5];
+
+julia> Flux.hinge_loss(y_pred, y_true)
+0.55
+
+julia> Flux.hinge_loss(y_pred[1], y_true[1])  # same sign but |ŷ| < 1
+0.9
+
+julia> Flux.hinge_loss(y_pred[end], y_true[end])  # same sign but |ŷ| >= 1 -> loss = 0
+0.0
+
+julia> Flux.hinge_loss(y_pred[2], y_true[2])  # opposite signs -> loss != 0
+1.3
+```
 """
 function hinge_loss(ŷ, y; agg = mean)
   _check_sizes(ŷ, y)
@@ -407,9 +449,31 @@ end
     squared_hinge_loss(ŷ, y)
 
 Return the squared hinge_loss loss given the prediction `ŷ` and true labels `y`
-(containing 1 or -1); calculated as `sum((max.(0, 1 .- ŷ .* y)).^2) / size(y, 2)`.
+(containing 1 or -1); calculated as
 
+    `sum((max.(0, 1 .- ŷ .* y)).^2) / size(y, 2)`.
+
+Usually used with classifiers like Support Vector Machines.
 See also: [`hinge_loss`](@ref)
+
+# Example
+```jldoctes
+julia> y_true = [1, -1, 1, 1];
+
+julia> y_pred = [0.1, 0.3, 1, 1.5];
+
+julia> Flux.squared_hinge_loss(y_pred, y_true)
+0.625
+
+julia> Flux.squared_hinge_loss(y_pred[1], y_true[1])  # same sign but |ŷ| < 1
+0.81
+
+julia> Flux.squared_hinge_loss(y_pred[end], y_true[end])  # same sign and |ŷ| >= 1 -> loss = 0
+0.0
+
+julia> Flux.squared_hinge_loss(y_pred[2], y_true[2])  # opposite signs -> loss != 0
+1.6900000000000002
+```
 """
 function squared_hinge_loss(ŷ, y; agg = mean)
   _check_sizes(ŷ, y)
@@ -422,9 +486,20 @@ end
 Return a loss based on the dice coefficient.
 Used in the [V-Net](https://arxiv.org/abs/1606.04797) image segmentation
 architecture.
-Similar to the F1_score. Calculated as:
+The dice coefficient is similar to the F1_score. Loss calculated as:
 
     1 - 2*sum(|ŷ .* y| + smooth) / (sum(ŷ.^2) + sum(y.^2) + smooth)
+
+# Example
+```jldoctest
+julia> y_pred = [1.1, 2.1, 3.1];
+
+julia> Flux.dice_coeff_loss(y_pred, 1:3)
+0.000992391663909964
+
+julia> 1 - Flux.dice_coeff_loss(y_pred, 1:3)  # ~ F1 score for image segmentation
+0.99900760833609
+```
 """
 function dice_coeff_loss(ŷ, y; smooth = ofeltype(ŷ, 1.0))
   _check_sizes(ŷ, y)
@@ -438,7 +513,23 @@ Return the [Tversky loss](https://arxiv.org/abs/1706.05721).
 Used with imbalanced data to give more weight to false negatives.
 Larger β weigh recall more than precision (by placing more emphasis on false negatives)
 Calculated as:
+
     1 - sum(|y .* ŷ| + 1) / (sum(y .* ŷ + β*(1 .- y) .* ŷ + (1 - β)*y .* (1 .- ŷ)) + 1)
+
+# Example
+```jldoctest
+julia> ŷ = [1, 0, 1, 1, 0];
+
+julia> y = [1, 0, 0, 1, 0];  # one false negative data point
+
+julia> Flux.tversky_loss(ŷ, y)
+0.18918918918918926
+
+julia> y = [1, 1, 1, 1, 0];  # No false negatives, but a false positive
+
+julia> Flux.tversky_loss(ŷ, y)  # loss is smaller as more weight given to the false negatives
+0.06976744186046513
+```
 """
 function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7))
     _check_sizes(ŷ, y)
@@ -456,6 +547,8 @@ The input, 'ŷ', is expected to be normalized (i.e. [softmax](@ref Softmax) out
 
 For `γ == 0`, the loss is mathematically equivalent to [`Losses.binarycrossentropy`](@ref).
 
+See also: [`Losses.focal_loss`](@ref) for multi-class setting
+
 # Example
 ```jldoctest
 julia> y = [0  1  0
@@ -473,9 +566,6 @@ julia> ŷ = [0.268941  0.5  0.268941
 julia> Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385
 true
 ```
-
-See also: [`Losses.focal_loss`](@ref) for multi-class setting
-
 """
 function binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ))
     _check_sizes(ŷ, y)
@@ -536,7 +626,17 @@ which can be useful for training Siamese Networks. It is given by
     agg(@. (1 - y) * ŷ^2 + y * max(0, margin - ŷ)^2)                           
                                  
 Specify `margin` to set the baseline for distance at which pairs are dissimilar.
-                                    
+
+# Example
+```jldoctest
+julia> ŷ = [0.5, 1.5, 2.5];
+
+julia> Flux.siamese_contrastive_loss(ŷ, 1:3)
+-4.833333333333333
+
+julia> Flux.siamese_contrastive_loss(ŷ, 1:3, margin = 2)
+-4.0
+```
 """
 function siamese_contrastive_loss(ŷ, y; agg = mean, margin::Real = 1)
     _check_sizes(ŷ, y)