Skip to content

Commit d4f1d81

Browse files
authored
Merge pull request #1998 from Saransh-cpp/docstrings-for-utils-and-losses
Miscellaneous docstring additions and fixes
2 parents f9b95c4 + f49ec34 commit d4f1d81

File tree

7 files changed

+194
-58
lines changed

7 files changed

+194
-58
lines changed

docs/src/utilities.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@ Flux.orthogonal
4242
Flux.sparse_init
4343
Flux.identity_init
4444
Flux.ones32
45+
Flux.zeros32
4546
Flux.rand32
47+
Flux.randn32
48+
Flux.rng_from_array
49+
Flux.default_rng_value
4650
```
4751

4852
## Changing the type of model parameters

src/layers/basic.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ struct Dense{F, M<:AbstractMatrix, B}
155155
bias::B
156156
σ::F
157157
function Dense(W::M, bias = true, σ::F = identity) where {M<:AbstractMatrix, F}
158-
b = create_bias(W, bias, size(W,1))
158+
b = _create_bias(W, bias, size(W,1))
159159
new{F,M,typeof(b)}(W, b, σ)
160160
end
161161
end
@@ -228,7 +228,7 @@ struct Scale{F, A<:AbstractArray, B}
228228
bias::B
229229
σ::F
230230
function Scale(scale::A, bias::B = true, σ::F = identity) where {A<:AbstractArray, B<:Union{Bool, AbstractArray}, F}
231-
b = create_bias(scale, bias, size(scale)...)
231+
b = _create_bias(scale, bias, size(scale)...)
232232
new{F, A, typeof(b)}(scale, b, σ)
233233
end
234234
end
@@ -403,7 +403,7 @@ struct Bilinear{F,A,B}
403403
σ::F
404404
function Bilinear(W::A, bias = true, σ::F = identity) where {A<:AbstractArray, F}
405405
ndims(A) == 3 || throw(ArgumentError("expected a 3-array of weights"))
406-
b = create_bias(W, bias, size(W,1))
406+
b = _create_bias(W, bias, size(W,1))
407407
new{F,A,typeof(b)}(W, b, σ)
408408
end
409409
end

src/layers/conv.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ function Conv(w::AbstractArray{T,N}, b = true, σ = identity;
156156
stride = expand(Val(N-2), stride)
157157
dilation = expand(Val(N-2), dilation)
158158
pad = calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride)
159-
bias = create_bias(w, b, size(w, N))
159+
bias = _create_bias(w, b, size(w, N))
160160
return Conv(σ, w, bias, stride, pad, dilation, groups)
161161
end
162162

@@ -293,7 +293,7 @@ function ConvTranspose(w::AbstractArray{T,N}, bias = true, σ = identity;
293293
stride = expand(Val(N-2), stride)
294294
dilation = expand(Val(N-2), dilation)
295295
pad = calc_padding(ConvTranspose, pad, size(w)[1:N-2], dilation, stride)
296-
b = create_bias(w, bias, size(w, N-1) * groups)
296+
b = _create_bias(w, bias, size(w, N-1) * groups)
297297
return ConvTranspose(σ, w, b, stride, pad, dilation, groups)
298298
end
299299

@@ -441,7 +441,7 @@ function CrossCor(w::AbstractArray{T,N}, bias = true, σ = identity;
441441
stride = expand(Val(N-2), stride)
442442
dilation = expand(Val(N-2), dilation)
443443
pad = calc_padding(CrossCor, pad, size(w)[1:N-2], dilation, stride)
444-
b = create_bias(w, bias, size(w, N))
444+
b = _create_bias(w, bias, size(w, N))
445445
return CrossCor(σ, w, b, stride, pad, dilation)
446446
end
447447

src/layers/normalise.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ end
5151
ChainRulesCore.@non_differentiable dropout_mask(::Any, ::Any, ::Any)
5252

5353
"""
54-
Dropout(p; dims=:, rng = rng_from_array())
54+
Dropout(p; dims=:, rng = default_rng_value())
5555
5656
Dropout layer.
5757
@@ -96,9 +96,9 @@ mutable struct Dropout{F,D,R<:AbstractRNG}
9696
active::Union{Bool, Nothing}
9797
rng::R
9898
end
99-
Dropout(p, dims, active) = Dropout(p, dims, active, rng_from_array())
99+
Dropout(p, dims, active) = Dropout(p, dims, active, default_rng_value())
100100

101-
function Dropout(p; dims=:, rng = rng_from_array())
101+
function Dropout(p; dims=:, rng = default_rng_value())
102102
@assert 0 p 1
103103
Dropout(p, dims, nothing, rng)
104104
end
@@ -121,7 +121,7 @@ function Base.show(io::IO, d::Dropout)
121121
end
122122

123123
"""
124-
AlphaDropout(p; rng = rng_from_array())
124+
AlphaDropout(p; rng = default_rng_value())
125125
126126
A dropout layer. Used in
127127
[Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515).
@@ -155,8 +155,8 @@ mutable struct AlphaDropout{F,R<:AbstractRNG}
155155
new{typeof(p), typeof(rng)}(p, active, rng)
156156
end
157157
end
158-
AlphaDropout(p, active) = AlphaDropout(p, active, rng_from_array())
159-
AlphaDropout(p; rng = rng_from_array()) = AlphaDropout(p, nothing, rng)
158+
AlphaDropout(p, active) = AlphaDropout(p, active, default_rng_value())
159+
AlphaDropout(p; rng = default_rng_value()) = AlphaDropout(p, nothing, rng)
160160

161161
@functor AlphaDropout
162162
trainable(a::AlphaDropout) = (;)

src/layers/stateless.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,25 @@ end
3232
Normalise `x` to mean 0 and standard deviation 1 across the dimension(s) given by `dims`.
3333
Per default, `dims` is the last dimension.
3434
`ϵ` is a small additive factor added to the denominator for numerical stability.
35+
36+
# Examples
37+
```jldoctest
38+
julia> using Statistics
39+
40+
julia> x = [9, 10, 20, 60];
41+
42+
julia> y = Flux.normalise(x);
43+
44+
julia> isapprox(std(y), 1, atol=0.2) && std(y) != std(x)
45+
true
46+
47+
julia> x = rand(1:100, 10, 2);
48+
49+
julia> y = Flux.normalise(x, dims=1);
50+
51+
julia> isapprox(std(y, dims=1), ones(1, 2), atol=0.2) && std(y, dims=1) != std(x, dims=1)
52+
true
53+
```
3554
"""
3655
@inline function normalise(x::AbstractArray; dims=ndims(x), ϵ=ofeltype(x, 1e-5))
3756
μ = mean(x, dims=dims)

src/losses/functions.jl

Lines changed: 99 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,17 @@ given the prediction `ŷ` and true values `y`.
8080
| 0.5 * |ŷ - y|^2, for |ŷ - y| <= δ
8181
Huber loss = |
8282
| δ * (|ŷ - y| - 0.5 * δ), otherwise
83+
84+
# Example
85+
```jldoctest
86+
julia> ŷ = [1.1, 2.1, 3.1];
87+
88+
julia> Flux.huber_loss(ŷ, 1:3) # default δ = 1 > |ŷ - y|
89+
0.005000000000000009
90+
91+
julia> Flux.huber_loss(ŷ, 1:3, δ=0.05) # changes behaviour as |ŷ - y| > δ
92+
0.003750000000000005
93+
```
8394
"""
8495
function huber_loss(ŷ, y; agg = mean, δ = ofeltype(ŷ, 1))
8596
_check_sizes(ŷ, y)
@@ -377,12 +388,22 @@ function kldivergence(ŷ, y; dims = 1, agg = mean, ϵ = epseltype(ŷ))
377388
end
378389

379390
"""
380-
poisson_loss(ŷ, y)
391+
poisson_loss(ŷ, y; agg = mean)
381392
382-
# Return how much the predicted distribution `ŷ` diverges from the expected Poisson
383-
# distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`.
393+
Return how much the predicted distribution `ŷ` diverges from the expected Poisson
394+
distribution `y`; calculated as -
395+
396+
sum(ŷ .- y .* log.(ŷ)) / size(y, 2)
384397
385398
[More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
399+
400+
# Example
401+
```jldoctest
402+
julia> y_model = [1, 3, 3]; # data should only take integral values
403+
404+
julia> Flux.poisson_loss(y_model, 1:3)
405+
0.5023128522198171
406+
```
386407
"""
387408
function poisson_loss(ŷ, y; agg = mean)
388409
_check_sizes(ŷ, y)
@@ -392,11 +413,32 @@ end
392413
"""
393414
hinge_loss(ŷ, y; agg = mean)
394415
395-
Return the [hinge_loss loss](https://en.wikipedia.org/wiki/Hinge_loss) given the
416+
Return the [hinge_loss](https://en.wikipedia.org/wiki/Hinge_loss) given the
396417
prediction `ŷ` and true labels `y` (containing 1 or -1); calculated as
397-
`sum(max.(0, 1 .- ŷ .* y)) / size(y, 2)`.
398418
419+
sum(max.(0, 1 .- ŷ .* y)) / size(y, 2)
420+
421+
Usually used with classifiers like Support Vector Machines.
399422
See also: [`squared_hinge_loss`](@ref)
423+
424+
# Example
425+
```jldoctest
426+
julia> y_true = [1, -1, 1, 1];
427+
428+
julia> y_pred = [0.1, 0.3, 1, 1.5];
429+
430+
julia> Flux.hinge_loss(y_pred, y_true)
431+
0.55
432+
433+
julia> Flux.hinge_loss(y_pred[1], y_true[1]) != 0 # same sign but |ŷ| < 1
434+
true
435+
436+
julia> Flux.hinge_loss(y_pred[end], y_true[end]) == 0 # same sign but |ŷ| >= 1
437+
true
438+
439+
julia> Flux.hinge_loss(y_pred[2], y_true[2]) != 0 # opposite signs
440+
true
441+
```
400442
"""
401443
function hinge_loss(ŷ, y; agg = mean)
402444
_check_sizes(ŷ, y)
@@ -407,9 +449,31 @@ end
407449
squared_hinge_loss(ŷ, y)
408450
409451
Return the squared hinge_loss loss given the prediction `ŷ` and true labels `y`
410-
(containing 1 or -1); calculated as `sum((max.(0, 1 .- ŷ .* y)).^2) / size(y, 2)`.
452+
(containing 1 or -1); calculated as
453+
454+
sum((max.(0, 1 .- ŷ .* y)).^2) / size(y, 2)
411455
456+
Usually used with classifiers like Support Vector Machines.
412457
See also: [`hinge_loss`](@ref)
458+
459+
# Example
460+
```jldoctes
461+
julia> y_true = [1, -1, 1, 1];
462+
463+
julia> y_pred = [0.1, 0.3, 1, 1.5];
464+
465+
julia> Flux.squared_hinge_loss(y_pred, y_true)
466+
0.625
467+
468+
julia> Flux.squared_hinge_loss(y_pred[1], y_true[1]) != 0
469+
true
470+
471+
julia> Flux.squared_hinge_loss(y_pred[end], y_true[end]) == 0
472+
true
473+
474+
julia> Flux.squared_hinge_loss(y_pred[2], y_true[2]) != 0
475+
true
476+
```
413477
"""
414478
function squared_hinge_loss(ŷ, y; agg = mean)
415479
_check_sizes(ŷ, y)
@@ -422,9 +486,20 @@ end
422486
Return a loss based on the dice coefficient.
423487
Used in the [V-Net](https://arxiv.org/abs/1606.04797) image segmentation
424488
architecture.
425-
Similar to the F1_score. Calculated as:
489+
The dice coefficient is similar to the F1_score. Loss calculated as:
426490
427491
1 - 2*sum(|ŷ .* y| + smooth) / (sum(ŷ.^2) + sum(y.^2) + smooth)
492+
493+
# Example
494+
```jldoctest
495+
julia> y_pred = [1.1, 2.1, 3.1];
496+
497+
julia> Flux.dice_coeff_loss(y_pred, 1:3)
498+
0.000992391663909964
499+
500+
julia> 1 - Flux.dice_coeff_loss(y_pred, 1:3) # ~ F1 score for image segmentation
501+
0.99900760833609
502+
```
428503
"""
429504
function dice_coeff_loss(ŷ, y; smooth = ofeltype(ŷ, 1.0))
430505
_check_sizes(ŷ, y)
@@ -436,9 +511,11 @@ end
436511
437512
Return the [Tversky loss](https://arxiv.org/abs/1706.05721).
438513
Used with imbalanced data to give more weight to false negatives.
439-
Larger β weigh recall more than precision (by placing more emphasis on false negatives)
514+
Larger β weigh recall more than precision (by placing more emphasis on false negatives).
440515
Calculated as:
441-
1 - sum(|y .* ŷ| + 1) / (sum(y .* ŷ + β*(1 .- y) .* ŷ + (1 - β)*y .* (1 .- ŷ)) + 1)
516+
517+
1 - sum(|y .* ŷ| + 1) / (sum(y .* ŷ + (1 - β)*(1 .- y) .* ŷ + β*y .* (1 .- ŷ)) + 1)
518+
442519
"""
443520
function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7))
444521
_check_sizes(ŷ, y)
@@ -456,6 +533,8 @@ The input, 'ŷ', is expected to be normalized (i.e. [softmax](@ref Softmax) out
456533
457534
For `γ == 0`, the loss is mathematically equivalent to [`Losses.binarycrossentropy`](@ref).
458535
536+
See also: [`Losses.focal_loss`](@ref) for multi-class setting
537+
459538
# Example
460539
```jldoctest
461540
julia> y = [0 1 0
@@ -473,9 +552,6 @@ julia> ŷ = [0.268941 0.5 0.268941
473552
julia> Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385
474553
true
475554
```
476-
477-
See also: [`Losses.focal_loss`](@ref) for multi-class setting
478-
479555
"""
480556
function binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ))
481557
_check_sizes(ŷ, y)
@@ -536,7 +612,17 @@ which can be useful for training Siamese Networks. It is given by
536612
agg(@. (1 - y) * ŷ^2 + y * max(0, margin - ŷ)^2)
537613
538614
Specify `margin` to set the baseline for distance at which pairs are dissimilar.
539-
615+
616+
# Example
617+
```jldoctest
618+
julia> ŷ = [0.5, 1.5, 2.5];
619+
620+
julia> Flux.siamese_contrastive_loss(ŷ, 1:3)
621+
-4.833333333333333
622+
623+
julia> Flux.siamese_contrastive_loss(ŷ, 1:3, margin = 2)
624+
-4.0
625+
```
540626
"""
541627
function siamese_contrastive_loss(ŷ, y; agg = mean, margin::Real = 1)
542628
_check_sizes(ŷ, y)

0 commit comments

Comments
 (0)