Merge pull request #1995 from Saransh-cpp/docstring-for-layers

ToucheSir · web-flow · commit 97f981b9cf7b · 2022-06-26T08:32:10.000-07:00
Update docstrings in `upsample.jl`, `recurrent.jl`, and `normalise.jl`
diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md
@@ -42,6 +42,7 @@ Much like the core layers above, but can be used to process sequence data (as we
 RNN
 LSTM
 GRU
+GRUv3
 Flux.Recur
 Flux.reset!
 ```
@@ -67,7 +68,6 @@ These layers don't affect the structure of the network but may improve training
 ```@docs
 Flux.normalise
 BatchNorm
-Flux.dropout
 Dropout
 AlphaDropout
 LayerNorm
diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
@@ -55,7 +55,7 @@ ChainRulesCore.@non_differentiable dropout_mask(::Any, ::Any, ::Any)
 """
     Dropout(p; dims=:, rng = rng_from_array())
 
-Dropout layer. In the forward pass, apply the [`Flux.dropout`](@ref) function on the input.
+Dropout layer. In the forward pass, applies the [`Flux.dropout`](@ref) function on the input.
 
 To apply dropout along certain dimension(s), specify the `dims` keyword.
 e.g. `Dropout(p; dims = 3)` will randomly zero out entire channels on WHCN input
@@ -65,6 +65,27 @@ Specify `rng` to use a custom RNG instead of the default.
 Custom RNGs are only supported on the CPU.
 
 Does nothing to the input once [`Flux.testmode!`](@ref) is `true`.
+
+# Examples
+```jldoctest
+julia> m = Chain(Dense(1 => 1), Dropout(1));
+
+julia> Flux.trainmode!(m);
+
+julia> y = m([1]);
+
+julia> y == [0]
+true
+
+julia> m = Chain(Dense(1000 => 1000), Dropout(0.5));
+
+julia> Flux.trainmode!(m);
+
+julia> y = m(ones(1000));
+
+julia> isapprox(count(==(0), y) / length(y), 0.5, atol=0.1)
+true
+```
 """
 mutable struct Dropout{F,D,R<:AbstractRNG}
   p::F
@@ -105,6 +126,22 @@ The AlphaDropout layer ensures that mean and variance of activations
 remain the same as before.
 
 Does nothing to the input once [`testmode!`](@ref) is true.
+
+# Examples
+```jldoctest
+julia> using Statistics
+
+julia> x = randn(1000,1);
+
+julia> m = Chain(Dense(1000 => 1000, selu), AlphaDropout(0.2));
+
+julia> Flux.trainmode!(m);
+
+julia> y = m(x);
+
+julia> isapprox(std(x), std(y), atol=0.2)
+true
+```
 """
 mutable struct AlphaDropout{F,R<:AbstractRNG}
   p::F
@@ -154,6 +191,20 @@ If `affine=true`, it also applies a learnable shift and rescaling
 using the [`Scale`](@ref) layer.
 
 See also [`BatchNorm`](@ref), [`InstanceNorm`](@ref), [`GroupNorm`](@ref), and [`normalise`](@ref).
+
+# Examples
+```jldoctest
+julia> using Statistics
+
+julia> xs = rand(3, 3, 3, 2);  # a batch of 2 images, each having 3 channels
+
+julia> m = LayerNorm(3);
+
+julia> y = m(xs);
+
+julia> isapprox(std(y, dims=1:3), ones(1, 1, 1, 2), atol=0.1) && std(y, dims=1:3) != std(xs, dims=1:3)
+true
+```
 """
 struct LayerNorm{F,D,T,N}
   λ::F
@@ -256,12 +307,16 @@ Use [`testmode!`](@ref) during inference.
 
 # Examples
 ```julia
-m = Chain(
-  Dense(28^2 => 64),
-  BatchNorm(64, relu),
-  Dense(64 => 10),
-  BatchNorm(10),
-  softmax)
+julia> using Statistics
+
+julia> xs = rand(3, 3, 3, 2);  # a batch of 2 images, each having 3 channels
+
+julia> m = BatchNorm(3);
+
+julia> Flux.trainmode!(m);
+
+julia> isapprox(std(m(xs)), 1, atol=0.1) && std(xs) != std(m(xs))
+true
 ```
 """
 mutable struct BatchNorm{F,V,N,W}
@@ -339,6 +394,20 @@ that will be used to renormalize the input in test phase.
 
 **Warning**: the defaults for `affine` and `track_stats` used to be `true`
 in previous Flux versions (< v0.12).
+
+# Examples
+```jldoctest
+julia> using Statistics
+
+julia> xs = rand(3, 3, 3, 2);  # a batch of 2 images, each having 3 channels
+
+julia> m = InstanceNorm(3);
+
+julia> y = m(xs);
+
+julia> isapprox(std(y, dims=1:2), ones(1, 1, 3, 2), atol=0.2) && std(y, dims=1:2) != std(xs, dims=1:2)
+true
+```
 """
 mutable struct InstanceNorm{F,V,N,W}
   λ::F  # activation function
@@ -416,6 +485,23 @@ through to learnable per-channel bias `β` and scale `γ` parameters.
 
 If `track_stats=true`, accumulates mean and var statistics in training phase
 that will be used to renormalize the input in test phase.
+
+# Examples
+```jldoctest
+julia> using Statistics
+
+julia> xs = rand(3, 3, 4, 2);  # a batch of 2 images, each having 4 channels
+
+julia> m = GroupNorm(4, 2);
+
+julia> y = m(xs);
+
+julia> isapprox(std(y[:, :, 1:2, 1]), 1, atol=0.1) && std(xs[:, :, 1:2, 1]) != std(y[:, :, 1:2, 1])
+true
+
+julia> isapprox(std(y[:, :, 3:4, 2]), 1, atol=0.1) && std(xs[:, :, 3:4, 2]) != std(y[:, :, 3:4, 2])
+true
+```
 """
 mutable struct GroupNorm{F,V,N,W}
   G::Int  # number of groups
diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl
@@ -63,28 +63,67 @@ in the background. `cell` should be a model of the form:
 
 For example, here's a recurrent network that keeps a running total of its inputs:
 
-```julia
-accum(h, x) = (h + x, x)
-rnn = Flux.Recur(accum, 0)
-rnn(2)      # 2
-rnn(3)      # 3
-rnn.state   # 5
-rnn.(1:10)  # apply to a sequence
-rnn.state   # 60
+# Examples
+```jldoctest
+julia> accum(h, x) = (h + x, x)
+accum (generic function with 1 method)
+
+julia> rnn = Flux.Recur(accum, 0)
+Recur(accum)
+
+julia> rnn(2) 
+2
+
+julia> rnn(3)
+3
+
+julia> rnn.state
+5
 ```
 
 Folding over a 3d Array of dimensions `(features, batch, time)` is also supported:
 
-```julia
-accum(h, x) = (h .+ x, x)
-rnn = Flux.Recur(accum, zeros(Int, 1, 1))
-rnn([2])                    # 2
-rnn([3])                    # 3
-rnn.state                   # 5
-rnn(reshape(1:10, 1, 1, :)) # apply to a sequence of (features, batch, time)
-rnn.state                   # 60
+```jldoctest
+julia> accum(h, x) = (h .+ x, x)
+accum (generic function with 1 method)
+
+julia> rnn = Flux.Recur(accum, zeros(Int, 1, 1))
+Recur(accum)
+
+julia> rnn([2])
+1-element Vector{Int64}:
+ 2
+
+julia> rnn([3])
+1-element Vector{Int64}:
+ 3
+
+julia> rnn.state
+1×1 Matrix{Int64}:
+ 5
+
+julia> out = rnn(reshape(1:10, 1, 1, :));  # apply to a sequence of (features, batch, time)
+
+julia> out |> size
+(1, 1, 10)
+
+julia> vec(out)
+10-element Vector{Int64}:
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+
+julia> rnn.state
+1×1 Matrix{Int64}:
+ 60
 ```
-
 """
 mutable struct Recur{T,S}
   cell::T
@@ -107,8 +146,34 @@ Base.show(io::IO, m::Recur) = print(io, "Recur(", m.cell, ")")
 Reset the hidden state of a recurrent layer back to its original value.
 
 Assuming you have a `Recur` layer `rnn`, this is roughly equivalent to:
-```julia
-rnn.state = hidden(rnn.cell)
+
+    rnn.state = hidden(rnn.cell)
+
+# Examples
+```jldoctest
+julia> r = Flux.RNNCell(relu, ones(1,1), zeros(1,1), ones(1,1), zeros(1,1));  # users should use the RNN wrapper struct instead
+
+julia> y = Flux.Recur(r, ones(1,1));
+
+julia> y.state
+1×1 Matrix{Float64}:
+ 1.0
+
+julia> y(ones(1,1))  # relu(1*1 + 1)
+1×1 Matrix{Float64}:
+ 2.0
+
+julia> y.state
+1×1 Matrix{Float64}:
+ 2.0
+
+julia> Flux.reset!(y)
+1×1 Matrix{Float64}:
+ 0.0
+
+julia> y.state
+1×1 Matrix{Float64}:
+ 0.0
 ```
 """
 reset!(m::Recur) = (m.state = m.cell.state0)
diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
@@ -75,9 +75,67 @@ end
 """
     PixelShuffle(r::Int)
 
-Pixel shuffling layer with upscale factor `r`.
+Pixel shuffling layer with upscale factor `r`. Usually used for generating higher
+resolution images while upscaling them.
  
 See [`NNlib.pixel_shuffle`](@ref).
+
+# Examples
+```jldoctest
+julia> p = PixelShuffle(2);
+
+julia> xs = [2row + col + channel/10 for row in 1:2, col in 1:2, channel in 1:4, n in 1:1]
+2×2×4×1 Array{Float64, 4}:
+[:, :, 1, 1] =
+ 3.1  4.1
+ 5.1  6.1
+
+[:, :, 2, 1] =
+ 3.2  4.2
+ 5.2  6.2
+
+[:, :, 3, 1] =
+ 3.3  4.3
+ 5.3  6.3
+
+[:, :, 4, 1] =
+ 3.4  4.4
+ 5.4  6.4
+
+julia> p(xs)
+4×4×1×1 Array{Float64, 4}:
+[:, :, 1, 1] =
+ 3.1  3.3  4.1  4.3
+ 3.2  3.4  4.2  4.4
+ 5.1  5.3  6.1  6.3
+ 5.2  5.4  6.2  6.4
+
+julia> xs = [3row + col + channel/10 for row in 1:2, col in 1:3, channel in 1:4, n in 1:1]
+2×3×4×1 Array{Float64, 4}:
+[:, :, 1, 1] =
+ 4.1  5.1  6.1
+ 7.1  8.1  9.1
+
+[:, :, 2, 1] =
+ 4.2  5.2  6.2
+ 7.2  8.2  9.2
+
+[:, :, 3, 1] =
+ 4.3  5.3  6.3
+ 7.3  8.3  9.3
+
+[:, :, 4, 1] =
+ 4.4  5.4  6.4
+ 7.4  8.4  9.4
+
+julia> p(xs)
+4×6×1×1 Array{Float64, 4}:
+[:, :, 1, 1] =
+ 4.1  4.3  5.1  5.3  6.1  6.3
+ 4.2  4.4  5.2  5.4  6.2  6.4
+ 7.1  7.3  8.1  8.3  9.1  9.3
+ 7.2  7.4  8.2  8.4  9.2  9.4
+```
 """
 struct PixelShuffle 
   r::Int