fix all doctests

mcabbott · mcabbott · commit 2423c1e51f22 · 2021-07-03T02:09:27.000-04:00
diff --git a/docs/src/utilities.md b/docs/src/utilities.md
@@ -28,7 +28,7 @@ To change the default on an applicable layer, pass the desired function with the
 
 ```jldoctest; setup = :(using Flux)
 julia> conv = Conv((3, 3), 1 => 8, relu; init=Flux.glorot_normal)
-Conv((3, 3), 1=>8, relu)
+Conv((3, 3), 1 => 8, relu)  # 80 parameters
 ```
 
 ```@docs
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -89,7 +89,7 @@ The weight matrix and/or the bias vector (of length `out`) may also be provided
 # Examples
 ```jldoctest
 julia> d = Dense(5, 2)
-Dense(5, 2)
+Dense(5, 2)         # 12 parameters
 
 julia> d(rand(Float32, 5, 64)) |> size
 (2, 64)
@@ -98,7 +98,7 @@ julia> d(rand(Float32, 5, 1, 1, 64)) |> size  # treated as three batch dimension
 (2, 1, 1, 64)
 
 julia> d1 = Dense(ones(2, 5), false, tanh)  # using provided weight matrix
-Dense(5, 2, tanh; bias=false)
+Dense(5, 2, tanh; bias=false)  # 10 parameters
 
 julia> d1(ones(5))
 2-element Vector{Float64}:
@@ -395,7 +395,11 @@ julia> size(model(rand(3)))
 (17,)
 
 julia> model = Parallel(+, Dense(10, 2), Dense(5, 2))
-Parallel(+, Dense(10, 2), Dense(5, 2))
+Parallel(
+  +,
+  Dense(10, 2),                         # 22 parameters
+  Dense(5, 2),                          # 12 parameters
+)                   # Total: 4 arrays, 34 parameters, 392 bytes
 
 julia> size(model(rand(10), rand(5)))
 (2,)
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -67,7 +67,7 @@ See also [`ConvTranspose`](@ref), [`DepthwiseConv`](@ref), [`CrossCor`](@ref).
 julia> xs = rand(Float32, 100, 100, 3, 50); # a batch of images
 
 julia> lay = Conv((5,5), 3 => 7, relu; bias=false)
-Conv((5, 5), 3 => 7, relu)  # 525 parameters
+Conv((5, 5), 3 => 7, relu, bias=false)  # 525 parameters
 
 julia> lay(xs) |> size
 (96, 96, 7, 50)
@@ -291,7 +291,7 @@ See also [`Conv`](@ref) for more detailed description of keywords.
 julia> xs = rand(Float32, 100, 100, 3, 50);  # a batch of 50 RGB images
 
 julia> lay = DepthwiseConv((5,5), 3 => 6, relu; bias=false)
-DepthwiseConv((5, 5), 3 => 6, relu)  # 150 parameters
+DepthwiseConv((5, 5), 3 => 6, relu, bias=false)  # 150 parameters
 
 julia> lay(xs) |> size
 (96, 96, 6, 50)
@@ -379,7 +379,7 @@ See also [`Conv`](@ref) for more detailed description of keywords.
 julia> xs = rand(Float32, 100, 100, 3, 50);  # a batch of 50 RGB images
 
 julia> lay = CrossCor((5,5), 3 => 6, relu; bias=false)
-CrossCor((5, 5), 3 => 6, relu)  # 450 parameters
+CrossCor((5, 5), 3 => 6, relu, bias=false)  # 450 parameters
 
 julia> lay(xs) |> size
 (96, 96, 6, 50)
@@ -618,7 +618,7 @@ julia> xs = rand(Float32, 100, 100, 3, 50);  # batch of 50 RGB images
 
 julia> m = Chain(Conv((5, 5), 3 => 7, pad=SamePad()), MaxPool((5, 5), pad=SamePad()))
 Chain(
-  Conv((5, 5), 3 => 7, pad=2),            # 532 parameters
+  Conv((5, 5), 3 => 7, pad=2),          # 532 parameters
   MaxPool((5, 5), pad=2),
 )
 
@@ -683,7 +683,7 @@ julia> xs = rand(Float32, 100, 100, 3, 50);
 
 julia> m = Chain(Conv((5,5), 3 => 7), MeanPool((5,5), pad=SamePad()))
 Chain(
-  Conv((5, 5), 3 => 7),                   # 532 parameters
+  Conv((5, 5), 3 => 7),                 # 532 parameters
   MeanPool((5, 5), pad=2),
 )
 
diff --git a/src/utils.jl b/src/utils.jl
@@ -13,14 +13,12 @@ This function is mainly used by weight initializers, e.g., [`kaiming_normal`](@r
 # Examples
 
 ```jldoctest
-julia> layer = Dense(10, 20)
-Dense(10, 20)
+julia> layer = Dense(10, 20);
 
 julia> Flux.nfan(size(layer.W))
 (10, 20)
 
-julia> layer = Conv((3, 3), 2=>10)
-Conv((3, 3), 2=>10)
+julia> layer = Conv((3, 3), 2=>10);
 
 julia> Flux.nfan(size(layer.weight))
 (18, 90)
@@ -506,7 +504,7 @@ julia> Flux.chunk(1:10, 3)
  9:10
 
 julia> Flux.chunk(collect(1:10), 3)
-3-element Vector{SubArray{Int64, 1, Vector{Int64}, Tuple{UnitRange{Int64}}, true}}: 
+3-element Vector{SubArray{Int64, 1, Vector{Int64}, Tuple{UnitRange{Int64}}, true}}:
  [1, 2, 3, 4]
  [5, 6, 7, 8]
  [9, 10]
@@ -720,19 +718,25 @@ over specific modules or subsets of the parameters
 # Examples
 
 ```jldoctest
-julia> m1 = Chain(Dense(28^2, 64), BatchNorm(64, relu))
-Chain(Dense(784, 64), BatchNorm(64, relu))
+julia> m1 = Chain(Dense(28^2, 64), BatchNorm(64, relu));
 
 julia> m2 = Chain(m1, Dense(64, 10))
-Chain(Chain(Dense(784, 64), BatchNorm(64, relu)), Dense(64, 10))
+Chain(
+  Chain(
+    Dense(784, 64),                     # 50_240 parameters
+    BatchNorm(64, relu),                # 128 parameters, plus 128
+  ),
+  Dense(64, 10),                        # 650 parameters
+)         # Total: 6 trainable arrays, with 51_018 parameters
+          # plus 2 non-trainable, 128 parameters, summarysize 200.312 KiB
 
 julia> Flux.modules(m2)
 5-element Vector{Any}:
- Chain(Chain(Dense(784, 64), BatchNorm(64, relu)), Dense(64, 10))
- Chain(Dense(784, 64), BatchNorm(64, relu))
- Dense(784, 64)
- BatchNorm(64, relu)
- Dense(64, 10)
+ Chain(Chain(Dense(784, 64), BatchNorm(64, relu)), Dense(64, 10))  # 51_018 parameters, plus 128 non-trainable
+ Chain(Dense(784, 64), BatchNorm(64, relu))  # 50_368 parameters, plus 128 non-trainable
+ Dense(784, 64)      # 50_240 parameters
+ BatchNorm(64, relu)  # 128 parameters, plus 128 non-trainable
+ Dense(64, 10)       # 650 parameters
 
 julia> L2(m) = sum(sum(abs2, l.weight) for l in Flux.modules(m) if l isa Dense)
 L2 (generic function with 1 method)
@@ -760,6 +764,7 @@ julia> loss() = rand();
 
 julia> trigger = Flux.patience(() -> loss() < 1, 3);
 
+
 julia> Flux.@epochs 10 begin
          trigger() && break
        end
@@ -796,6 +801,7 @@ julia> loss = let l = 0
 
 julia> es = Flux.early_stopping(loss, 3);
 
+
 julia> Flux.@epochs 10 begin
          es() && break
        end
@@ -836,6 +842,7 @@ julia> f = let v = 10
 
 julia> trigger = Flux.plateau(f, 3; init_score=10, min_dist=18);
 
+
 julia> Flux.@epochs 10 begin
          trigger() && break
        end