FluxML
diff --git a/‎src/convnets/alexnet.jl
Lines changed: 17 additions & 17 deletions b/‎src/convnets/alexnet.jl
Lines changed: 17 additions & 17 deletions
diff --git a/‎src/convnets/convmixer.jl
Lines changed: 1 addition & 2 deletions b/‎src/convnets/convmixer.jl
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/convnets/convnext.jl
Lines changed: 4 additions & 5 deletions b/‎src/convnets/convnext.jl
Lines changed: 4 additions & 5 deletions
diff --git a/‎src/convnets/densenet.jl
Lines changed: 1 addition & 4 deletions b/‎src/convnets/densenet.jl
Lines changed: 1 addition & 4 deletions
diff --git a/‎src/convnets/efficientnet.jl
Lines changed: 6 additions & 8 deletions b/‎src/convnets/efficientnet.jl
Lines changed: 6 additions & 8 deletions
diff --git a/‎src/convnets/inception/googlenet.jl
Lines changed: 23 additions & 25 deletions b/‎src/convnets/inception/googlenet.jl
Lines changed: 23 additions & 25 deletions
diff --git a/‎src/convnets/inception/inceptionresnetv2.jl
Lines changed: 17 additions & 18 deletions b/‎src/convnets/inception/inceptionresnetv2.jl
Lines changed: 17 additions & 18 deletions
diff --git a/‎src/convnets/inception/inceptionv3.jl
Lines changed: 20 additions & 23 deletions b/‎src/convnets/inception/inceptionv3.jl
Lines changed: 20 additions & 23 deletions
diff --git a/‎src/convnets/inception/inceptionv4.jl
Lines changed: 24 additions & 25 deletions b/‎src/convnets/inception/inceptionv4.jl
Lines changed: 24 additions & 25 deletions
diff --git a/‎src/convnets/inception/xception.jl
Lines changed: 11 additions & 12 deletions b/‎src/convnets/inception/xception.jl
Lines changed: 11 additions & 12 deletions
@@ -9,22 +9,21 @@ Create an AlexNet model
   - `nclasses`: the number of output classes
 """
 function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000)
-    layers = Chain(Chain(Conv((11, 11), inchannels => 64, relu; stride = (4, 4), pad = (2, 2)),
-                         MaxPool((3, 3); stride = (2, 2)),
-                         Conv((5, 5), 64 => 192, relu; pad = (2, 2)),
-                         MaxPool((3, 3); stride = (2, 2)),
-                         Conv((3, 3), 192 => 384, relu; pad = (1, 1)),
-                         Conv((3, 3), 384 => 256, relu; pad = (1, 1)),
-                         Conv((3, 3), 256 => 256, relu; pad = (1, 1)),
-                         MaxPool((3, 3); stride = (2, 2)),
-                         AdaptiveMeanPool((6, 6))),
-                   Chain(MLUtils.flatten,
-                         Dropout(0.5),
-                         Dense(256 * 6 * 6, 4096, relu),
-                         Dropout(0.5),
-                         Dense(4096, 4096, relu),
-                         Dense(4096, nclasses)))
-    return layers
+    backbone = Chain(Conv((11, 11), inchannels => 64, relu; stride = 4, pad = 2),
+                     MaxPool((3, 3); stride = 2),
+                     Conv((5, 5), 64 => 192, relu; pad = 2),
+                     MaxPool((3, 3); stride = 2),
+                     Conv((3, 3), 192 => 384, relu; pad = 1),
+                     Conv((3, 3), 384 => 256, relu; pad = 1),
+                     Conv((3, 3), 256 => 256, relu; pad = 1),
+                     MaxPool((3, 3); stride = 2))
+    classifier = Chain(AdaptiveMeanPool((6, 6)), MLUtils.flatten,
+                       Dropout(0.5),
+                       Dense(256 * 6 * 6, 4096, relu),
+                       Dropout(0.5),
+                       Dense(4096, 4096, relu),
+                       Dense(4096, nclasses))
+    return Chain(backbone, classifier)
 end
 
 """
@@ -47,7 +46,8 @@ struct AlexNet
 end
 @functor AlexNet
 
-function AlexNet(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000)
+function AlexNet(; pretrain::Bool = false, inchannels::Integer = 3,
+                 nclasses::Integer = 1000)
     layers = alexnet(; inchannels, nclasses)
     if pretrain
         loadpretrain!(layers, "AlexNet")
 
@@ -26,8 +26,7 @@ function convmixer(planes::Integer, depth::Integer; kernel_size = (9, 9),
                                                    pad = SamePad())), +),
                     conv_norm((1, 1), planes, planes, activation; preact = true)...)
               for _ in 1:depth]
-    head = Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, Dense(planes, nclasses))
-    return Chain(Chain(stem..., Chain(blocks)), head)
+    return Chain(Chain(stem..., Chain(blocks)), create_classifier(planes, nclasses))
 end
 
 const CONVMIXER_CONFIGS = Dict(:base => Dict(:planes => 1536, :depth => 20,
 
@@ -63,11 +63,10 @@ function convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer};
         cur += depths[i]
     end
     backbone = collect(Iterators.flatten(Iterators.flatten(zip(downsample_layers, stages))))
-    head = Chain(GlobalMeanPool(),
-                 MLUtils.flatten,
-                 LayerNorm(planes[end]),
-                 Dense(planes[end], nclasses))
-    return Chain(Chain(backbone), head)
+    classifier = Chain(GlobalMeanPool(), MLUtils.flatten,
+                       LayerNorm(planes[end]),
+                       Dense(planes[end], nclasses))
+    return Chain(Chain(backbone...), classifier)
 end
 
 # Configurations for ConvNeXt models
 
@@ -83,10 +83,7 @@ function densenet(inplanes::Integer, growth_rates; reduction = 0.5, inchannels::
         inplanes = floor(Int, outplanes * reduction)
     end
     push!(layers, BatchNorm(outplanes, relu))
-    return Chain(Chain(layers),
-                 Chain(AdaptiveMeanPool((1, 1)),
-                       MLUtils.flatten,
-                       Dense(outplanes, nclasses)))
+    return Chain(Chain(layers...), create_classifier(outplanes, nclasses))
 end
 
 """
 
@@ -28,8 +28,8 @@ function efficientnet(scalings, block_configs; max_width::Integer = 1280,
     scalew(w) = wscale ≈ 1 ? w : ceil(Int64, wscale * w)
     scaled(d) = dscale ≈ 1 ? d : ceil(Int64, dscale * d)
     out_channels = _round_channels(scalew(32), 8)
-    stem = conv_norm((3, 3), inchannels, out_channels, swish;
-                     bias = false, stride = 2, pad = SamePad())
+    stem = conv_norm((3, 3), inchannels, out_channels, swish; bias = false, stride = 2,
+                     pad = SamePad())
     blocks = []
     for (n, k, s, e, i, o) in block_configs
         in_channels = _round_channels(scalew(i), 8)
@@ -44,13 +44,11 @@ function efficientnet(scalings, block_configs; max_width::Integer = 1280,
                                    stride = 1, reduction = 4))
         end
     end
-    blocks = Chain(blocks...)
     head_out_channels = _round_channels(max_width, 8)
-    head = conv_norm((1, 1), out_channels, head_out_channels, swish;
-                     bias = false, pad = SamePad())
-    top = Dense(head_out_channels, nclasses)
-    return Chain(Chain([stem..., blocks, head...]),
-                 Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, top))
+    append!(blocks,
+            conv_norm((1, 1), out_channels, head_out_channels, swish;
+                      bias = false, pad = SamePad()))
+    return Chain(Chain(stem..., blocks...), create_classifier(head_out_channels, nclasses))
 end
 
 # n: # of block repetitions
 
@@ -36,32 +36,29 @@ Create an Inception-v1 model (commonly referred to as GoogLeNet)
 
   - `nclasses`: the number of output classes
 """
-function googlenet(; nclasses::Integer = 1000)
-    layers = Chain(Chain(Conv((7, 7), 3 => 64; stride = 2, pad = 3),
-                         MaxPool((3, 3); stride = 2, pad = 1),
-                         Conv((1, 1), 64 => 64),
-                         Conv((3, 3), 64 => 192; pad = 1),
-                         MaxPool((3, 3); stride = 2, pad = 1),
-                         _inceptionblock(192, 64, 96, 128, 16, 32, 32),
-                         _inceptionblock(256, 128, 128, 192, 32, 96, 64),
-                         MaxPool((3, 3); stride = 2, pad = 1),
-                         _inceptionblock(480, 192, 96, 208, 16, 48, 64),
-                         _inceptionblock(512, 160, 112, 224, 24, 64, 64),
-                         _inceptionblock(512, 128, 128, 256, 24, 64, 64),
-                         _inceptionblock(512, 112, 144, 288, 32, 64, 64),
-                         _inceptionblock(528, 256, 160, 320, 32, 128, 128),
-                         MaxPool((3, 3); stride = 2, pad = 1),
-                         _inceptionblock(832, 256, 160, 320, 32, 128, 128),
-                         _inceptionblock(832, 384, 192, 384, 48, 128, 128)),
-                   Chain(AdaptiveMeanPool((1, 1)),
-                         MLUtils.flatten,
-                         Dropout(0.4),
-                         Dense(1024, nclasses)))
-    return layers
+function googlenet(; inchannels::Integer = 3, nclasses::Integer = 1000)
+    backbone = Chain(Conv((7, 7), inchannels => 64; stride = 2, pad = 3),
+                     MaxPool((3, 3); stride = 2, pad = 1),
+                     Conv((1, 1), 64 => 64),
+                     Conv((3, 3), 64 => 192; pad = 1),
+                     MaxPool((3, 3); stride = 2, pad = 1),
+                     _inceptionblock(192, 64, 96, 128, 16, 32, 32),
+                     _inceptionblock(256, 128, 128, 192, 32, 96, 64),
+                     MaxPool((3, 3); stride = 2, pad = 1),
+                     _inceptionblock(480, 192, 96, 208, 16, 48, 64),
+                     _inceptionblock(512, 160, 112, 224, 24, 64, 64),
+                     _inceptionblock(512, 128, 128, 256, 24, 64, 64),
+                     _inceptionblock(512, 112, 144, 288, 32, 64, 64),
+                     _inceptionblock(528, 256, 160, 320, 32, 128, 128),
+                     MaxPool((3, 3); stride = 2, pad = 1),
+                     _inceptionblock(832, 256, 160, 320, 32, 128, 128),
+                     _inceptionblock(832, 384, 192, 384, 48, 128, 128))
+    classifier = create_classifier(1024, nclasses; dropout_rate = 0.4)
+    return Chain(backbone, classifier)
 end
 
 """
-    GoogLeNet(; pretrain::Bool = false, nclasses::Integer = 1000)
+    GoogLeNet(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000)
 
 Create an Inception-v1 model (commonly referred to as `GoogLeNet`)
 ([reference](https://arxiv.org/abs/1409.4842v1)).
@@ -82,8 +79,9 @@ struct GoogLeNet
 end
 @functor GoogLeNet
 
-function GoogLeNet(; pretrain::Bool = false, nclasses::Integer = 1000)
-    layers = googlenet(; nclasses = nclasses)
+function GoogLeNet(; pretrain::Bool = false, inchannels::Integer = 3,
+                   nclasses::Integer = 1000)
+    layers = googlenet(; inchannels, nclasses)
     if pretrain
         loadpretrain!(layers, "GoogLeNet")
     end
 
@@ -77,24 +77,23 @@ Creates an InceptionResNetv2 model.
 """
 function inceptionresnetv2(; inchannels::Integer = 3, dropout_rate = 0.0,
                            nclasses::Integer = 1000)
-    body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)...,
-                 conv_norm((3, 3), 32, 32)...,
-                 conv_norm((3, 3), 32, 64; pad = 1)...,
-                 MaxPool((3, 3); stride = 2),
-                 conv_norm((3, 3), 64, 80)...,
-                 conv_norm((3, 3), 80, 192)...,
-                 MaxPool((3, 3); stride = 2),
-                 mixed_5b(),
-                 [block35(0.17f0) for _ in 1:10]...,
-                 mixed_6a(),
-                 [block17(0.10f0) for _ in 1:20]...,
-                 mixed_7a(),
-                 [block8(0.20f0) for _ in 1:9]...,
-                 block8(; activation = relu),
-                 conv_norm((1, 1), 2080, 1536)...)
-    head = Chain(GlobalMeanPool(), MLUtils.flatten, Dropout(dropout_rate),
-                 Dense(1536, nclasses))
-    return Chain(body, head)
+    backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)...,
+                     conv_norm((3, 3), 32, 32)...,
+                     conv_norm((3, 3), 32, 64; pad = 1)...,
+                     MaxPool((3, 3); stride = 2),
+                     conv_norm((3, 3), 64, 80)...,
+                     conv_norm((3, 3), 80, 192)...,
+                     MaxPool((3, 3); stride = 2),
+                     mixed_5b(),
+                     [block35(0.17f0) for _ in 1:10]...,
+                     mixed_6a(),
+                     [block17(0.10f0) for _ in 1:20]...,
+                     mixed_7a(),
+                     [block8(0.20f0) for _ in 1:9]...,
+                     block8(; activation = relu),
+                     conv_norm((1, 1), 2080, 1536)...)
+    classifier = create_classifier(1536, nclasses; dropout_rate)
+    return Chain(backbone, classifier)
 end
 
 """
 
@@ -136,29 +136,26 @@ Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)).
   - `nclasses`: the number of output classes
 """
 function inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000)
-    layer = Chain(Chain(conv_norm((3, 3), inchannels, 32; stride = 2)...,
-                        conv_norm((3, 3), 32, 32)...,
-                        conv_norm((3, 3), 32, 64; pad = 1)...,
-                        MaxPool((3, 3); stride = 2),
-                        conv_norm((1, 1), 64, 80)...,
-                        conv_norm((3, 3), 80, 192)...,
-                        MaxPool((3, 3); stride = 2),
-                        inceptionv3_a(192, 32),
-                        inceptionv3_a(256, 64),
-                        inceptionv3_a(288, 64),
-                        inceptionv3_b(288),
-                        inceptionv3_c(768, 128),
-                        inceptionv3_c(768, 160),
-                        inceptionv3_c(768, 160),
-                        inceptionv3_c(768, 192),
-                        inceptionv3_d(768),
-                        inceptionv3_e(1280),
-                        inceptionv3_e(2048)),
-                  Chain(AdaptiveMeanPool((1, 1)),
-                        Dropout(0.2),
-                        MLUtils.flatten,
-                        Dense(2048, nclasses)))
-    return layer
+    backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)...,
+                     conv_norm((3, 3), 32, 32)...,
+                     conv_norm((3, 3), 32, 64; pad = 1)...,
+                     MaxPool((3, 3); stride = 2),
+                     conv_norm((1, 1), 64, 80)...,
+                     conv_norm((3, 3), 80, 192)...,
+                     MaxPool((3, 3); stride = 2),
+                     inceptionv3_a(192, 32),
+                     inceptionv3_a(256, 64),
+                     inceptionv3_a(288, 64),
+                     inceptionv3_b(288),
+                     inceptionv3_c(768, 128),
+                     inceptionv3_c(768, 160),
+                     inceptionv3_c(768, 160),
+                     inceptionv3_c(768, 192),
+                     inceptionv3_d(768),
+                     inceptionv3_e(1280),
+                     inceptionv3_e(2048))
+    classifier = create_classifier(2048, nclasses; dropout_rate = 0.2)
+    return Chain(backbone, classifier)
 end
 
 """
 
@@ -95,31 +95,30 @@ Create an Inceptionv4 model.
 """
 function inceptionv4(; dropout_rate = 0.0, inchannels::Integer = 3,
                      nclasses::Integer = 1000)
-    body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)...,
-                 conv_norm((3, 3), 32, 32)...,
-                 conv_norm((3, 3), 32, 64; pad = 1)...,
-                 mixed_3a(),
-                 mixed_4a(),
-                 mixed_5a(),
-                 inceptionv4_a(),
-                 inceptionv4_a(),
-                 inceptionv4_a(),
-                 inceptionv4_a(),
-                 reduction_a(),  # mixed_6a
-                 inceptionv4_b(),
-                 inceptionv4_b(),
-                 inceptionv4_b(),
-                 inceptionv4_b(),
-                 inceptionv4_b(),
-                 inceptionv4_b(),
-                 inceptionv4_b(),
-                 reduction_b(),  # mixed_7a
-                 inceptionv4_c(),
-                 inceptionv4_c(),
-                 inceptionv4_c())
-    head = Chain(GlobalMeanPool(), MLUtils.flatten, Dropout(dropout_rate),
-                 Dense(1536, nclasses))
-    return Chain(body, head)
+    backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)...,
+                     conv_norm((3, 3), 32, 32)...,
+                     conv_norm((3, 3), 32, 64; pad = 1)...,
+                     mixed_3a(),
+                     mixed_4a(),
+                     mixed_5a(),
+                     inceptionv4_a(),
+                     inceptionv4_a(),
+                     inceptionv4_a(),
+                     inceptionv4_a(),
+                     reduction_a(),  # mixed_6a
+                     inceptionv4_b(),
+                     inceptionv4_b(),
+                     inceptionv4_b(),
+                     inceptionv4_b(),
+                     inceptionv4_b(),
+                     inceptionv4_b(),
+                     inceptionv4_b(),
+                     reduction_b(),  # mixed_7a
+                     inceptionv4_c(),
+                     inceptionv4_c(),
+                     inceptionv4_c())
+    classifier = create_classifier(1536, nclasses; dropout_rate)
+    return Chain(backbone, classifier)
 end
 
 """
 
@@ -57,18 +57,17 @@ Creates an Xception model.
   - `nclasses`: the number of output classes.
 """
 function xception(; dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000)
-    body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2, bias = false)...,
-                 conv_norm((3, 3), 32, 64; bias = false)...,
-                 xception_block(64, 128, 2; stride = 2, start_with_relu = false),
-                 xception_block(128, 256, 2; stride = 2),
-                 xception_block(256, 728, 2; stride = 2),
-                 [xception_block(728, 728, 3) for _ in 1:8]...,
-                 xception_block(728, 1024, 2; stride = 2, grow_at_start = false),
-                 depthwise_sep_conv_norm((3, 3), 1024, 1536; pad = 1)...,
-                 depthwise_sep_conv_norm((3, 3), 1536, 2048; pad = 1)...)
-    head = Chain(GlobalMeanPool(), MLUtils.flatten, Dropout(dropout_rate),
-                 Dense(2048, nclasses))
-    return Chain(body, head)
+    backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2, bias = false)...,
+                     conv_norm((3, 3), 32, 64; bias = false)...,
+                     xception_block(64, 128, 2; stride = 2, start_with_relu = false),
+                     xception_block(128, 256, 2; stride = 2),
+                     xception_block(256, 728, 2; stride = 2),
+                     [xception_block(728, 728, 3) for _ in 1:8]...,
+                     xception_block(728, 1024, 2; stride = 2, grow_at_start = false),
+                     depthwise_sep_conv_norm((3, 3), 1024, 1536; pad = 1)...,
+                     depthwise_sep_conv_norm((3, 3), 1536, 2048; pad = 1)...)
+    classifier = create_classifier(2048, nclasses; dropout_rate)
+    return Chain(backbone, classifier)
 end
 
 """