More uniform mid level API

theabhirath · theabhirath · commit 251b323582c8 · 2022-08-25T08:37:01.000+05:30
1. Expose `pretrain` option for all models
2. Make it easier to initialise models with config options at the mid level by providing an additional dispatch
3. Some cleanup + documentation
diff --git a/src/convnets/alexnet.jl b/src/convnets/alexnet.jl
@@ -1,15 +1,16 @@
 """
-    alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000)
+    alexnet(; dropout_rate = 0.5, inchannels::Integer = 3, nclasses::Integer = 1000)
 
 Create an AlexNet model
 ([reference](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)).
 
 # Arguments
 
+  - `dropout_rate`: dropout rate for the classifier
   - `inchannels`: The number of input channels.
   - `nclasses`: the number of output classes
 """
-function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000)
+function alexnet(; dropout_rate = 0.5, inchannels::Integer = 3, nclasses::Integer = 1000)
     backbone = Chain(Conv((11, 11), inchannels => 64, relu; stride = 4, pad = 2),
                      MaxPool((3, 3); stride = 2),
                      Conv((5, 5), 64 => 192, relu; pad = 2),
@@ -19,9 +20,9 @@ function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000)
                      Conv((3, 3), 256 => 256, relu; pad = 1),
                      MaxPool((3, 3); stride = 2))
     classifier = Chain(AdaptiveMeanPool((6, 6)), MLUtils.flatten,
-                       Dropout(0.5),
+                       Dropout(dropout_rate),
                        Dense(256 * 6 * 6, 4096, relu),
-                       Dropout(0.5),
+                       Dropout(dropout_rate),
                        Dense(4096, 4096, relu),
                        Dense(4096, nclasses))
     return Chain(backbone, classifier)
diff --git a/src/convnets/convnext.jl b/src/convnets/convnext.jl
@@ -68,6 +68,12 @@ function convnext(depths::AbstractVector{<:Integer}, planes::AbstractVector{<:In
     return Chain(Chain(backbone...), classifier)
 end
 
+function convnext(config::Symbol; drop_path_rate = 0.0, layerscale_init = 1.0f-6,
+                  inchannels::Integer = 3, nclasses::Integer = 1000)
+    return convnext(CONVNEXT_CONFIGS[config]...; drop_path_rate, layerscale_init,
+                    inchannels, nclasses)
+end
+
 # Configurations for ConvNeXt models
 const CONVNEXT_CONFIGS = Dict(:tiny => ([3, 3, 9, 3], [96, 192, 384, 768]),
                               :small => ([3, 3, 27, 3], [96, 192, 384, 768]),
@@ -76,27 +82,37 @@ const CONVNEXT_CONFIGS = Dict(:tiny => ([3, 3, 9, 3], [96, 192, 384, 768]),
                               :xlarge => ([3, 3, 27, 3], [256, 512, 1024, 2048]))
 
 """
-    ConvNeXt(config::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000)
+    ConvNeXt(config::Symbol; pretrain::Bool = true, inchannels::Integer = 3,
+             nclasses::Integer = 1000)
 
 Creates a ConvNeXt model.
 ([reference](https://arxiv.org/abs/2201.03545))
 
 # Arguments
 
   - `config`: The size of the model, one of `tiny`, `small`, `base`, `large` or `xlarge`.
+  - `pretrain`: set to `true` to load pre-trained weights for ImageNet
   - `inchannels`: number of input channels
   - `nclasses`: number of output classes
 
+!!! warning
+    
+    `ConvNeXt` does not currently support pretrained weights.
+
 See also [`Metalhead.convnext`](#).
 """
 struct ConvNeXt
     layers::Any
 end
 @functor ConvNeXt
 
-function ConvNeXt(config::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000)
+function ConvNeXt(config::Symbol; pretrain::Bool = true, inchannels::Integer = 3,
+                  nclasses::Integer = 1000)
     _checkconfig(config, keys(CONVNEXT_CONFIGS))
-    layers = convnext(CONVNEXT_CONFIGS[config]...; inchannels, nclasses)
+    layers = convnext(config; inchannels, nclasses)
+    if pretrain
+        layers = load_pretrained(layers, "convnext_$config")
+    end
     return ConvNeXt(layers)
 end
 
diff --git a/src/convnets/efficientnets/core.jl b/src/convnets/efficientnets/core.jl
@@ -1,8 +1,8 @@
-function efficientnet(block_configs::AbstractVector{<:Tuple}; inplanes::Integer,
-                      scalings::NTuple{2, Real} = (1, 1),
-                      headplanes::Integer = block_configs[end][3] * 4,
-                      norm_layer = BatchNorm, dropout_rate = nothing,
-                      inchannels::Integer = 3, nclasses::Integer = 1000)
+function efficientnetcore(block_configs::AbstractVector{<:Tuple}; inplanes::Integer,
+                          scalings::NTuple{2, Real} = (1, 1),
+                          headplanes::Integer = block_configs[end][3] * 4,
+                          norm_layer = BatchNorm, dropout_rate = nothing,
+                          inchannels::Integer = 3, nclasses::Integer = 1000)
     layers = []
     # stem of the model
     inplanes = _round_channels(inplanes * scalings[1])
diff --git a/src/convnets/efficientnets/efficientnet.jl b/src/convnets/efficientnets/efficientnet.jl
@@ -52,8 +52,8 @@ function EfficientNet(config::Symbol; pretrain::Bool = false, inchannels::Intege
                       nclasses::Integer = 1000)
     _checkconfig(config, keys(EFFICIENTNET_GLOBAL_CONFIGS))
     scalings = EFFICIENTNET_GLOBAL_CONFIGS[config][2]
-    layers = efficientnet(EFFICIENTNET_BLOCK_CONFIGS; inplanes = 32, scalings,
-                          inchannels, nclasses)
+    layers = efficientnet_core(EFFICIENTNET_BLOCK_CONFIGS; inplanes = 32, scalings,
+                               inchannels, nclasses)
     if pretrain
         loadpretrain!(layers, string("efficientnet-", config))
     end
diff --git a/src/convnets/efficientnets/efficientnetv2.jl b/src/convnets/efficientnets/efficientnetv2.jl
@@ -59,8 +59,8 @@ function EfficientNetv2(config::Symbol; pretrain::Bool = false,
                         inchannels::Integer = 3, nclasses::Integer = 1000)
     _checkconfig(config, sort(collect(keys(EFFNETV2_CONFIGS))))
     block_configs = EFFNETV2_CONFIGS[config]
-    layers = efficientnet(block_configs; inplanes = block_configs[1][3],
-                          headplanes = 1280, inchannels, nclasses)
+    layers = efficientnet_core(block_configs; inplanes = block_configs[1][3],
+                               headplanes = 1280, inchannels, nclasses)
     if pretrain
         loadpretrain!(layers, string("efficientnetv2-", config))
     end
diff --git a/src/convnets/inceptions/googlenet.jl b/src/convnets/inceptions/googlenet.jl
@@ -1,5 +1,5 @@
 """
-    _inceptionblock(inplanes, out_1x1, red_3x3, out_3x3, red_5x5, out_3x3, pool_proj)
+    inceptionblock(inplanes, out_1x1, red_3x3, out_3x3, red_5x5, out_3x3, pool_proj)
 
 Create an inception module for use in GoogLeNet
 ([reference](https://arxiv.org/abs/1409.4842v1)).
@@ -14,7 +14,7 @@ Create an inception module for use in GoogLeNet
   - `out_5x5`: the number of output feature maps for the 5x5 convolution (branch 3)
   - `pool_proj`: the number of output feature maps for the pooling projection (branch 4)
 """
-function _inceptionblock(inplanes, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, pool_proj)
+function inceptionblock(inplanes, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, pool_proj)
     branch1 = Chain(Conv((1, 1), inplanes => out_1x1))
     branch2 = Chain(Conv((1, 1), inplanes => red_3x3),
                     Conv((3, 3), red_3x3 => out_3x3; pad = 1))
@@ -42,17 +42,17 @@ function googlenet(; dropout_rate = 0.4, inchannels::Integer = 3, nclasses::Inte
                      Conv((1, 1), 64 => 64),
                      Conv((3, 3), 64 => 192; pad = 1),
                      MaxPool((3, 3); stride = 2, pad = 1),
-                     _inceptionblock(192, 64, 96, 128, 16, 32, 32),
-                     _inceptionblock(256, 128, 128, 192, 32, 96, 64),
+                     inceptionblock(192, 64, 96, 128, 16, 32, 32),
+                     inceptionblock(256, 128, 128, 192, 32, 96, 64),
                      MaxPool((3, 3); stride = 2, pad = 1),
-                     _inceptionblock(480, 192, 96, 208, 16, 48, 64),
-                     _inceptionblock(512, 160, 112, 224, 24, 64, 64),
-                     _inceptionblock(512, 128, 128, 256, 24, 64, 64),
-                     _inceptionblock(512, 112, 144, 288, 32, 64, 64),
-                     _inceptionblock(528, 256, 160, 320, 32, 128, 128),
+                     inceptionblock(480, 192, 96, 208, 16, 48, 64),
+                     inceptionblock(512, 160, 112, 224, 24, 64, 64),
+                     inceptionblock(512, 128, 128, 256, 24, 64, 64),
+                     inceptionblock(512, 112, 144, 288, 32, 64, 64),
+                     inceptionblock(528, 256, 160, 320, 32, 128, 128),
                      MaxPool((3, 3); stride = 2, pad = 1),
-                     _inceptionblock(832, 256, 160, 320, 32, 128, 128),
-                     _inceptionblock(832, 384, 192, 384, 48, 128, 128))
+                     inceptionblock(832, 256, 160, 320, 32, 128, 128),
+                     inceptionblock(832, 384, 192, 384, 48, 128, 128))
     return Chain(backbone, create_classifier(1024, nclasses; dropout_rate))
 end
 
diff --git a/src/convnets/mobilenets/mnasnet.jl b/src/convnets/mobilenets/mnasnet.jl
@@ -41,6 +41,13 @@ function mnasnet(block_configs::AbstractVector{<:Tuple}; width_mult::Real = 1,
     return Chain(Chain(layers...), create_classifier(max_width, nclasses; dropout_rate))
 end
 
+function mnasnet(config::Symbol; width_mult::Real = 1, max_width::Integer = 1280,
+                 dropout_rate = 0.2, inchannels::Integer = 3, nclasses::Integer = 1000)
+    inplanes, block_configs = MNASNET_CONFIGS[config]
+    return mnasnet(block_configs; width_mult, max_width, dropout_rate, inplanes,
+                   inchannels, nclasses)
+end
+
 # Layer configurations for MNasNet
 # f: block function - we use `dwsep_conv_bn` for the first block and `mbconv` for the rest
 # k: kernel size
@@ -79,7 +86,8 @@ const MNASNET_CONFIGS = Dict(:B1 => (32,
                                             (mbconv, 5, 32, 6, 2, 4, 4, relu),
                                             (mbconv, 3, 32, 6, 1, 3, 4, relu),
                                             (mbconv, 5, 88, 6, 2, 3, 4, relu),
-                                            (mbconv, 3, 144, 6, 1, 1, nothing, relu)]))
+                                            (mbconv, 3, 144, 6, 1, 1, nothing, relu),
+                                        ]))
 
 """
     MNASNet(width_mult = 1; inchannels::Integer = 3, pretrain::Bool = false,
@@ -111,8 +119,7 @@ end
 function MNASNet(config::Symbol; width_mult::Real = 1, pretrain::Bool = false,
                  inchannels::Integer = 3, nclasses::Integer = 1000)
     _checkconfig(config, keys(MNASNET_CONFIGS))
-    inplanes, block_configs = MNASNET_CONFIGS[config]
-    layers = mnasnet(block_configs; width_mult, inplanes, inchannels, nclasses)
+    layers = mnasnet(config; width_mult, inchannels, nclasses)
     if pretrain
         loadpretrain!(layers, "mnasnet$(width_mult)")
     end
diff --git a/src/convnets/mobilenets/mobilenetv1.jl b/src/convnets/mobilenets/mobilenetv1.jl
@@ -37,6 +37,12 @@ function mobilenetv1(config::AbstractVector{<:Tuple}; width_mult::Real = 1,
     return Chain(Chain(layers...), create_classifier(outplanes, nclasses; dropout_rate))
 end
 
+function mobilenetv1(width_mult::Real = 1; activation = relu, dropout_rate = nothing,
+                     inchannels::Integer = 3, nclasses::Integer = 1000)
+    return mobilenetv1(MOBILENETV1_CONFIGS[config]; width_mult, activation,
+                       dropout_rate, inchannels, nclasses)
+end
+
 # Layer configurations for MobileNetv1
 # f: block function - we use `dwsep_conv_bn` for all blocks
 # k: kernel size
diff --git a/src/convnets/mobilenets/mobilenetv2.jl b/src/convnets/mobilenets/mobilenetv2.jl
@@ -43,6 +43,14 @@ function mobilenetv2(block_configs::AbstractVector{<:Tuple}; width_mult::Real =
     return Chain(Chain(layers...), create_classifier(headplanes, nclasses; dropout_rate))
 end
 
+function mobilenetv2(width_mult::Real = 1; max_width::Integer = 1280,
+                     divisor::Integer = 8, inplanes::Integer = 32,
+                     dropout_rate = 0.2, inchannels::Integer = 3,
+                     nclasses::Integer = 1000)
+    return mobilenetv2(MOBILENETV2_CONFIGS; width_mult, max_width, divisor, inplanes,
+                       dropout_rate, inchannels, nclasses)
+end
+
 # Layer configurations for MobileNetv2
 # f: block function - we use `mbconv` for all blocks
 # k: kernel size
diff --git a/src/convnets/mobilenets/mobilenetv3.jl b/src/convnets/mobilenets/mobilenetv3.jl
@@ -46,6 +46,13 @@ function mobilenetv3(configs::AbstractVector{<:Tuple}; width_mult::Real = 1,
                                    (hardswish, identity); dropout_rate))
 end
 
+function mobilenetv3(config::Symbol; width_mult::Real = 1, dropout_rate = 0.2,
+                     inchannels::Integer = 3, nclasses::Integer = 1000)
+    max_width = config === :large ? 1280 : 1024
+    return mobilenetv3(MOBILENETV3_CONFIGS[config]; width_mult, max_width,
+                       dropout_rate, inchannels, nclasses)
+end
+
 # Layer configurations for small and large models for MobileNetv3
 # f: mbconv block function - we use `mbconv` for all blocks
 # k: kernel size
@@ -110,9 +117,7 @@ end
 function MobileNetv3(config::Symbol; width_mult::Real = 1, pretrain::Bool = false,
                      inchannels::Integer = 3, nclasses::Integer = 1000)
     _checkconfig(config, [:small, :large])
-    max_width = config == :large ? 1280 : 1024
-    layers = mobilenetv3(MOBILENETV3_CONFIGS[config]; width_mult, max_width, inchannels,
-                         nclasses)
+    layers = mobilenetv3(config; width_mult, inchannels, nclasses)
     if pretrain
         loadpretrain!(layers, string("MobileNetv3", config))
     end
diff --git a/src/convnets/resnets/core.jl b/src/convnets/resnets/core.jl
@@ -196,8 +196,8 @@ function resnet_planes(block_repeats::AbstractVector{<:Integer})
                              for (stage_idx, stages) in enumerate(block_repeats))
 end
 
-function resnet(img_dims, stem, get_layers, block_repeats::AbstractVector{<:Integer},
-                connection, classifier_fn)
+function resnetcore(img_dims, stem, get_layers, block_repeats::AbstractVector{<:Integer},
+                    connection, classifier_fn)
     # Build stages of the ResNet
     stage_blocks = cnn_stages(get_layers, block_repeats, connection)
     backbone = Chain(stem, stage_blocks...)
@@ -209,13 +209,14 @@ end
 function resnet(block_type, block_repeats::AbstractVector{<:Integer},
                 downsample_opt::NTuple{2, Any} = (downsample_conv, downsample_identity);
                 cardinality::Integer = 1, base_width::Integer = 64,
-                inplanes::Integer = 64,
-                reduction_factor::Integer = 1, imsize::Dims{2} = (256, 256),
-                inchannels::Integer = 3, stem_fn = resnet_stem, connection = addact,
-                activation = relu, norm_layer = BatchNorm, revnorm::Bool = false,
+                inplanes::Integer = 64, reduction_factor::Integer = 1,
+                stem_fn = resnet_stem, connection = addact, activation = relu,
+                norm_layer = BatchNorm, revnorm::Bool = false,
                 attn_fn = planes -> identity, pool_layer = AdaptiveMeanPool((1, 1)),
-                use_conv::Bool = false, drop_block_rate = nothing, drop_path_rate = nothing,
-                dropout_rate = nothing, nclasses::Integer = 1000, kwargs...)
+                use_conv::Bool = false, drop_block_rate = nothing,
+                drop_path_rate = nothing, dropout_rate = nothing,
+                imsize::Dims{2} = (256, 256), inchannels::Integer = 3,
+                nclasses::Integer = 1000, kwargs...)
     # Build stem
     stem = stem_fn(; inchannels)
     # Block builder
@@ -253,8 +254,8 @@ function resnet(block_type, block_repeats::AbstractVector{<:Integer},
     end
     classifier_fn = nfeatures -> create_classifier(nfeatures, nclasses; dropout_rate,
                                                    pool_layer, use_conv)
-    return resnet((imsize..., inchannels), stem, get_layers, block_repeats,
-                  connection$activation, classifier_fn)
+    return resnet_core((imsize..., inchannels), stem, get_layers, block_repeats,
+                       connection$activation, classifier_fn)
 end
 function resnet(block_fn, block_repeats, downsample_opt::Symbol = :B; kwargs...)
     return resnet(block_fn, block_repeats, RESNET_SHORTCUTS[downsample_opt]; kwargs...)
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -8,19 +8,19 @@
     conv_norm(kernel_size::Dims{2}, inplanes => outplanes, activation = identity;
               kwargs...)
 
-Create a convolution + batch normalization pair with activation.
+Create a convolution + normalisation layer pair with activation.
 
 # Arguments
 
   - `kernel_size`: size of the convolution kernel (tuple)
   - `inplanes`: number of input feature maps
   - `outplanes`: number of output feature maps
   - `activation`: the activation function for the final layer
-  - `norm_layer`: the normalization layer used
+  - `norm_layer`: the normalisation layer used
   - `revnorm`: set to `true` to place the normalisation layer before the convolution
-  - `preact`: set to `true` to place the activation function before the batch norm
+  - `preact`: set to `true` to place the activation function before the normalisation layer
     (only compatible with `revnorm = false`)
-  - `use_norm`: set to `false` to disable normalization
+  - `use_norm`: set to `false` to disable normalisation
     (only compatible with `revnorm = false` and `preact = false`)
   - `stride`: stride of the convolution kernel
   - `pad`: padding of the convolution kernel
@@ -70,8 +70,22 @@ function conv_norm(kernel_size::Dims{2}, ch::Pair{<:Integer, <:Integer},
     return conv_norm(kernel_size, inplanes, outplanes, activation; kwargs...)
 end
 
-# conv + bn layer combination as used by the inception model family matching
-# the default values used in TensorFlow
+"""
+    basic_conv_bn(kernel_size::Dims{2}, inplanes, outplanes, activation = relu;
+                  kwargs...)
+
+Returns a convolution + batch normalisation pair with activation as used by the
+Inception family of models with default values matching those used in the official
+TensorFlow implementation.
+
+# Arguments
+
+  - `kernel_size`: size of the convolution kernel (tuple)
+  - `inplanes`: number of input feature maps
+  - `outplanes`: number of output feature maps
+  - `activation`: the activation function for the final layer
+  - `kwargs`: keyword arguments passed to [`conv_norm`](@ref)
+"""
 function basic_conv_bn(kernel_size::Dims{2}, inplanes, outplanes, activation = relu;
                        kwargs...)
     return conv_norm(kernel_size, inplanes, outplanes, activation; norm_layer = BatchNorm,
diff --git a/src/mixers/gmlp.jl b/src/mixers/gmlp.jl
@@ -94,10 +94,13 @@ end
 @functor gMLP
 
 function gMLP(config::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16),
-              inchannels::Integer = 3, nclasses::Integer = 1000)
+              pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000)
     _checkconfig(config, keys(MIXER_CONFIGS))
     layers = mlpmixer(spatial_gating_block, imsize; mlp_layer = gated_mlp_block, patch_size,
                       MIXER_CONFIGS[config]..., inchannels, nclasses)
+    if pretrain
+        loadpretrain!(layers, string("gmlp", config))
+    end
     return gMLP(layers)
 end
 
diff --git a/src/mixers/mlpmixer.jl b/src/mixers/mlpmixer.jl
@@ -57,11 +57,14 @@ end
 @functor MLPMixer
 
 function MLPMixer(config::Symbol; imsize::Dims{2} = (224, 224),
-                  patch_size::Dims{2} = (16, 16),
+                  patch_size::Dims{2} = (16, 16), pretrain::Bool = false,
                   inchannels::Integer = 3, nclasses::Integer = 1000)
     _checkconfig(config, keys(MIXER_CONFIGS))
     layers = mlpmixer(mixerblock, imsize; patch_size, MIXER_CONFIGS[config]..., inchannels,
                       nclasses)
+    if pretrain
+        loadpretrain!(layers, string("mlpmixer", config))
+    end
     return MLPMixer(layers)
 end
 
diff --git a/src/mixers/resmlp.jl b/src/mixers/resmlp.jl
@@ -56,11 +56,14 @@ end
 @functor ResMLP
 
 function ResMLP(config::Symbol; imsize::Dims{2} = (224, 224),
-                patch_size::Dims{2} = (16, 16),
+                patch_size::Dims{2} = (16, 16), pretrain::Bool = false,
                 inchannels::Integer = 3, nclasses::Integer = 1000)
     _checkconfig(config, keys(MIXER_CONFIGS))
     layers = mlpmixer(resmixerblock, imsize; mlp_ratio = 4.0, patch_size,
                       MIXER_CONFIGS[config]..., inchannels, nclasses)
+    if pretrain
+        loadpretrain!(layers, string(resmlp, config))
+    end
     return ResMLP(layers)
 end
 
diff --git a/src/vit-based/vit.jl b/src/vit-based/vit.jl