Add Res2Net and Res2NeXt

theabhirath · theabhirath · commit b54f59495bea · 2022-08-03T18:40:27.000+05:30
diff --git a/src/Metalhead.jl b/src/Metalhead.jl
@@ -26,6 +26,7 @@ include("convnets/resnets/core.jl")
 include("convnets/resnets/resnet.jl")
 include("convnets/resnets/resnext.jl")
 include("convnets/resnets/seresnet.jl")
+include("convnets/resnets/res2net.jl")
 ## Inceptions
 include("convnets/inception/googlenet.jl")
 include("convnets/inception/inceptionv3.jl")
@@ -57,16 +58,16 @@ include("pretrain.jl")
 
 export AlexNet, VGG, VGG11, VGG13, VGG16, VGG19,
        ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152,
-       WideResNet, ResNeXt, SEResNet, SEResNeXt,
+       WideResNet, ResNeXt, SEResNet, SEResNeXt, Res2Net, Res2NeXt,
        DenseNet, DenseNet121, DenseNet161, DenseNet169, DenseNet201,
        GoogLeNet, Inception3, Inceptionv3, Inceptionv4, InceptionResNetv2, Xception,
        SqueezeNet, MobileNetv1, MobileNetv2, MobileNetv3, EfficientNet,
        MLPMixer, ResMLP, gMLP, ViT, ConvMixer, ConvNeXt
 
 # use Flux._big_show to pretty print large models
 for T in (:AlexNet, :VGG, :ResNet, :ResNeXt, :DenseNet, :SEResNet, :SEResNeXt,
-          :GoogLeNet, :Inceptionv3, :Inceptionv4, :InceptionResNetv2, :Xception,
-          :SqueezeNet, :MobileNetv1, :MobileNetv2, :MobileNetv3, :EfficientNet,
+          :Res2Net, :Res2NeXt, :GoogLeNet, :Inceptionv3, :Inceptionv4,
+          :Xception, :SqueezeNet, :MobileNetv1, :MobileNetv2, :MobileNetv3, :EfficientNet,
           :MLPMixer, :ResMLP, :gMLP, :ViT, :ConvMixer, :ConvNeXt)
     @eval Base.show(io::IO, ::MIME"text/plain", model::$T) = _maybe_big_show(io, model)
 end
diff --git a/src/convnets/resnets/core.jl b/src/convnets/resnets/core.jl
@@ -1,8 +1,9 @@
 """
-    basicblock(inplanes, planes; stride = 1, reduction_factor = 1, activation = relu,
-                    norm_layer = BatchNorm, revnorm = false,
-                    drop_block = identity, drop_path = identity,
-                    attn_fn = planes -> identity)
+    basicblock(inplanes::Integer, planes::Integer; stride::Integer = 1,
+               reduction_factor::Integer = 1, activation = relu,
+               norm_layer = BatchNorm, revnorm::Bool = false,
+               drop_block = identity, drop_path = identity,
+               attn_fn = planes -> identity)
 
 Creates a basic residual block (see [reference](https://arxiv.org/abs/1512.03385v1)).
 
@@ -11,10 +12,11 @@ Creates a basic residual block (see [reference](https://arxiv.org/abs/1512.03385
   - `inplanes`: number of input feature maps
   - `planes`: number of feature maps for the block
   - `stride`: the stride of the block
-  - `reduction_factor`: the factor by which the input feature maps
-    are reduced before the first convolution.
+  - `reduction_factor`: the factor by which the input feature maps are reduced before 
+    the first convolution.
   - `activation`: the activation function to use.
   - `norm_layer`: the normalization layer to use.
+  - `revnorm`: set to `true` to place the normalisation layer before the convolution
   - `drop_block`: the drop block layer
   - `drop_path`: the drop path layer
   - `attn_fn`: the attention function to use. See [`squeeze_excite`](#) for an example.
@@ -36,11 +38,12 @@ function basicblock(inplanes::Integer, planes::Integer; stride::Integer = 1,
 end
 
 """
-    bottleneck(inplanes, planes; stride = 1, cardinality = 1, base_width = 64,
-                    reduction_factor = 1, activation = relu,
-                    norm_layer = BatchNorm, revnorm = false,
-                    drop_block = identity, drop_path = identity,
-                    attn_fn = planes -> identity)
+    bottleneck(inplanes::Integer, planes::Integer; stride::Integer,
+               cardinality::Integer = 1, base_width::Integer = 64,
+               reduction_factor::Integer = 1, activation = relu,
+               norm_layer = BatchNorm, revnorm::Bool = false,
+               drop_block = identity, drop_path = identity,
+               attn_fn = planes -> identity)
 
 Creates a bottleneck residual block (see [reference](https://arxiv.org/abs/1512.03385v1)).
 
@@ -55,6 +58,7 @@ Creates a bottleneck residual block (see [reference](https://arxiv.org/abs/1512.
     convolution.
   - `activation`: the activation function to use.
   - `norm_layer`: the normalization layer to use.
+  - `revnorm`: set to `true` to place the normalisation layer before the convolution
   - `drop_block`: the drop block layer
   - `drop_path`: the drop path layer
   - `attn_fn`: the attention function to use. See [`squeeze_excite`](#) for an example.
@@ -153,7 +157,8 @@ on how to use this function.
         shows peformance improvements over the `:deep` stem in some cases.
 
   - `inchannels`: The number of channels in the input.
-  - `replace_pool`: Set to true to replace the max pooling layers with a 3x3 convolution + normalization with a stride of two.
+  - `replace_pool`: Set to true to replace the max pooling layers with a 3x3 convolution + 
+    normalization with a stride of two.
   - `norm_layer`: The normalisation layer used in the stem.
   - `activation`: The activation function used in the stem.
 """
@@ -253,8 +258,6 @@ function bottleneck_builder(block_repeats::AbstractVector{<:Integer};
         stride = stride_fn(stage_idx, block_idx)
         downsample_fn = (stride != 1 || inplanes != planes * expansion) ?
                         downsample_tuple[1] : downsample_tuple[2]
-        # DropBlock, DropPath both take in rates based on a linear scaling schedule
-        schedule_idx = sum(block_repeats[1:(stage_idx - 1)]) + block_idx
         drop_path = DropPath(pathschedule[schedule_idx])
         drop_block = DropBlock(blockschedule[schedule_idx])
         block = bottleneck(inplanes, planes; stride, cardinality, base_width,
@@ -280,8 +283,7 @@ function resnet_stages(get_layers, block_repeats::AbstractVector{<:Integer}, con
 end
 
 function resnet(img_dims, stem, get_layers, block_repeats::AbstractVector{<:Integer},
-                connection,
-                classifier_fn)
+                connection, classifier_fn)
     # Build stages of the ResNet
     stage_blocks = resnet_stages(get_layers, block_repeats, connection)
     backbone = Chain(stem, stage_blocks)
@@ -291,35 +293,46 @@ function resnet(img_dims, stem, get_layers, block_repeats::AbstractVector{<:Inte
     return Chain(backbone, classifier)
 end
 
-function resnet(block_type::Symbol, block_repeats::AbstractVector{<:Integer};
-                downsample_opt::NTuple{2, Any} = (downsample_conv, downsample_identity),
+function resnet(block_type::Symbol, block_repeats::AbstractVector{<:Integer},
+                downsample_opt::NTuple{2, Any} = (downsample_conv, downsample_identity);
                 cardinality::Integer = 1, base_width::Integer = 64, inplanes::Integer = 64,
                 reduction_factor::Integer = 1, imsize::Dims{2} = (256, 256),
-                inchannels::Integer = 3, stem_fn = resnet_stem,
-                connection = addact, activation = relu, norm_layer = BatchNorm,
-                revnorm::Bool = false, attn_fn = planes -> identity,
-                pool_layer = AdaptiveMeanPool((1, 1)), use_conv::Bool = false,
-                drop_block_rate = 0.0, drop_path_rate = 0.0, dropout_rate = 0.0,
-                nclasses::Integer = 1000)
+                inchannels::Integer = 3, stem_fn = resnet_stem, connection = addact,
+                activation = relu, norm_layer = BatchNorm, revnorm::Bool = false,
+                attn_fn = planes -> identity, pool_layer = AdaptiveMeanPool((1, 1)),
+                use_conv::Bool = false, drop_block_rate = 0.0, drop_path_rate = 0.0,
+                dropout_rate = 0.0, nclasses::Integer = 1000, kwargs...)
     # Build stem
     stem = stem_fn(; inchannels)
     # Block builder
-    if block_type == :basicblock
+    if block_type === :basicblock
         @assert cardinality==1 "Cardinality must be 1 for `basicblock`"
         @assert base_width==64 "Base width must be 64 for `basicblock`"
         get_layers = basicblock_builder(block_repeats; inplanes, reduction_factor,
                                         activation, norm_layer, revnorm, attn_fn,
                                         drop_block_rate, drop_path_rate,
                                         stride_fn = resnet_stride,
                                         planes_fn = resnet_planes,
-                                        downsample_tuple = downsample_opt)
-    elseif block_type == :bottleneck
+                                        downsample_tuple = downsample_opt,
+                                        kwargs...)
+    elseif block_type === :bottleneck
         get_layers = bottleneck_builder(block_repeats; inplanes, cardinality, base_width,
-                                        reduction_factor, activation, norm_layer,
-                                        revnorm, attn_fn, drop_block_rate, drop_path_rate,
+                                        reduction_factor, activation, norm_layer, revnorm,
+                                        attn_fn, drop_block_rate, drop_path_rate,
                                         stride_fn = resnet_stride,
                                         planes_fn = resnet_planes,
-                                        downsample_tuple = downsample_opt)
+                                        downsample_tuple = downsample_opt,
+                                        kwargs...)
+    elseif block_type === :bottle2neck
+        @assert drop_block_rate==0.0 "DropBlock not supported for `bottle2neck`"
+        @assert drop_path_rate==0.0 "DropPath not supported for `bottle2neck`"
+        @assert reduction_factor==1 "Reduction factor not supported for `bottle2neck`"
+        get_layers = bottle2neck_builder(block_repeats; inplanes, cardinality, base_width,
+                                         activation, norm_layer, revnorm, attn_fn,
+                                         stride_fn = resnet_stride,
+                                         planes_fn = resnet_planes,
+                                         downsample_tuple = downsample_opt,
+                                         kwargs...)
     else
         # TODO: write better message when we have link to dev docs for resnet
         throw(ArgumentError("Unknown block type $block_type"))
diff --git a/src/convnets/resnets/res2net.jl b/src/convnets/resnets/res2net.jl
@@ -0,0 +1,142 @@
+"""
+    bottle2neck(inplanes::Integer, planes::Integer; stride::Integer = 1,
+                cardinality::Integer = 1, base_width::Integer = 26,
+                scale::Integer = 4, activation = relu, norm_layer = BatchNorm,
+                revnorm::Bool = false, attn_fn = planes -> identity)
+
+Creates a bottleneck block as described in the Res2Net paper.
+([reference](https://arxiv.org/abs/1904.01169))
+
+# Arguments
+  - `inplanes`: number of input feature maps
+  - `planes`: number of feature maps for the block
+  - `stride`: the stride of the block
+  - `cardinality`: the number of groups in the 3x3 convolutions.
+  - `base_width`: the number of output feature maps for each convolutional group.
+  - `scale`: the number of feature groups in the block. See the [paper](https://arxiv.org/abs/1904.01169)
+    for more details.
+  - `activation`: the activation function to use.
+  - `norm_layer`: the normalization layer to use.
+  - `revnorm`: set to `true` to place the batch norm before the convolution
+  - `attn_fn`: the attention function to use. See [`squeeze_excite`](#) for an example.
+"""
+function bottle2neck(inplanes::Integer, planes::Integer; stride::Integer = 1,
+                     cardinality::Integer = 1, base_width::Integer = 26,
+                     scale::Integer = 4, activation = relu, norm_layer = BatchNorm,
+                     revnorm::Bool = false, attn_fn = planes -> identity)
+    width = fld(planes * base_width, 64) * cardinality
+    outplanes = planes * 4
+    is_first = stride > 1
+    pool = is_first && scale > 1 ? MeanPool((3, 3); stride, pad = 1) : identity
+    conv_bns = [Chain(conv_norm((3, 3), width => width, activation; norm_layer, stride,
+                                pad = 1, groups = cardinality, bias = false)...)
+                for _ in 1:(max(1, scale - 1))]
+    reslayer = is_first ? Parallel(cat_channels, pool, conv_bns...) :
+               Parallel(cat_channels, identity, PairwiseFusion(+, conv_bns...))
+    tuplify(x) = is_first ? tuple(x...) : tuple(x[1], tuple(x[2:end]...))
+    return Chain(conv_norm((1, 1), inplanes => width * scale, activation;
+                           norm_layer, revnorm, bias = false)...,
+                 chunk$(; size = width, dims = 3),
+                 tuplify, reslayer,
+                 conv_norm((1, 1), width * scale => outplanes, activation;
+                           norm_layer, revnorm, bias = false)...,
+                 attn_fn(outplanes))
+end
+
+function bottle2neck_builder(block_repeats::AbstractVector{<:Integer};
+                             inplanes::Integer = 64, cardinality::Integer = 1,
+                             base_width::Integer = 26, scale::Integer = 4,
+                             expansion::Integer = 4, norm_layer = BatchNorm,
+                             revnorm::Bool = false, activation = relu, 
+                             attn_fn = planes -> identity,
+                             stride_fn = resnet_stride, planes_fn = resnet_planes,
+                             downsample_tuple = (downsample_conv, downsample_identity))
+    planes_vec = collect(planes_fn(block_repeats))
+    # closure over `idxs`
+    function get_layers(stage_idx::Integer, block_idx::Integer)
+        # This is needed for block `inplanes` and `planes` calculations
+        schedule_idx = sum(block_repeats[1:(stage_idx - 1)]) + block_idx
+        planes = planes_vec[schedule_idx]
+        inplanes = schedule_idx == 1 ? inplanes : planes_vec[schedule_idx - 1] * expansion
+        # `resnet_stride` is a callback that the user can tweak to change the stride of the
+        # blocks. It defaults to the standard behaviour as in the paper
+        stride = stride_fn(stage_idx, block_idx)
+        downsample_fn = (stride != 1 || inplanes != planes * expansion) ?
+                        downsample_tuple[1] : downsample_tuple[2]
+        block = bottle2neck(inplanes, planes; stride, cardinality, base_width, scale,
+                            activation, norm_layer, revnorm, attn_fn)
+        downsample = downsample_fn(inplanes, planes * expansion; stride, norm_layer,
+                                   revnorm)
+        return block, downsample
+    end
+    return get_layers
+end
+
+"""
+    Res2Net(depth::Integer; pretrain::Bool = false, scale::Integer = 4,
+            base_width::Integer = 26, inchannels::Integer = 3,
+            nclasses::Integer = 1000)
+
+Creates a Res2Net model with the specified depth, scale, and base width.
+([reference](https://arxiv.org/abs/1904.01169))
+
+# Arguments
+  - `depth`: one of `[50, 101, 152]`. The depth of the Res2Net model.
+  - `pretrain`: set to `true` to load the model with pre-trained weights for ImageNet
+  - `scale`: the number of feature groups in the block. See the
+    [paper](https://arxiv.org/abs/1904.01169) for more details.
+  - `base_width`: the number of feature maps in each group.
+  - `inchannels`: the number of input channels.
+  - `nclasses`: the number of output classes
+"""
+struct Res2Net
+    layers::Any
+end
+
+function Res2Net(depth::Integer; pretrain::Bool = false, scale::Integer = 4,
+                 base_width::Integer = 26, inchannels::Integer = 3,
+                 nclasses::Integer = 1000)
+    _checkconfig(depth, sort(collect(keys(RESNET_CONFIGS)))[3:end])
+    layers = resnet(:bottle2neck, RESNET_CONFIGS[depth][2], :C; base_width, scale,
+                    inchannels, nclasses)
+    if pretrain
+        loadpretrain!(layers, string("Res2Net", depth, "_", base_width, "x", scale))
+    end
+    return ResNet(layers)
+end
+
+"""
+    Res2NeXt(depth::Integer; pretrain::Bool = false, scale::Integer = 4,
+             base_width::Integer = 4, cardinality::Integer = 8,
+             inchannels::Integer = 3, nclasses::Integer = 1000)
+
+Creates a Res2NeXt model with the specified depth, scale, base width and cardinality.
+([reference](https://arxiv.org/abs/1904.01169))
+
+# Arguments
+  - `depth`: one of `[50, 101, 152]`. The depth of the Res2Net model.
+  - `pretrain`: set to `true` to load the model with pre-trained weights for ImageNet
+  - `scale`: the number of feature groups in the block. See the
+    [paper](https://arxiv.org/abs/1904.01169) for more details.
+  - `base_width`: the number of feature maps in each group.
+  - `cardinality`: the number of groups in the 3x3 convolutions.
+  - `inchannels`: the number of input channels.
+  - `nclasses`: the number of output classes
+"""
+struct Res2NeXt
+    layers::Any
+end
+
+function Res2NeXt(depth::Integer; pretrain::Bool = false, scale::Integer = 4,
+                  base_width::Integer = 4, cardinality::Integer = 8,
+                  inchannels::Integer = 3, nclasses::Integer = 1000)
+    _checkconfig(depth, sort(collect(keys(RESNET_CONFIGS)))[3:end])
+    layers = resnet(:bottle2neck, RESNET_CONFIGS[depth][2], :C; base_width, scale,
+                    cardinality, inchannels, nclasses)
+    if pretrain
+        loadpretrain!(layers,
+                      string("Res2NeXt", depth, "_", base_width, "x", cardinality,
+                             "x", scale))
+    end
+    return ResNet(layers)
+end
diff --git a/src/convnets/resnets/resnet.jl b/src/convnets/resnets/resnet.jl
@@ -65,7 +65,7 @@ end
 
 function WideResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3,
                     nclasses::Integer = 1000)
-    _checkconfig(depth, [50, 101])
+    _checkconfig(depth, sort(collect(keys(RESNET_CONFIGS)))[3:end])
     layers = resnet(RESNET_CONFIGS[depth]...; base_width = 128, inchannels, nclasses)
     if pretrain
         loadpretrain!(layers, string("WideResNet", depth))
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -16,7 +16,7 @@ Create a convolution + batch normalization pair with activation.
   - `outplanes`: number of output feature maps
   - `activation`: the activation function for the final layer
   - `norm_layer`: the normalization layer used
-  - `revnorm`: set to `true` to place the batch norm before the convolution
+  - `revnorm`: set to `true` to place the normalisation layer before the convolution
   - `preact`: set to `true` to place the activation function before the batch norm
     (only compatible with `revnorm = false`)
   - `use_norm`: set to `false` to disable normalization
diff --git a/src/mixers/mlpmixer.jl b/src/mixers/mlpmixer.jl
@@ -56,7 +56,8 @@ struct MLPMixer
 end
 @functor MLPMixer
 
-function MLPMixer(config::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16),
+function MLPMixer(config::Symbol; imsize::Dims{2} = (224, 224),
+                  patch_size::Dims{2} = (16, 16),
                   inchannels::Integer = 3, nclasses::Integer = 1000)
     _checkconfig(config, keys(MIXER_CONFIGS))
     layers = mlpmixer(mixerblock, imsize; patch_size, MIXER_CONFIGS[config]..., inchannels,