FluxML
diff --git a/‎.github/workflows/CI.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/CI.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/Metalhead.jl
Lines changed: 4 additions & 3 deletions b/‎src/Metalhead.jl
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/convnets/inception/googlenet.jl
Lines changed: 2 additions & 2 deletions b/‎src/convnets/inception/googlenet.jl
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/convnets/inception/inceptionresnetv2.jl
Lines changed: 3 additions & 3 deletions b/‎src/convnets/inception/inceptionresnetv2.jl
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/convnets/inception/inceptionv3.jl
Lines changed: 2 additions & 2 deletions b/‎src/convnets/inception/inceptionv3.jl
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/convnets/inception/xception.jl
Lines changed: 1 addition & 2 deletions b/‎src/convnets/inception/xception.jl
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/convnets/resnets/core.jl
Lines changed: 53 additions & 35 deletions b/‎src/convnets/resnets/core.jl
Lines changed: 53 additions & 35 deletions
@@ -31,6 +31,7 @@ jobs:
           - '["EfficientNet"]'
           - 'r"/*/ResNet*"'
           - '[r"ResNeXt", r"SEResNet"]'
+          - '[r"Res2Net", r"Res2NeXt"]'
           - '"Inception"'
           - '"DenseNet"'
           - '["ConvNeXt", "ConvMixer"]'
 
@@ -26,6 +26,7 @@ include("convnets/resnets/core.jl")
 include("convnets/resnets/resnet.jl")
 include("convnets/resnets/resnext.jl")
 include("convnets/resnets/seresnet.jl")
+include("convnets/resnets/res2net.jl")
 ## Inceptions
 include("convnets/inception/googlenet.jl")
 include("convnets/inception/inceptionv3.jl")
@@ -57,16 +58,16 @@ include("pretrain.jl")
 
 export AlexNet, VGG, VGG11, VGG13, VGG16, VGG19,
        ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152,
-       WideResNet, ResNeXt, SEResNet, SEResNeXt,
+       WideResNet, ResNeXt, SEResNet, SEResNeXt, Res2Net, Res2NeXt,
        DenseNet, DenseNet121, DenseNet161, DenseNet169, DenseNet201,
        GoogLeNet, Inception3, Inceptionv3, Inceptionv4, InceptionResNetv2, Xception,
        SqueezeNet, MobileNetv1, MobileNetv2, MobileNetv3, EfficientNet,
        MLPMixer, ResMLP, gMLP, ViT, ConvMixer, ConvNeXt
 
 # use Flux._big_show to pretty print large models
 for T in (:AlexNet, :VGG, :ResNet, :ResNeXt, :DenseNet, :SEResNet, :SEResNeXt,
-          :GoogLeNet, :Inceptionv3, :Inceptionv4, :InceptionResNetv2, :Xception,
-          :SqueezeNet, :MobileNetv1, :MobileNetv2, :MobileNetv3, :EfficientNet,
+          :Res2Net, :Res2NeXt, :GoogLeNet, :Inceptionv3, :Inceptionv4,
+          :Xception, :SqueezeNet, :MobileNetv1, :MobileNetv2, :MobileNetv3, :EfficientNet,
           :MLPMixer, :ResMLP, :gMLP, :ViT, :ConvMixer, :ConvNeXt)
     @eval Base.show(io::IO, ::MIME"text/plain", model::$T) = _maybe_big_show(io, model)
 end
 
@@ -36,7 +36,7 @@ Create an Inception-v1 model (commonly referred to as GoogLeNet)
 
   - `nclasses`: the number of output classes
 """
-function googlenet(; inchannels::Integer = 3, nclasses::Integer = 1000)
+function googlenet(; dropout_rate = 0.4, inchannels::Integer = 3, nclasses::Integer = 1000)
     backbone = Chain(Conv((7, 7), inchannels => 64; stride = 2, pad = 3),
                      MaxPool((3, 3); stride = 2, pad = 1),
                      Conv((1, 1), 64 => 64),
@@ -53,7 +53,7 @@ function googlenet(; inchannels::Integer = 3, nclasses::Integer = 1000)
                      MaxPool((3, 3); stride = 2, pad = 1),
                      _inceptionblock(832, 256, 160, 320, 32, 128, 128),
                      _inceptionblock(832, 384, 192, 384, 48, 128, 128))
-    return Chain(backbone, create_classifier(1024, nclasses; dropout_rate = 0.4))
+    return Chain(backbone, create_classifier(1024, nclasses; dropout_rate))
 end
 
 """
 
@@ -96,7 +96,8 @@ function inceptionresnetv2(; inchannels::Integer = 3, dropout_rate = 0.0,
 end
 
 """
-    InceptionResNetv2(; pretrain::Bool = false, inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000)
+    InceptionResNetv2(; pretrain::Bool = false, inchannels::Integer = 3,
+                        nclasses::Integer = 1000)
 
 Creates an InceptionResNetv2 model.
 ([reference](https://arxiv.org/abs/1602.07261))
@@ -118,9 +119,8 @@ end
 @functor InceptionResNetv2
 
 function InceptionResNetv2(; pretrain::Bool = false, inchannels::Integer = 3,
-                           dropout_rate = 0.0,
                            nclasses::Integer = 1000)
-    layers = inceptionresnetv2(; inchannels, dropout_rate, nclasses)
+    layers = inceptionresnetv2(; inchannels, nclasses)
     if pretrain
         loadpretrain!(layers, "InceptionResNetv2")
     end
 
@@ -133,7 +133,7 @@ Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)).
 
   - `nclasses`: the number of output classes
 """
-function inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000)
+function inceptionv3(; dropout_rate = 0.2, inchannels::Integer = 3, nclasses::Integer = 1000)
     backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)...,
                      conv_norm((3, 3), 32, 32)...,
                      conv_norm((3, 3), 32, 64; pad = 1)...,
@@ -152,7 +152,7 @@ function inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000)
                      inceptionv3_d(768),
                      inceptionv3_e(1280),
                      inceptionv3_e(2048))
-    return Chain(backbone, create_classifier(2048, nclasses; dropout_rate = 0.2))
+    return Chain(backbone, create_classifier(2048, nclasses; dropout_rate))
 end
 
 """
 
@@ -66,8 +66,7 @@ function xception(; dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integ
                      xception_block(728, 1024, 2; stride = 2, grow_at_start = false),
                      depthwise_sep_conv_norm((3, 3), 1024, 1536; pad = 1)...,
                      depthwise_sep_conv_norm((3, 3), 1536, 2048; pad = 1)...)
-    classifier = create_classifier(2048, nclasses; dropout_rate)
-    return Chain(backbone, classifier)
+    return Chain(backbone, create_classifier(2048, nclasses; dropout_rate))
 end
 
 """
 
@@ -1,8 +1,9 @@
 """
-    basicblock(inplanes, planes; stride = 1, reduction_factor = 1, activation = relu,
-                    norm_layer = BatchNorm, revnorm = false,
-                    drop_block = identity, drop_path = identity,
-                    attn_fn = planes -> identity)
+    basicblock(inplanes::Integer, planes::Integer; stride::Integer = 1,
+               reduction_factor::Integer = 1, activation = relu,
+               norm_layer = BatchNorm, revnorm::Bool = false,
+               drop_block = identity, drop_path = identity,
+               attn_fn = planes -> identity)
 
 Creates a basic residual block (see [reference](https://arxiv.org/abs/1512.03385v1)).
 
@@ -11,10 +12,11 @@ Creates a basic residual block (see [reference](https://arxiv.org/abs/1512.03385
   - `inplanes`: number of input feature maps
   - `planes`: number of feature maps for the block
   - `stride`: the stride of the block
-  - `reduction_factor`: the factor by which the input feature maps
-    are reduced before the first convolution.
+  - `reduction_factor`: the factor by which the input feature maps are reduced before 
+    the first convolution.
   - `activation`: the activation function to use.
   - `norm_layer`: the normalization layer to use.
+  - `revnorm`: set to `true` to place the normalisation layer before the convolution
   - `drop_block`: the drop block layer
   - `drop_path`: the drop path layer
   - `attn_fn`: the attention function to use. See [`squeeze_excite`](#) for an example.
@@ -36,11 +38,12 @@ function basicblock(inplanes::Integer, planes::Integer; stride::Integer = 1,
 end
 
 """
-    bottleneck(inplanes, planes; stride = 1, cardinality = 1, base_width = 64,
-                    reduction_factor = 1, activation = relu,
-                    norm_layer = BatchNorm, revnorm = false,
-                    drop_block = identity, drop_path = identity,
-                    attn_fn = planes -> identity)
+    bottleneck(inplanes::Integer, planes::Integer; stride::Integer,
+               cardinality::Integer = 1, base_width::Integer = 64,
+               reduction_factor::Integer = 1, activation = relu,
+               norm_layer = BatchNorm, revnorm::Bool = false,
+               drop_block = identity, drop_path = identity,
+               attn_fn = planes -> identity)
 
 Creates a bottleneck residual block (see [reference](https://arxiv.org/abs/1512.03385v1)).
 
@@ -55,6 +58,7 @@ Creates a bottleneck residual block (see [reference](https://arxiv.org/abs/1512.
     convolution.
   - `activation`: the activation function to use.
   - `norm_layer`: the normalization layer to use.
+  - `revnorm`: set to `true` to place the normalisation layer before the convolution
   - `drop_block`: the drop block layer
   - `drop_path`: the drop path layer
   - `attn_fn`: the attention function to use. See [`squeeze_excite`](#) for an example.
@@ -112,7 +116,7 @@ function downsample_identity(inplanes::Integer, outplanes::Integer; kwargs...)
 end
 
 # Shortcut configurations for the ResNet models
-const shortcut_dict = Dict(:A => (downsample_identity, downsample_identity),
+const RESNET_SHORTCUTS = Dict(:A => (downsample_identity, downsample_identity),
                            :B => (downsample_conv, downsample_identity),
                            :C => (downsample_conv, downsample_conv),
                            :D => (downsample_pool, downsample_identity))
@@ -153,7 +157,8 @@ on how to use this function.
         shows peformance improvements over the `:deep` stem in some cases.
 
   - `inchannels`: The number of channels in the input.
-  - `replace_pool`: Set to true to replace the max pooling layers with a 3x3 convolution + normalization with a stride of two.
+  - `replace_pool`: Set to true to replace the max pooling layers with a 3x3 convolution + 
+    normalization with a stride of two.
   - `norm_layer`: The normalisation layer used in the stem.
   - `activation`: The activation function used in the stem.
 """
@@ -253,8 +258,6 @@ function bottleneck_builder(block_repeats::AbstractVector{<:Integer};
         stride = stride_fn(stage_idx, block_idx)
         downsample_fn = (stride != 1 || inplanes != planes * expansion) ?
                         downsample_tuple[1] : downsample_tuple[2]
-        # DropBlock, DropPath both take in rates based on a linear scaling schedule
-        schedule_idx = sum(block_repeats[1:(stage_idx - 1)]) + block_idx
         drop_path = DropPath(pathschedule[schedule_idx])
         drop_block = DropBlock(blockschedule[schedule_idx])
         block = bottleneck(inplanes, planes; stride, cardinality, base_width,
@@ -289,35 +292,46 @@ function resnet(img_dims, stem, get_layers, block_repeats::AbstractVector{<:Inte
     return Chain(backbone, classifier_fn(nfeaturemaps))
 end
 
-function resnet(block_type::Symbol, block_repeats::AbstractVector{<:Integer};
-                downsample_opt::NTuple{2, Any} = (downsample_conv, downsample_identity),
+function resnet(block_type, block_repeats::AbstractVector{<:Integer},
+                downsample_opt::NTuple{2, Any} = (downsample_conv, downsample_identity);
                 cardinality::Integer = 1, base_width::Integer = 64, inplanes::Integer = 64,
                 reduction_factor::Integer = 1, imsize::Dims{2} = (256, 256),
-                inchannels::Integer = 3, stem_fn = resnet_stem,
-                connection = addact, activation = relu, norm_layer = BatchNorm,
-                revnorm::Bool = false, attn_fn = planes -> identity,
-                pool_layer = AdaptiveMeanPool((1, 1)), use_conv::Bool = false,
-                drop_block_rate = 0.0, drop_path_rate = 0.0, dropout_rate = 0.0,
-                nclasses::Integer = 1000)
+                inchannels::Integer = 3, stem_fn = resnet_stem, connection = addact,
+                activation = relu, norm_layer = BatchNorm, revnorm::Bool = false,
+                attn_fn = planes -> identity, pool_layer = AdaptiveMeanPool((1, 1)),
+                use_conv::Bool = false, drop_block_rate = 0.0, drop_path_rate = 0.0,
+                dropout_rate = 0.0, nclasses::Integer = 1000, kwargs...)
     # Build stem
     stem = stem_fn(; inchannels)
     # Block builder
-    if block_type == :basicblock
+    if block_type == basicblock
         @assert cardinality==1 "Cardinality must be 1 for `basicblock`"
         @assert base_width==64 "Base width must be 64 for `basicblock`"
         get_layers = basicblock_builder(block_repeats; inplanes, reduction_factor,
                                         activation, norm_layer, revnorm, attn_fn,
                                         drop_block_rate, drop_path_rate,
                                         stride_fn = resnet_stride,
                                         planes_fn = resnet_planes,
-                                        downsample_tuple = downsample_opt)
-    elseif block_type == :bottleneck
+                                        downsample_tuple = downsample_opt,
+                                        kwargs...)
+    elseif block_type == bottleneck
         get_layers = bottleneck_builder(block_repeats; inplanes, cardinality, base_width,
-                                        reduction_factor, activation, norm_layer,
-                                        revnorm, attn_fn, drop_block_rate, drop_path_rate,
+                                        reduction_factor, activation, norm_layer, revnorm,
+                                        attn_fn, drop_block_rate, drop_path_rate,
                                         stride_fn = resnet_stride,
                                         planes_fn = resnet_planes,
-                                        downsample_tuple = downsample_opt)
+                                        downsample_tuple = downsample_opt,
+                                        kwargs...)
+    elseif block_type == bottle2neck
+        @assert drop_block_rate==0.0 "DropBlock not supported for `bottle2neck`"
+        @assert drop_path_rate==0.0 "DropPath not supported for `bottle2neck`"
+        @assert reduction_factor==1 "Reduction factor not supported for `bottle2neck`"
+        get_layers = bottle2neck_builder(block_repeats; inplanes, cardinality, base_width,
+                                         activation, norm_layer, revnorm, attn_fn,
+                                         stride_fn = resnet_stride,
+                                         planes_fn = resnet_planes,
+                                         downsample_tuple = downsample_opt,
+                                         kwargs...)
     else
         # TODO: write better message when we have link to dev docs for resnet
         throw(ArgumentError("Unknown block type $block_type"))
@@ -328,12 +342,16 @@ function resnet(block_type::Symbol, block_repeats::AbstractVector{<:Integer};
                   connection$activation, classifier_fn)
 end
 function resnet(block_fn, block_repeats, downsample_opt::Symbol = :B; kwargs...)
-    return resnet(block_fn, block_repeats, shortcut_dict[downsample_opt]; kwargs...)
+    return resnet(block_fn, block_repeats, RESNET_SHORTCUTS[downsample_opt]; kwargs...)
 end
 
 # block-layer configurations for ResNet-like models
-const RESNET_CONFIGS = Dict(18 => (:basicblock, [2, 2, 2, 2]),
-                            34 => (:basicblock, [3, 4, 6, 3]),
-                            50 => (:bottleneck, [3, 4, 6, 3]),
-                            101 => (:bottleneck, [3, 4, 23, 3]),
-                            152 => (:bottleneck, [3, 8, 36, 3]))
+const RESNET_CONFIGS = Dict(18 => (basicblock, [2, 2, 2, 2]),
+                            34 => (basicblock, [3, 4, 6, 3]),
+                            50 => (bottleneck, [3, 4, 6, 3]),
+                            101 => (bottleneck, [3, 4, 23, 3]),
+                            152 => (bottleneck, [3, 8, 36, 3]))
+
+const LRESNET_CONFIGS = Dict(50 => (bottleneck, [3, 4, 6, 3]),
+                             101 => (bottleneck, [3, 4, 23, 3]),
+                             152 => (bottleneck, [3, 8, 36, 3]))