Xception

theabhirath · theabhirath · commit 55565d8e6147 · 2022-06-18T09:39:01.000+05:30
diff --git a/src/Metalhead.jl b/src/Metalhead.jl
@@ -37,22 +37,19 @@ include("vit-based/vit.jl")
 
 include("pretrain.jl")
 
-export AlexNet,
-       VGG, VGG11, VGG13, VGG16, VGG19,
-       GoogLeNet,
-       ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152,
-       Inception3, Inceptionv3, Inceptionv4, InceptionResNetv2,
-       SqueezeNet,
-       ResNeXt,
+export AlexNet, VGG, VGG11, VGG13, VGG16, VGG19,
+       ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, ResNeXt,
        DenseNet, DenseNet121, DenseNet161, DenseNet169, DenseNet201,
-       MobileNetv1, MobileNetv2, MobileNetv3,
+       GoogLeNet, Inception3, Inceptionv3, Inceptionv4, InceptionResNetv2, Xception,
+       SqueezeNet, MobileNetv1, MobileNetv2, MobileNetv3,
        MLPMixer, ResMLP, gMLP,
        ViT,
        ConvMixer, ConvNeXt
 
 # use Flux._big_show to pretty print large models
-for T in (:AlexNet, :VGG, :GoogLeNet, :ResNet, :ResNeXt, :Inceptionv3,
-          :SqueezeNet, :DenseNet, :MobileNetv1, :MobileNetv2, :MobileNetv3,
+for T in (:AlexNet, :VGG, :ResNet, :ResNeXt, :DenseNet,
+          :GoogLeNet, :Inceptionv3, :Inceptionv4, :InceptionResNetv2, :Xception,
+          :SqueezeNet, :MobileNetv1, :MobileNetv2, :MobileNetv3,
           :MLPMixer, :ResMLP, :gMLP, :ViT, :ConvMixer, :ConvNeXt)
     @eval Base.show(io::IO, ::MIME"text/plain", model::$T) = _maybe_big_show(io, model)
 end
diff --git a/src/convnets/inception.jl b/src/convnets/inception.jl
@@ -478,3 +478,103 @@ end
 
 backbone(m::InceptionResNetv2) = m.layers[1]
 classifier(m::InceptionResNetv2) = m.layers[2]
+
+## Xception
+
+"""
+    xception_block(inchannels, outchannels, nrepeats; stride = 1, start_with_relu = true, 
+                        grow_first = true)
+
+Create an Xception block.
+([reference](https://arxiv.org/abs/1610.02357))
+
+# Arguments
+
+  - inchannels: number of input channels.
+  - outchannels: number of output channels.
+  - nrepeats: number of repeats of depthwise separable convolution layers.
+  - stride: stride by which to downsample the input.
+  - start_with_relu: if true, start the block with a ReLU activation.
+  - grow_first: if true, increase the number of channels at the first convolution.
+"""
+function xception_block(inchannels, outchannels, nrepeats; stride = 1,
+                        start_with_relu = true,
+                        grow_first = true)
+    if outchannels != inchannels || stride != 1
+        skip = conv_bn((1, 1), inchannels, outchannels, identity; stride = stride,
+                       bias = false)
+    else
+        skip = [identity]
+    end
+    layers = []
+    for i in 1:nrepeats
+        if grow_first
+            inc = i == 1 ? inchannels : outchannels
+            outc = outchannels
+        else
+            inc = inchannels
+            outc = i == nrepeats ? outchannels : inchannels
+        end
+        push!(layers, Base.Fix1(applyactivation, relu))
+        append!(layers,
+                depthwise_sep_conv_bn((3, 3), inc, outc; pad = 1, bias = false,
+                                      use_bn1 = false, use_bn2 = false))
+        push!(layers, BatchNorm(outc))
+    end
+    layers = start_with_relu ? layers : layers[2:end]
+    push!(layers, MaxPool((3, 3); stride = stride, pad = 1))
+    return Parallel(+, Chain(skip...), Chain(layers...))
+end
+
+"""
+    xception(; inchannels = 3, dropout = 0.0, nclasses = 1000)
+
+Creates an Xception model.
+([reference](https://arxiv.org/abs/1610.02357))
+
+# Arguments
+
+  - inchannels: number of input channels.
+  - dropout: rate of dropout in classifier head.
+  - nclasses: the number of output classes.
+"""
+function xception(; inchannels = 3, dropout = 0.0, nclasses = 1000)
+    body = Chain(conv_bn((3, 3), inchannels, 32; stride = 2, bias = false)...,
+                 conv_bn((3, 3), 32, 64; bias = false)...,
+                 xception_block(64, 128, 2; stride = 2, start_with_relu = false),
+                 xception_block(128, 256, 2; stride = 2),
+                 xception_block(256, 728, 2; stride = 2),
+                 [xception_block(728, 728, 3) for _ in 1:8]...,
+                 xception_block(728, 1024, 2; stride = 2, grow_first = false),
+                 depthwise_sep_conv_bn((3, 3), 1024, 1536; pad = 1)...,
+                 depthwise_sep_conv_bn((3, 3), 1536, 2048; pad = 1)...)
+    head = Chain(GlobalMeanPool(), MLUtils.flatten, Dropout(dropout), Dense(2048, nclasses))
+    return Chain(body, head)
+end
+
+struct Xception
+    layers::Any
+end
+
+"""
+    Xception(; inchannels = 3, dropout = 0.0, nclasses = 1000)
+
+Creates an Xception model.
+([reference](https://arxiv.org/abs/1610.02357))
+
+# Arguments
+  - inchannels: number of input channels.
+  - dropout: rate of dropout in classifier head.
+  - nclasses: the number of output classes. 
+"""
+function Xception(; inchannels = 3, dropout = 0.0, nclasses = 1000)
+    layers = xception(; inchannels, dropout, nclasses)
+    return Xception(layers)
+end
+
+@functor Xception
+
+(m::Xception)(x) = m.layers(x)
+
+backbone(m::Xception) = m.layers[1]
+classifier(m::Xception) = m.layers[2]
diff --git a/src/convnets/mobilenet.jl b/src/convnets/mobilenet.jl
@@ -36,7 +36,7 @@ function mobilenetv1(width_mult, config;
         for _ in 1:nrepeats
             layer = dw ?
                     depthwise_sep_conv_bn((3, 3), inchannels, outch, activation;
-                                          stride = stride, pad = 1) :
+                                          stride = stride, pad = 1, bias = false) :
                     conv_bn((3, 3), inchannels, outch, activation; stride = stride, pad = 1)
             append!(layers, layer)
             inchannels = outch
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -1,8 +1,8 @@
 """
     conv_bn(kernelsize, inplanes, outplanes, activation = relu;
-            rev = false, preact = true,
-            stride = 1, pad = 0, dilation = 1, groups = 1, [bias, weight, init],
-            initβ = Flux.zeros32, initγ = Flux.ones32, ϵ = 1f-5, momentum = 1f-1)
+                 rev = false, preact = false, use_bn = true,
+                 initβ = Flux.zeros32, initγ = Flux.ones32, ϵ = 1.0f-5, momentum = 1.0f-1,
+                 kwargs...)
 
 Create a convolution + batch normalization pair with activation.
 
@@ -15,6 +15,8 @@ Create a convolution + batch normalization pair with activation.
   - `rev`: set to `true` to place the batch norm before the convolution
   - `preact`: set to `true` to place the activation function before the batch norm
     (only compatible with `rev = false`)
+  - `use_bn`: set to `false` to disable batch normalization
+    (only compatible with `rev = false` and `preact = false`)
   - `stride`: stride of the convolution kernel
   - `pad`: padding of the convolution kernel
   - `dilation`: dilation of the convolution kernel
@@ -24,9 +26,13 @@ Create a convolution + batch normalization pair with activation.
   - `ϵ`, `momentum`: batch norm parameters (see [`Flux.BatchNorm`](#))
 """
 function conv_bn(kernelsize, inplanes, outplanes, activation = relu;
-                 rev = false, preact = false,
+                 rev = false, preact = false, use_bn = true,
                  initβ = Flux.zeros32, initγ = Flux.ones32, ϵ = 1.0f-5, momentum = 1.0f-1,
                  kwargs...)
+    if !use_bn
+        (preact || rev) ? throw("preact only supported with `use_bn = true`") :
+        return [Conv(kernelsize, inplanes => outplanes, activation; kwargs...)]
+    end
     layers = []
     if rev
         activations = (conv = activation, bn = identity)
@@ -49,18 +55,18 @@ end
 
 """
     depthwise_sep_conv_bn(kernelsize, inplanes, outplanes, activation = relu;
-                          rev = false,
-                          stride = 1, pad = 0, dilation = 1, [bias, weight, init],
-                          initβ = Flux.zeros32, initγ = Flux.ones32,
-                          ϵ = 1f-5, momentum = 1f-1)
+                               rev = false, use_bn1 = true, use_bn2 = true,
+                               initβ = Flux.zeros32, initγ = Flux.ones32,
+                               ϵ = 1.0f-5, momentum = 1.0f-1,
+                               stride = 1, kwargs...)
 
-Create a depthwise separable convolution chain as used in MobileNet v1.
+Create a depthwise separable convolution chain as used in MobileNetv1.
 This is sequence of layers:
 
   - a `kernelsize` depthwise convolution from `inplanes => inplanes`
-  - a batch norm layer + `activation`
+  - a batch norm layer + `activation` (if `use_bn1`; otherwise `activation` is applied to the convolution output)
   - a `kernelsize` convolution from `inplanes => outplanes`
-  - a batch norm layer + `activation`
+  - a batch norm layer + `activation` (if `use_bn2`; otherwise `activation` is applied to the convolution output)
 
 See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1).
 
@@ -71,6 +77,8 @@ See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1).
   - `outplanes`: number of output feature maps
   - `activation`: the activation function for the final layer
   - `rev`: set to `true` to place the batch norm before the convolution
+  - `use_bn1`: set to `true` to use a batch norm after the depthwise convolution
+  - `use_bn2`: set to `true` to use a batch norm after the pointwise convolution
   - `stride`: stride of the first convolution kernel
   - `pad`: padding of the first convolution kernel
   - `dilation`: dilation of the first convolution kernel
@@ -79,16 +87,16 @@ See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1).
   - `ϵ`, `momentum`: batch norm parameters (see [`Flux.BatchNorm`](#))
 """
 function depthwise_sep_conv_bn(kernelsize, inplanes, outplanes, activation = relu;
-                               rev = false,
+                               rev = false, use_bn1 = true, use_bn2 = true,
                                initβ = Flux.zeros32, initγ = Flux.ones32,
                                ϵ = 1.0f-5, momentum = 1.0f-1,
                                stride = 1, kwargs...)
     return vcat(conv_bn(kernelsize, inplanes, inplanes, activation;
                         rev = rev, initβ = initβ, initγ = initγ,
-                        ϵ = ϵ, momentum = momentum,
+                        ϵ = ϵ, momentum = momentum, use_bn = use_bn1,
                         stride = stride, groups = Int(inplanes), kwargs...),
                 conv_bn((1, 1), inplanes, outplanes, activation;
-                        rev = rev, initβ = initβ, initγ = initγ,
+                        rev = rev, initβ = initβ, initγ = initγ, use_bn = use_bn2,
                         ϵ = ϵ, momentum = momentum))
 end
 
diff --git a/src/utilities.jl b/src/utilities.jl
@@ -55,6 +55,14 @@ Equivalent to `permutedims(x, perm)`.
 """
 swapdims(perm) = Base.Fix2(permutedims, perm)
 
+"""
+    applyactivation(activation, x)
+
+Apply an activation function to a given input.
+Equivalent to `activation.(x)`.
+"""
+applyactivation(activation, x) = activation.(x)
+
 # Utility function for pretty printing large models
 function _maybe_big_show(io, model)
     if isdefined(Flux, :_big_show)