Initial commit for new ResNet API

theabhirath · theabhirath · commit a4d3f125b31e · 2022-06-21T15:49:02.000+05:30
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,6 +1,6 @@
 using Pkg
 
-Pkg.develop(path = "..")
+Pkg.develop(; path = "..")
 
 using Publish
 using Artifacts, LazyArtifacts
@@ -13,5 +13,5 @@ p = Publish.Project(Metalhead)
 
 function build_and_deploy(label)
     rm(label; recursive = true, force = true)
-    deploy(Metalhead; root = "/Metalhead.jl", label = label)
+    return deploy(Metalhead; root = "/Metalhead.jl", label = label)
 end
diff --git a/docs/serve.jl b/docs/serve.jl
@@ -1,6 +1,6 @@
 using Pkg
 
-Pkg.develop(path = "..")
+Pkg.develop(; path = "..")
 
 using Revise
 using Publish
diff --git a/src/Metalhead.jl b/src/Metalhead.jl
@@ -38,7 +38,7 @@ include("vit-based/vit.jl")
 include("pretrain.jl")
 
 export AlexNet, VGG, VGG11, VGG13, VGG16, VGG19,
-       ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, ResNeXt,
+#    ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, ResNeXt,
        DenseNet, DenseNet121, DenseNet161, DenseNet169, DenseNet201,
        GoogLeNet, Inception3, Inceptionv3, Inceptionv4, InceptionResNetv2, Xception,
        SqueezeNet, MobileNetv1, MobileNetv2, MobileNetv3,
@@ -47,7 +47,7 @@ export AlexNet, VGG, VGG11, VGG13, VGG16, VGG19,
        ConvMixer, ConvNeXt
 
 # use Flux._big_show to pretty print large models
-for T in (:AlexNet, :VGG, :ResNet, :ResNeXt, :DenseNet,
+for T in (:AlexNet, :VGG, :ResNeXt, :DenseNet, # :ResNet,
           :GoogLeNet, :Inceptionv3, :Inceptionv4, :InceptionResNetv2, :Xception,
           :SqueezeNet, :MobileNetv1, :MobileNetv2, :MobileNetv3,
           :MLPMixer, :ResMLP, :gMLP, :ViT, :ConvMixer, :ConvNeXt)
diff --git a/src/convnets/densenet.jl b/src/convnets/densenet.jl
@@ -100,7 +100,8 @@ Create a DenseNet model
   - `reduction`: the factor by which the number of feature maps is scaled across each transition
   - `nclasses`: the number of output classes
 """
-function densenet(nblocks::NTuple{N, <:Integer}; growth_rate = 32, reduction = 0.5, nclasses = 1000)
+function densenet(nblocks::NTuple{N, <:Integer}; growth_rate = 32, reduction = 0.5,
+                  nclasses = 1000) where {N}
     return densenet(2 * growth_rate, [fill(growth_rate, n) for n in nblocks];
                     reduction = reduction, nclasses = nclasses)
 end
diff --git a/src/convnets/resnet.jl b/src/convnets/resnet.jl
diff --git a/src/layers/Layers.jl b/src/layers/Layers.jl
@@ -24,5 +24,6 @@ export MHAttention,
        ChannelLayerNorm, prenorm,
        skip_identity, skip_projection,
        conv_bn, depthwise_sep_conv_bn,
-       invertedresidual, squeeze_excite
+       invertedresidual, squeeze_excite,
+       DropBlock
 end
diff --git a/src/layers/drop.jl b/src/layers/drop.jl
@@ -7,45 +7,48 @@ Implements DropBlock, a regularization method for convolutional networks.
 struct DropBlock{F}
     drop_prob::F
     block_size::Integer
+    gamma_scale::F
 end
 @functor DropBlock
 
-(m::DropBlock)(x) = dropblock(x, m.drop_prob, m.block_size)
+(m::DropBlock)(x) = dropblock(x, m.drop_prob, m.block_size, m.gamma_scale)
 
-DropBlock(drop_prob = 0.1, block_size = 7) = DropBlock(drop_prob, block_size)
+function DropBlock(drop_prob = 0.1, block_size = 7, gamma_scale = 1.0)
+    return DropBlock(drop_prob, block_size, gamma_scale)
+end
 
-function _dropblock_checks(x, drop_prob, T)
+function _dropblock_checks(x, drop_prob, gamma_scale, T)
     if !(T <: AbstractArray)
         throw(ArgumentError("x must be an `AbstractArray`"))
     end
     if ndims(x) != 4
         throw(ArgumentError("x must have 4 dimensions (H, W, C, N) for `DropBlock`"))
     end
-    @assert drop_prob < 0 || drop_prob > 1 "drop_prob must be between 0 and 1, got $drop_prob"
+    @assert drop_prob < 0||drop_prob > 1 "drop_prob must be between 0 and 1, got $drop_prob"
+    @assert gamma_scale < 0||gamma_scale > 1 "gamma_scale must be between 0 and 1, got $gamma_scale"
 end
-ChainRulesCore.@non_differentiable _dropblock_checks(x, drop_prob, T)
+ChainRulesCore.@non_differentiable _dropblock_checks(x, drop_prob, gamma_scale, T)
 
-function dropblock(x::T, drop_prob, block_size::Integer) where {T}
-    _dropblock_checks(x, drop_prob, T)
+function dropblock(x::T, drop_prob, block_size::Integer, gamma_scale) where {T}
+    _dropblock_checks(x, drop_prob, gamma_scale, T)
     if drop_prob == 0
         return x
     end
-    return _dropblock(x, drop_prob, block_size)
+    return _dropblock(x, drop_prob, block_size, gamma_scale)
 end
 
-function _dropblock(x::AbstractArray{T, 4}, drop_prob, block_size) where {T}
-    gamma = drop_prob / (block_size ^ 2)
-    mask = rand_like(x, Float32, (size(x, 1), size(x, 2), size(x, 3)))
-    mask .<= gamma
-    block_mask = maxpool(reshape(mask, (size(mask)[1:3]..., 1)), (block_size, block_size);
-                         pad = block_size ÷ 2, stride = (1, 1))
-    if block_size % 2 == 0
-        block_mask = block_mask[1:(end - 1), 1:(end - 1), :, :]
-    end
-    block_mask = 1 .- dropdims(block_mask; dims = 4)
-    out = (x .* reshape(block_mask, (size(block_mask)[1:3]..., 1))) * length(block_mask) /
-          sum(block_mask)
-    return out
+function _dropblock(x::AbstractArray{T, 4}, drop_prob, block_size, gamma_scale) where {T}
+    H, W, _, _ = size(x)
+    total_size = H * W
+    clipped_block_size = min(block_size, min(H, W))
+    gamma = gamma_scale * drop_prob * total_size / clipped_block_size^2 /
+            ((W - block_size + 1) * (H - block_size + 1))
+    block_mask = rand_like(x) .< gamma
+    block_mask = maxpool(convert(T, block_mask), (clipped_block_size, clipped_block_size);
+                         stride = 1, padding = clipped_block_size ÷ 2)
+    block_mask = 1 .- block_mask
+    normalize_scale = convert(T, (length(block_mask) / sum(block_mask) .+ 1e-6))
+    return x * block_mask * normalize_scale
 end
 
 """
diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
@@ -19,9 +19,9 @@ end
 
 @functor ChannelLayerNorm
 
-(m::ChannelLayerNorm)(x) = m.diag(MLUtils.normalise(x; dims = ndims(x) - 1, ϵ = m.ϵ))
-
 function ChannelLayerNorm(sz::Integer, λ = identity; ϵ = 1.0f-5)
     diag = Flux.Scale(1, 1, sz, λ)
     return ChannelLayerNorm(diag, ϵ)
 end
+
+(m::ChannelLayerNorm)(x) = m.diag(MLUtils.normalise(x; dims = ndims(x) - 1, ϵ = m.ϵ))