Skip to content

Commit 541fabd

Browse files
committed
Merge branch 'master' into resnet-plus
2 parents 54ea529 + 2b1fbd1 commit 541fabd

File tree

2 files changed

+328
-1
lines changed

2 files changed

+328
-1
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "Metalhead"
22
uuid = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
3-
version = "0.7.3"
3+
version = "0.8.0-DEV"
44

55
[deps]
66
Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

src/convnets/mobilenet.jl

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
# MobileNetv1
2+
3+
"""
4+
mobilenetv1(width_mult, config;
5+
activation = relu,
6+
inchannels = 3,
7+
nclasses = 1000)
8+
9+
Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)).
10+
11+
# Arguments
12+
13+
- `width_mult`: Controls the number of output feature maps in each block
14+
(with 1.0 being the default in the paper)
15+
16+
- `configs`: A "list of tuples" configuration for each layer that details:
17+
18+
+ `dw`: Set true to use a depthwise separable convolution or false for regular convolution
19+
+ `o`: The number of output feature maps
20+
+ `s`: The stride of the convolutional kernel
21+
+ `r`: The number of time this configuration block is repeated
22+
- `activate`: The activation function to use throughout the network
23+
- `inchannels`: The number of input channels. The default value is 3.
24+
- `nclasses`: The number of output classes
25+
"""
26+
function mobilenetv1(width_mult, config;
27+
activation = relu,
28+
inchannels = 3,
29+
nclasses = 1000)
30+
layers = []
31+
for (dw, outch, stride, nrepeats) in config
32+
outch = Int(outch * width_mult)
33+
for _ in 1:nrepeats
34+
layer = dw ?
35+
depthwise_sep_conv_bn((3, 3), inchannels, outch, activation;
36+
stride = stride, pad = 1, bias = false) :
37+
conv_bn((3, 3), inchannels, outch, activation; stride = stride, pad = 1,
38+
bias = false)
39+
append!(layers, layer)
40+
inchannels = outch
41+
end
42+
end
43+
44+
return Chain(Chain(layers),
45+
Chain(GlobalMeanPool(),
46+
MLUtils.flatten,
47+
Dense(inchannels, nclasses)))
48+
end
49+
50+
const mobilenetv1_configs = [
51+
# dw, c, s, r
52+
(false, 32, 2, 1),
53+
(true, 64, 1, 1),
54+
(true, 128, 2, 1),
55+
(true, 128, 1, 1),
56+
(true, 256, 2, 1),
57+
(true, 256, 1, 1),
58+
(true, 512, 2, 1),
59+
(true, 512, 1, 5),
60+
(true, 1024, 2, 1),
61+
(true, 1024, 1, 1),
62+
]
63+
64+
"""
65+
MobileNetv1(width_mult = 1; inchannels = 3, pretrain = false, nclasses = 1000)
66+
67+
Create a MobileNetv1 model with the baseline configuration
68+
([reference](https://arxiv.org/abs/1704.04861v1)).
69+
Set `pretrain` to `true` to load the pretrained weights for ImageNet.
70+
71+
# Arguments
72+
73+
- `width_mult`: Controls the number of output feature maps in each block
74+
(with 1.0 being the default in the paper;
75+
this is usually a value between 0.1 and 1.4)
76+
- `inchannels`: The number of input channels. The default value is 3.
77+
- `pretrain`: Whether to load the pre-trained weights for ImageNet
78+
- `nclasses`: The number of output classes
79+
80+
See also [`Metalhead.mobilenetv1`](#).
81+
"""
82+
struct MobileNetv1
83+
layers::Any
84+
end
85+
86+
function MobileNetv1(width_mult::Number = 1; inchannels = 3, pretrain = false,
87+
nclasses = 1000)
88+
layers = mobilenetv1(width_mult, mobilenetv1_configs; inchannels, nclasses)
89+
pretrain && loadpretrain!(layers, string("MobileNetv1"))
90+
return MobileNetv1(layers)
91+
end
92+
93+
@functor MobileNetv1
94+
95+
(m::MobileNetv1)(x) = m.layers(x)
96+
97+
backbone(m::MobileNetv1) = m.layers[1]
98+
classifier(m::MobileNetv1) = m.layers[2]
99+
100+
# MobileNetv2
101+
102+
"""
103+
mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, nclasses = 1000)
104+
105+
Create a MobileNetv2 model.
106+
([reference](https://arxiv.org/abs/1801.04381)).
107+
108+
# Arguments
109+
110+
- `width_mult`: Controls the number of output feature maps in each block
111+
(with 1.0 being the default in the paper)
112+
113+
- `configs`: A "list of tuples" configuration for each layer that details:
114+
115+
+ `t`: The expansion factor that controls the number of feature maps in the bottleneck layer
116+
+ `c`: The number of output feature maps
117+
+ `n`: The number of times a block is repeated
118+
+ `s`: The stride of the convolutional kernel
119+
+ `a`: The activation function used in the bottleneck layer
120+
- `inchannels`: The number of input channels. The default value is 3.
121+
- `max_width`: The maximum number of feature maps in any layer of the network
122+
- `nclasses`: The number of output classes
123+
"""
124+
function mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, nclasses = 1000)
125+
# building first layer
126+
inplanes = _round_channels(32 * width_mult, width_mult == 0.1 ? 4 : 8)
127+
layers = []
128+
append!(layers, conv_bn((3, 3), inchannels, inplanes; pad = 1, stride = 2))
129+
# building inverted residual blocks
130+
for (t, c, n, s, a) in configs
131+
outplanes = _round_channels(c * width_mult, width_mult == 0.1 ? 4 : 8)
132+
for i in 1:n
133+
push!(layers,
134+
invertedresidual(3, inplanes, inplanes * t, outplanes, a;
135+
stride = i == 1 ? s : 1))
136+
inplanes = outplanes
137+
end
138+
end
139+
# building last several layers
140+
outplanes = (width_mult > 1) ?
141+
_round_channels(max_width * width_mult, width_mult == 0.1 ? 4 : 8) :
142+
max_width
143+
return Chain(Chain(Chain(layers),
144+
conv_bn((1, 1), inplanes, outplanes, relu6; bias = false)...),
145+
Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten,
146+
Dense(outplanes, nclasses)))
147+
end
148+
149+
# Layer configurations for MobileNetv2
150+
const mobilenetv2_configs = [
151+
# t, c, n, s, a
152+
(1, 16, 1, 1, relu6),
153+
(6, 24, 2, 2, relu6),
154+
(6, 32, 3, 2, relu6),
155+
(6, 64, 4, 2, relu6),
156+
(6, 96, 3, 1, relu6),
157+
(6, 160, 3, 2, relu6),
158+
(6, 320, 1, 1, relu6),
159+
]
160+
161+
# Model definition for MobileNetv2
162+
struct MobileNetv2
163+
layers::Any
164+
end
165+
166+
"""
167+
MobileNetv2(width_mult = 1.0; inchannels = 3, pretrain = false, nclasses = 1000)
168+
169+
Create a MobileNetv2 model with the specified configuration.
170+
([reference](https://arxiv.org/abs/1801.04381)).
171+
Set `pretrain` to `true` to load the pretrained weights for ImageNet.
172+
173+
# Arguments
174+
175+
- `width_mult`: Controls the number of output feature maps in each block
176+
(with 1.0 being the default in the paper;
177+
this is usually a value between 0.1 and 1.4)
178+
- `inchannels`: The number of input channels. The default value is 3.
179+
- `pretrain`: Whether to load the pre-trained weights for ImageNet
180+
- `nclasses`: The number of output classes
181+
182+
See also [`Metalhead.mobilenetv2`](#).
183+
"""
184+
function MobileNetv2(width_mult::Number = 1; inchannels = 3, pretrain = false,
185+
nclasses = 1000)
186+
layers = mobilenetv2(width_mult, mobilenetv2_configs; inchannels, nclasses)
187+
pretrain && loadpretrain!(layers, string("MobileNetv2"))
188+
return MobileNetv2(layers)
189+
end
190+
191+
@functor MobileNetv2
192+
193+
(m::MobileNetv2)(x) = m.layers(x)
194+
195+
backbone(m::MobileNetv2) = m.layers[1]
196+
classifier(m::MobileNetv2) = m.layers[2]
197+
198+
# MobileNetv3
199+
200+
"""
201+
mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, nclasses = 1000)
202+
203+
Create a MobileNetv3 model.
204+
([reference](https://arxiv.org/abs/1905.02244)).
205+
206+
# Arguments
207+
208+
- `width_mult`: Controls the number of output feature maps in each block
209+
(with 1.0 being the default in the paper;
210+
this is usually a value between 0.1 and 1.4)
211+
212+
- `configs`: a "list of tuples" configuration for each layer that details:
213+
214+
+ `k::Integer` - The size of the convolutional kernel
215+
+ `c::Float` - The multiplier factor for deciding the number of feature maps in the hidden layer
216+
+ `t::Integer` - The number of output feature maps for a given block
217+
+ `r::Integer` - The reduction factor (`>= 1` or `nothing` to skip) for squeeze and excite layers
218+
+ `s::Integer` - The stride of the convolutional kernel
219+
+ `a` - The activation function used in the bottleneck (typically `hardswish` or `relu`)
220+
- `inchannels`: The number of input channels. The default value is 3.
221+
- `max_width`: The maximum number of feature maps in any layer of the network
222+
- `nclasses`: the number of output classes
223+
"""
224+
function mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, nclasses = 1000)
225+
# building first layer
226+
inplanes = _round_channels(16 * width_mult, 8)
227+
layers = []
228+
append!(layers,
229+
conv_bn((3, 3), inchannels, inplanes, hardswish; pad = 1, stride = 2,
230+
bias = false))
231+
explanes = 0
232+
# building inverted residual blocks
233+
for (k, t, c, r, a, s) in configs
234+
# inverted residual layers
235+
outplanes = _round_channels(c * width_mult, 8)
236+
explanes = _round_channels(inplanes * t, 8)
237+
push!(layers,
238+
invertedresidual(k, inplanes, explanes, outplanes, a;
239+
stride = s, reduction = r))
240+
inplanes = outplanes
241+
end
242+
# building last several layers
243+
output_channel = max_width
244+
output_channel = width_mult > 1.0 ? _round_channels(output_channel * width_mult, 8) :
245+
output_channel
246+
classifier = Chain(Dense(explanes, output_channel, hardswish),
247+
Dropout(0.2),
248+
Dense(output_channel, nclasses))
249+
return Chain(Chain(Chain(layers),
250+
conv_bn((1, 1), inplanes, explanes, hardswish; bias = false)...),
251+
Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, classifier))
252+
end
253+
254+
# Configurations for small and large mode for MobileNetv3
255+
mobilenetv3_configs = Dict(:small => [
256+
# k, t, c, SE, a, s
257+
(3, 1, 16, 4, relu, 2),
258+
(3, 4.5, 24, nothing, relu, 2),
259+
(3, 3.67, 24, nothing, relu, 1),
260+
(5, 4, 40, 4, hardswish, 2),
261+
(5, 6, 40, 4, hardswish, 1),
262+
(5, 6, 40, 4, hardswish, 1),
263+
(5, 3, 48, 4, hardswish, 1),
264+
(5, 3, 48, 4, hardswish, 1),
265+
(5, 6, 96, 4, hardswish, 2),
266+
(5, 6, 96, 4, hardswish, 1),
267+
(5, 6, 96, 4, hardswish, 1),
268+
],
269+
:large => [
270+
# k, t, c, SE, a, s
271+
(3, 1, 16, nothing, relu, 1),
272+
(3, 4, 24, nothing, relu, 2),
273+
(3, 3, 24, nothing, relu, 1),
274+
(5, 3, 40, 4, relu, 2),
275+
(5, 3, 40, 4, relu, 1),
276+
(5, 3, 40, 4, relu, 1),
277+
(3, 6, 80, nothing, hardswish, 2),
278+
(3, 2.5, 80, nothing, hardswish, 1),
279+
(3, 2.3, 80, nothing, hardswish, 1),
280+
(3, 2.3, 80, nothing, hardswish, 1),
281+
(3, 6, 112, 4, hardswish, 1),
282+
(3, 6, 112, 4, hardswish, 1),
283+
(5, 6, 160, 4, hardswish, 2),
284+
(5, 6, 160, 4, hardswish, 1),
285+
(5, 6, 160, 4, hardswish, 1),
286+
])
287+
288+
# Model definition for MobileNetv3
289+
struct MobileNetv3
290+
layers::Any
291+
end
292+
293+
"""
294+
MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; inchannels = 3, pretrain = false, nclasses = 1000)
295+
296+
Create a MobileNetv3 model with the specified configuration.
297+
([reference](https://arxiv.org/abs/1905.02244)).
298+
Set `pretrain = true` to load the model with pre-trained weights for ImageNet.
299+
300+
# Arguments
301+
302+
- `mode`: :small or :large for the size of the model (see paper).
303+
- `width_mult`: Controls the number of output feature maps in each block
304+
(with 1.0 being the default in the paper;
305+
this is usually a value between 0.1 and 1.4)
306+
- `inchannels`: The number of channels in the input. The default value is 3.
307+
- `pretrain`: whether to load the pre-trained weights for ImageNet
308+
- `nclasses`: the number of output classes
309+
310+
See also [`Metalhead.mobilenetv3`](#).
311+
"""
312+
function MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; inchannels = 3,
313+
pretrain = false, nclasses = 1000)
314+
@assert mode in [:large, :small] "`mode` has to be either :large or :small"
315+
max_width = (mode == :large) ? 1280 : 1024
316+
layers = mobilenetv3(width_mult, mobilenetv3_configs[mode]; inchannels, max_width,
317+
nclasses)
318+
pretrain && loadpretrain!(layers, string("MobileNetv3", mode))
319+
return MobileNetv3(layers)
320+
end
321+
322+
@functor MobileNetv3
323+
324+
(m::MobileNetv3)(x) = m.layers(x)
325+
326+
backbone(m::MobileNetv3) = m.layers[1]
327+
classifier(m::MobileNetv3) = m.layers[2]

0 commit comments

Comments
 (0)