4
4
mobilenetv1(width_mult, config;
5
5
activation = relu,
6
6
inchannels = 3,
7
- nclasses = 1000,
8
- fcsize = 1024)
7
+ nclasses = 1000)
9
8
10
9
Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)).
11
10
@@ -21,23 +20,24 @@ Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)).
21
20
+ `s`: The stride of the convolutional kernel
22
21
+ `r`: The number of time this configuration block is repeated
23
22
- `activate`: The activation function to use throughout the network
24
- - `inchannels`: The number of input feature maps``
23
+ - `inchannels`: The number of input channels. The default value is 3.
25
24
- `fcsize`: The intermediate fully-connected size between the convolution and final layers
26
25
- `nclasses`: The number of output classes
27
26
"""
28
27
function mobilenetv1 (width_mult, config;
29
28
activation = relu,
30
29
inchannels = 3 ,
31
- nclasses = 1000 ,
32
- fcsize = 1024 )
30
+ fcsize = 1024 ,
31
+ nclasses = 1000 )
33
32
layers = []
34
33
for (dw, outch, stride, nrepeats) in config
35
34
outch = Int (outch * width_mult)
36
35
for _ in 1 : nrepeats
37
36
layer = dw ?
38
37
depthwise_sep_conv_bn ((3 , 3 ), inchannels, outch, activation;
39
38
stride = stride, pad = 1 , bias = false ) :
40
- conv_bn ((3 , 3 ), inchannels, outch, activation; stride = stride, pad = 1 )
39
+ conv_bn ((3 , 3 ), inchannels, outch, activation; stride = stride, pad = 1 ,
40
+ bias = false )
41
41
append! (layers, layer)
42
42
inchannels = outch
43
43
end
@@ -51,7 +51,7 @@ function mobilenetv1(width_mult, config;
51
51
end
52
52
53
53
const mobilenetv1_configs = [
54
- # dw, c, s, r
54
+ # dw, c, s, r
55
55
(false , 32 , 2 , 1 ),
56
56
(true , 64 , 1 , 1 ),
57
57
(true , 128 , 2 , 1 ),
@@ -65,7 +65,7 @@ const mobilenetv1_configs = [
65
65
]
66
66
67
67
"""
68
- MobileNetv1(width_mult = 1; pretrain = false, nclasses = 1000)
68
+ MobileNetv1(width_mult = 1; inchannels = 3, pretrain = false, nclasses = 1000)
69
69
70
70
Create a MobileNetv1 model with the baseline configuration
71
71
([reference](https://arxiv.org/abs/1704.04861v1)).
@@ -76,6 +76,7 @@ Set `pretrain` to `true` to load the pretrained weights for ImageNet.
76
76
- `width_mult`: Controls the number of output feature maps in each block
77
77
(with 1.0 being the default in the paper;
78
78
this is usually a value between 0.1 and 1.4)
79
+ - `inchannels`: The number of input channels. The default value is 3.
79
80
- `pretrain`: Whether to load the pre-trained weights for ImageNet
80
81
- `nclasses`: The number of output classes
81
82
@@ -85,10 +86,10 @@ struct MobileNetv1
85
86
layers:: Any
86
87
end
87
88
88
- function MobileNetv1 (width_mult:: Number = 1 ; pretrain = false , nclasses = 1000 )
89
- layers = mobilenetv1 (width_mult, mobilenetv1_configs; nclasses = nclasses)
89
+ function MobileNetv1 (width_mult:: Number = 1 ; inchannels = 3 , pretrain = false ,
90
+ nclasses = 1000 )
91
+ layers = mobilenetv1 (width_mult, mobilenetv1_configs; inchannels, nclasses)
90
92
pretrain && loadpretrain! (layers, string (" MobileNetv1" ))
91
-
92
93
return MobileNetv1 (layers)
93
94
end
94
95
@@ -102,7 +103,7 @@ classifier(m::MobileNetv1) = m.layers[2]
102
103
# MobileNetv2
103
104
104
105
"""
105
- mobilenetv2(width_mult, configs; max_width = 1280, nclasses = 1000)
106
+ mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, nclasses = 1000)
106
107
107
108
Create a MobileNetv2 model.
108
109
([reference](https://arxiv.org/abs/1801.04381)).
@@ -119,14 +120,15 @@ Create a MobileNetv2 model.
119
120
+ `n`: The number of times a block is repeated
120
121
+ `s`: The stride of the convolutional kernel
121
122
+ `a`: The activation function used in the bottleneck layer
123
+ - `inchannels`: The number of input channels. The default value is 3.
122
124
- `max_width`: The maximum number of feature maps in any layer of the network
123
125
- `nclasses`: The number of output classes
124
126
"""
125
- function mobilenetv2 (width_mult, configs; max_width = 1280 , nclasses = 1000 )
127
+ function mobilenetv2 (width_mult, configs; inchannels = 3 , max_width = 1280 , nclasses = 1000 )
126
128
# building first layer
127
129
inplanes = _round_channels (32 * width_mult, width_mult == 0.1 ? 4 : 8 )
128
130
layers = []
129
- append! (layers, conv_bn ((3 , 3 ), 3 , inplanes; stride = 2 ))
131
+ append! (layers, conv_bn ((3 , 3 ), inchannels , inplanes; pad = 1 , stride = 2 ))
130
132
# building inverted residual blocks
131
133
for (t, c, n, s, a) in configs
132
134
outplanes = _round_channels (c * width_mult, width_mult == 0.1 ? 4 : 8 )
@@ -165,7 +167,7 @@ struct MobileNetv2
165
167
end
166
168
167
169
"""
168
- MobileNetv2(width_mult = 1.0; pretrain = false, nclasses = 1000)
170
+ MobileNetv2(width_mult = 1.0; inchannels = 3, pretrain = false, nclasses = 1000)
169
171
170
172
Create a MobileNetv2 model with the specified configuration.
171
173
([reference](https://arxiv.org/abs/1801.04381)).
@@ -176,13 +178,15 @@ Set `pretrain` to `true` to load the pretrained weights for ImageNet.
176
178
- `width_mult`: Controls the number of output feature maps in each block
177
179
(with 1.0 being the default in the paper;
178
180
this is usually a value between 0.1 and 1.4)
181
+ - `inchannels`: The number of input channels. The default value is 3.
179
182
- `pretrain`: Whether to load the pre-trained weights for ImageNet
180
183
- `nclasses`: The number of output classes
181
184
182
185
See also [`Metalhead.mobilenetv2`](#).
183
186
"""
184
- function MobileNetv2 (width_mult:: Number = 1 ; pretrain = false , nclasses = 1000 )
185
- layers = mobilenetv2 (width_mult, mobilenetv2_configs; nclasses = nclasses)
187
+ function MobileNetv2 (width_mult:: Number = 1 ; inchannels = 3 , pretrain = false ,
188
+ nclasses = 1000 )
189
+ layers = mobilenetv2 (width_mult, mobilenetv2_configs; inchannels, nclasses)
186
190
pretrain && loadpretrain! (layers, string (" MobileNetv2" ))
187
191
return MobileNetv2 (layers)
188
192
end
@@ -197,7 +201,7 @@ classifier(m::MobileNetv2) = m.layers[2]
197
201
# MobileNetv3
198
202
199
203
"""
200
- mobilenetv3(width_mult, configs; max_width = 1024, nclasses = 1000)
204
+ mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, nclasses = 1000)
201
205
202
206
Create a MobileNetv3 model.
203
207
([reference](https://arxiv.org/abs/1905.02244)).
@@ -216,14 +220,17 @@ Create a MobileNetv3 model.
216
220
+ `r::Integer` - The reduction factor (`>= 1` or `nothing` to skip) for squeeze and excite layers
217
221
+ `s::Integer` - The stride of the convolutional kernel
218
222
+ `a` - The activation function used in the bottleneck (typically `hardswish` or `relu`)
223
+ - `inchannels`: The number of input channels. The default value is 3.
219
224
- `max_width`: The maximum number of feature maps in any layer of the network
220
225
- `nclasses`: the number of output classes
221
226
"""
222
- function mobilenetv3 (width_mult, configs; max_width = 1024 , nclasses = 1000 )
227
+ function mobilenetv3 (width_mult, configs; inchannels = 3 , max_width = 1024 , nclasses = 1000 )
223
228
# building first layer
224
229
inplanes = _round_channels (16 * width_mult, 8 )
225
230
layers = []
226
- append! (layers, conv_bn ((3 , 3 ), 3 , inplanes, hardswish; stride = 2 ))
231
+ append! (layers,
232
+ conv_bn ((3 , 3 ), inchannels, inplanes, hardswish; pad = 1 , stride = 2 ,
233
+ bias = false ))
227
234
explanes = 0
228
235
# building inverted residual blocks
229
236
for (k, t, c, r, a, s) in configs
249
256
250
257
# Configurations for small and large mode for MobileNetv3
251
258
mobilenetv3_configs = Dict (:small => [
252
- # k, t, c, SE, a, s
259
+ # k, t, c, SE, a, s
253
260
(3 , 1 , 16 , 4 , relu, 2 ),
254
261
(3 , 4.5 , 24 , nothing , relu, 2 ),
255
262
(3 , 3.67 , 24 , nothing , relu, 1 ),
@@ -263,7 +270,7 @@ mobilenetv3_configs = Dict(:small => [
263
270
(5 , 6 , 96 , 4 , hardswish, 1 ),
264
271
],
265
272
:large => [
266
- # k, t, c, SE, a, s
273
+ # k, t, c, SE, a, s
267
274
(3 , 1 , 16 , nothing , relu, 1 ),
268
275
(3 , 4 , 24 , nothing , relu, 2 ),
269
276
(3 , 3 , 24 , nothing , relu, 1 ),
@@ -287,7 +294,7 @@ struct MobileNetv3
287
294
end
288
295
289
296
"""
290
- MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; pretrain = false, nclasses = 1000)
297
+ MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; inchannels = 3, pretrain = false, nclasses = 1000)
291
298
292
299
Create a MobileNetv3 model with the specified configuration.
293
300
([reference](https://arxiv.org/abs/1905.02244)).
@@ -299,17 +306,18 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet.
299
306
- `width_mult`: Controls the number of output feature maps in each block
300
307
(with 1.0 being the default in the paper;
301
308
this is usually a value between 0.1 and 1.4)
309
+ - `inchannels`: The number of channels in the input. The default value is 3.
302
310
- `pretrain`: whether to load the pre-trained weights for ImageNet
303
311
- `nclasses`: the number of output classes
304
312
305
313
See also [`Metalhead.mobilenetv3`](#).
306
314
"""
307
- function MobileNetv3 (mode:: Symbol = :small , width_mult:: Number = 1 ; pretrain = false ,
308
- nclasses = 1000 )
315
+ function MobileNetv3 (mode:: Symbol = :small , width_mult:: Number = 1 ; inchannels = 3 ,
316
+ pretrain = false , nclasses = 1000 )
309
317
@assert mode in [:large , :small ] " `mode` has to be either :large or :small"
310
318
max_width = (mode == :large ) ? 1280 : 1024
311
- layers = mobilenetv3 (width_mult, mobilenetv3_configs[mode]; max_width = max_width,
312
- nclasses = nclasses )
319
+ layers = mobilenetv3 (width_mult, mobilenetv3_configs[mode]; inchannels, max_width,
320
+ nclasses)
313
321
pretrain && loadpretrain! (layers, string (" MobileNetv3" , mode))
314
322
return MobileNetv3 (layers)
315
323
end
0 commit comments