Skip to content

Commit 43225d1

Browse files
committed
Unify drop connect vs drop path under 'drop path' name, switch all EfficientNet/MobilenetV3 refs to 'drop_path'. Update factory to handle new drop args.
1 parent f1d5f8a commit 43225d1

File tree

6 files changed

+74
-67
lines changed

6 files changed

+74
-67
lines changed

timm/models/efficientnet.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ class EfficientNet(nn.Module):
253253

254254
def __init__(self, block_args, num_classes=1000, num_features=1280, in_chans=3, stem_size=32,
255255
channel_multiplier=1.0, channel_divisor=8, channel_min=None,
256-
output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0.,
256+
output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0.,
257257
se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, global_pool='avg'):
258258
super(EfficientNet, self).__init__()
259259
norm_kwargs = norm_kwargs or {}
@@ -273,7 +273,7 @@ def __init__(self, block_args, num_classes=1000, num_features=1280, in_chans=3,
273273
# Middle stages (IR/ER/DS Blocks)
274274
builder = EfficientNetBuilder(
275275
channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs,
276-
norm_layer, norm_kwargs, drop_connect_rate, verbose=_DEBUG)
276+
norm_layer, norm_kwargs, drop_path_rate, verbose=_DEBUG)
277277
self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
278278
self.feature_info = builder.features
279279
self._in_chs = builder.in_chs
@@ -333,7 +333,7 @@ class EfficientNetFeatures(nn.Module):
333333

334334
def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pre_pwl',
335335
in_chans=3, stem_size=32, channel_multiplier=1.0, channel_divisor=8, channel_min=None,
336-
output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0.,
336+
output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0.,
337337
se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None):
338338
super(EfficientNetFeatures, self).__init__()
339339
norm_kwargs = norm_kwargs or {}
@@ -355,7 +355,7 @@ def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pr
355355
# Middle stages (IR/ER/DS Blocks)
356356
builder = EfficientNetBuilder(
357357
channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs,
358-
norm_layer, norm_kwargs, drop_connect_rate, feature_location=feature_location, verbose=_DEBUG)
358+
norm_layer, norm_kwargs, drop_path_rate, feature_location=feature_location, verbose=_DEBUG)
359359
self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
360360
self.feature_info = builder.features # builder provides info about feature channels for each block
361361
self._in_chs = builder.in_chs
@@ -875,7 +875,7 @@ def spnasnet_100(pretrained=False, **kwargs):
875875
@register_model
876876
def efficientnet_b0(pretrained=False, **kwargs):
877877
""" EfficientNet-B0 """
878-
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2
878+
# NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
879879
model = _gen_efficientnet(
880880
'efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
881881
return model
@@ -884,7 +884,7 @@ def efficientnet_b0(pretrained=False, **kwargs):
884884
@register_model
885885
def efficientnet_b1(pretrained=False, **kwargs):
886886
""" EfficientNet-B1 """
887-
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2
887+
# NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
888888
model = _gen_efficientnet(
889889
'efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
890890
return model
@@ -893,7 +893,7 @@ def efficientnet_b1(pretrained=False, **kwargs):
893893
@register_model
894894
def efficientnet_b2(pretrained=False, **kwargs):
895895
""" EfficientNet-B2 """
896-
# NOTE for train, drop_rate should be 0.3, drop_connect_rate should be 0.2
896+
# NOTE for train, drop_rate should be 0.3, drop_path_rate should be 0.2
897897
model = _gen_efficientnet(
898898
'efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
899899
return model
@@ -902,7 +902,7 @@ def efficientnet_b2(pretrained=False, **kwargs):
902902
@register_model
903903
def efficientnet_b2a(pretrained=False, **kwargs):
904904
""" EfficientNet-B2 @ 288x288 w/ 1.0 test crop"""
905-
# NOTE for train, drop_rate should be 0.3, drop_connect_rate should be 0.2
905+
# NOTE for train, drop_rate should be 0.3, drop_path_rate should be 0.2
906906
model = _gen_efficientnet(
907907
'efficientnet_b2a', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
908908
return model
@@ -911,7 +911,7 @@ def efficientnet_b2a(pretrained=False, **kwargs):
911911
@register_model
912912
def efficientnet_b3(pretrained=False, **kwargs):
913913
""" EfficientNet-B3 """
914-
# NOTE for train, drop_rate should be 0.3, drop_connect_rate should be 0.2
914+
# NOTE for train, drop_rate should be 0.3, drop_path_rate should be 0.2
915915
model = _gen_efficientnet(
916916
'efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
917917
return model
@@ -920,7 +920,7 @@ def efficientnet_b3(pretrained=False, **kwargs):
920920
@register_model
921921
def efficientnet_b3a(pretrained=False, **kwargs):
922922
""" EfficientNet-B3 @ 320x320 w/ 1.0 test crop-pct """
923-
# NOTE for train, drop_rate should be 0.3, drop_connect_rate should be 0.2
923+
# NOTE for train, drop_rate should be 0.3, drop_path_rate should be 0.2
924924
model = _gen_efficientnet(
925925
'efficientnet_b3a', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
926926
return model
@@ -929,7 +929,7 @@ def efficientnet_b3a(pretrained=False, **kwargs):
929929
@register_model
930930
def efficientnet_b4(pretrained=False, **kwargs):
931931
""" EfficientNet-B4 """
932-
# NOTE for train, drop_rate should be 0.4, drop_connect_rate should be 0.2
932+
# NOTE for train, drop_rate should be 0.4, drop_path_rate should be 0.2
933933
model = _gen_efficientnet(
934934
'efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
935935
return model
@@ -938,7 +938,7 @@ def efficientnet_b4(pretrained=False, **kwargs):
938938
@register_model
939939
def efficientnet_b5(pretrained=False, **kwargs):
940940
""" EfficientNet-B5 """
941-
# NOTE for train, drop_rate should be 0.4, drop_connect_rate should be 0.2
941+
# NOTE for train, drop_rate should be 0.4, drop_path_rate should be 0.2
942942
model = _gen_efficientnet(
943943
'efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
944944
return model
@@ -947,7 +947,7 @@ def efficientnet_b5(pretrained=False, **kwargs):
947947
@register_model
948948
def efficientnet_b6(pretrained=False, **kwargs):
949949
""" EfficientNet-B6 """
950-
# NOTE for train, drop_rate should be 0.5, drop_connect_rate should be 0.2
950+
# NOTE for train, drop_rate should be 0.5, drop_path_rate should be 0.2
951951
model = _gen_efficientnet(
952952
'efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
953953
return model
@@ -956,7 +956,7 @@ def efficientnet_b6(pretrained=False, **kwargs):
956956
@register_model
957957
def efficientnet_b7(pretrained=False, **kwargs):
958958
""" EfficientNet-B7 """
959-
# NOTE for train, drop_rate should be 0.5, drop_connect_rate should be 0.2
959+
# NOTE for train, drop_rate should be 0.5, drop_path_rate should be 0.2
960960
model = _gen_efficientnet(
961961
'efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
962962
return model
@@ -965,7 +965,7 @@ def efficientnet_b7(pretrained=False, **kwargs):
965965
@register_model
966966
def efficientnet_b8(pretrained=False, **kwargs):
967967
""" EfficientNet-B8 """
968-
# NOTE for train, drop_rate should be 0.5, drop_connect_rate should be 0.2
968+
# NOTE for train, drop_rate should be 0.5, drop_path_rate should be 0.2
969969
model = _gen_efficientnet(
970970
'efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs)
971971
return model
@@ -974,7 +974,7 @@ def efficientnet_b8(pretrained=False, **kwargs):
974974
@register_model
975975
def efficientnet_l2(pretrained=False, **kwargs):
976976
""" EfficientNet-L2."""
977-
# NOTE for train, drop_rate should be 0.5, drop_connect_rate should be 0.2
977+
# NOTE for train, drop_rate should be 0.5, drop_path_rate should be 0.2
978978
model = _gen_efficientnet(
979979
'efficientnet_l2', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs)
980980
return model
@@ -1007,7 +1007,7 @@ def efficientnet_el(pretrained=False, **kwargs):
10071007
@register_model
10081008
def efficientnet_cc_b0_4e(pretrained=False, **kwargs):
10091009
""" EfficientNet-CondConv-B0 w/ 8 Experts """
1010-
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2
1010+
# NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
10111011
model = _gen_efficientnet_condconv(
10121012
'efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
10131013
return model
@@ -1016,7 +1016,7 @@ def efficientnet_cc_b0_4e(pretrained=False, **kwargs):
10161016
@register_model
10171017
def efficientnet_cc_b0_8e(pretrained=False, **kwargs):
10181018
""" EfficientNet-CondConv-B0 w/ 8 Experts """
1019-
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2
1019+
# NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
10201020
model = _gen_efficientnet_condconv(
10211021
'efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2,
10221022
pretrained=pretrained, **kwargs)
@@ -1025,7 +1025,7 @@ def efficientnet_cc_b0_8e(pretrained=False, **kwargs):
10251025
@register_model
10261026
def efficientnet_cc_b1_8e(pretrained=False, **kwargs):
10271027
""" EfficientNet-CondConv-B1 w/ 8 Experts """
1028-
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2
1028+
# NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
10291029
model = _gen_efficientnet_condconv(
10301030
'efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2,
10311031
pretrained=pretrained, **kwargs)
@@ -1355,7 +1355,7 @@ def tf_efficientnet_el(pretrained=False, **kwargs):
13551355
@register_model
13561356
def tf_efficientnet_cc_b0_4e(pretrained=False, **kwargs):
13571357
""" EfficientNet-CondConv-B0 w/ 4 Experts. Tensorflow compatible variant """
1358-
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2
1358+
# NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
13591359
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
13601360
kwargs['pad_type'] = 'same'
13611361
model = _gen_efficientnet_condconv(
@@ -1366,7 +1366,7 @@ def tf_efficientnet_cc_b0_4e(pretrained=False, **kwargs):
13661366
@register_model
13671367
def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs):
13681368
""" EfficientNet-CondConv-B0 w/ 8 Experts. Tensorflow compatible variant """
1369-
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2
1369+
# NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
13701370
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
13711371
kwargs['pad_type'] = 'same'
13721372
model = _gen_efficientnet_condconv(
@@ -1377,7 +1377,7 @@ def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs):
13771377
@register_model
13781378
def tf_efficientnet_cc_b1_8e(pretrained=False, **kwargs):
13791379
""" EfficientNet-CondConv-B1 w/ 8 Experts. Tensorflow compatible variant """
1380-
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2
1380+
# NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
13811381
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
13821382
kwargs['pad_type'] = 'same'
13831383
model = _gen_efficientnet_condconv(

timm/models/efficientnet_blocks.py

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import torch.nn as nn
33
from torch.nn import functional as F
44
from .layers.activations import sigmoid
5-
from .layers import create_conv2d
5+
from .layers import create_conv2d, drop_path
66

77

88
# Defaults used for Google/Tensorflow training of mobile networks /w RMSprop as per
@@ -69,19 +69,6 @@ def round_channels(channels, multiplier=1.0, divisor=8, channel_min=None):
6969
return make_divisible(channels, divisor, channel_min)
7070

7171

72-
def drop_connect(inputs, training: bool = False, drop_connect_rate: float = 0.):
73-
"""Apply drop connect."""
74-
if not training:
75-
return inputs
76-
77-
keep_prob = 1 - drop_connect_rate
78-
random_tensor = keep_prob + torch.rand(
79-
(inputs.size()[0], 1, 1, 1), dtype=inputs.dtype, device=inputs.device)
80-
random_tensor.floor_() # binarize
81-
output = inputs.div(keep_prob) * random_tensor
82-
return output
83-
84-
8572
class ChannelShuffle(nn.Module):
8673
# FIXME haven't used yet
8774
def __init__(self, groups):
@@ -154,13 +141,13 @@ class DepthwiseSeparableConv(nn.Module):
154141
def __init__(self, in_chs, out_chs, dw_kernel_size=3,
155142
stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False,
156143
pw_kernel_size=1, pw_act=False, se_ratio=0., se_kwargs=None,
157-
norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.):
144+
norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_path_rate=0.):
158145
super(DepthwiseSeparableConv, self).__init__()
159146
norm_kwargs = norm_kwargs or {}
160147
has_se = se_ratio is not None and se_ratio > 0.
161148
self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip
162149
self.has_pw_act = pw_act # activation after point-wise conv
163-
self.drop_connect_rate = drop_connect_rate
150+
self.drop_path_rate = drop_path_rate
164151

165152
self.conv_dw = create_conv2d(
166153
in_chs, in_chs, dw_kernel_size, stride=stride, dilation=dilation, padding=pad_type, depthwise=True)
@@ -200,8 +187,8 @@ def forward(self, x):
200187
x = self.act2(x)
201188

202189
if self.has_residual:
203-
if self.drop_connect_rate > 0.:
204-
x = drop_connect(x, self.training, self.drop_connect_rate)
190+
if self.drop_path_rate > 0.:
191+
x = drop_path(x, self.drop_path_rate, self.training)
205192
x += residual
206193
return x
207194

@@ -213,14 +200,14 @@ def __init__(self, in_chs, out_chs, dw_kernel_size=3,
213200
stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False,
214201
exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
215202
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
216-
conv_kwargs=None, drop_connect_rate=0.):
203+
conv_kwargs=None, drop_path_rate=0.):
217204
super(InvertedResidual, self).__init__()
218205
norm_kwargs = norm_kwargs or {}
219206
conv_kwargs = conv_kwargs or {}
220207
mid_chs = make_divisible(in_chs * exp_ratio)
221208
has_se = se_ratio is not None and se_ratio > 0.
222209
self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
223-
self.drop_connect_rate = drop_connect_rate
210+
self.drop_path_rate = drop_path_rate
224211

225212
# Point-wise expansion
226213
self.conv_pw = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, **conv_kwargs)
@@ -278,8 +265,8 @@ def forward(self, x):
278265
x = self.bn3(x)
279266

280267
if self.has_residual:
281-
if self.drop_connect_rate > 0.:
282-
x = drop_connect(x, self.training, self.drop_connect_rate)
268+
if self.drop_path_rate > 0.:
269+
x = drop_path(x, self.drop_path_rate, self.training)
283270
x += residual
284271

285272
return x
@@ -292,7 +279,7 @@ def __init__(self, in_chs, out_chs, dw_kernel_size=3,
292279
stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False,
293280
exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
294281
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
295-
num_experts=0, drop_connect_rate=0.):
282+
num_experts=0, drop_path_rate=0.):
296283

297284
self.num_experts = num_experts
298285
conv_kwargs = dict(num_experts=self.num_experts)
@@ -302,7 +289,7 @@ def __init__(self, in_chs, out_chs, dw_kernel_size=3,
302289
act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size,
303290
pw_kernel_size=pw_kernel_size, se_ratio=se_ratio, se_kwargs=se_kwargs,
304291
norm_layer=norm_layer, norm_kwargs=norm_kwargs, conv_kwargs=conv_kwargs,
305-
drop_connect_rate=drop_connect_rate)
292+
drop_path_rate=drop_path_rate)
306293

307294
self.routing_fn = nn.Linear(in_chs, self.num_experts)
308295

@@ -332,8 +319,8 @@ def forward(self, x):
332319
x = self.bn3(x)
333320

334321
if self.has_residual:
335-
if self.drop_connect_rate > 0.:
336-
x = drop_connect(x, self.training, self.drop_connect_rate)
322+
if self.drop_path_rate > 0.:
323+
x = drop_path(x, self.drop_path_rate, self.training)
337324
x += residual
338325
return x
339326

@@ -344,7 +331,7 @@ class EdgeResidual(nn.Module):
344331
def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_chs=0,
345332
stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, pw_kernel_size=1,
346333
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
347-
drop_connect_rate=0.):
334+
drop_path_rate=0.):
348335
super(EdgeResidual, self).__init__()
349336
norm_kwargs = norm_kwargs or {}
350337
if fake_in_chs > 0:
@@ -353,7 +340,7 @@ def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_ch
353340
mid_chs = make_divisible(in_chs * exp_ratio)
354341
has_se = se_ratio is not None and se_ratio > 0.
355342
self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
356-
self.drop_connect_rate = drop_connect_rate
343+
self.drop_path_rate = drop_path_rate
357344

358345
# Expansion convolution
359346
self.conv_exp = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type)
@@ -400,8 +387,8 @@ def forward(self, x):
400387
x = self.bn2(x)
401388

402389
if self.has_residual:
403-
if self.drop_connect_rate > 0.:
404-
x = drop_connect(x, self.training, self.drop_connect_rate)
390+
if self.drop_path_rate > 0.:
391+
x = drop_path(x, self.drop_path_rate, self.training)
405392
x += residual
406393

407394
return x

0 commit comments

Comments
 (0)