2
2
3
3
PyTorch implementations of DropBlock and DropPath (Stochastic Depth) regularization layers.
4
4
5
+ Papers:
6
+ DropBlock: A regularization method for convolutional networks (https://arxiv.org/abs/1810.12890)
7
+
8
+ Deep Networks with Stochastic Depth (https://arxiv.org/abs/1603.09382)
9
+
10
+ Code:
11
+ DropBlock impl inspired by two Tensorflow impl that I liked:
12
+ - https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L74
13
+ - https://github.com/clovaai/assembled-cnn/blob/master/nets/blocks.py
14
+
5
15
Hacked together by Ross Wightman
6
16
"""
7
17
import torch
11
21
import math
12
22
13
23
14
- def drop_block_2d (x , drop_prob = 0.1 , block_size = 7 , gamma_scale = 1.0 , drop_with_noise = False ):
24
+ def drop_block_2d (x , drop_prob = 0.1 , training = False , block_size = 7 , gamma_scale = 1.0 , drop_with_noise = False ):
15
25
""" DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
26
+
27
+ DropBlock with an experimental gaussian noise option. This layer has been tested on a few training
28
+ runs with success, but needs further validation and possibly optimization for lower runtime impact.
29
+
16
30
"""
31
+ if drop_prob == 0. or not training :
32
+ return x
17
33
_ , _ , height , width = x .shape
18
34
total_size = width * height
19
35
clipped_block_size = min (block_size , min (width , height ))
@@ -60,14 +76,21 @@ def __init__(self,
60
76
self .with_noise = with_noise
61
77
62
78
def forward (self , x ):
63
- if not self .training or not self .drop_prob :
64
- return x
65
- return drop_block_2d (x , self .drop_prob , self .block_size , self .gamma_scale , self .with_noise )
79
+ return drop_block_2d (x , self .drop_prob , self .training , self .block_size , self .gamma_scale , self .with_noise )
80
+
81
+
82
+ def drop_path (x , drop_prob = 0. , training = False ):
83
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
66
84
85
+ This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
86
+ the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
87
+ See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
88
+ changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
89
+ 'survival rate' as the argument.
67
90
68
- def drop_path (x , drop_prob = 0. ):
69
- """Drop paths (Stochastic Depth) per sample (when applied in residual blocks).
70
91
"""
92
+ if drop_prob == 0. or not training :
93
+ return x
71
94
keep_prob = 1 - drop_prob
72
95
random_tensor = keep_prob + torch .rand ((x .size ()[0 ], 1 , 1 , 1 ), dtype = x .dtype , device = x .device )
73
96
random_tensor .floor_ () # binarize
@@ -76,13 +99,11 @@ def drop_path(x, drop_prob=0.):
76
99
77
100
78
101
class DropPath (nn .ModuleDict ):
79
- """Drop paths (Stochastic Depth) per sample (when applied in residual blocks).
102
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
80
103
"""
81
104
def __init__ (self , drop_prob = None ):
82
105
super (DropPath , self ).__init__ ()
83
106
self .drop_prob = drop_prob
84
107
85
108
def forward (self , x ):
86
- if not self .training or not self .drop_prob :
87
- return x
88
- return drop_path (x , self .drop_prob )
109
+ return drop_path (x , self .drop_prob , self .training )
0 commit comments