@@ -10,7 +10,7 @@ _dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(s
10
10
_dropout_kernel (y:: T , p, q) where {T} = y > p ? T (1 / q) : T (0 )
11
11
12
12
"""
13
- dropout(x, p; dims=:, active=true)
13
+ dropout([rng = rng_from_array(x)], x, p; dims=:, active=true)
14
14
15
15
The dropout function. If `active` is `true`,
16
16
for each input, either sets that input to `0` (with probability
@@ -20,6 +20,9 @@ This is used as a regularisation, i.e. it reduces overfitting during training.
20
20
21
21
If `active` is `false`, it just returns the input `x`.
22
22
23
+ Specify `rng` for custom RNGs instead of the default RNG.
24
+ Note that custom RNGs are only supported on the CPU.
25
+
23
26
Warning: when using this function, you have to manually manage the activation
24
27
state. Usually in fact, dropout is used while training
25
28
but is deactivated in the inference phase. This can be
@@ -28,49 +31,63 @@ automatically managed using the [`Dropout`](@ref) layer instead of the
28
31
29
32
The [`Dropout`](@ref) layer is what you should use in most scenarios.
30
33
"""
31
- function dropout (x, p; dims= :, active:: Bool = true )
34
+ function dropout (rng, x, p; dims= :, active:: Bool = true )
32
35
active || return x
33
- y = dropout_mask (x, p, dims= dims)
36
+ y = dropout_mask (rng, x, p, dims= dims)
34
37
return x .* y
35
38
end
39
+ dropout (x, p; kwargs... ) = dropout (rng_from_array (x), x, p; kwargs... )
36
40
37
- @adjoint function dropout (x, p; dims= :, active:: Bool = true )
41
+ @adjoint function dropout (rng, x, p; dims= :, active:: Bool = true )
38
42
active || return x, Δ -> (Δ, nothing )
39
- y = dropout_mask (x, p, dims= dims)
40
- return x .* y, Δ -> (Δ .* y, nothing )
43
+ y = dropout_mask (rng, x, p, dims= dims)
44
+ return x .* y, Δ -> (nothing , Δ .* y, nothing )
41
45
end
42
46
43
- function dropout_mask (x, p; dims= :)
44
- y = rand! (similar (x, _dropout_shape (x, dims)))
47
+ dropout_mask (rng:: CUDA.RNG , x:: CuArray , p; kwargs... ) = _dropout_mask (rng, x, p; kwargs... )
48
+ dropout_mask (rng, x:: CuArray , p; kwargs... ) =
49
+ throw (ArgumentError (" x isa CuArray, but rng isa $(typeof (rng)) . dropout_mask only support CUDA.RNG for CuArrays." ))
50
+ dropout_mask (rng, x, p; kwargs... ) = _dropout_mask (rng, x, p; kwargs... )
51
+ function _dropout_mask (rng, x, p; dims= :)
52
+ y = rand! (rng, similar (x, _dropout_shape (x, dims)))
45
53
y .= _dropout_kernel .(y, p, 1 - p)
46
54
return y
47
55
end
48
56
49
57
"""
50
- Dropout(p; dims=:)
58
+ Dropout(p; dims=:, rng = rng_from_array() )
51
59
52
60
Dropout layer. In the forward pass, apply the [`Flux.dropout`](@ref) function on the input.
53
61
54
62
To apply dropout along certain dimension(s), specify the `dims` keyword.
55
63
e.g. `Dropout(p; dims = 3)` will randomly zero out entire channels on WHCN input
56
64
(also called 2D dropout).
57
65
66
+ Specify `rng` to use a custom RNG instead of the default.
67
+ Custom RNGs are only supported on the CPU.
68
+
58
69
Does nothing to the input once [`Flux.testmode!`](@ref) is `true`.
59
70
"""
60
- mutable struct Dropout{F,D}
71
+ mutable struct Dropout{F,D,R <: AbstractRNG }
61
72
p:: F
62
73
dims:: D
63
74
active:: Union{Bool, Nothing}
75
+ rng:: R
64
76
end
77
+ Dropout (p, dims, active) = Dropout (p, dims, active, rng_from_array ())
65
78
66
- function Dropout (p; dims= :)
79
+ function Dropout (p; dims= :, rng = rng_from_array () )
67
80
@assert 0 ≤ p ≤ 1
68
- Dropout (p, dims, nothing )
81
+ Dropout (p, dims, nothing , rng )
69
82
end
70
83
84
+ @functor Dropout
85
+
86
+ trainable (a:: Dropout ) = ()
87
+
71
88
function (a:: Dropout )(x)
72
89
_isactive (a) || return x
73
- return dropout (x, a. p; dims= a. dims, active= true )
90
+ return dropout (a . rng, x, a. p; dims= a. dims, active= true )
74
91
end
75
92
76
93
testmode! (m:: Dropout , mode= true ) =
@@ -83,7 +100,7 @@ function Base.show(io::IO, d::Dropout)
83
100
end
84
101
85
102
"""
86
- AlphaDropout(p)
103
+ AlphaDropout(p; rng = rng_from_array() )
87
104
88
105
A dropout layer. Used in
89
106
[Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515).
@@ -92,14 +109,21 @@ remain the same as before.
92
109
93
110
Does nothing to the input once [`testmode!`](@ref) is true.
94
111
"""
95
- mutable struct AlphaDropout{F}
112
+ mutable struct AlphaDropout{F,R <: AbstractRNG }
96
113
p:: F
97
114
active:: Union{Bool, Nothing}
98
- function AlphaDropout (p, active = nothing )
115
+ rng:: R
116
+ function AlphaDropout (p, active, rng)
99
117
@assert 0 ≤ p ≤ 1
100
- new {typeof(p)} (p, active)
118
+ new {typeof(p), typeof(rng) } (p, active, rng )
101
119
end
102
120
end
121
+ AlphaDropout (p, active) = AlphaDropout (p, active, rng_from_array ())
122
+ AlphaDropout (p; rng = rng_from_array ()) = AlphaDropout (p, nothing , rng)
123
+
124
+ @functor AlphaDropout
125
+
126
+ trainable (a:: AlphaDropout ) = ()
103
127
104
128
function (a:: AlphaDropout )(x:: AbstractArray{T} ) where T
105
129
_isactive (a) || return x
@@ -111,7 +135,7 @@ function (a::AlphaDropout)(x::AbstractArray{T}) where T
111
135
A = T (inv (sqrt ((1 - p) * (1 + p * α′^ 2 ))))
112
136
B = T (- A * α′ * p)
113
137
114
- noise = rand! (similar (x))
138
+ noise = rand! (a . rng, similar (x))
115
139
return A .* ifelse .(noise .> p, x, α′) .+ B
116
140
end
117
141
0 commit comments