156
156
@functor Dense
157
157
158
158
function (a:: Dense )(x:: AbstractVecOrMat )
159
- W, b = a. weight, a. bias
160
159
σ = NNlib. fast_act (a. σ, x) # replaces tanh => tanh_fast, etc
161
- return σ .(W * x .+ b )
160
+ return σ .(a . weight * x .+ a . bias )
162
161
end
163
162
164
163
(a:: Dense )(x:: AbstractArray ) =
@@ -172,35 +171,37 @@ function Base.show(io::IO, l::Dense)
172
171
end
173
172
174
173
"""
175
- Diagonal(size::Integer...; bias=true, init=ones32)
176
- Diagonal(scale::AbstractArray, [bias])
174
+ Diagonal(size::Integer...; σ = identity, bias=true, init=ones32)
175
+ Diagonal(scale::AbstractArray, [bias, activation ])
177
176
178
177
Create an element-wise linear layer, which performs
179
178
180
- y = scale .* x .+ bias
179
+ y = σ.( scale .* x .+ bias)
181
180
182
- with no activation function.
183
-
184
181
The learnable scale & bias are initialised `init(size...)` and `zeros32(size...)`,
185
182
with `init=ones32` by default. You may specify the function `init`,
186
183
turn off trainable bias with `bias=false`, or provide the array(s) explicitly.
187
184
188
185
Used by [`LayerNorm`](@ref).
189
186
"""
190
- struct Diagonal{A<: AbstractArray , B}
187
+ struct Diagonal{A<: AbstractArray , B, F }
191
188
scale:: A
192
189
bias:: B
193
- function Diagonal (W:: M , bias = true ) where M<: AbstractArray
190
+ σ:: F
191
+ function Diagonal (W:: M , bias = true , σ:: F = identity) where {M<: AbstractArray , F}
194
192
b = create_bias (W, bias, size (W)... )
195
- new {M, typeof(b)} (W, b)
193
+ new {M, typeof(b), F } (W, b, σ )
196
194
end
197
195
end
198
196
199
- Diagonal (sz:: Integer... ; bias = true , init = ones32) = Diagonal (init (sz... ), bias)
197
+ Diagonal (sz:: Integer... ; σ = identity, bias = true , init = ones32) = Diagonal (init (sz... ), bias, σ )
200
198
201
199
@functor Diagonal
202
200
203
- (a:: Diagonal )(x) = a. scale .* x .+ a. bias
201
+ function (a:: Diagonal )(x:: AbstractArray )
202
+ σ = NNlib. fast_act (a. σ, x) # replaces tanh => tanh_fast, etc
203
+ return σ === typeof (identity) ? a. scale .* x .+ a. bias : σ .(a. scale .* x .+ a. bias)
204
+ end
204
205
205
206
function Base. show (io:: IO , l:: Diagonal )
206
207
print (io, " Diagonal(" , join (size (l. scale), " , " ))
212
213
Maxout(layers...)
213
214
Maxout(f, n_alts)
214
215
215
- This contains a number of internal layes , each of which receives the same input.
216
+ This contains a number of internal layers , each of which receives the same input.
216
217
Its output is the elementwise maximum of the the internal layers' outputs.
217
218
218
219
Instead of defining layers individually, you can provide a zero-argument function
0 commit comments