@@ -138,13 +138,13 @@ julia> Flux.params(d1) # no trainable bias
138
138
Params([[1.0 1.0 … 1.0 1.0; 1.0 1.0 … 1.0 1.0]])
139
139
```
140
140
"""
141
- struct Dense{F, M<: AbstractMatrix , B}
141
+ struct Dense{M<: AbstractMatrix , B, F }
142
142
weight:: M
143
143
bias:: B
144
144
σ:: F
145
145
function Dense (W:: M , bias = true , σ:: F = identity) where {M<: AbstractMatrix , F}
146
146
b = create_bias (W, bias, size (W,1 ))
147
- new {F,M, typeof(b)} (W, b, σ)
147
+ new {M, typeof(b), F } (W, b, σ)
148
148
end
149
149
end
150
150
158
158
function (a:: Dense )(x:: AbstractVecOrMat )
159
159
W, b = a. weight, a. bias
160
160
σ = NNlib. fast_act (a. σ, x) # replaces tanh => tanh_fast, etc
161
- return σ .(W* x .+ b)
161
+ return σ .(W * x .+ b)
162
162
end
163
163
164
164
(a:: Dense )(x:: AbstractArray ) =
@@ -172,35 +172,37 @@ function Base.show(io::IO, l::Dense)
172
172
end
173
173
174
174
"""
175
- Diagonal(size::Integer...; bias=true, init=ones32)
176
- Diagonal(scale::AbstractArray, [bias])
175
+ Diagonal(size::Integer...; σ = identity, bias=true, init=ones32)
176
+ Diagonal(scale::AbstractArray, [bias, activation ])
177
177
178
178
Create an element-wise linear layer, which performs
179
179
180
- y = scale .* x .+ bias
180
+ y = σ.( scale .* x .+ bias)
181
181
182
- with no activation function.
183
-
184
182
The learnable scale & bias are initialised `init(size...)` and `zeros32(size...)`,
185
183
with `init=ones32` by default. You may specify the function `init`,
186
184
turn off trainable bias with `bias=false`, or provide the array(s) explicitly.
187
185
188
186
Used by [`LayerNorm`](@ref).
189
187
"""
190
- struct Diagonal{A<: AbstractArray , B}
188
+ struct Diagonal{A<: AbstractArray , B, F }
191
189
scale:: A
192
190
bias:: B
193
- function Diagonal (W:: M , bias = true ) where M<: AbstractArray
191
+ σ:: F
192
+ function Diagonal (W:: M , bias = true , σ:: F = identity) where {M<: AbstractArray , F}
194
193
b = create_bias (W, bias, size (W)... )
195
- new {M, typeof(b)} (W, b)
194
+ new {M, typeof(b), F } (W, b, σ )
196
195
end
197
196
end
198
197
199
- Diagonal (sz:: Integer... ; bias = true , init = ones32) = Diagonal (init (sz... ), bias)
198
+ Diagonal (sz:: Integer... ; σ = identity, bias = true , init = ones32) = Diagonal (init (sz... ), bias, σ )
200
199
201
200
@functor Diagonal
202
201
203
- (a:: Diagonal )(x) = a. scale .* x .+ a. bias
202
+ function (a:: Diagonal )(x:: AbstractArray )
203
+ σ = NNlib. fast_act (a. σ, x) # replaces tanh => tanh_fast, etc
204
+ return σ == typeof (identity) ? a. scale .* x .+ a. bias : σ .(a. scale .* x .+ a. bias)
205
+ end
204
206
205
207
function Base. show (io:: IO , l:: Diagonal )
206
208
print (io, " Diagonal(" , join (size (l. scale), " , " ))
212
214
Maxout(layers...)
213
215
Maxout(f, n_alts)
214
216
215
- This contains a number of internal layes , each of which receives the same input.
217
+ This contains a number of internal layers , each of which receives the same input.
216
218
Its output is the elementwise maximum of the the internal layers' outputs.
217
219
218
220
Instead of defining layers individually, you can provide a zero-argument function
0 commit comments