@@ -110,6 +110,7 @@ struct RMSProp{T} <: AbstractRule
110
110
epsilon:: T
111
111
centred:: Bool
112
112
end
113
+
113
114
RMSProp (η = 1f-3 , ρ = 9f-1 , ϵ = eps (typeof (η)); centred:: Bool = false , centered:: Bool = false ) =
114
115
RMSProp {typeof(η)} (η, ρ, ϵ, centred | centered)
115
116
@@ -135,6 +136,47 @@ function Base.show(io::IO, o::RMSProp)
135
136
print (io, " ; centred = " , o. centred, " )" )
136
137
end
137
138
139
+
140
+ """
141
+ Rprop(η = 1f-3, ℓ = (5f-1, 1.2f0), Γ = (1f-6, 50f0))
142
+
143
+ Optimizer using the
144
+ [Rprop](https://ieeexplore.ieee.org/document/298623) algorithm. A full-batch
145
+ learning algorithm that depends only on the sign of the gradient.
146
+
147
+ # Parameters
148
+ - Learning rate (`η`): Amount by which gradients are discounted before updating
149
+ the weights.
150
+
151
+ - Scaling factors (`ℓ::Tuple`): Multiplicative increase and decrease factors.
152
+
153
+ - Step sizes (`Γ::Tuple`): Mminimal and maximal allowed step sizes.
154
+ """
155
+ struct Rprop{T} <: AbstractRule
156
+ eta:: T
157
+ ell:: Tuple{T,T}
158
+ gamma:: Tuple{T,T}
159
+ end
160
+
161
+ Rprop (η = 1f-3 , ℓ = (5f-1 , 1.2f0 ), Γ = (1f-6 , 50f0 )) = Rprop {typeof(η)} (η, ℓ, Γ)
162
+
163
+ init (o:: Rprop , x:: AbstractArray ) = (zero (x), onevalue (o. eta, x))
164
+
165
+ function apply! (o:: Rprop , state, x, dx)
166
+ ℓ, Γ = o. ell, o. gamma
167
+ g, η = state
168
+
169
+ η = broadcast (g, η, dx) do g, η, dx
170
+ g * dx > 0 ? min (η * ℓ[2 ], Γ[2 ]) : g * dx < 0 ? max (η * ℓ[1 ], Γ[1 ]) : η
171
+ end
172
+ g = broadcast (g, dx) do g, dx
173
+ g * dx < 0 ? zero (dx) : dx
174
+ end
175
+ dx′ = @lazy η * sign (g)
176
+
177
+ return (g, η), dx′
178
+ end
179
+
138
180
"""
139
181
Adam(η = 1f-3, β = (9f-1, 9.99f-1), ϵ = eps(typeof(η)))
140
182
@@ -584,4 +626,4 @@ function Base.show(io::IO, c::OptimiserChain)
584
626
print (io, " OptimiserChain(" )
585
627
join (io, c. opts, " , " )
586
628
print (io, " )" )
587
- end
629
+ end
0 commit comments