@@ -359,9 +359,9 @@ function apply!(o::AdaGrad, x, Δ)
359
359
end
360
360
361
361
"""
362
- ADADelta (ρ = 0.9, ϵ = $EPS )
362
+ AdaDelta (ρ = 0.9, ϵ = $EPS )
363
363
364
- [ADADelta ](https://arxiv.org/abs/1212.5701) is a version of AdaGrad adapting its learning
364
+ [AdaDelta ](https://arxiv.org/abs/1212.5701) is a version of AdaGrad adapting its learning
365
365
rate based on a window of past gradient updates.
366
366
Parameters don't need tuning.
367
367
@@ -370,20 +370,20 @@ Parameters don't need tuning.
370
370
371
371
# Examples
372
372
```julia
373
- opt = ADADelta ()
373
+ opt = AdaDelta ()
374
374
375
- opt = ADADelta (0.89)
375
+ opt = AdaDelta (0.89)
376
376
```
377
377
"""
378
- mutable struct ADADelta <: AbstractOptimiser
378
+ mutable struct AdaDelta <: AbstractOptimiser
379
379
rho:: Float64
380
380
epsilon:: Float64
381
381
state:: IdDict{Any, Any}
382
382
end
383
- ADADelta (ρ:: Real = 0.9 , ϵ:: Real = EPS) = ADADelta (ρ, ϵ, IdDict ())
384
- ADADelta (ρ:: Real , state:: IdDict ) = ADADelta (ρ, EPS, state)
383
+ AdaDelta (ρ:: Real = 0.9 , ϵ:: Real = EPS) = AdaDelta (ρ, ϵ, IdDict ())
384
+ AdaDelta (ρ:: Real , state:: IdDict ) = AdaDelta (ρ, EPS, state)
385
385
386
- function apply! (o:: ADADelta , x, Δ)
386
+ function apply! (o:: AdaDelta , x, Δ)
387
387
ρ = o. rho
388
388
acc, Δacc = get! (() -> (zero (x), zero (x)), o. state, x):: NTuple{2,typeof(x)}
389
389
@. acc = ρ * acc + (1 - ρ) * Δ * conj (Δ)
0 commit comments