@@ -594,7 +594,7 @@ function apply!(o::InvDecay, x, Δ)
594
594
end
595
595
596
596
"""
597
- ExpDecay(η = 0.001, decay = 0.1, decay_step = 1000, clip = 1e-4)
597
+ ExpDecay(η = 0.001, decay = 0.1, decay_step = 1000, clip = 1e-4, start = 1 )
598
598
599
599
Discount the learning rate `η` by the factor `decay` every `decay_step` steps till
600
600
a minimum of `clip`.
@@ -606,6 +606,7 @@ a minimum of `clip`.
606
606
- `decay_step`: Schedule decay operations by setting the number of steps between
607
607
two decay operations.
608
608
- `clip`: Minimum value of learning rate.
609
+ - 'start': Step at which the decay starts.
609
610
610
611
611
612
See also the [Scheduling Optimisers](@ref) section of the docs
@@ -624,16 +625,17 @@ mutable struct ExpDecay <: AbstractOptimiser
624
625
decay:: Float64
625
626
step:: Int64
626
627
clip:: Float64
628
+ start:: Int64
627
629
current:: IdDict
628
630
end
629
631
630
- ExpDecay (opt = 0.001 , decay = 0.1 , decay_step = 1000 , clip = 1e-4 ) =
631
- ExpDecay (opt, decay, decay_step, clip, IdDict ())
632
+ ExpDecay (opt = 0.001 , decay = 0.1 , decay_step = 1000 , clip = 1e-4 , start = 0 ) =
633
+ ExpDecay (opt, decay, decay_step, clip, start, IdDict ())
632
634
633
635
function apply! (o:: ExpDecay , x, Δ)
634
- η, s, decay = o. eta, o. step, o. decay
636
+ η, s, decay, start = o. eta, o. step, o. decay, o . start
635
637
n = o. current[x] = get (o. current, x, 0 ) + 1
636
- if o . current[x] % s == 0 && count (x -> x% s == 0 , values (o. current)) == 1
638
+ if n > start && n % s == 0 && count (x -> x > start && x % s == 0 , values (o. current)) == 1
637
639
η = max (η * decay, o. clip)
638
640
o. eta = η
639
641
end
0 commit comments