@@ -80,7 +80,7 @@ Conjugate gradient line search implementation from:
80
80
conjugate gradient method with guaranteed descent. ACM
81
81
Transactions on Mathematical Software 32: 113–137.
82
82
"""
83
- @with_kw struct HagerZhang{T}
83
+ @with_kw struct HagerZhang{T, Tm }
84
84
delta:: T = DEFAULTDELTA # c_1 Wolfe sufficient decrease condition
85
85
sigma:: T = DEFAULTSIGMA # c_2 Wolfe curvature condition (Recommend 0.1 for GradientDescent)
86
86
alphamax:: T = Inf
@@ -90,34 +90,27 @@ Conjugate gradient line search implementation from:
90
90
linesearchmax:: Int = 50
91
91
psi3:: T = 0.1
92
92
display:: Int = 0
93
+ mayterminate:: Tm = Ref {Bool} (false )
93
94
end
94
95
95
- (ls:: HagerZhang )(args... ) = _hagerzhang! (args... ,
96
- ls. delta, ls. sigma, ls. alphamax, ls. rho, ls. epsilon, ls. gamma,
97
- ls. linesearchmax, ls. psi3, ls. display)
98
-
96
+ function (ls:: HagerZhang )(df:: AbstractObjective , x:: AbstractArray{T} ,
97
+ s:: AbstractArray{T} , α:: Real ,
98
+ x_new:: AbstractArray{T} , phi_0:: Real , dphi_0:: Real ) where T
99
+ ϕ, ϕdϕ = make_ϕ_ϕdϕ (df, x_new, x, s)
100
+ ls (ϕ, ϕdϕ, x, s, α:: Real , phi_0, dphi_0)
101
+ end
99
102
100
- function _hagerzhang! (df ,
103
+ function (ls :: HagerZhang )(ϕ, ϕdϕ ,
101
104
x:: AbstractArray{T} ,
102
105
s:: AbstractArray{T} ,
103
- x_new:: AbstractArray{T} ,
104
- phi_0,
105
- dphi_0,
106
106
c:: Real ,
107
- mayterminate:: Bool ,
108
- delta:: Real = DEFAULTDELTA,
109
- sigma:: Real = DEFAULTSIGMA,
110
- alphamax:: Real = convert (T,Inf ),
111
- rho:: Real = convert (T,5 ),
112
- epsilon:: Real = convert (T,1e-6 ),
113
- gamma:: Real = convert (T,0.66 ),
114
- linesearchmax:: Integer = 50 ,
115
- psi3:: Real = convert (T,0.1 ),
116
- display:: Integer = 0 ) where T
117
-
118
- ϕ, dϕ, ϕdϕ = make_ϕ_dϕ_ϕdϕ (df, x_new, x, s)
119
-
120
- # Prevent values of `x_new` that are likely to make
107
+ phi_0:: Real ,
108
+ dphi_0:: Real ) where T
109
+
110
+ @unpack delta, sigma, alphamax, rho, epsilon, gamma,
111
+ linesearchmax, psi3, display, mayterminate = ls
112
+
113
+ # Prevent values of x_new = x+αs that are likely to make
121
114
# ϕ(x_new) infinite
122
115
iterfinitemax:: Int = ceil (Int, - log2 (eps (T)))
123
116
alphas = [T (0.0 )] # for bisection
@@ -134,27 +127,29 @@ function _hagerzhang!(df,
134
127
phi_c, dphi_c = ϕdϕ (c)
135
128
iterfinite = 1
136
129
while ! (isfinite (phi_c) && isfinite (dphi_c)) && iterfinite < iterfinitemax
137
- mayterminate = false
130
+ mayterminate[] = false
138
131
iterfinite += 1
139
132
c *= psi3
140
133
phi_c, dphi_c = ϕdϕ (c)
141
134
end
142
135
if ! (isfinite (phi_c) && isfinite (dphi_c))
143
136
warn (" Failed to achieve finite new evaluation point, using alpha=0" )
144
- return zero (T) # phi_0
137
+ mayterminate[] = false # reset in case another initial guess is used next
138
+ return T (0.0 ), ϕ (T (0.0 )) # phi_0
145
139
end
146
140
push! (alphas, c)
147
141
push! (values, phi_c)
148
142
push! (slopes, dphi_c)
149
143
150
144
# If c was generated by quadratic interpolation, check whether it
151
145
# satisfies the Wolfe conditions
152
- if mayterminate &&
146
+ if mayterminate[] &&
153
147
satisfies_wolfe (c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
154
148
if display & LINESEARCH > 0
155
149
println (" Wolfe condition satisfied on point alpha = " , c)
156
150
end
157
- return c # phi_c
151
+ mayterminate[] = false # reset in case another initial guess is used next
152
+ return c, phi_c # phi_c
158
153
end
159
154
# Initial bracketing step (HZ, stages B0-B3)
160
155
isbracketed = false
@@ -204,7 +199,8 @@ function _hagerzhang!(df,
204
199
" , cold = " , cold, " , new c = " , c)
205
200
end
206
201
if c == cold || nextfloat (c) >= alphamax
207
- return cold
202
+ mayterminate[] = false # reset in case another initial guess is used next
203
+ return cold, dphi_c
208
204
end
209
205
end
210
206
phi_c, dphi_c = ϕdϕ (c)
@@ -219,7 +215,8 @@ function _hagerzhang!(df,
219
215
phi_c, dphi_c = ϕdϕ (c)
220
216
end
221
217
if ! (isfinite (phi_c) && isfinite (dphi_c))
222
- return cold
218
+ mayterminate[] = false # reset in case another initial guess is used next
219
+ return cold, ϕ (cold)
223
220
elseif dphi_c < 0 && c == alphamax
224
221
# We're on the edge of the allowed region, and the
225
222
# value is still decreasing. This can be due to
@@ -234,7 +231,8 @@ function _hagerzhang!(df,
234
231
" , phi_c = " , phi_c,
235
232
" , dphi_c = " , dphi_c)
236
233
end
237
- return c
234
+ mayterminate[] = false # reset in case another initial guess is used next
235
+ return c, phi_c
238
236
end
239
237
push! (alphas, c)
240
238
push! (values, phi_c)
@@ -255,11 +253,13 @@ function _hagerzhang!(df,
255
253
" , phi(b) = " , values[ib])
256
254
end
257
255
if b - a <= eps (b)
258
- return a # lsr.value[ia]
256
+ mayterminate[] = false # reset in case another initial guess is used next
257
+ return a, values[ia] # lsr.value[ia]
259
258
end
260
259
iswolfe, iA, iB = secant2! (ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
261
260
if iswolfe
262
- return alphas[iA] # lsr.value[iA]
261
+ mayterminate[] = false # reset in case another initial guess is used next
262
+ return alphas[iA], values[iA] # lsr.value[iA]
263
263
end
264
264
A = alphas[iA]
265
265
B = alphas[iB]
@@ -273,7 +273,8 @@ function _hagerzhang!(df,
273
273
if display & LINESEARCH > 0
274
274
println (" Linesearch: secant suggests it's flat" )
275
275
end
276
- return A
276
+ mayterminate[] = false # reset in case another initial guess is used next
277
+ return A, values[iA]
277
278
end
278
279
ia = iA
279
280
ib = iB
@@ -504,136 +505,3 @@ function bisect!(ϕdϕ,
504
505
end
505
506
return ia, ib
506
507
end
507
-
508
- """
509
- Initial step size algorithm from
510
- W. W. Hager and H. Zhang (2006) Algorithm 851: CG_DESCENT, a
511
- conjugate gradient method with guaranteed descent. ACM
512
- Transactions on Mathematical Software 32: 113–137.
513
-
514
- If α0 is NaN, then procedure I0 is called at the first iteration,
515
- otherwise, we select according to procedure I1-2, with starting value α0.
516
- """
517
- @with_kw struct InitialHagerZhang{T}
518
- ψ0:: T = 0.01
519
- ψ1:: T = 0.2
520
- ψ2:: T = 2.0
521
- ψ3:: T = 0.1
522
- αmax:: T = Inf
523
- α0:: T = 1.0 # Initial alpha guess. NaN => algorithm calculates
524
- verbose:: Bool = false
525
- end
526
-
527
- function (is:: InitialHagerZhang )(state, phi_0, dphi_0, df)
528
-
529
-
530
- if isnan (state. f_x_previous) && isnan (is. α0)
531
- # If we're at the first iteration (f_x_previous is NaN)
532
- # and the user has not provided an initial step size (is.α0 is NaN),
533
- # then we
534
- # pick the initial step size according to HZ #I0
535
- state. alpha = _hzI0 (state. x, NLSolversBase. gradient (df),
536
- NLSolversBase. value (df),
537
- convert (eltype (state. x), is. ψ0)) # Hack to deal with type instability between is{T} and state.x
538
- state. mayterminate = false
539
- else
540
- # Pick the initial step size according to HZ #I1-2
541
- state. alpha, state. mayterminate =
542
- _hzI12 (state. alpha, df, state. x, state. s, state. x_ls, phi_0, dphi_0,
543
- is. ψ1, is. ψ2, is. ψ3, is. αmax, is. verbose)
544
- end
545
- return state. alpha
546
- end
547
-
548
- # Pick the initial step size (HZ #I1-I2)
549
- function _hzI12 (alpha:: T ,
550
- df,
551
- x:: AbstractArray{T} ,
552
- s:: AbstractArray{T} ,
553
- x_new:: AbstractArray{T} ,
554
- phi_0:: T ,
555
- dphi_0:: T ,
556
- psi1:: Real = convert (T,0.2 ),
557
- psi2:: Real = convert (T,2.0 ),
558
- psi3:: Real = convert (T,0.1 ),
559
- alphamax:: Real = convert (T, Inf ),
560
- verbose:: Bool = false ) where T
561
-
562
-
563
- ϕ = make_ϕ (df, x_new, x, s)
564
-
565
- # Prevent values of `x_new` that are likely to make
566
- # ϕ(x_new) infinite
567
- iterfinitemax:: Int = ceil (Int, - log2 (eps (T)))
568
-
569
- alphatest = psi1 * alpha
570
- alphatest = min (alphatest, alphamax)
571
-
572
- phitest = ϕ (alphatest)
573
-
574
- iterfinite = 1
575
- while ! isfinite (phitest)
576
- alphatest = psi3 * alphatest
577
-
578
- phitest = ϕ (alphatest)
579
-
580
- iterfinite += 1
581
- if iterfinite >= iterfinitemax
582
- return zero (T), true
583
- # error("Failed to achieve finite test value; alphatest = ", alphatest)
584
- end
585
- end
586
- a = ((phitest- phi_0)/ alphatest - dphi_0)/ alphatest # quadratic fit
587
- if verbose == true
588
- println (" quadfit: alphatest = " , alphatest,
589
- " , phi_0 = " , phi_0,
590
- " , phitest = " , phitest,
591
- " , quadcoef = " , a)
592
- end
593
- mayterminate = false
594
- if isfinite (a) && a > 0 && phitest <= phi_0
595
- alpha = - dphi_0 / 2 / a # if convex, choose minimum of quadratic
596
- if alpha == 0
597
- error (" alpha is zero. dphi_0 = " , dphi_0, " , phi_0 = " , phi_0, " , phitest = " , phitest, " , alphatest = " , alphatest, " , a = " , a)
598
- end
599
- if alpha <= alphamax
600
- mayterminate = true
601
- else
602
- alpha = alphamax
603
- mayterminate = false
604
- end
605
- if verbose == true
606
- println (" alpha guess (quadratic): " , alpha,
607
- " ,(mayterminate = " , mayterminate, " )" )
608
- end
609
- else
610
- if phitest > phi_0
611
- alpha = alphatest
612
- else
613
- alpha *= psi2 # if not convex, expand the interval
614
- end
615
- end
616
- alpha = min (alphamax, alpha)
617
- if verbose == true
618
- println (" alpha guess (expand): " , alpha)
619
- end
620
- return alpha, mayterminate
621
- end
622
-
623
- # Generate initial guess for step size (HZ, stage I0)
624
- function _hzI0 (x:: AbstractArray{T} ,
625
- gr:: AbstractArray{T} ,
626
- f_x:: T ,
627
- psi0:: T = convert (T,0.01 )) where T
628
- alpha = one (T)
629
- gr_max = maximum (abs, gr)
630
- if gr_max != 0.0
631
- x_max = maximum (abs, x)
632
- if x_max != 0.0
633
- alpha = psi0 * x_max / gr_max
634
- elseif f_x != 0.0
635
- alpha = psi0 * abs (f_x) / vecnorm (gr)
636
- end
637
- end
638
- return alpha
639
- end
0 commit comments