Skip to content

Commit deb45fb

Browse files
authored
Fixes to MoreThuente (#75)
* Fix bug using s = max(abs(theta),...) instead of s = max(theta, ...) * Move finite-evaluation check outside the loop, essentially before the MT algorithm starts * Update NEWS
1 parent 5cd79f4 commit deb45fb

File tree

2 files changed

+51
-17
lines changed

2 files changed

+51
-17
lines changed

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# LineSearches v3.2.2 release notes
2+
- Ensure finite initial values and fix typos in original MoreThuente translation from MATLAB/FORTRAN implementations. See [#75](https://github.com/anriseth/LineSearches.jl/pull/75)
3+
14
# LineSearches v3.2.0 release notes
25
- Introduce initial step length procedures. See [#70](https://github.com/anriseth/LineSearches.jl/pull/70)
36

src/morethuente.jl

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,32 @@ function _morethuente!(df,
216216
fy = finit
217217
dgy = dginit
218218

219+
# START: Ensure that the initial step provides finite function values
220+
# This is not part of the original FORTRAN code
221+
if !isfinite(stp)
222+
stp = one(T)
223+
end
224+
stmin = stx
225+
stmax = stp + 4 * (stp - stx) # Why 4?
226+
stp = max(stp, stpmin)
227+
stp = min(stp, stpmax)
228+
229+
@. x_new = x + stp*s
230+
231+
f = NLSolversBase.value_gradient!(df, x_new)
232+
gdf = NLSolversBase.gradient(df)
233+
iterfinite = 0
234+
while (!isfinite(f) || any(.!isfinite.(gdf))) && iterfinite < iterfinitemax
235+
iterfinite += 1
236+
stp = 0.5*stp
237+
@. x_new = x + stp*s
238+
f = NLSolversBase.value_gradient!(df, x_new)
239+
gdf = NLSolversBase.gradient(df)
240+
# Make stpmax = (3/2)*stp < 2stp in the first iteration below
241+
stx = (7/8)*stp
242+
end
243+
# END: Ensure that the initial step provides finite function values
244+
219245
while true
220246
#
221247
# Set the minimum and maximum steps to correspond
@@ -230,6 +256,13 @@ function _morethuente!(df,
230256
stmax = stp + 4 * (stp - stx) # Why 4?
231257
end
232258

259+
#
260+
# Ensure stmin and stmax (used in cstep) don't violate stpmin and stpmax
261+
# Not part of original FORTRAN translation
262+
#
263+
stmin = max(stpmin,stmin)
264+
stmax = min(stpmax,stmax)
265+
233266
#
234267
# Force the step to be within the bounds stpmax and stpmin
235268
#
@@ -257,16 +290,6 @@ function _morethuente!(df,
257290
f = NLSolversBase.value_gradient!(df, x_new)
258291
gdf = NLSolversBase.gradient(df)
259292

260-
# Ensure that the step provides finite function values
261-
# This is not part of the original FORTRAN code
262-
iterfinite = 0
263-
while (!isfinite(f) || any(.!isfinite.(gdf))) && iterfinite < iterfinitemax
264-
stp = 0.5*stp
265-
@. x_new = x + stp*s
266-
f = NLSolversBase.value_gradient!(df, x_new)
267-
gdf = NLSolversBase.gradient(df)
268-
end
269-
270293
if isapprox(norm(gdf), 0.0) # TODO: this should be tested vs Optim's g_tol
271294
return stp
272295
end
@@ -472,7 +495,8 @@ function cstep(stx::Real, fx::Real, dgx::Real,
472495
info = 1
473496
bound = true
474497
theta = 3 * (fx - f) / (stp - stx) + dgx + dg
475-
s = max(theta, dgx, dg)
498+
# Use s to prevent overflow/underflow of theta^2 and dgx * dg
499+
s = max(abs(theta), abs(dgx), abs(dg))
476500
gamma = s * sqrt((theta / s)^2 - (dgx / s) * (dg / s))
477501
if stp < stx
478502
gamma = -gamma
@@ -481,11 +505,11 @@ function cstep(stx::Real, fx::Real, dgx::Real,
481505
q = gamma - dgx + gamma + dg
482506
r = p / q
483507
stpc = stx + r * (stp - stx)
484-
stpq = stx + ((dgx / ((fx - f) / (stp - stx) + dgx)) / 2) * (stp - stx)
508+
stpq = stx + 0.5 * (dgx / ((fx - f) / (stp - stx) + dgx)) * (stp - stx)
485509
if abs(stpc - stx) < abs(stpq - stx)
486510
stpf = stpc
487511
else
488-
stpf = stpc + (stpq - stpc) / 2
512+
stpf = 0.5*(stpc + stpq)
489513
end
490514
bracketed = true
491515

@@ -500,8 +524,10 @@ function cstep(stx::Real, fx::Real, dgx::Real,
500524
info = 2
501525
bound = false
502526
theta = 3 * (fx - f) / (stp - stx) + dgx + dg
503-
s = max(theta, dgx, dg)
527+
# Use s to prevent overflow/underflow of theta^2 and dgx * dg
528+
s = max(abs(theta), abs(dgx), abs(dg))
504529
gamma = s * sqrt((theta / s)^2 - (dgx / s) * (dg / s))
530+
505531
if stp > stx
506532
gamma = -gamma
507533
end
@@ -532,12 +558,15 @@ function cstep(stx::Real, fx::Real, dgx::Real,
532558
info = 3
533559
bound = true
534560
theta = 3 * (fx - f) / (stp - stx) + dgx + dg
535-
s = max(theta, dgx, dg)
561+
# Use s to prevent overflow/underflow of theta^2 and dgx * dg
562+
s = max(abs(theta), abs(dgx), abs(dg))
536563
#
537564
# The case gamma = 0 only arises if the cubic does not tend
538565
# to infinity in the direction of the step
539566
#
540-
gamma = s * sqrt(max(0.0, (theta / s)^2 - (dgx / s) * (dg / s)))
567+
#
568+
gamma = (s > zero(s)) ? s * sqrt((theta / s)^2 - (dgx / s) * (dg / s)) : zero(s)
569+
541570
if stp > stx
542571
gamma = -gamma
543572
end
@@ -578,8 +607,10 @@ function cstep(stx::Real, fx::Real, dgx::Real,
578607
bound = false
579608
if bracketed
580609
theta = 3 * (f - fy) / (sty - stp) + dgy + dg
581-
s = max(theta, dgy, dg)
610+
# Use s to prevent overflow/underflow of theta^2 and dgy * dg
611+
s = max(abs(theta), abs(dgy), abs(dg))
582612
gamma = s * sqrt((theta / s)^2 - (dgy / s) * (dg / s))
613+
583614
if stp > sty
584615
gamma = -gamma
585616
end

0 commit comments

Comments
 (0)