JuliaNLSolvers
diff --git a/‎src/LineSearches.jl
Lines changed: 27 additions & 0 deletions b/‎src/LineSearches.jl
Lines changed: 27 additions & 0 deletions
diff --git a/‎src/backtracking.jl
Lines changed: 16 additions & 6 deletions b/‎src/backtracking.jl
Lines changed: 16 additions & 6 deletions
diff --git a/‎src/hagerzhang.jl
Lines changed: 34 additions & 166 deletions b/‎src/hagerzhang.jl
Lines changed: 34 additions & 166 deletions
@@ -5,6 +5,7 @@ module LineSearches
 using Parameters, NaNMath
 
 import NLSolversBase
+import NLSolversBase: AbstractObjective
 import Base.clear!
 
 export LineSearchResults, LineSearchException
@@ -26,6 +27,19 @@ function make_ϕ(df, x_new, x, s)
     end
     ϕ
 end
+function make_ϕdϕ(df, x_new, x, s)
+    function ϕdϕ(α)
+        # Move a distance of alpha in the direction of s
+        x_new .= x .+ α.*s
+
+        # Evaluate ∇f(x+α*s)
+        NLSolversBase.value_gradient!(df, x_new)
+
+        # Calculate ϕ(a_i), ϕ'(a_i)
+        NLSolversBase.value(df), vecdot(NLSolversBase.gradient(df), s)
+    end
+    ϕdϕ
+end
 function make_ϕ_dϕ(df, x_new, x, s)
     function dϕ(α)
         # Move a distance of alpha in the direction of s
@@ -62,6 +76,19 @@ function make_ϕ_dϕ_ϕdϕ(df, x_new, x, s)
     end
     make_ϕ(df, x_new, x, s), dϕ, ϕdϕ
 end
+function make_ϕ_ϕdϕ(df, x_new, x, s)
+    function ϕdϕ(α)
+        # Move a distance of alpha in the direction of s
+        x_new .= x .+ α.*s
+
+        # Evaluate ∇f(x+α*s)
+        NLSolversBase.value_gradient!(df, x_new)
+
+        # Calculate ϕ'(a_i)
+        NLSolversBase.value(df), vecdot(NLSolversBase.gradient(df), s)
+    end
+    make_ϕ(df, x_new, x, s), ϕdϕ
+end
 
 include("types.jl")
 
 
@@ -17,13 +17,23 @@ This is a modification of the algorithm described in Nocedal Wright (2nd ed), Se
     maxstep::TF = Inf
 end
 
-function (ls::BackTracking)(df, x::AbstractArray{T}, s::AbstractArray{T},
-                            x_new::AbstractArray{T},
-                            ϕ_0, dϕ_0, α_0::Tα = 1.0, alphamax = convert(T, Inf)) where {T, Tα}
+function (ls::BackTracking)(df::AbstractObjective, x::AbstractArray{T}, s::AbstractArray{T},
+                            α_0::Tα = 1.0, x_new::AbstractArray{T} = similar(x), ϕ_0 = nothing, dϕ_0 = nothing, alphamax = convert(T, Inf)) where {T, Tα}
+    ϕ, dϕ = make_ϕ_dϕ(df, x_new, x, s)
 
-    @unpack c_1, ρ_hi, ρ_lo, iterations, order, maxstep = ls
+    if ϕ_0 == nothing
+        ϕ_0 = ϕ(α_0)
+    end
+    if dϕ_0 == nothing
+        dϕ_0 = ϕ(α_0)
+    end
 
-    ϕ = make_ϕ(df, x_new, x, s)
+    ls(ϕ, x, s, α_0, ϕ_0, dϕ_0, alphamax)
+end
+function (ls::BackTracking)(ϕ, x::AbstractArray{T}, s::AbstractArray{T}, α_0::Tα,
+                            ϕ_0, dϕ_0, alphamax = convert(T, Inf)) where {T, Tα}
+
+    @unpack c_1, ρ_hi, ρ_lo, iterations, order, maxstep = ls
 
     iterfinitemax = -log2(eps(T))
 
@@ -102,5 +112,5 @@ function (ls::BackTracking)(df, x::AbstractArray{T}, s::AbstractArray{T},
         ϕx_0, ϕx_1 = ϕx_1, ϕ(α_2)
     end
 
-    return α_2
+    return α_2, ϕx_1
 end
@@ -80,7 +80,7 @@ Conjugate gradient line search implementation from:
     conjugate gradient method with guaranteed descent. ACM
     Transactions on Mathematical Software 32: 113–137.
 """
-@with_kw struct HagerZhang{T}
+@with_kw struct HagerZhang{T, Tm}
    delta::T = DEFAULTDELTA # c_1 Wolfe sufficient decrease condition
    sigma::T = DEFAULTSIGMA # c_2 Wolfe curvature condition (Recommend 0.1 for GradientDescent)
    alphamax::T = Inf
@@ -90,34 +90,27 @@ Conjugate gradient line search implementation from:
    linesearchmax::Int = 50
    psi3::T = 0.1
    display::Int = 0
+   mayterminate::Tm = Ref{Bool}(false)
 end
 
-(ls::HagerZhang)(args...) = _hagerzhang!(args...,
-      ls.delta, ls.sigma, ls.alphamax, ls.rho, ls.epsilon, ls.gamma,
-      ls.linesearchmax, ls.psi3, ls.display)
-
+function (ls::HagerZhang)(df::AbstractObjective, x::AbstractArray{T},
+                            s::AbstractArray{T}, α::Real,
+                            x_new::AbstractArray{T}, phi_0::Real, dphi_0::Real) where T
+    ϕ, ϕdϕ = make_ϕ_ϕdϕ(df, x_new, x, s)
+    ls(ϕ, ϕdϕ, x, s, α::Real, phi_0, dphi_0)
+end
 
-function _hagerzhang!(df,
+function (ls::HagerZhang)(ϕ, ϕdϕ,
                      x::AbstractArray{T},
                      s::AbstractArray{T},
-                     x_new::AbstractArray{T},
-                     phi_0,
-                     dphi_0,
                      c::Real,
-                     mayterminate::Bool,
-                     delta::Real = DEFAULTDELTA,
-                     sigma::Real = DEFAULTSIGMA,
-                     alphamax::Real = convert(T,Inf),
-                     rho::Real = convert(T,5),
-                     epsilon::Real = convert(T,1e-6),
-                     gamma::Real = convert(T,0.66),
-                     linesearchmax::Integer = 50,
-                     psi3::Real = convert(T,0.1),
-                     display::Integer = 0) where T
-
-    ϕ, dϕ, ϕdϕ = make_ϕ_dϕ_ϕdϕ(df, x_new, x, s)
-
-    # Prevent values of `x_new` that are likely to make
+                     phi_0::Real,
+                     dphi_0::Real) where T
+
+    @unpack delta, sigma, alphamax, rho, epsilon, gamma,
+            linesearchmax, psi3, display, mayterminate = ls
+
+    # Prevent values of x_new = x+αs that are likely to make
     # ϕ(x_new) infinite
     iterfinitemax::Int = ceil(Int, -log2(eps(T)))
     alphas = [T(0.0)] # for bisection
@@ -134,27 +127,29 @@ function _hagerzhang!(df,
     phi_c, dphi_c = ϕdϕ(c)
     iterfinite = 1
     while !(isfinite(phi_c) && isfinite(dphi_c)) && iterfinite < iterfinitemax
-        mayterminate = false
+        mayterminate[] = false
         iterfinite += 1
         c *= psi3
         phi_c, dphi_c = ϕdϕ(c)
     end
     if !(isfinite(phi_c) && isfinite(dphi_c))
         warn("Failed to achieve finite new evaluation point, using alpha=0")
-        return zero(T) # phi_0
+        mayterminate[] = false # reset in case another initial guess is used next
+        return T(0.0), ϕ(T(0.0)) # phi_0
     end
     push!(alphas, c)
     push!(values, phi_c)
     push!(slopes, dphi_c)
 
     # If c was generated by quadratic interpolation, check whether it
     # satisfies the Wolfe conditions
-    if mayterminate &&
+    if mayterminate[] &&
           satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
         if display & LINESEARCH > 0
             println("Wolfe condition satisfied on point alpha = ", c)
         end
-        return c # phi_c
+        mayterminate[] = false # reset in case another initial guess is used next
+        return c, phi_c # phi_c
     end
     # Initial bracketing step (HZ, stages B0-B3)
     isbracketed = false
@@ -204,7 +199,8 @@ function _hagerzhang!(df,
                             ", cold = ", cold, ", new c = ", c)
                 end
                 if c == cold || nextfloat(c) >= alphamax
-                    return cold
+                    mayterminate[] = false # reset in case another initial guess is used next
+                    return cold, dphi_c
                 end
             end
             phi_c, dphi_c = ϕdϕ(c)
@@ -219,7 +215,8 @@ function _hagerzhang!(df,
                 phi_c, dphi_c = ϕdϕ(c)
             end
             if !(isfinite(phi_c) && isfinite(dphi_c))
-                return cold
+                mayterminate[] = false # reset in case another initial guess is used next
+                return cold, ϕ(cold)
             elseif dphi_c < 0 && c == alphamax
                 # We're on the edge of the allowed region, and the
                 # value is still decreasing. This can be due to
@@ -234,7 +231,8 @@ function _hagerzhang!(df,
                             ", phi_c = ", phi_c,
                             ", dphi_c = ", dphi_c)
                 end
-                return c
+                mayterminate[] = false # reset in case another initial guess is used next
+                return c, phi_c
             end
             push!(alphas, c)
             push!(values, phi_c)
@@ -255,11 +253,13 @@ function _hagerzhang!(df,
                     ", phi(b) = ", values[ib])
         end
         if b - a <= eps(b)
-            return a # lsr.value[ia]
+            mayterminate[] = false # reset in case another initial guess is used next
+            return a, values[ia] # lsr.value[ia]
         end
         iswolfe, iA, iB = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
         if iswolfe
-            return alphas[iA] # lsr.value[iA]
+            mayterminate[] = false # reset in case another initial guess is used next
+            return alphas[iA], values[iA] # lsr.value[iA]
         end
         A = alphas[iA]
         B = alphas[iB]
@@ -273,7 +273,8 @@ function _hagerzhang!(df,
                 if display & LINESEARCH > 0
                     println("Linesearch: secant suggests it's flat")
                 end
-                return A
+                mayterminate[] = false # reset in case another initial guess is used next
+                return A, values[iA]
             end
             ia = iA
             ib = iB
@@ -504,136 +505,3 @@ function bisect!(ϕdϕ,
     end
     return ia, ib
 end
-
-"""
-Initial step size algorithm from
-  W. W. Hager and H. Zhang (2006) Algorithm 851: CG_DESCENT, a
-    conjugate gradient method with guaranteed descent. ACM
-    Transactions on Mathematical Software 32: 113–137.
-
-If α0 is NaN, then procedure I0 is called at the first iteration,
-otherwise, we select according to procedure I1-2, with starting value α0.
-"""
-@with_kw struct InitialHagerZhang{T}
-    ψ0::T         = 0.01
-    ψ1::T         = 0.2
-    ψ2::T         = 2.0
-    ψ3::T         = 0.1
-    αmax::T       = Inf
-    α0::T         = 1.0 # Initial alpha guess. NaN => algorithm calculates
-    verbose::Bool = false
-end
-
-function (is::InitialHagerZhang)(state, phi_0, dphi_0, df)
-
-
-    if isnan(state.f_x_previous) && isnan(is.α0)
-        # If we're at the first iteration (f_x_previous is NaN)
-        # and the user has not provided an initial step size (is.α0 is NaN),
-        # then we
-        # pick the initial step size according to HZ #I0
-        state.alpha = _hzI0(state.x, NLSolversBase.gradient(df),
-                            NLSolversBase.value(df),
-                            convert(eltype(state.x), is.ψ0)) # Hack to deal with type instability between is{T} and state.x
-        state.mayterminate = false
-    else
-        # Pick the initial step size according to HZ #I1-2
-        state.alpha, state.mayterminate =
-            _hzI12(state.alpha, df, state.x, state.s, state.x_ls, phi_0, dphi_0,
-                   is.ψ1, is.ψ2, is.ψ3, is.αmax, is.verbose)
-    end
-    return state.alpha
-end
-
-# Pick the initial step size (HZ #I1-I2)
-function _hzI12(alpha::T,
-                df,
-                x::AbstractArray{T},
-                s::AbstractArray{T},
-                x_new::AbstractArray{T},
-                phi_0::T,
-                dphi_0::T,
-                psi1::Real = convert(T,0.2),
-                psi2::Real = convert(T,2.0),
-                psi3::Real = convert(T,0.1),
-                alphamax::Real = convert(T, Inf),
-                verbose::Bool = false) where T
-
-
-     ϕ = make_ϕ(df, x_new, x, s)
-
-    # Prevent values of `x_new` that are likely to make
-    # ϕ(x_new) infinite
-    iterfinitemax::Int = ceil(Int, -log2(eps(T)))
-
-    alphatest = psi1 * alpha
-    alphatest = min(alphatest, alphamax)
-
-    phitest = ϕ(alphatest)
-
-    iterfinite = 1
-    while !isfinite(phitest)
-        alphatest = psi3 * alphatest
-
-        phitest = ϕ(alphatest)
-
-        iterfinite += 1
-        if iterfinite >= iterfinitemax
-            return zero(T), true
-            #             error("Failed to achieve finite test value; alphatest = ", alphatest)
-        end
-    end
-    a = ((phitest-phi_0)/alphatest - dphi_0)/alphatest  # quadratic fit
-    if verbose == true
-        println("quadfit: alphatest = ", alphatest,
-                ", phi_0 = ", phi_0,
-                ", phitest = ", phitest,
-                ", quadcoef = ", a)
-    end
-    mayterminate = false
-    if isfinite(a) && a > 0 && phitest <= phi_0
-        alpha = -dphi_0 / 2 / a # if convex, choose minimum of quadratic
-        if alpha == 0
-            error("alpha is zero. dphi_0 = ", dphi_0, ", phi_0 = ", phi_0, ", phitest = ", phitest, ", alphatest = ", alphatest, ", a = ", a)
-        end
-        if alpha <= alphamax
-            mayterminate = true
-        else
-            alpha = alphamax
-            mayterminate = false
-        end
-        if verbose == true
-            println("alpha guess (quadratic): ", alpha,
-                    ",(mayterminate = ", mayterminate, ")")
-        end
-    else
-        if phitest > phi_0
-            alpha = alphatest
-        else
-            alpha *= psi2 # if not convex, expand the interval
-        end
-    end
-    alpha = min(alphamax, alpha)
-    if verbose == true
-        println("alpha guess (expand): ", alpha)
-    end
-    return alpha, mayterminate
-end
-
-# Generate initial guess for step size (HZ, stage I0)
-function _hzI0(x::AbstractArray{T},
-               gr::AbstractArray{T},
-               f_x::T,
-               psi0::T = convert(T,0.01)) where T
-    alpha = one(T)
-    gr_max = maximum(abs, gr)
-    if gr_max != 0.0
-        x_max = maximum(abs, x)
-        if x_max != 0.0
-            alpha = psi0 * x_max / gr_max
-        elseif f_x != 0.0
-            alpha = psi0 * abs(f_x) / vecnorm(gr)
-        end
-    end
-    return alpha
-end