Skip to content

Improvements for Broyden and Klement #303

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/NonlinearSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ function SciMLBase.reinit!(cache::AbstractNonlinearSolveCache{iip}, u0 = get_u(c
get_u(cache), p, get_fu(cache), Val(iip))
end

hasfield(typeof(cache), :uf) && (cache.uf.p = p)
hasfield(typeof(cache), :uf) && cache.uf !== nothing && (cache.uf.p = p)

cache.abstol = abstol
cache.reltol = reltol
Expand Down Expand Up @@ -117,6 +117,7 @@ function Base.show(io::IO, alg::AbstractNonlinearSolveAlgorithm)
push!(modifiers, "linesearch = $(nameof(typeof(alg.linesearch)))()")
end
end
append!(modifiers, __alg_print_modifiers(alg))
if __getproperty(alg, Val(:radius_update_scheme)) !== nothing
push!(modifiers, "radius_update_scheme = $(alg.radius_update_scheme)")
end
Expand All @@ -125,6 +126,8 @@ function Base.show(io::IO, alg::AbstractNonlinearSolveAlgorithm)
return nothing
end

__alg_print_modifiers(_) = String[]

function SciMLBase.__solve(prob::Union{NonlinearProblem, NonlinearLeastSquaresProblem},
alg::AbstractNonlinearSolveAlgorithm, args...; kwargs...)
cache = init(prob, alg, args...; kwargs...)
Expand Down
175 changes: 146 additions & 29 deletions src/broyden.jl
Original file line number Diff line number Diff line change
@@ -1,33 +1,78 @@
# Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl
"""
GeneralBroyden(; max_resets = 3, linesearch = nothing, reset_tolerance = nothing)
GeneralBroyden(; max_resets = 100, linesearch = nothing, reset_tolerance = nothing,
init_jacobian::Val = Val(:identity), autodiff = nothing, alpha = nothing)

An implementation of `Broyden` with resetting and line search.

## Arguments

- `max_resets`: the maximum number of resets to perform. Defaults to `3`.
- `max_resets`: the maximum number of resets to perform. Defaults to `100`.

- `reset_tolerance`: the tolerance for the reset check. Defaults to
`sqrt(eps(real(eltype(u))))`.
- `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
which means that no line search is performed. Algorithms from `LineSearches.jl` can be
used here directly, and they will be converted to the correct `LineSearch`. It is
recommended to use [LiFukushimaLineSearch](@ref) -- a derivative free linesearch
specifically designed for Broyden's method.
- `alpha`: If `init_jacobian` is set to `Val(:identity)`, then the initial Jacobian
inverse is set to be `(αI)⁻¹`. Defaults to `nothing` which implies
`α = max(norm(u), 1) / (2 * norm(fu))`.
- `init_jacobian`: the method to use for initializing the jacobian. Defaults to
`Val(:identity)`. Choices include:

+ `Val(:identity)`: Identity Matrix.
+ `Val(:true_jacobian)`: True Jacobian. This is a good choice for differentiable
problems.
- `autodiff`: determines the backend used for the Jacobian. Note that this argument is
ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
`nothing` which means that a default is selected according to the problem specification!
Valid choices are types from ADTypes.jl. (Used if `init_jacobian = Val(:true_jacobian)`)
- `update_rule`: Update Rule for the Jacobian. Choices are:

+ `Val(:good_broyden)`: Good Broyden's Update Rule
+ `Val(:bad_broyden)`: Bad Broyden's Update Rule
+ `Val(:diagonal)`: Only update the diagonal of the Jacobian. This algorithm may be
useful for specific problems, but whether it will work may depend strongly on the
problem.
"""
@concrete struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
@concrete struct GeneralBroyden{IJ, UR, CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
ad::AD
max_resets::Int
reset_tolerance
linesearch
alpha
end

function __alg_print_modifiers(alg::GeneralBroyden{IJ, UR}) where {IJ, UR}
modifiers = String[]
IJ !== :identity && push!(modifiers, "init_jacobian = Val(:$(IJ))")
UR !== :good_broyden && push!(modifiers, "update_rule = Val(:$(UR))")
alg.alpha !== nothing && push!(modifiers, "alpha = $(alg.alpha)")
return modifiers
end

function set_ad(alg::GeneralBroyden{IJ, UR, CJ}, ad) where {IJ, UR, CJ}
return GeneralBroyden{IJ, UR, CJ}(ad, alg.max_resets, alg.reset_tolerance,
alg.linesearch, alg.alpha)
end

function GeneralBroyden(; max_resets = 3, linesearch = nothing,
reset_tolerance = nothing)
function GeneralBroyden(; max_resets = 100, linesearch = nothing, reset_tolerance = nothing,
init_jacobian::Val = Val(:identity), autodiff = nothing, alpha = nothing,
update_rule = Val(:good_broyden))
UR = _unwrap_val(update_rule)
@assert UR ∈ (:good_broyden, :bad_broyden, :diagonal)
IJ = _unwrap_val(init_jacobian)
@assert IJ ∈ (:identity, :true_jacobian)
linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
return GeneralBroyden(max_resets, reset_tolerance, linesearch)
CJ = IJ === :true_jacobian
return GeneralBroyden{IJ, UR, CJ}(autodiff, max_resets, reset_tolerance, linesearch,
alpha)
end

@concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip}
@concrete mutable struct GeneralBroydenCache{iip, IJ, UR} <:
AbstractNonlinearSolveCache{iip}
f
alg
u
Expand All @@ -37,8 +82,12 @@ end
fu_cache
dfu
p
uf
J⁻¹
J⁻¹_cache
J⁻¹dfu
inv_alpha
alpha_initial
force_stop::Bool
resets::Int
max_resets::Int
Expand All @@ -49,46 +98,85 @@ end
reltol
reset_tolerance
reset_check
jac_cache
prob
stats::NLStats
ls_cache
tc_cache
trace
end

function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyden, args...;
alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralBroyden{IJ, UR},
args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
kwargs...) where {uType, iip, F}
kwargs...) where {uType, iip, F, IJ, UR}
@unpack f, u0, p = prob
u = __maybe_unaliased(u0, alias_u0)
fu = evaluate_f(prob, u)
@bb du = copy(u)
J⁻¹ = __init_identity_jacobian(u, fu)

inv_alpha = __initial_inv_alpha(alg_.alpha, u, fu, internalnorm)

if IJ === :true_jacobian
alg = get_concrete_algorithm(alg_, prob)
uf, _, J, fu_cache, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
lininit = Val(false))
if UR === :diagonal
J⁻¹_cache = J
J⁻¹ = __diag(J)
else
J⁻¹_cache = nothing
J⁻¹ = J
end
elseif IJ === :identity
alg = alg_
@bb du = similar(u)
uf, fu_cache, jac_cache, J⁻¹_cache = nothing, nothing, nothing, nothing
if UR === :diagonal
J⁻¹ = one.(fu)
@bb J⁻¹ .*= inv_alpha
else
J⁻¹ = __init_identity_jacobian(u, fu, inv_alpha)
end
end

reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(real(eltype(u)))) :
alg.reset_tolerance
reset_check = x -> abs(x) ≤ reset_tolerance

@bb u_cache = copy(u)
@bb fu_cache = copy(fu)
@bb dfu = similar(fu)
@bb dfu = copy(fu)
@bb J⁻¹dfu = similar(u)

abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u,
termination_condition)
trace = init_nonlinearsolve_trace(alg, u, fu, J⁻¹, du; uses_jac_inverse = Val(true),
kwargs...)
trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(__zero, J⁻¹), du;
uses_jac_inverse = Val(true), kwargs...)

return GeneralBroydenCache{iip}(f, alg, u, u_cache, du, fu, fu_cache, dfu, p,
J⁻¹, J⁻¹dfu, false, 0, alg.max_resets, maxiters, internalnorm, ReturnCode.Default,
abstol, reltol, reset_tolerance, reset_check, prob, NLStats(1, 0, 0, 0, 0),
return GeneralBroydenCache{iip, IJ, UR}(f, alg, u, u_cache, du, fu, fu_cache, dfu, p,
uf, J⁻¹, J⁻¹_cache, J⁻¹dfu, inv_alpha, alg.alpha, false, 0, alg.max_resets,
maxiters, internalnorm, ReturnCode.Default, abstol, reltol, reset_tolerance,
reset_check, jac_cache, prob, NLStats(1, 0, 0, 0, 0),
init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace)
end

function perform_step!(cache::GeneralBroydenCache{iip}) where {iip}
function perform_step!(cache::GeneralBroydenCache{iip, IJ, UR}) where {iip, IJ, UR}
T = eltype(cache.u)

@bb cache.du = cache.J⁻¹ × vec(cache.fu)
if IJ === :true_jacobian && cache.stats.nsteps == 0
if UR === :diagonal
cache.J⁻¹_cache = __safe_inv(jacobian!!(cache.J⁻¹_cache, cache))
cache.J⁻¹ = __get_diagonal!!(cache.J⁻¹, cache.J⁻¹_cache)
else
cache.J⁻¹ = __safe_inv(jacobian!!(cache.J⁻¹, cache))
end
end

if UR === :diagonal
@bb @. cache.du = cache.J⁻¹ * cache.fu
else
@bb cache.du = cache.J⁻¹ × vec(cache.fu)
end
α = perform_linesearch!(cache.ls_cache, cache.u, cache.du)
@bb axpy!(-α, cache.du, cache.u)

Expand All @@ -100,33 +188,62 @@ function perform_step!(cache::GeneralBroydenCache{iip}) where {iip}
cache.force_stop && return nothing

# Update the inverse jacobian
@bb @. cache.dfu = cache.fu - cache.fu_cache
@bb @. cache.dfu = cache.fu - cache.dfu

if all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu)
if cache.resets ≥ cache.max_resets
cache.retcode = ReturnCode.ConvergenceFailure
cache.force_stop = true
return nothing
end
cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹)
if IJ === :true_jacobian
if UR === :diagonal
cache.J⁻¹_cache = __safe_inv(jacobian!!(cache.J⁻¹_cache, cache))
cache.J⁻¹ = __get_diagonal!!(cache.J⁻¹, cache.J⁻¹_cache)
else
cache.J⁻¹ = __safe_inv(jacobian!!(cache.J⁻¹, cache))
end
else
cache.inv_alpha = __initial_inv_alpha(cache.inv_alpha, cache.alpha_initial,
cache.u, cache.fu, cache.internalnorm)
cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹, cache.inv_alpha)
end
cache.resets += 1
else
@bb cache.du .*= -1
@bb cache.J⁻¹dfu = cache.J⁻¹ × vec(cache.dfu)
@bb cache.u_cache = transpose(cache.J⁻¹) × vec(cache.du)
denom = dot(cache.du, cache.J⁻¹dfu)
@bb @. cache.du = (cache.du - cache.J⁻¹dfu) / ifelse(iszero(denom), T(1e-5), denom)
@bb cache.J⁻¹ += vec(cache.du) × transpose(_vec(cache.u_cache))
if UR === :good_broyden
@bb cache.J⁻¹dfu = cache.J⁻¹ × vec(cache.dfu)
@bb cache.u_cache = transpose(cache.J⁻¹) × vec(cache.du)
denom = dot(cache.du, cache.J⁻¹dfu)
@bb @. cache.du = (cache.du - cache.J⁻¹dfu) /
ifelse(iszero(denom), T(1e-5), denom)
@bb cache.J⁻¹ += vec(cache.du) × transpose(_vec(cache.u_cache))
elseif UR === :bad_broyden
@bb cache.J⁻¹dfu = cache.J⁻¹ × vec(cache.dfu)
dfu_norm = cache.internalnorm(cache.dfu)^2
@bb @. cache.du = (cache.du - cache.J⁻¹dfu) /
ifelse(iszero(dfu_norm), T(1e-5), dfu_norm)
@bb cache.J⁻¹ += vec(cache.du) × transpose(_vec(cache.dfu))
elseif UR === :diagonal
@bb @. cache.J⁻¹dfu = cache.du * cache.J⁻¹ * cache.dfu
denom = sum(cache.J⁻¹dfu)
@bb @. cache.J⁻¹ += (cache.du - cache.J⁻¹ * cache.dfu) * cache.du * cache.J⁻¹ /
ifelse(iszero(denom), T(1e-5), denom)
else
error("update_rule = Val(:$(UR)) is not implemented for Broyden.")
end
end

@bb copyto!(cache.fu_cache, cache.fu)
@bb copyto!(cache.dfu, cache.fu)
@bb copyto!(cache.u_cache, cache.u)

return nothing
end

function __reinit_internal!(cache::GeneralBroydenCache; kwargs...)
cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹)
cache.inv_alpha = __initial_inv_alpha(cache.inv_alpha, cache.alpha_initial, cache.u,
cache.fu, cache.internalnorm)
cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹, cache.inv_alpha)
cache.resets = 0
return nothing
end
5 changes: 3 additions & 2 deletions src/default.jl
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,9 @@ function FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothi
TrustRegion(; concrete_jac, linsolve, precs,
radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...))
else
algs = (GeneralKlement(; linsolve, precs),
GeneralBroyden(),
algs = (GeneralBroyden(),
GeneralBroyden(; init_jacobian = Val(:true_jacobian)),
GeneralKlement(; linsolve, precs),
NewtonRaphson(; concrete_jac, linsolve, precs, adkwargs...),
NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
adkwargs...),
Expand Down
1 change: 1 addition & 0 deletions src/jacobian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ function jacobian!!(::Number, cache)
cache.stats.njacs += 1
return last(value_derivative(cache.uf, cache.u))
end

# Build Jacobian Caches
function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val{iip};
linsolve_kwargs = (;), lininit::Val{linsolve_init} = Val(true),
Expand Down
Loading