Skip to content

Commit 0571034

Browse files
committed
More improvements to Klement
1 parent 50daa20 commit 0571034

File tree

7 files changed

+187
-71
lines changed

7 files changed

+187
-71
lines changed

src/NonlinearSolve.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ function SciMLBase.reinit!(cache::AbstractNonlinearSolveCache{iip}, u0 = get_u(c
8181
get_u(cache), p, get_fu(cache), Val(iip))
8282
end
8383

84-
hasfield(typeof(cache), :uf) && (cache.uf.p = p)
84+
hasfield(typeof(cache), :uf) && cache.uf !== nothing && (cache.uf.p = p)
8585

8686
cache.abstol = abstol
8787
cache.reltol = reltol

src/broyden.jl

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
# Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl
22
"""
3-
GeneralBroyden(; max_resets = 3, linesearch = nothing, reset_tolerance = nothing,
4-
init_jacobian::Val = Val(:identity), autodiff = nothing, alpha = 1.0)
3+
GeneralBroyden(; max_resets = 100, linesearch = nothing, reset_tolerance = nothing,
4+
init_jacobian::Val = Val(:identity), autodiff = nothing, alpha = nothing)
55
66
An implementation of `Broyden` with resetting and line search.
77
88
## Arguments
99
10-
- `max_resets`: the maximum number of resets to perform. Defaults to `3`.
10+
- `max_resets`: the maximum number of resets to perform. Defaults to `100`.
1111
1212
- `reset_tolerance`: the tolerance for the reset check. Defaults to
1313
`sqrt(eps(real(eltype(u))))`.
@@ -16,11 +16,15 @@ An implementation of `Broyden` with resetting and line search.
1616
used here directly, and they will be converted to the correct `LineSearch`. It is
1717
recommended to use [LiFukushimaLineSearch](@ref) -- a derivative free linesearch
1818
specifically designed for Broyden's method.
19-
- `alpha_initial`: If `init_jacobian` is set to `Val(:identity)`, then the initial
20-
Jacobian inverse is set to be `(αI)⁻¹`. Defaults to `1.0`.
21-
- `init_jacobian`: the method to use for initializing the jacobian. Defaults to using the
22-
identity matrix (`Val(:identitiy)`). Alternatively, can be set to `Val(:true_jacobian)`
23-
to use the true jacobian as initialization.
19+
- `alpha`: If `init_jacobian` is set to `Val(:identity)`, then the initial Jacobian
20+
inverse is set to be `(αI)⁻¹`. Defaults to `nothing` which implies
21+
`α = max(norm(u), 1) / (2 * norm(fu))`.
22+
- `init_jacobian`: the method to use for initializing the jacobian. Defaults to
23+
`Val(:identity)`. Choices include:
24+
25+
+ `Val(:identity)`: Identity Matrix.
26+
+ `Val(:true_jacobian)`: True Jacobian. This is a good choice for differentiable
27+
problems.
2428
- `autodiff`: determines the backend used for the Jacobian. Note that this argument is
2529
ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
2630
`nothing` which means that a default is selected according to the problem specification!
@@ -29,39 +33,45 @@ An implementation of `Broyden` with resetting and line search.
2933
3034
+ `Val(:good_broyden)`: Good Broyden's Update Rule
3135
+ `Val(:bad_broyden)`: Bad Broyden's Update Rule
36+
+ `Val(:diagonal)`: Only update the diagonal of the Jacobian. This algorithm may be
37+
useful for specific problems, but whether it will work may depend strongly on the
38+
problem.
39+
+ `Val(:exciting_mixing)`: Update the diagonal of the Jacobian. This is based off
40+
SciPy's implementation of Broyden's method. This algorithm may be useful for
41+
specific problems, but whether it will work may depend strongly on the problem.
3242
"""
3343
@concrete struct GeneralBroyden{IJ, UR, CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
3444
ad::AD
3545
max_resets::Int
3646
reset_tolerance
3747
linesearch
38-
inv_alpha
48+
alpha
3949
end
4050

4151
function __alg_print_modifiers(alg::GeneralBroyden{IJ, UR}) where {IJ, UR}
4252
modifiers = String[]
4353
IJ !== :identity && push!(modifiers, "init_jacobian = :$(IJ)")
4454
UR !== :good_broyden && push!(modifiers, "update_rule = :$(UR)")
45-
alg.inv_alpha != 1 && push!(modifiers, "alpha = $(1 / alg.inv_alpha)")
55+
alg.alpha !== nothing && push!(modifiers, "alpha = $(alg.alpha)")
4656
return modifiers
4757
end
4858

4959
function set_ad(alg::GeneralBroyden{IJ, UR, CJ}, ad) where {IJ, UR, CJ}
5060
return GeneralBroyden{IJ, UR, CJ}(ad, alg.max_resets, alg.reset_tolerance,
51-
alg.linesearch, alg.inv_alpha)
61+
alg.linesearch, alg.alpha)
5262
end
5363

54-
function GeneralBroyden(; max_resets = 3, linesearch = nothing, reset_tolerance = nothing,
55-
init_jacobian::Val = Val(:identity), autodiff = nothing, alpha = 1.0,
64+
function GeneralBroyden(; max_resets = 100, linesearch = nothing, reset_tolerance = nothing,
65+
init_jacobian::Val = Val(:identity), autodiff = nothing, alpha = nothing,
5666
update_rule = Val(:good_broyden))
5767
UR = _unwrap_val(update_rule)
58-
@assert UR (:good_broyden, :bad_broyden)
68+
@assert UR (:good_broyden, :bad_broyden, :diagonal, :exciting_mixing)
5969
IJ = _unwrap_val(init_jacobian)
6070
@assert IJ (:identity, :true_jacobian)
6171
linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
6272
CJ = IJ === :true_jacobian
6373
return GeneralBroyden{IJ, UR, CJ}(autodiff, max_resets, reset_tolerance, linesearch,
64-
1 / alpha)
74+
alpha)
6575
end
6676

6777
@concrete mutable struct GeneralBroydenCache{iip, IJ, UR} <:
@@ -78,6 +88,8 @@ end
7888
uf
7989
J⁻¹
8090
J⁻¹dfu
91+
inv_alpha
92+
alpha_initial
8193
force_stop::Bool
8294
resets::Int
8395
max_resets::Int
@@ -105,6 +117,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralBroyd
105117
fu = evaluate_f(prob, u)
106118
@bb du = copy(u)
107119

120+
inv_alpha = __initial_inv_alpha(alg_.alpha, u, fu, internalnorm)
121+
108122
if IJ === :true_jacobian
109123
alg = get_concrete_algorithm(alg_, prob)
110124
uf, _, J, fu_cache, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
@@ -114,7 +128,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralBroyd
114128
alg = alg_
115129
@bb du = similar(u)
116130
uf, fu_cache, jac_cache = nothing, nothing, nothing
117-
J⁻¹ = __init_identity_jacobian(u, fu, alg.inv_alpha)
131+
J⁻¹ = __init_identity_jacobian(u, fu, inv_alpha)
118132
end
119133

120134
reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(real(eltype(u)))) :
@@ -131,9 +145,9 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralBroyd
131145
kwargs...)
132146

133147
return GeneralBroydenCache{iip, IJ, UR}(f, alg, u, u_cache, du, fu, fu_cache, dfu, p,
134-
uf, J⁻¹, J⁻¹dfu, false, 0, alg.max_resets, maxiters, internalnorm,
135-
ReturnCode.Default, abstol, reltol, reset_tolerance, reset_check, jac_cache, prob,
136-
NLStats(1, 0, 0, 0, 0),
148+
uf, J⁻¹, J⁻¹dfu, inv_alpha, alg.alpha, false, 0, alg.max_resets, maxiters,
149+
internalnorm, ReturnCode.Default, abstol, reltol, reset_tolerance, reset_check,
150+
jac_cache, prob, NLStats(1, 0, 0, 0, 0),
137151
init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace)
138152
end
139153

@@ -167,7 +181,9 @@ function perform_step!(cache::GeneralBroydenCache{iip, IJ, UR}) where {iip, IJ,
167181
if IJ === :true_jacobian
168182
cache.J⁻¹ = inv(jacobian!!(cache.J⁻¹, cache))
169183
else
170-
cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹, cache.alg.inv_alpha)
184+
cache.inv_alpha = __initial_inv_alpha(cache.inv_alpha, cache.alpha_initial,
185+
cache.u, cache.fu, cache.internalnorm)
186+
cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹, cache.inv_alpha)
171187
end
172188
cache.resets += 1
173189
else
@@ -194,7 +210,9 @@ function perform_step!(cache::GeneralBroydenCache{iip, IJ, UR}) where {iip, IJ,
194210
end
195211

196212
function __reinit_internal!(cache::GeneralBroydenCache; kwargs...)
197-
cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹, cache.alg.inv_alpha)
213+
cache.inv_alpha = __initial_inv_alpha(cache.inv_alpha, cache.alpha_initial, cache.u,
214+
cache.fu, cache.internalnorm)
215+
cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹, cache.inv_alpha)
198216
cache.resets = 0
199217
return nothing
200218
end

src/default.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,9 @@ function FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothi
248248
TrustRegion(; concrete_jac, linsolve, precs,
249249
radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...))
250250
else
251-
algs = (GeneralKlement(; linsolve, precs),
252-
GeneralBroyden(),
251+
algs = (GeneralBroyden(),
252+
GeneralBroyden(; init_jacobian = Val(:true_jacobian)),
253+
GeneralKlement(; linsolve, precs),
253254
NewtonRaphson(; concrete_jac, linsolve, precs, adkwargs...),
254255
NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
255256
adkwargs...),

src/klement.jl

Lines changed: 65 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
"""
2-
GeneralKlement(; max_resets = 5, linsolve = nothing, linesearch = nothing,
3-
precs = DEFAULT_PRECS)
2+
GeneralKlement(; max_resets = 100, linsolve = nothing, linesearch = nothing,
3+
precs = DEFAULT_PRECS, alpha = true, init_jacobian::Val = Val(:identity),
4+
autodiff = nothing)
45
56
An implementation of `Klement` with line search, preconditioning and customizable linear
6-
solves.
7+
solves. It is recommended to use `Broyden` for most problems over this.
78
89
## Keyword Arguments
910
10-
- `max_resets`: the maximum number of resets to perform. Defaults to `5`.
11+
- `max_resets`: the maximum number of resets to perform. Defaults to `100`.
12+
1113
- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
1214
linear solves within the Newton method. Defaults to `nothing`, which means it uses the
1315
LinearSolve.jl default algorithm choice. For more information on available algorithm
@@ -19,10 +21,18 @@ solves.
1921
- `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
2022
which means that no line search is performed. Algorithms from `LineSearches.jl` can be
2123
used here directly, and they will be converted to the correct `LineSearch`.
22-
- `init_jacobian`: the method to use for initializing the jacobian. Defaults to using the
23-
identity matrix (`Val(:identitiy)`). Alternatively, can be set to `Val(:true_jacobian)`
24-
to use the true jacobian as initialization. (Our tests suggest it is a good idea to
25-
to initialize with an identity matrix)
24+
- `alpha`: If `init_jacobian` is set to `Val(:identity)`, then the initial Jacobian
25+
inverse is set to be `αI`. Defaults to `1`. Can be set to `nothing` which implies
26+
`α = max(norm(u), 1) / (2 * norm(fu))`.
27+
- `init_jacobian`: the method to use for initializing the jacobian. Defaults to
28+
`Val(:identity)`. Choices include:
29+
30+
+ `Val(:identity)`: Identity Matrix.
31+
+ `Val(:true_jacobian)`: True Jacobian. Our tests suggest that this is not very
32+
stable. Instead using `Broyden` with `Val(:true_jacobian)` gives faster and more
33+
reliable convergence.
34+
+ `Val(:true_jacobian_diagonal)`: Diagonal of True Jacobian. This is a good choice for
35+
differentiable problems.
2636
- `autodiff`: determines the backend used for the Jacobian. Note that this argument is
2737
ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
2838
`nothing` which means that a default is selected according to the problem specification!
@@ -34,32 +44,30 @@ solves.
3444
linsolve
3545
precs
3646
linesearch
47+
alpha
3748
end
3849

39-
function __alg_print_modifiers(::GeneralKlement{IJ}) where {IJ}
50+
function __alg_print_modifiers(alg::GeneralKlement{IJ}) where {IJ}
4051
modifiers = String[]
4152
IJ !== :identity && push!(modifiers, "init_jacobian = :$(IJ)")
53+
alg.alpha !== nothing && push!(modifiers, "alpha = $(alg.alpha)")
4254
return modifiers
4355
end
4456

45-
function set_linsolve(alg::GeneralKlement{IJ, CS}, linsolve) where {IJ, CS}
46-
return GeneralKlement{IJ, CS}(alg.ad, alg.max_resets, linsolve, alg.precs,
47-
alg.linesearch)
48-
end
49-
50-
function set_ad(alg::GeneralKlement{IJ, CS}, ad) where {IJ, CS}
51-
return GeneralKlement{IJ, CS}(ad, alg.max_resets, alg.linsolve, alg.precs,
52-
alg.linesearch)
57+
function set_ad(alg::GeneralKlement{IJ, CJ}, ad) where {IJ, CJ}
58+
return GeneralKlement{IJ, CJ}(ad, alg.max_resets, alg.linsolve, alg.precs,
59+
alg.linesearch, alg.alpha)
5360
end
5461

55-
function GeneralKlement(; max_resets::Int = 5, linsolve = nothing,
62+
function GeneralKlement(; max_resets::Int = 100, linsolve = nothing, alpha = true,
5663
linesearch = nothing, precs = DEFAULT_PRECS, init_jacobian::Val = Val(:identity),
5764
autodiff = nothing)
5865
IJ = _unwrap_val(init_jacobian)
59-
@assert IJ (:identity, :true_jacobian)
66+
@assert IJ (:identity, :true_jacobian, :true_jacobian_diagonal)
6067
linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
61-
CJ = IJ === :true_jacobian
62-
return GeneralKlement{IJ, CJ}(autodiff, max_resets, linsolve, precs, linesearch)
68+
CJ = IJ !== :identity
69+
return GeneralKlement{IJ, CJ}(autodiff, max_resets, linsolve, precs, linesearch,
70+
alpha)
6371
end
6472

6573
@concrete mutable struct GeneralKlementCache{iip, IJ} <: AbstractNonlinearSolveCache{iip}
@@ -79,6 +87,8 @@ end
7987
J_cache_2
8088
Jdu
8189
Jdu_cache
90+
alpha
91+
alpha_initial
8292
resets
8393
force_stop
8494
maxiters::Int
@@ -102,15 +112,23 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKleme
102112
u = __maybe_unaliased(u0, alias_u0)
103113
fu = evaluate_f(prob, u)
104114

115+
alpha = __initial_alpha(alg_.alpha, u, fu, internalnorm)
116+
105117
if IJ === :true_jacobian
106118
alg = get_concrete_algorithm(alg_, prob)
107119
uf, _, J, fu_cache, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
108120
lininit = Val(false))
121+
elseif IJ === :true_jacobian_diagonal
122+
alg = get_concrete_algorithm(alg_, prob)
123+
uf, _, J_cache, fu_cache, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
124+
lininit = Val(false))
125+
J = __diag(J_cache)
109126
elseif IJ === :identity
110127
alg = alg_
111128
@bb du = similar(u)
112129
uf, fu_cache, jac_cache = nothing, nothing, nothing
113130
J = one.(u) # Identity Init Jacobian for Klement maintains a Diagonal Structure
131+
@bb J .*= alpha
114132
else
115133
error("Invalid `init_jacobian` value")
116134
end
@@ -133,13 +151,14 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKleme
133151
@bb J_cache_2 = similar(J)
134152
@bb Jdu_cache = similar(fu)
135153
else
136-
J_cache, J_cache_2, Jdu_cache = nothing, nothing, nothing
154+
IJ === :identity && (J_cache = nothing)
155+
J_cache_2, Jdu_cache = nothing, nothing
137156
end
138157

139158
return GeneralKlementCache{iip, IJ}(f, alg, u, u_cache, fu, fu_cache, fu_cache_2, du, p,
140-
uf, linsolve, J, J_cache, J_cache_2, Jdu, Jdu_cache, 0, false, maxiters,
141-
internalnorm,
142-
ReturnCode.Default, abstol, reltol, prob, jac_cache, NLStats(1, 0, 0, 0, 0),
159+
uf, linsolve, J, J_cache, J_cache_2, Jdu, Jdu_cache, alpha, alg.alpha, 0, false,
160+
maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, jac_cache,
161+
NLStats(1, 0, 0, 0, 0),
143162
init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace)
144163
end
145164

@@ -150,6 +169,12 @@ function perform_step!(cache::GeneralKlementCache{iip, IJ}) where {iip, IJ}
150169
if IJ === :true_jacobian
151170
cache.stats.nsteps == 0 && (cache.J = jacobian!!(cache.J, cache))
152171
ill_conditioned = __is_ill_conditioned(cache.J)
172+
elseif IJ === :true_jacobian_diagonal
173+
if cache.stats.nsteps == 0
174+
cache.J_cache = jacobian!!(cache.J_cache, cache)
175+
cache.J = __get_diagonal!!(cache.J, cache.J_cache)
176+
end
177+
ill_conditioned = __is_ill_conditioned(cache.J)
153178
elseif IJ === :identity
154179
ill_conditioned = __is_ill_conditioned(cache.J)
155180
end
@@ -162,13 +187,18 @@ function perform_step!(cache::GeneralKlementCache{iip, IJ}) where {iip, IJ}
162187
end
163188
if IJ === :true_jacobian && cache.stats.nsteps != 0
164189
cache.J = jacobian!!(cache.J, cache)
165-
else
166-
cache.J = __reinit_identity_jacobian!!(cache.J)
190+
elseif IJ === :true_jacobian_diagonal && cache.stats.nsteps != 0
191+
cache.J_cache = jacobian!!(cache.J_cache, cache)
192+
cache.J = __get_diagonal!!(cache.J, cache.J_cache)
193+
elseif IJ === :identity
194+
cache.alpha = __initial_alpha(cache.alpha, cache.alpha_initial, cache.u,
195+
cache.fu, cache.internalnorm)
196+
cache.J = __reinit_identity_jacobian!!(cache.J, cache.alpha)
167197
end
168198
cache.resets += 1
169199
end
170200

171-
if IJ === :identity
201+
if cache.J isa AbstractVector || cache.J isa Number
172202
@bb @. cache.du = cache.fu / cache.J
173203
else
174204
# u = u - J \ fu
@@ -193,13 +223,15 @@ function perform_step!(cache::GeneralKlementCache{iip, IJ}) where {iip, IJ}
193223

194224
# Update the Jacobian
195225
@bb cache.du .*= -1
196-
if IJ === :identity
226+
if cache.J isa AbstractVector || cache.J isa Number
197227
@bb @. cache.Jdu = (cache.J^2) * (cache.du^2)
198228
@bb @. cache.J += ((cache.fu - cache.fu_cache_2 - cache.J * cache.du) /
199229
ifelse(iszero(cache.Jdu), T(1e-5), cache.Jdu)) * cache.du *
200230
(cache.J^2)
201231
else
202-
@bb cache.J_cache .= cache.J' .^ 2
232+
# Klement Updates to the Full Jacobian don't work for most problems, we should
233+
# probably be using the Broyden Update Rule here
234+
@bb @. cache.J_cache = cache.J' ^ 2
203235
@bb @. cache.Jdu = cache.du^2
204236
@bb cache.Jdu_cache = cache.J_cache × vec(cache.Jdu)
205237
@bb cache.Jdu = cache.J × vec(cache.du)
@@ -217,7 +249,9 @@ function perform_step!(cache::GeneralKlementCache{iip, IJ}) where {iip, IJ}
217249
end
218250

219251
function __reinit_internal!(cache::GeneralKlementCache; kwargs...)
220-
cache.J = __reinit_identity_jacobian!!(cache.J)
252+
cache.alpha = __initial_alpha(cache.alpha, cache.alpha_initial, cache.u, cache.fu,
253+
cache.internalnorm)
254+
cache.J = __reinit_identity_jacobian!!(cache.J, cache.alpha)
221255
cache.resets = 0
222256
return nothing
223257
end

0 commit comments

Comments
 (0)