From d4f354d3ce70432f07c958870f5f7d891a8a1d90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Mon, 28 Apr 2025 10:23:26 +0200 Subject: [PATCH 1/8] Remove dynamic dispatch in Hessian evaluation --- .../ReverseAD/forward_over_reverse.jl | 52 +++++++++++-------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl index 571a1a3027..60fc7221ee 100644 --- a/src/Nonlinear/ReverseAD/forward_over_reverse.jl +++ b/src/Nonlinear/ReverseAD/forward_over_reverse.jl @@ -6,35 +6,41 @@ const TAG = :ReverseAD -""" - _eval_hessian( +function _generate_eval_hessian() + exprs = map(1:10) do chunk + return :(return _eval_hessian_inner(d, f, H, λ, offset, Val($chunk))) + end + return Nonlinear._create_binary_switch(1:10, exprs) +end + +@eval begin + """ + _eval_hessian( + d::NLPEvaluator, + f::_FunctionStorage, + H::AbstractVector{Float64}, + λ::Float64, + offset::Int, + )::Int + + Evaluate the hessian matrix of the function `f` and store the result, scaled by + `λ`, in `H`, beginning at element `offset+1`. This function assumes that + `_reverse_mode(d, x)` has already been called. + + Returns the number of non-zeros in the computed Hessian, which will be used to + update the offset for the next call. + """ + function _eval_hessian( d::NLPEvaluator, f::_FunctionStorage, H::AbstractVector{Float64}, λ::Float64, offset::Int, )::Int - -Evaluate the hessian matrix of the function `f` and store the result, scaled by -`λ`, in `H`, beginning at element `offset+1`. This function assumes that -`_reverse_mode(d, x)` has already been called. - -Returns the number of non-zeros in the computed Hessian, which will be used to -update the offset for the next call. -""" -function _eval_hessian( - d::NLPEvaluator, - f::_FunctionStorage, - H::AbstractVector{Float64}, - λ::Float64, - offset::Int, -)::Int - chunk = min(size(f.seed_matrix, 2), d.max_chunk) - # As a performance optimization, skip dynamic dispatch if the chunk is 1. - if chunk == 1 - return _eval_hessian_inner(d, f, H, λ, offset, Val(1)) - else - return _eval_hessian_inner(d, f, H, λ, offset, Val(chunk)) + id = min(size(f.seed_matrix, 2), d.max_chunk) + # As a performance optimization, skip dynamic dispatch if the chunk is 1. + $(_generate_eval_hessian()) + error("Invalid chunk `$id`. Please report this.") end end From 9fa53c2ec2071af1f57619a655671a24627663a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Mon, 28 Apr 2025 11:09:49 +0200 Subject: [PATCH 2/8] Test no allocations in tests --- src/Nonlinear/ReverseAD/forward_over_reverse.jl | 6 ++++-- src/Nonlinear/ReverseAD/mathoptinterface_api.jl | 3 +-- test/Nonlinear/ReverseAD.jl | 11 +---------- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl index 60fc7221ee..2ec522dbc8 100644 --- a/src/Nonlinear/ReverseAD/forward_over_reverse.jl +++ b/src/Nonlinear/ReverseAD/forward_over_reverse.jl @@ -6,11 +6,13 @@ const TAG = :ReverseAD +const MAX_CHUNK = 10 + function _generate_eval_hessian() - exprs = map(1:10) do chunk + exprs = map(1:MAX_CHUNK) do chunk return :(return _eval_hessian_inner(d, f, H, λ, offset, Val($chunk))) end - return Nonlinear._create_binary_switch(1:10, exprs) + return Nonlinear._create_binary_switch(1:MAX_CHUNK, exprs) end @eval begin diff --git a/src/Nonlinear/ReverseAD/mathoptinterface_api.jl b/src/Nonlinear/ReverseAD/mathoptinterface_api.jl index 48c8af50a3..1766c25e73 100644 --- a/src/Nonlinear/ReverseAD/mathoptinterface_api.jl +++ b/src/Nonlinear/ReverseAD/mathoptinterface_api.jl @@ -140,8 +140,7 @@ function MOI.initialize(d::NLPEvaluator, requested_features::Vector{Symbol}) max_expr_length = max(max_expr_length, length(d.constraints[end].nodes)) max_chunk = max(max_chunk, size(d.constraints[end].seed_matrix, 2)) end - # 10 is hardcoded upper bound to avoid excess memory allocation - max_chunk = min(max_chunk, 10) + max_chunk = min(max_chunk, MAX_CHUNK) max_expr_with_sub_length = max(max_expr_with_sub_length, max_expr_length) if d.want_hess || want_hess_storage d.input_ϵ = zeros(max_chunk * N) diff --git a/test/Nonlinear/ReverseAD.jl b/test/Nonlinear/ReverseAD.jl index 71bc94e59c..c0f61b4d78 100644 --- a/test/Nonlinear/ReverseAD.jl +++ b/test/Nonlinear/ReverseAD.jl @@ -150,16 +150,7 @@ function test_objective_quadratic_multivariate_subexpressions() MOI.eval_hessian_objective(evaluator, H, val) @test H == [2.0, 2.0, 1.0] @test evaluator.backend.max_chunk == 2 - # The call of `_eval_hessian_inner` from `_eval_hessian` needs dynamic dispatch for `Val(chunk)` so it allocates. - # We call directly `_eval_hessian_inner` to check that the rest does not allocates. - @test 0 == @allocated MOI.Nonlinear.ReverseAD._eval_hessian_inner( - evaluator.backend, - evaluator.backend.objective, - H, - 1.0, - 0, - Val(2), - ) + @test 0 == @allocated MOI.eval_hessian_objective(evaluator, H, val) @test MOI.hessian_lagrangian_structure(evaluator) == [(1, 1), (2, 2), (2, 1)] H = [NaN, NaN, NaN] From d1880b8e8a33a91a0947b9720aaf2dc4c6c4407e Mon Sep 17 00:00:00 2001 From: odow Date: Tue, 29 Apr 2025 12:48:47 +1200 Subject: [PATCH 3/8] Update --- .../ReverseAD/forward_over_reverse.jl | 101 +++++++++--------- 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl index 2ec522dbc8..ffa9cf1a54 100644 --- a/src/Nonlinear/ReverseAD/forward_over_reverse.jl +++ b/src/Nonlinear/ReverseAD/forward_over_reverse.jl @@ -8,69 +8,47 @@ const TAG = :ReverseAD const MAX_CHUNK = 10 -function _generate_eval_hessian() - exprs = map(1:MAX_CHUNK) do chunk - return :(return _eval_hessian_inner(d, f, H, λ, offset, Val($chunk))) - end - return Nonlinear._create_binary_switch(1:MAX_CHUNK, exprs) -end - -@eval begin - """ - _eval_hessian( - d::NLPEvaluator, - f::_FunctionStorage, - H::AbstractVector{Float64}, - λ::Float64, - offset::Int, - )::Int - - Evaluate the hessian matrix of the function `f` and store the result, scaled by - `λ`, in `H`, beginning at element `offset+1`. This function assumes that - `_reverse_mode(d, x)` has already been called. - - Returns the number of non-zeros in the computed Hessian, which will be used to - update the offset for the next call. - """ - function _eval_hessian( +""" + _eval_hessian( d::NLPEvaluator, f::_FunctionStorage, H::AbstractVector{Float64}, λ::Float64, offset::Int, )::Int - id = min(size(f.seed_matrix, 2), d.max_chunk) - # As a performance optimization, skip dynamic dispatch if the chunk is 1. - $(_generate_eval_hessian()) - error("Invalid chunk `$id`. Please report this.") - end -end -function _eval_hessian_inner( +Evaluate the hessian matrix of the function `f` and store the result, scaled by +`λ`, in `H`, beginning at element `offset+1`. This function assumes that +`_reverse_mode(d, x)` has already been called. + +Returns the number of non-zeros in the computed Hessian, which will be used to +update the offset for the next call. +""" +function _eval_hessian( d::NLPEvaluator, ex::_FunctionStorage, H::AbstractVector{Float64}, scale::Float64, nzcount::Int, - ::Val{CHUNK}, -) where {CHUNK} +)::Int if ex.linearity == LINEAR @assert length(ex.hess_I) == 0 return 0 end + chunk = min(size(ex.seed_matrix, 2), d.max_chunk) Coloring.prepare_seed_matrix!(ex.seed_matrix, ex.rinfo) # Compute hessian-vector products num_products = size(ex.seed_matrix, 2) # number of hessian-vector products - num_chunks = div(num_products, CHUNK) + num_chunks = div(num_products, chunk) @assert size(ex.seed_matrix, 1) == length(ex.rinfo.local_indices) - for offset in 1:CHUNK:(CHUNK*num_chunks) - _eval_hessian_chunk(d, ex, offset, CHUNK, Val(CHUNK)) + for offset in 1:chunk:(chunk*num_chunks) + _eval_hessian_chunk(d, ex, offset, chunk, chunk) end # leftover chunk - remaining = num_products - CHUNK * num_chunks + remaining = num_products - chunk * num_chunks if remaining > 0 - offset = CHUNK * num_chunks + 1 - _eval_hessian_chunk(d, ex, offset, remaining, Val(CHUNK)) + offset = chunk * num_chunks + 1 + _eval_hessian_chunk(d, ex, offset, remaining, chunk) end want, got = nzcount + length(ex.hess_I), length(H) if want > got @@ -98,32 +76,59 @@ function _eval_hessian_chunk( ex::_FunctionStorage, offset::Int, chunk::Int, - ::Val{CHUNK}, -) where {CHUNK} + chunk_size::Int, +) for r in eachindex(ex.rinfo.local_indices) # set up directional derivatives @inbounds idx = ex.rinfo.local_indices[r] # load up ex.seed_matrix[r,k,k+1,...,k+remaining-1] into input_ϵ for s in 1:chunk - # If `chunk < CHUNK`, leaves junk in the unused components - d.input_ϵ[(idx-1)*CHUNK+s] = ex.seed_matrix[r, offset+s-1] + # If `chunk < chunk_size`, leaves junk in the unused components + d.input_ϵ[(idx-1)*chunk_size+s] = ex.seed_matrix[r, offset+s-1] end end - _hessian_slice_inner(d, ex, Val(CHUNK)) + _hessian_slice_inner(d, ex, chunk_size) fill!(d.input_ϵ, 0.0) # collect directional derivatives for r in eachindex(ex.rinfo.local_indices) @inbounds idx = ex.rinfo.local_indices[r] # load output_ϵ into ex.seed_matrix[r,k,k+1,...,k+remaining-1] for s in 1:chunk - ex.seed_matrix[r, offset+s-1] = d.output_ϵ[(idx-1)*CHUNK+s] + ex.seed_matrix[r, offset+s-1] = d.output_ϵ[(idx-1)*chunk_size+s] end end return end -function _hessian_slice_inner(d, ex, ::Val{CHUNK}) where {CHUNK} - T = ForwardDiff.Partials{CHUNK,Float64} # This is our element type. +# A wrapper function to avoid dynamic dispatch. +function _hessian_slice_inner(d, ex, chunk::Int) + @assert 1 <= chunk <= MAX_CHUNK + @assert MAX_CHUNK == 10 + if chunk == 1 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{1,Float64}) + elseif chunk == 2 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{2,Float64}) + elseif chunk == 3 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{3,Float64}) + elseif chunk == 4 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{4,Float64}) + elseif chunk == 5 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{5,Float64}) + elseif chunk == 6 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{6,Float64}) + elseif chunk == 7 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{7,Float64}) + elseif chunk == 8 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{8,Float64}) + elseif chunk == 9 + _hessian_slice_inner(d, ex, ForwardDiff.Partials{9,Float64}) + else + _hessian_slice_inner(d, ex, ForwardDiff.Partials{10,Float64}) + end + return +end + +function _hessian_slice_inner(d, ex, ::Type{T}) where {T} fill!(d.output_ϵ, 0.0) output_ϵ = _reinterpret_unsafe(T, d.output_ϵ) subexpr_forward_values_ϵ = From 894806306bb0f14a1632c341b35a95d9e4854437 Mon Sep 17 00:00:00 2001 From: odow Date: Tue, 29 Apr 2025 14:35:57 +1200 Subject: [PATCH 4/8] Update --- src/Nonlinear/ReverseAD/forward_over_reverse.jl | 7 +++++++ test/Nonlinear/ReverseAD.jl | 1 + 2 files changed, 8 insertions(+) diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl index ffa9cf1a54..1c6b2cefff 100644 --- a/src/Nonlinear/ReverseAD/forward_over_reverse.jl +++ b/src/Nonlinear/ReverseAD/forward_over_reverse.jl @@ -6,6 +6,13 @@ const TAG = :ReverseAD +""" + const MAX_CHUNK::Int = 10 + +An upper bound on the chunk sie for forward-over-reverse. Increasing this could +improve performance at the cost of extra memory allocation. It has been 10 for a +long time, and nobody seems to have complained. +""" const MAX_CHUNK = 10 """ diff --git a/test/Nonlinear/ReverseAD.jl b/test/Nonlinear/ReverseAD.jl index c0f61b4d78..55bf555ba8 100644 --- a/test/Nonlinear/ReverseAD.jl +++ b/test/Nonlinear/ReverseAD.jl @@ -156,6 +156,7 @@ function test_objective_quadratic_multivariate_subexpressions() H = [NaN, NaN, NaN] μ = Float64[] MOI.eval_hessian_lagrangian(evaluator, H, val, 1.5, μ) + @test 0 == @allocated MOI.eval_hessian_lagrangian(evaluator, H, val, 1.5, μ) @test H == 1.5 .* [2.0, 2.0, 1.0] v = [0.3, 0.4] hv = [NaN, NaN] From eb6186c105fe90e4068d6a11a99b7b1b8b160773 Mon Sep 17 00:00:00 2001 From: odow Date: Wed, 30 Apr 2025 09:38:29 +1200 Subject: [PATCH 5/8] Update --- .../ReverseAD/forward_over_reverse.jl | 33 +++++-------------- test/Nonlinear/ReverseAD.jl | 14 ++++++++ 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl index 1c6b2cefff..a57083f5ad 100644 --- a/src/Nonlinear/ReverseAD/forward_over_reverse.jl +++ b/src/Nonlinear/ReverseAD/forward_over_reverse.jl @@ -108,31 +108,16 @@ function _eval_hessian_chunk( end # A wrapper function to avoid dynamic dispatch. -function _hessian_slice_inner(d, ex, chunk::Int) - @assert 1 <= chunk <= MAX_CHUNK - @assert MAX_CHUNK == 10 - if chunk == 1 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{1,Float64}) - elseif chunk == 2 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{2,Float64}) - elseif chunk == 3 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{3,Float64}) - elseif chunk == 4 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{4,Float64}) - elseif chunk == 5 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{5,Float64}) - elseif chunk == 6 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{6,Float64}) - elseif chunk == 7 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{7,Float64}) - elseif chunk == 8 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{8,Float64}) - elseif chunk == 9 - _hessian_slice_inner(d, ex, ForwardDiff.Partials{9,Float64}) - else - _hessian_slice_inner(d, ex, ForwardDiff.Partials{10,Float64}) +function _generate_hessian_slice_inner() + exprs = map(1:MAX_CHUNK) do id + return :(_hessian_slice_inner(d, ex, ForwardDiff.Partials{$id,Float64})) end - return + return _create_binary_switch(1:MAX_CHUNK, exprs) +end + +@eval function _hessian_slice_inner(d, ex, id::Int) + $(_generate_hessian_slice_inner()) + return error("Invalid chunk size: $id") end function _hessian_slice_inner(d, ex, ::Type{T}) where {T} diff --git a/test/Nonlinear/ReverseAD.jl b/test/Nonlinear/ReverseAD.jl index 55bf555ba8..4394dc9618 100644 --- a/test/Nonlinear/ReverseAD.jl +++ b/test/Nonlinear/ReverseAD.jl @@ -1385,6 +1385,20 @@ function test_eval_user_defined_operator_type_mismatch() return end +function test_generate_hessian_slice_inner() + # Test that it evaluates without error. The code contents are tested + # elsewhere. + MOI.Nonlinear.ReverseAD._generate_hessian_slice_inner() + d = ex = nothing # These arguments are untyped and not needed for this test + for id in [0, MAX_CHUNK + 1] + @test_throws( + ErrorException("Invalid chunk size: $id"), + MOI.Nonlinear.ReverseAD._hessian_slice_inner(d, ex, id), + ) + end + return +end + end # module TestReverseAD.runtests() From e353d45c1b381ebb58c11464bd80d10fd70c650e Mon Sep 17 00:00:00 2001 From: odow Date: Wed, 30 Apr 2025 09:40:35 +1200 Subject: [PATCH 6/8] Udpate --- src/Nonlinear/ReverseAD/forward_over_reverse.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl index a57083f5ad..e5f79ba050 100644 --- a/src/Nonlinear/ReverseAD/forward_over_reverse.jl +++ b/src/Nonlinear/ReverseAD/forward_over_reverse.jl @@ -112,7 +112,7 @@ function _generate_hessian_slice_inner() exprs = map(1:MAX_CHUNK) do id return :(_hessian_slice_inner(d, ex, ForwardDiff.Partials{$id,Float64})) end - return _create_binary_switch(1:MAX_CHUNK, exprs) + return MOI.Nonlinear._create_binary_switch(1:MAX_CHUNK, exprs) end @eval function _hessian_slice_inner(d, ex, id::Int) From d9923d61e18597065bd711c4dc0e97826dca038d Mon Sep 17 00:00:00 2001 From: odow Date: Wed, 30 Apr 2025 09:58:33 +1200 Subject: [PATCH 7/8] Update --- src/Nonlinear/ReverseAD/forward_over_reverse.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl index e5f79ba050..0bd65e2aea 100644 --- a/src/Nonlinear/ReverseAD/forward_over_reverse.jl +++ b/src/Nonlinear/ReverseAD/forward_over_reverse.jl @@ -110,7 +110,8 @@ end # A wrapper function to avoid dynamic dispatch. function _generate_hessian_slice_inner() exprs = map(1:MAX_CHUNK) do id - return :(_hessian_slice_inner(d, ex, ForwardDiff.Partials{$id,Float64})) + T = ForwardDiff.Partials{id,Float64} + return :(return _hessian_slice_inner(d, ex, $T)) end return MOI.Nonlinear._create_binary_switch(1:MAX_CHUNK, exprs) end From b2f45296db79905fd5fa780bbe6d0ccb54063fd3 Mon Sep 17 00:00:00 2001 From: odow Date: Wed, 30 Apr 2025 10:33:55 +1200 Subject: [PATCH 8/8] update --- test/Nonlinear/ReverseAD.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Nonlinear/ReverseAD.jl b/test/Nonlinear/ReverseAD.jl index 4394dc9618..b91f89376a 100644 --- a/test/Nonlinear/ReverseAD.jl +++ b/test/Nonlinear/ReverseAD.jl @@ -1390,7 +1390,7 @@ function test_generate_hessian_slice_inner() # elsewhere. MOI.Nonlinear.ReverseAD._generate_hessian_slice_inner() d = ex = nothing # These arguments are untyped and not needed for this test - for id in [0, MAX_CHUNK + 1] + for id in [0, MOI.Nonlinear.ReverseAD.MAX_CHUNK + 1] @test_throws( ErrorException("Invalid chunk size: $id"), MOI.Nonlinear.ReverseAD._hessian_slice_inner(d, ex, id),