diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl index 571a1a3027..0bd65e2aea 100644 --- a/src/Nonlinear/ReverseAD/forward_over_reverse.jl +++ b/src/Nonlinear/ReverseAD/forward_over_reverse.jl @@ -6,6 +6,15 @@ const TAG = :ReverseAD +""" + const MAX_CHUNK::Int = 10 + +An upper bound on the chunk sie for forward-over-reverse. Increasing this could +improve performance at the cost of extra memory allocation. It has been 10 for a +long time, and nobody seems to have complained. +""" +const MAX_CHUNK = 10 + """ _eval_hessian( d::NLPEvaluator, @@ -23,46 +32,30 @@ Returns the number of non-zeros in the computed Hessian, which will be used to update the offset for the next call. """ function _eval_hessian( - d::NLPEvaluator, - f::_FunctionStorage, - H::AbstractVector{Float64}, - λ::Float64, - offset::Int, -)::Int - chunk = min(size(f.seed_matrix, 2), d.max_chunk) - # As a performance optimization, skip dynamic dispatch if the chunk is 1. - if chunk == 1 - return _eval_hessian_inner(d, f, H, λ, offset, Val(1)) - else - return _eval_hessian_inner(d, f, H, λ, offset, Val(chunk)) - end -end - -function _eval_hessian_inner( d::NLPEvaluator, ex::_FunctionStorage, H::AbstractVector{Float64}, scale::Float64, nzcount::Int, - ::Val{CHUNK}, -) where {CHUNK} +)::Int if ex.linearity == LINEAR @assert length(ex.hess_I) == 0 return 0 end + chunk = min(size(ex.seed_matrix, 2), d.max_chunk) Coloring.prepare_seed_matrix!(ex.seed_matrix, ex.rinfo) # Compute hessian-vector products num_products = size(ex.seed_matrix, 2) # number of hessian-vector products - num_chunks = div(num_products, CHUNK) + num_chunks = div(num_products, chunk) @assert size(ex.seed_matrix, 1) == length(ex.rinfo.local_indices) - for offset in 1:CHUNK:(CHUNK*num_chunks) - _eval_hessian_chunk(d, ex, offset, CHUNK, Val(CHUNK)) + for offset in 1:chunk:(chunk*num_chunks) + _eval_hessian_chunk(d, ex, offset, chunk, chunk) end # leftover chunk - remaining = num_products - CHUNK * num_chunks + remaining = num_products - chunk * num_chunks if remaining > 0 - offset = CHUNK * num_chunks + 1 - _eval_hessian_chunk(d, ex, offset, remaining, Val(CHUNK)) + offset = chunk * num_chunks + 1 + _eval_hessian_chunk(d, ex, offset, remaining, chunk) end want, got = nzcount + length(ex.hess_I), length(H) if want > got @@ -90,32 +83,45 @@ function _eval_hessian_chunk( ex::_FunctionStorage, offset::Int, chunk::Int, - ::Val{CHUNK}, -) where {CHUNK} + chunk_size::Int, +) for r in eachindex(ex.rinfo.local_indices) # set up directional derivatives @inbounds idx = ex.rinfo.local_indices[r] # load up ex.seed_matrix[r,k,k+1,...,k+remaining-1] into input_ϵ for s in 1:chunk - # If `chunk < CHUNK`, leaves junk in the unused components - d.input_ϵ[(idx-1)*CHUNK+s] = ex.seed_matrix[r, offset+s-1] + # If `chunk < chunk_size`, leaves junk in the unused components + d.input_ϵ[(idx-1)*chunk_size+s] = ex.seed_matrix[r, offset+s-1] end end - _hessian_slice_inner(d, ex, Val(CHUNK)) + _hessian_slice_inner(d, ex, chunk_size) fill!(d.input_ϵ, 0.0) # collect directional derivatives for r in eachindex(ex.rinfo.local_indices) @inbounds idx = ex.rinfo.local_indices[r] # load output_ϵ into ex.seed_matrix[r,k,k+1,...,k+remaining-1] for s in 1:chunk - ex.seed_matrix[r, offset+s-1] = d.output_ϵ[(idx-1)*CHUNK+s] + ex.seed_matrix[r, offset+s-1] = d.output_ϵ[(idx-1)*chunk_size+s] end end return end -function _hessian_slice_inner(d, ex, ::Val{CHUNK}) where {CHUNK} - T = ForwardDiff.Partials{CHUNK,Float64} # This is our element type. +# A wrapper function to avoid dynamic dispatch. +function _generate_hessian_slice_inner() + exprs = map(1:MAX_CHUNK) do id + T = ForwardDiff.Partials{id,Float64} + return :(return _hessian_slice_inner(d, ex, $T)) + end + return MOI.Nonlinear._create_binary_switch(1:MAX_CHUNK, exprs) +end + +@eval function _hessian_slice_inner(d, ex, id::Int) + $(_generate_hessian_slice_inner()) + return error("Invalid chunk size: $id") +end + +function _hessian_slice_inner(d, ex, ::Type{T}) where {T} fill!(d.output_ϵ, 0.0) output_ϵ = _reinterpret_unsafe(T, d.output_ϵ) subexpr_forward_values_ϵ = diff --git a/src/Nonlinear/ReverseAD/mathoptinterface_api.jl b/src/Nonlinear/ReverseAD/mathoptinterface_api.jl index 48c8af50a3..1766c25e73 100644 --- a/src/Nonlinear/ReverseAD/mathoptinterface_api.jl +++ b/src/Nonlinear/ReverseAD/mathoptinterface_api.jl @@ -140,8 +140,7 @@ function MOI.initialize(d::NLPEvaluator, requested_features::Vector{Symbol}) max_expr_length = max(max_expr_length, length(d.constraints[end].nodes)) max_chunk = max(max_chunk, size(d.constraints[end].seed_matrix, 2)) end - # 10 is hardcoded upper bound to avoid excess memory allocation - max_chunk = min(max_chunk, 10) + max_chunk = min(max_chunk, MAX_CHUNK) max_expr_with_sub_length = max(max_expr_with_sub_length, max_expr_length) if d.want_hess || want_hess_storage d.input_ϵ = zeros(max_chunk * N) diff --git a/test/Nonlinear/ReverseAD.jl b/test/Nonlinear/ReverseAD.jl index 71bc94e59c..b91f89376a 100644 --- a/test/Nonlinear/ReverseAD.jl +++ b/test/Nonlinear/ReverseAD.jl @@ -150,21 +150,13 @@ function test_objective_quadratic_multivariate_subexpressions() MOI.eval_hessian_objective(evaluator, H, val) @test H == [2.0, 2.0, 1.0] @test evaluator.backend.max_chunk == 2 - # The call of `_eval_hessian_inner` from `_eval_hessian` needs dynamic dispatch for `Val(chunk)` so it allocates. - # We call directly `_eval_hessian_inner` to check that the rest does not allocates. - @test 0 == @allocated MOI.Nonlinear.ReverseAD._eval_hessian_inner( - evaluator.backend, - evaluator.backend.objective, - H, - 1.0, - 0, - Val(2), - ) + @test 0 == @allocated MOI.eval_hessian_objective(evaluator, H, val) @test MOI.hessian_lagrangian_structure(evaluator) == [(1, 1), (2, 2), (2, 1)] H = [NaN, NaN, NaN] μ = Float64[] MOI.eval_hessian_lagrangian(evaluator, H, val, 1.5, μ) + @test 0 == @allocated MOI.eval_hessian_lagrangian(evaluator, H, val, 1.5, μ) @test H == 1.5 .* [2.0, 2.0, 1.0] v = [0.3, 0.4] hv = [NaN, NaN] @@ -1393,6 +1385,20 @@ function test_eval_user_defined_operator_type_mismatch() return end +function test_generate_hessian_slice_inner() + # Test that it evaluates without error. The code contents are tested + # elsewhere. + MOI.Nonlinear.ReverseAD._generate_hessian_slice_inner() + d = ex = nothing # These arguments are untyped and not needed for this test + for id in [0, MOI.Nonlinear.ReverseAD.MAX_CHUNK + 1] + @test_throws( + ErrorException("Invalid chunk size: $id"), + MOI.Nonlinear.ReverseAD._hessian_slice_inner(d, ex, id), + ) + end + return +end + end # module TestReverseAD.runtests()