From c9347b2134d36e1b88735ef7f289a6ae0720b87b Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 30 Jun 2025 10:51:50 +0100 Subject: [PATCH 1/7] Rework API for AD testing --- HISTORY.md | 6 +++ docs/src/api.md | 15 ++++++ src/test_utils/ad.jl | 123 ++++++++++++++++++++++++++++--------------- test/ad.jl | 10 ++-- 4 files changed, 108 insertions(+), 46 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d559e6373..f7b950713 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,12 @@ The `@submodel` macro is fully removed; please use `to_submodel` instead. +### `DynamicPPL.TestUtils.AD.run_ad` + +The three keyword arguments, `test`, `reference_backend`, and `expected_value_and_grad` have been merged into a single `test` keyword argument. +Please see the API documentation for more details. +(The old `test=true` and `test=false` values are still valid, and you only need to adjust the invocation if you were explicitly passing the `reference_backend` or `expected_value_and_grad` arguments.) + ### Accumulators This release overhauls how VarInfo objects track variables such as the log joint probability. The new approach is to use what we call accumulators: Objects that the VarInfo carries on it that may change their state at each `tilde_assume!!` and `tilde_observe!!` call based on the value of the variable in question. They replace both variables that were previously hard-coded in the `VarInfo` object (`logp` and `num_produce`) and some contexts. This brings with it a number of breaking changes: diff --git a/docs/src/api.md b/docs/src/api.md index 886d34a2f..1419ff044 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -211,6 +211,21 @@ To test and/or benchmark the performance of an AD backend on a model, DynamicPPL ```@docs DynamicPPL.TestUtils.AD.run_ad +``` + +THe default test setting is to compare against ForwardDiff. +You can have more fine-grained control over how to test the AD backend using the following types: + +```@docs +DynamicPPL.TestUtils.AD.AbstractADCorrectnessTestSetting +DynamicPPL.TestUtils.AD.WithBackend +DynamicPPL.TestUtils.AD.WithExpectedResult +DynamicPPL.TestUtils.AD.NoTest +``` + +These are returned / thrown by the `run_ad` function: + +```@docs DynamicPPL.TestUtils.AD.ADResult DynamicPPL.TestUtils.AD.ADIncorrectException ``` diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl index 5285391b1..d01e85110 100644 --- a/src/test_utils/ad.jl +++ b/src/test_utils/ad.jl @@ -4,14 +4,7 @@ using ADTypes: AbstractADType, AutoForwardDiff using Chairmarks: @be import DifferentiationInterface as DI using DocStringExtensions -using DynamicPPL: - Model, - LogDensityFunction, - VarInfo, - AbstractVarInfo, - link, - DefaultContext, - AbstractContext +using DynamicPPL: Model, LogDensityFunction, VarInfo, AbstractVarInfo, link using LogDensityProblems: logdensity, logdensity_and_gradient using Random: Random, Xoshiro using Statistics: median @@ -20,12 +13,48 @@ using Test: @test export ADResult, run_ad, ADIncorrectException """ - REFERENCE_ADTYPE + AbstractADCorrectnessTestSetting -Reference AD backend to use for comparison. In this case, ForwardDiff.jl, since -it's the default AD backend used in Turing.jl. +Different ways of testing the correctness of an AD backend. """ -const REFERENCE_ADTYPE = AutoForwardDiff() +abstract type AbstractADCorrectnessTestSetting end + +""" + WithBackend(adtype::AbstractADType=AutoForwardDiff()) <: AbstractADCorrectnessTestSetting + +Test correctness by comparing it against the result obtained with `adtype`. + +`adtype` defaults to ForwardDiff.jl, since it's the default AD backend used in +Turing.jl. +""" +struct WithBackend{AD<:AbstractADType} <: AbstractADCorrectnessTestSetting + adtype::AD +end +WithBackend() = WithBackend(AutoForwardDiff()) + +""" + WithExpectedResult( + value::T, + grad::AbstractVector{T} + ) where {T <: AbstractFloat} + <: AbstractADCorrectnessTestSetting + +Test correctness by comparing it against a known result (e.g. one obtained +analytically, or one obtained with a different backend previously). Both the +value of the primal (i.e. the log-density) as well as its gradient must be +supplied. +""" +struct WithExpectedResult{T<:AbstractFloat} <: AbstractADCorrectnessTestSetting + value::T + grad::AbstractVector{T} +end + +""" + NoTest() <: AbstractADCorrectnessTestSetting + +Disable correctness testing. +""" +struct NoTest <: AbstractADCorrectnessTestSetting end """ ADIncorrectException{T<:AbstractFloat} @@ -84,14 +113,12 @@ end run_ad( model::Model, adtype::ADTypes.AbstractADType; - test=true, + test::Union{AbstractADCorrectnessTestSetting,Bool}=WithBackend(), benchmark=false, value_atol=1e-6, grad_atol=1e-6, varinfo::AbstractVarInfo=link(VarInfo(model), model), params::Union{Nothing,Vector{<:AbstractFloat}}=nothing, - reference_adtype::ADTypes.AbstractADType=REFERENCE_ADTYPE, - expected_value_and_grad::Union{Nothing,Tuple{AbstractFloat,Vector{<:AbstractFloat}}}=nothing, verbose=true, )::ADResult @@ -143,22 +170,25 @@ Everything else is optional, and can be categorised into several groups: prep_params)`. You could then evaluate the gradient at a different set of parameters using the `params` keyword argument. -3. _How to specify the results to compare against._ (Only if `test=true`.) +3. _How to specify the results to compare against._ Once logp and its gradient has been calculated with the specified `adtype`, - it must be tested for correctness. + it can optionally be tested for correctness. The exact way this is tested + is specified in the `test` parameter. - This can be done either by specifying `reference_adtype`, in which case logp - and its gradient will also be calculated with this reference in order to - obtain the ground truth; or by using `expected_value_and_grad`, which is a - tuple of `(logp, gradient)` that the calculated values must match. The - latter is useful if you are testing multiple AD backends and want to avoid - recalculating the ground truth multiple times. + There are several options for this: - The default reference backend is ForwardDiff. If none of these parameters are - specified, ForwardDiff will be used to calculate the ground truth. + - You can explicitly specify the correct value using + [`WithExpectedResult()`](@ref). + - You can compare against the result obtained with a different AD backend + using [`WithBackend(adtype)`](@ref). + - You can disable testing by passing [`NoTest()`](@ref). + - The default is to compare against the result obtained with ForwardDiff, + i.e. `WithBackend(AutoForwardDiff())`. + - `test=false` and `test=true` are synonyms for + `NoTest()` and `WithBackend(AutoForwardDiff())`, respectively. -4. _How to specify the tolerances._ (Only if `test=true`.) +4. _How to specify the tolerances._ (Only if testing is enabled.) The tolerances for the value and gradient can be set using `value_atol` and `grad_atol`. These default to 1e-6. @@ -180,48 +210,57 @@ thrown as-is. function run_ad( model::Model, adtype::AbstractADType; - test::Bool=true, + test::Union{AbstractADCorrectnessTestSetting,Bool}=WithBackend(), benchmark::Bool=false, value_atol::AbstractFloat=1e-6, grad_atol::AbstractFloat=1e-6, varinfo::AbstractVarInfo=link(VarInfo(model), model), params::Union{Nothing,Vector{<:AbstractFloat}}=nothing, - reference_adtype::AbstractADType=REFERENCE_ADTYPE, - expected_value_and_grad::Union{Nothing,Tuple{AbstractFloat,Vector{<:AbstractFloat}}}=nothing, verbose=true, )::ADResult + # Convert Boolean `test` to an AbstractADCorrectnessTestSetting + if test isa Bool + test = test ? WithBackend() : NoTest() + end + + # Extract parameters if isnothing(params) params = varinfo[:] end params = map(identity, params) # Concretise + # Calculate log-density and gradient with the backend of interest verbose && @info "Running AD on $(model.f) with $(adtype)\n" verbose && println(" params : $(params)") ldf = LogDensityFunction(model, varinfo; adtype=adtype) - value, grad = logdensity_and_gradient(ldf, params) + # collect(): https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/754 grad = collect(grad) verbose && println(" actual : $((value, grad))") - if test - # Calculate ground truth to compare against - value_true, grad_true = if expected_value_and_grad === nothing - ldf_reference = LogDensityFunction(model, varinfo; adtype=reference_adtype) - logdensity_and_gradient(ldf_reference, params) - else - expected_value_and_grad + # Test correctness + if test isa NoTest + value_true = nothing + grad_true = nothing + else + # Get the correct result + if test isa WithExpectedResult + value_true = test.value + grad_true = test.grad + elseif test isa WithBackend + ldf_reference = LogDensityFunction(model, varinfo; adtype=test.adtype) + value_true, grad_true = logdensity_and_gradient(ldf_reference, params) + # collect(): https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/754 + grad_true = collect(grad_true) end + # Perform testing verbose && println(" expected : $((value_true, grad_true))") - grad_true = collect(grad_true) - exc() = throw(ADIncorrectException(value, value_true, grad, grad_true)) isapprox(value, value_true; atol=value_atol) || exc() isapprox(grad, grad_true; atol=grad_atol) || exc() - else - value_true = nothing - grad_true = nothing end + # Benchmark time_vs_primal = if benchmark primal_benchmark = @be (ldf, params) logdensity(_[1], _[2]) grad_benchmark = @be (ldf, params) logdensity_and_gradient(_[1], _[2]) diff --git a/test/ad.jl b/test/ad.jl index 0947c017a..7f7bc8739 100644 --- a/test/ad.jl +++ b/test/ad.jl @@ -1,4 +1,5 @@ using DynamicPPL: LogDensityFunction +using DynamicPPL.TestUtils.AD: run_ad, WithExpectedResult, NoTest @testset "Automatic differentiation" begin # Used as the ground truth that others are compared against. @@ -31,9 +32,10 @@ using DynamicPPL: LogDensityFunction linked_varinfo = DynamicPPL.link(varinfo, m) f = LogDensityFunction(m, linked_varinfo) x = DynamicPPL.getparams(f) + # Calculate reference logp + gradient of logp using ForwardDiff - ref_ldf = LogDensityFunction(m, linked_varinfo; adtype=ref_adtype) - ref_logp, ref_grad = LogDensityProblems.logdensity_and_gradient(ref_ldf, x) + ref_ad_result = run_ad(m, ref_adtype; varinfo=linked_varinfo, test=NoTest()) + ref_logp, ref_grad = ref_ad_result.value_actual, ref_ad_result.grad_actual @testset "$adtype" for adtype in test_adtypes @info "Testing AD on: $(m.f) - $(short_varinfo_name(linked_varinfo)) - $adtype" @@ -63,11 +65,11 @@ using DynamicPPL: LogDensityFunction ref_ldf, adtype ) else - @test DynamicPPL.TestUtils.AD.run_ad( + @test run_ad( m, adtype; varinfo=linked_varinfo, - expected_value_and_grad=(ref_logp, ref_grad), + test=WithExpectedResult(ref_logp, ref_grad), ) isa Any end end From 2f8574ea166d8b387908b378f79069f5cfa33370 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 30 Jun 2025 11:34:44 +0100 Subject: [PATCH 2/7] Fix test --- test/ad.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/ad.jl b/test/ad.jl index 7f7bc8739..48dffeadb 100644 --- a/test/ad.jl +++ b/test/ad.jl @@ -52,17 +52,17 @@ using DynamicPPL.TestUtils.AD: run_ad, WithExpectedResult, NoTest if is_mooncake && is_1_11 && is_svi_vnv # https://github.com/compintell/Mooncake.jl/issues/470 @test_throws ArgumentError DynamicPPL.LogDensityFunction( - ref_ldf, adtype + m, linked_varinfo; adtype=adtype ) elseif is_mooncake && is_1_10 && is_svi_vnv # TODO: report upstream @test_throws UndefRefError DynamicPPL.LogDensityFunction( - ref_ldf, adtype + m, linked_varinfo; adtype=adtype ) elseif is_mooncake && is_1_10 && is_svi_od # TODO: report upstream @test_throws Mooncake.MooncakeRuleCompilationError DynamicPPL.LogDensityFunction( - ref_ldf, adtype + m, linked_varinfo; adtype=adtype ) else @test run_ad( From 48464f3f27c2686c0fe42bc353d9fda09e038ca9 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 30 Jun 2025 11:42:44 +0100 Subject: [PATCH 3/7] Add `rng` keyword argument --- src/test_utils/ad.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl index d01e85110..db19d25d2 100644 --- a/src/test_utils/ad.jl +++ b/src/test_utils/ad.jl @@ -6,7 +6,7 @@ import DifferentiationInterface as DI using DocStringExtensions using DynamicPPL: Model, LogDensityFunction, VarInfo, AbstractVarInfo, link using LogDensityProblems: logdensity, logdensity_and_gradient -using Random: Random, Xoshiro +using Random: AbstractRNG, default_rng using Statistics: median using Test: @test @@ -160,8 +160,8 @@ Everything else is optional, and can be categorised into several groups: Note that if the VarInfo is not specified (and thus automatically generated) the parameters in it will have been sampled from the prior of the model. If - you want to seed the parameter generation, the easiest way is to pass a - `rng` argument to the VarInfo constructor (i.e. do `VarInfo(rng, model)`). + you want to seed the parameter generation for the VarInfo, you can pass the + `rng` keyword argument, which will then be used to create the VarInfo. Finally, note that these only reflect the parameters used for _evaluating_ the gradient. If you also want to control the parameters used for @@ -214,7 +214,8 @@ function run_ad( benchmark::Bool=false, value_atol::AbstractFloat=1e-6, grad_atol::AbstractFloat=1e-6, - varinfo::AbstractVarInfo=link(VarInfo(model), model), + rng::AbstractRNG=default_rng(), + varinfo::AbstractVarInfo=link(VarInfo(rng, model), model), params::Union{Nothing,Vector{<:AbstractFloat}}=nothing, verbose=true, )::ADResult From 6da8d57eecb8a0498ada5a4f75841d811c837424 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 30 Jun 2025 11:52:41 +0100 Subject: [PATCH 4/7] Use atol and rtol --- HISTORY.md | 6 ++++++ src/test_utils/ad.jl | 44 ++++++++++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index f7b950713..eb2236e15 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -14,6 +14,12 @@ The three keyword arguments, `test`, `reference_backend`, and `expected_value_an Please see the API documentation for more details. (The old `test=true` and `test=false` values are still valid, and you only need to adjust the invocation if you were explicitly passing the `reference_backend` or `expected_value_and_grad` arguments.) +There is now also an `rng` keyword argument to help seed parameter generation. + +Finally, instead of specifying `value_atol` and `grad_atol`, you can now specify `atol` and `rtol` which are used for both value and gradient. +Their semantics are the same as in Julia's `isapprox`; two values are equal if they satisfy either `atol` or `rtol`. +Note that gradients are always compared elementwise (instead of using the norm, which is what `isapprox` does). + ### Accumulators This release overhauls how VarInfo objects track variables such as the log joint probability. The new approach is to use what we call accumulators: Objects that the VarInfo carries on it that may change their state at each `tilde_assume!!` and `tilde_observe!!` call based on the value of the variable in question. They replace both variables that were previously hard-coded in the `VarInfo` object (`logp` and `num_produce`) and some contexts. This brings with it a number of breaking changes: diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl index db19d25d2..1732fe833 100644 --- a/src/test_utils/ad.jl +++ b/src/test_utils/ad.jl @@ -10,7 +10,7 @@ using Random: AbstractRNG, default_rng using Statistics: median using Test: @test -export ADResult, run_ad, ADIncorrectException +export ADResult, run_ad, ADIncorrectException, WithBackend, WithExpectedResult, NoTest """ AbstractADCorrectnessTestSetting @@ -74,17 +74,18 @@ struct ADIncorrectException{T<:AbstractFloat} <: Exception end """ - ADResult{Tparams<:AbstractFloat,Tresult<:AbstractFloat} + ADResult{Tparams<:AbstractFloat,Tresult<:AbstractFloat,Ttol<:AbstractFloat} Data structure to store the results of the AD correctness test. The type parameter `Tparams` is the numeric type of the parameters passed in; -`Tresult` is the type of the value and the gradient. +`Tresult` is the type of the value and the gradient; and `Ttol` is the type of the +absolute and relative tolerances used for correctness testing. # Fields $(TYPEDFIELDS) """ -struct ADResult{Tparams<:AbstractFloat,Tresult<:AbstractFloat} +struct ADResult{Tparams<:AbstractFloat,Tresult<:AbstractFloat,Ttol<:AbstractFloat} "The DynamicPPL model that was tested" model::Model "The VarInfo that was used" @@ -93,10 +94,10 @@ struct ADResult{Tparams<:AbstractFloat,Tresult<:AbstractFloat} params::Vector{Tparams} "The AD backend that was tested" adtype::AbstractADType - "The absolute tolerance for the value of logp" - value_atol::Tresult - "The absolute tolerance for the gradient of logp" - grad_atol::Tresult + "Absolute tolerance used for correctness test" + atol::Ttol + "Relative tolerance used for correctness test" + rtol::Ttol "The expected value of logp" value_expected::Union{Nothing,Tresult} "The expected gradient of logp" @@ -115,8 +116,8 @@ end adtype::ADTypes.AbstractADType; test::Union{AbstractADCorrectnessTestSetting,Bool}=WithBackend(), benchmark=false, - value_atol=1e-6, - grad_atol=1e-6, + atol::AbstractFloat=1e-8, + rtol::AbstractFloat=sqrt(eps()), varinfo::AbstractVarInfo=link(VarInfo(model), model), params::Union{Nothing,Vector{<:AbstractFloat}}=nothing, verbose=true, @@ -190,8 +191,13 @@ Everything else is optional, and can be categorised into several groups: 4. _How to specify the tolerances._ (Only if testing is enabled.) - The tolerances for the value and gradient can be set using `value_atol` and - `grad_atol`. These default to 1e-6. + Both absolute and relative tolerances can be specified using the `atol` and + `rtol` keyword arguments respectively. The behaviour of these is similar to + `isapprox()`, i.e. the value and gradient are considered correct if either + atol or rtol is satisfied. The default values are `1e-8` for `atol` and + `sqrt(eps())` for `rtol`. + + Note that gradients are always compared elementwise. 5. _Whether to output extra logging information._ @@ -212,8 +218,8 @@ function run_ad( adtype::AbstractADType; test::Union{AbstractADCorrectnessTestSetting,Bool}=WithBackend(), benchmark::Bool=false, - value_atol::AbstractFloat=1e-6, - grad_atol::AbstractFloat=1e-6, + atol::AbstractFloat=1e-8, + rtol::AbstractFloat=sqrt(eps()), rng::AbstractRNG=default_rng(), varinfo::AbstractVarInfo=link(VarInfo(rng, model), model), params::Union{Nothing,Vector{<:AbstractFloat}}=nothing, @@ -257,8 +263,10 @@ function run_ad( # Perform testing verbose && println(" expected : $((value_true, grad_true))") exc() = throw(ADIncorrectException(value, value_true, grad, grad_true)) - isapprox(value, value_true; atol=value_atol) || exc() - isapprox(grad, grad_true; atol=grad_atol) || exc() + isapprox(value, value_true; atol=atol, rtol=rtol) || exc() + for (g, g_true) in zip(grad, grad_true) + isapprox(g, g_true; atol=atol, rtol=rtol) || exc() + end end # Benchmark @@ -277,8 +285,8 @@ function run_ad( varinfo, params, adtype, - value_atol, - grad_atol, + atol, + rtol, value_true, grad_true, value, From 3587ce5472fe8c7a207863c740579eadfa1cf2e5 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 30 Jun 2025 13:03:29 +0100 Subject: [PATCH 5/7] remove unbound type parameter (?) --- src/test_utils/ad.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl index 1732fe833..49158ed3c 100644 --- a/src/test_utils/ad.jl +++ b/src/test_utils/ad.jl @@ -103,9 +103,9 @@ struct ADResult{Tparams<:AbstractFloat,Tresult<:AbstractFloat,Ttol<:AbstractFloa "The expected gradient of logp" grad_expected::Union{Nothing,Vector{Tresult}} "The value of logp (calculated using `adtype`)" - value_actual::Union{Nothing,Tresult} + value_actual::Tresult "The gradient of logp (calculated using `adtype`)" - grad_actual::Union{Nothing,Vector{Tresult}} + grad_actual::Vector{Tresult} "If benchmarking was requested, the time taken by the AD backend to calculate the gradient of logp, divided by the time taken to evaluate logp itself" time_vs_primal::Union{Nothing,Tresult} end From e1043aeeaf13818bc67cd1a808a76b6f2dbd8454 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 3 Jul 2025 17:29:44 +0100 Subject: [PATCH 6/7] Don't need to do elementwise check --- src/test_utils/ad.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl index 49158ed3c..155f3b68d 100644 --- a/src/test_utils/ad.jl +++ b/src/test_utils/ad.jl @@ -194,10 +194,12 @@ Everything else is optional, and can be categorised into several groups: Both absolute and relative tolerances can be specified using the `atol` and `rtol` keyword arguments respectively. The behaviour of these is similar to `isapprox()`, i.e. the value and gradient are considered correct if either - atol or rtol is satisfied. The default values are `1e-8` for `atol` and + atol or rtol is satisfied. The default values are `100*eps()` for `atol` and `sqrt(eps())` for `rtol`. - Note that gradients are always compared elementwise. + For the most part, it is the `rtol` check that is more meaningful, because + we cannot know the magnitude of logp and its gradient a priori. The `atol` + value is supplied to handle the case where gradients are equal to zero. 5. _Whether to output extra logging information._ @@ -218,7 +220,7 @@ function run_ad( adtype::AbstractADType; test::Union{AbstractADCorrectnessTestSetting,Bool}=WithBackend(), benchmark::Bool=false, - atol::AbstractFloat=1e-8, + atol::AbstractFloat=100 * eps(), rtol::AbstractFloat=sqrt(eps()), rng::AbstractRNG=default_rng(), varinfo::AbstractVarInfo=link(VarInfo(rng, model), model), @@ -264,9 +266,7 @@ function run_ad( verbose && println(" expected : $((value_true, grad_true))") exc() = throw(ADIncorrectException(value, value_true, grad, grad_true)) isapprox(value, value_true; atol=atol, rtol=rtol) || exc() - for (g, g_true) in zip(grad, grad_true) - isapprox(g, g_true; atol=atol, rtol=rtol) || exc() - end + isapprox(grad, grad_true; atol=atol, rtol=rtol) || exc() end # Benchmark From be36626725a9e2aa60bbb1d83b5f024befb9e88e Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 3 Jul 2025 17:31:15 +0100 Subject: [PATCH 7/7] Update changelog --- HISTORY.md | 1 - 1 file changed, 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index eb2236e15..96c4465ba 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -18,7 +18,6 @@ There is now also an `rng` keyword argument to help seed parameter generation. Finally, instead of specifying `value_atol` and `grad_atol`, you can now specify `atol` and `rtol` which are used for both value and gradient. Their semantics are the same as in Julia's `isapprox`; two values are equal if they satisfy either `atol` or `rtol`. -Note that gradients are always compared elementwise (instead of using the norm, which is what `isapprox` does). ### Accumulators