Add unit tests for HMC InferenceAlgorithm interface

penelopeysm · penelopeysm · commit 4606bdd77d33 · 2025-06-15T18:12:49.000+01:00
diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl
@@ -33,9 +33,11 @@ function update_sample_kwargs(alg::AdaptiveHamiltonian, N::Integer, kwargs)
             _discard_initial = discard_initial
         end
 
-        (nadapts=_nadapts, discard_initial=_discard_initial, kwargs...)
+        # Have to put kwargs first so that the later keyword arguments
+        # override anything that's already inside it.
+        (kwargs..., nadapts=_nadapts, discard_initial=_discard_initial)
     else
-        (nadapts=0, discard_adapt=false, discard_initial=0, kwargs...)
+        (kwargs..., nadapts=0, discard_adapt=false, discard_initial=0)
     end
 end
 
diff --git a/test/ext/dynamichmc.jl b/test/ext/dynamichmc.jl
@@ -12,12 +12,37 @@ using StableRNGs: StableRNG
 using Turing
 
 @testset "TuringDynamicHMCExt" begin
-    @test DynamicPPL.alg_str(Sampler(externalsampler(DynamicHMC.NUTS()))) == "DynamicNUTS"
-
-    rng = StableRNG(468)
     spl = externalsampler(DynamicHMC.NUTS())
-    chn = sample(rng, gdemo_default, spl, 10_000)
-    check_gdemo(chn)
+
+    @testset "alg_str" begin
+        @test DynamicPPL.alg_str(Sampler(spl)) == "DynamicNUTS"
+    end
+
+    @testset "sample() interface" begin
+        @model function demo_normal(x)
+            a ~ Normal()
+            return x ~ Normal(a)
+        end
+        model = demo_normal(2.0)
+        # note: passing LDF to a Hamiltonian sampler requires explicit adtype
+        ldf = LogDensityFunction(model; adtype=AutoForwardDiff())
+        sampling_objects = Dict("DynamicPPL.Model" => model, "LogDensityFunction" => ldf)
+        seed = 468
+        @testset "sampling with $name" for (name, model_or_ldf) in sampling_objects
+            # check sampling works without rng
+            @test sample(model_or_ldf, spl, 5) isa Chains
+            # check reproducibility with rng
+            chn1 = sample(Random.Xoshiro(seed), model_or_ldf, spl, 5)
+            chn2 = sample(Random.Xoshiro(seed), model_or_ldf, spl, 5)
+            @test mean(chn1[:a]) == mean(chn2[:a])
+        end
+    end
+
+    @testset "numerical accuracy" begin
+        rng = StableRNG(468)
+        chn = sample(rng, gdemo_default, spl, 10_000)
+        check_gdemo(chn)
+    end
 end
 
 end
diff --git a/test/mcmc/hmc.jl b/test/mcmc/hmc.jl
@@ -20,6 +20,162 @@ using Turing
     @info "Starting HMC tests"
     seed = 123
 
+    @testset "InferenceAlgorithm interface" begin
+        # Check that the various Hamiltonian samplers implement the
+        # Turing.Inference.InferenceAlgorithm interface correctly.
+        algs = [HMC(0.1, 3), HMCDA(0.8, 0.75), NUTS(0.5), NUTS(0, 0.5)]
+
+        @testset "get_adtype" begin
+            # Default
+            for alg in algs
+                @test Turing.Inference.get_adtype(alg) == Turing.DEFAULT_ADTYPE
+            end
+            # Manual
+            for adtype in (AutoReverseDiff(), AutoMooncake(; config=nothing))
+                alg1 = HMC(0.1, 3; adtype=adtype)
+                alg2 = HMCDA(0.8, 0.75; adtype=adtype)
+                alg3 = NUTS(0.5; adtype=adtype)
+                @test Turing.Inference.get_adtype(alg1) == adtype
+                @test Turing.Inference.get_adtype(alg2) == adtype
+                @test Turing.Inference.get_adtype(alg3) == adtype
+            end
+        end
+
+        @testset "requires_unconstrained_space" begin
+            # Hamiltonian samplers always need it
+            for alg in algs
+                @test Turing.Inference.requires_unconstrained_space(alg)
+            end
+        end
+
+        @testset "update_sample_kwargs" begin
+            # Static Hamiltonian
+            static_alg = HMC(0.1, 3)
+            # Adaptive Hamiltonian, where the number of adaptations is
+            # explicitly specified (here 200)
+            adaptive_alg_explicit_nadapts = HMCDA(200, 0.8, 0.75)
+            # Adaptive Hamiltonian, where the number of adaptations is
+            # implicit
+            adaptive_alg_implicit_nadapts = NUTS(0.5)
+
+            # chain length
+            N = 1000
+
+            # convenience function to check NamedTuple equality up to ordering, i.e.
+            # we want (a=1, b=2) to be equal to (b=2, a=1)
+            nt_eq(nt1, nt2) = Dict(pairs(nt1)) == Dict(pairs(nt2))
+
+            # We don't test every single possibility of keyword arguments here,
+            # just some typical cases that reflect common usage.
+
+            # Case 1: no relevant kwargs. The adaptive algorithms need to add
+            # in the number of adaptations and set discard_initial equal to
+            # that. The static algorithm does not need to do anything.
+            kwargs = (; _foo="bar")
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(static_alg, N, kwargs), kwargs
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(
+                    adaptive_alg_explicit_nadapts, N, kwargs
+                ),
+                (nadapts=200, discard_initial=200, _foo="bar"),
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(
+                    adaptive_alg_implicit_nadapts, N, kwargs
+                ),
+                # by default the adaptive algorithm takes N / 2 adaptations, or
+                # 1000, whichever is smaller. In this case since N = 1000, we
+                # expect the number of adaptations to be 500.
+                (nadapts=500, discard_initial=500, _foo="bar"),
+            )
+
+            # Case 2: When resuming from an earlier chain. In this case, no
+            # adaptation is needed.
+            chn = Chains([1.0], [:a])
+            kwargs = (; resume_from=chn)
+            kwargs_without_adapts = (
+                nadapts=0, discard_initial=0, discard_adapt=false, resume_from=chn
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(static_alg, N, kwargs), kwargs
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(
+                    adaptive_alg_explicit_nadapts, N, kwargs
+                ),
+                kwargs_without_adapts,
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(
+                    adaptive_alg_implicit_nadapts, N, kwargs
+                ),
+                kwargs_without_adapts,
+            )
+
+            # Case 3: user manually specifies number of adaptations.
+            kwargs = (; nadapts=500)
+            kwargs_with_adapts = (nadapts=500, discard_initial=500)
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(static_alg, N, kwargs), kwargs
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(
+                    adaptive_alg_explicit_nadapts, N, kwargs
+                ),
+                kwargs_with_adapts,
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(
+                    adaptive_alg_implicit_nadapts, N, kwargs
+                ),
+                kwargs_with_adapts,
+            )
+
+            # Case 4: user wants to keep the adaptations
+            kwargs = (; discard_adapt=false)
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(static_alg, N, kwargs), kwargs
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(
+                    adaptive_alg_explicit_nadapts, N, kwargs
+                ),
+                (nadapts=200, discard_initial=0, discard_adapt=false),
+            )
+            @test nt_eq(
+                Turing.Inference.update_sample_kwargs(
+                    adaptive_alg_implicit_nadapts, N, kwargs
+                ),
+                (nadapts=500, discard_initial=0, discard_adapt=false),
+            )
+        end
+    end
+
+    @testset "sample() interface" begin
+        @model function demo_normal(x)
+            a ~ Normal()
+            return x ~ Normal(a)
+        end
+        model = demo_normal(2.0)
+        # note: passing LDF to a Hamiltonian sampler requires explicit adtype
+        ldf = LogDensityFunction(model; adtype=AutoForwardDiff())
+        sampling_objects = Dict("DynamicPPL.Model" => model, "LogDensityFunction" => ldf)
+        algs = [HMC(0.1, 3), HMCDA(0.8, 0.75), NUTS(0.5)]
+        seed = 468
+        @testset "sampling with $name" for (name, model_or_ldf) in sampling_objects
+            @testset "$alg" for alg in algs
+                # check sampling works without rng
+                @test sample(model_or_ldf, alg, 5) isa Chains
+                # check reproducibility with rng
+                chn1 = sample(Random.Xoshiro(seed), model_or_ldf, alg, 5)
+                chn2 = sample(Random.Xoshiro(seed), model_or_ldf, alg, 5)
+                @test mean(chn1[:a]) == mean(chn2[:a])
+            end
+        end
+    end
+
     @testset "constrained bounded" begin
         obs = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1]
 
diff --git a/test/mcmc/sghmc.jl b/test/mcmc/sghmc.jl
@@ -8,11 +8,73 @@ using DynamicPPL: DynamicPPL
 using Distributions: sample
 import ForwardDiff
 using LinearAlgebra: dot
-import ReverseDiff
+using Random: Xoshiro
 using StableRNGs: StableRNG
 using Test: @test, @testset
 using Turing
 
+@testset "SGHMC + SGLD: InferenceAlgorithm interface" begin
+    algs = [
+        SGHMC(; learning_rate=0.01, momentum_decay=0.1),
+        SGLD(; stepsize=PolynomialStepsize(0.25)),
+    ]
+
+    @testset "get_adtype" begin
+        # Default
+        for alg in algs
+            @test Turing.Inference.get_adtype(alg) == Turing.DEFAULT_ADTYPE
+        end
+        # Manual
+        for adtype in (AutoReverseDiff(), AutoMooncake(; config=nothing))
+            alg1 = SGHMC(; learning_rate=0.01, momentum_decay=0.1, adtype=adtype)
+            alg2 = SGLD(; stepsize=PolynomialStepsize(0.25), adtype=adtype)
+            @test Turing.Inference.get_adtype(alg1) == adtype
+            @test Turing.Inference.get_adtype(alg2) == adtype
+        end
+    end
+
+    @testset "requires_unconstrained_space" begin
+        # Hamiltonian samplers always need it
+        for alg in algs
+            @test Turing.Inference.requires_unconstrained_space(alg)
+        end
+    end
+
+    @testset "update_sample_kwargs" begin
+        # These don't update kwargs
+        for alg in algs
+            kwargs = (a=1, b=2)
+            @test Turing.Inference.update_sample_kwargs(alg, 1000, kwargs) == kwargs
+        end
+    end
+end
+
+@testset verbose = true "SGHMC + SGLD: sample() interface" begin
+    @model function demo_normal(x)
+        a ~ Normal()
+        return x ~ Normal(a)
+    end
+    model = demo_normal(2.0)
+    # note: passing LDF to a Hamiltonian sampler requires explicit adtype
+    ldf = LogDensityFunction(model; adtype=AutoForwardDiff())
+    sampling_objects = Dict("DynamicPPL.Model" => model, "LogDensityFunction" => ldf)
+    algs = [
+        SGHMC(; learning_rate=0.01, momentum_decay=0.1),
+        SGLD(; stepsize=PolynomialStepsize(0.25)),
+    ]
+    seed = 468
+    @testset "sampling with $name" for (name, model_or_ldf) in sampling_objects
+        @testset "$alg" for alg in algs
+            # check sampling works without rng
+            @test sample(model_or_ldf, alg, 5) isa Chains
+            # check reproducibility with rng
+            chn1 = sample(Xoshiro(seed), model_or_ldf, alg, 5)
+            chn2 = sample(Xoshiro(seed), model_or_ldf, alg, 5)
+            @test mean(chn1[:a]) == mean(chn2[:a])
+        end
+    end
+end
+
 @testset verbose = true "Testing sghmc.jl" begin
     @testset "sghmc constructor" begin
         alg = SGHMC(; learning_rate=0.01, momentum_decay=0.1)