TuringLang · yebai · Jun 3, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 14, 2025
diff --git a/Project.toml b/Project.toml
@@ -54,7 +54,7 @@ Accessors = "0.1"
 AdvancedHMC = "0.3.0, 0.4.0, 0.5.2, 0.6"
 AdvancedMH = "0.8"
 AdvancedPS = "0.6.0"
-AdvancedVI = "0.2"
+AdvancedVI = "0.3.1"
 BangBang = "0.4.2"
 Bijectors = "0.14, 0.15"
 Compat = "4.15.0"

diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
@@ -1,50 +1,171 @@
+
 module Variational
 
-using DistributionsAD: DistributionsAD
-using DynamicPPL: DynamicPPL
-using StatsBase: StatsBase
-using StatsFuns: StatsFuns
-using LogDensityProblems: LogDensityProblems
+using DynamicPPL
+using ADTypes
 using Distributions
+using LinearAlgebra
+using LogDensityProblems
+using Random
+using UnicodePlots
 
-using Random: Random
+import ..Turing: DEFAULT_ADTYPE, PROGRESS
 
 import AdvancedVI
 import Bijectors
 
 # Reexports
-using AdvancedVI: vi, ADVI, ELBO, elbo, TruncatedADAGrad, DecayedADAGrad
-export vi, ADVI, ELBO, elbo, TruncatedADAGrad, DecayedADAGrad
-
-"""
-    make_logjoint(model::Model; weight = 1.0)
-Constructs the logjoint as a function of latent variables, i.e. the map z → p(x ∣ z) p(z).
-The weight used to scale the likelihood, e.g. when doing stochastic gradient descent one needs to
-use `DynamicPPL.MiniBatch` context to run the `Model` with a weight `num_total_obs / batch_size`.
-## Notes
-- For sake of efficiency, the returned function is closes over an instance of `VarInfo`. This means that you *might* run into some weird behaviour if you call this method sequentially using different types; if that's the case, just generate a new one for each type using `make_logjoint`.
-"""
-function make_logjoint(model::DynamicPPL.Model; weight=1.0)
-    # setup
+using AdvancedVI: RepGradELBO, ScoreGradELBO, DoG, DoWG
+export vi, RepGradELBO, ScoreGradELBO, DoG, DoWG
+
+export meanfield_gaussian, fullrank_gaussian
+
+include("bijectors.jl")
+
+function make_logdensity(model::DynamicPPL.Model)
+    weight = 1.0
     ctx = DynamicPPL.MiniBatchContext(DynamicPPL.DefaultContext(), weight)
-    f = DynamicPPL.LogDensityFunction(model, DynamicPPL.VarInfo(model), ctx)
-    return Base.Fix1(LogDensityProblems.logdensity, f)
+    return DynamicPPL.LogDensityFunction(model, DynamicPPL.VarInfo(model), ctx)
+end
+
+function initialize_gaussian_scale(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model,
+    location::AbstractVector,
+    scale::AbstractMatrix;
+    num_samples::Int=10,
+    num_max_trials::Int=10,
+    reduce_factor=one(eltype(scale)) / 2,
+)
+    prob = make_logdensity(model)
+    ℓπ = Base.Fix1(LogDensityProblems.logdensity, prob)
+    varinfo = DynamicPPL.VarInfo(model)
+
+    n_trial = 0
+    while true
+        q = AdvancedVI.MvLocationScale(location, scale, Normal())
+        b = Bijectors.bijector(model; varinfo=varinfo)
+        q_trans = Bijectors.transformed(q, Bijectors.inverse(b))
+        energy = mean(ℓπ, eachcol(rand(rng, q_trans, num_samples)))
+
+        if isfinite(energy)
+            return scale
+        elseif n_trial == num_max_trials
+            error("Could not find an initial")
+        end
+
+        scale = reduce_factor * scale
+        n_trial += 1
+    end
+end
+
+function meanfield_gaussian(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model,
+    location::Union{Nothing,<:AbstractVector}=nothing,
+    scale::Union{Nothing,<:Diagonal}=nothing;
+    kwargs...,
+)
+    varinfo = DynamicPPL.VarInfo(model)
+    # Use linked `varinfo` to determine the correct number of parameters.
+    # TODO: Replace with `length` once this is implemented for `VarInfo`.
+    varinfo_linked = DynamicPPL.link(varinfo, model)
+    num_params = length(varinfo_linked[:])
+
+    μ = if isnothing(location)
+        zeros(num_params)
+    else
+        @assert length(location) == num_params "Length of the provided location vector, $(length(location)), does not match dimension of the target distribution, $(num_params)."
+        location
+    end
+
+    L = if isnothing(scale)
+        initialize_gaussian_scale(rng, model, μ, Diagonal(ones(num_params)); kwargs...)
+    else
+        @assert size(scale) == (num_params, num_params) "Dimensions of the provided scale matrix, $(size(scale)), does not match the dimension of the target distribution, $(num_params)."
+        L = scale
+    end
+
+    q = AdvancedVI.MeanFieldGaussian(μ, L)
+    b = Bijectors.bijector(model; varinfo=varinfo)
+    return Bijectors.transformed(q, Bijectors.inverse(b))
 end
 
-# objectives
-function (elbo::ELBO)(
+function meanfield_gaussian(
+    model::DynamicPPL.Model,
+    location::Union{Nothing,<:AbstractVector}=nothing,
+    scale::Union{Nothing,<:Diagonal}=nothing;
+    kwargs...,
+)
+    return meanfield_gaussian(Random.default_rng(), model, location, scale; kwargs...)
+end
+
+function fullrank_gaussian(
     rng::Random.AbstractRNG,
-    alg::AdvancedVI.VariationalInference,
-    q,
     model::DynamicPPL.Model,
-    num_samples;
-    weight=1.0,
+    location::Union{Nothing, <:AbstractVector} = nothing,
+    scale::Union{Nothing, <:LowerTriangular} = nothing;
+    kwargs...
+)
+    varinfo = DynamicPPL.VarInfo(model)
+    # Use linked `varinfo` to determine the correct number of parameters.
+    # TODO: Replace with `length` once this is implemented for `VarInfo`.
+    varinfo_linked = DynamicPPL.link(varinfo, model)
+    num_params = length(varinfo_linked[:])
+
+    μ = if isnothing(location)
+        zeros(num_params)
+    else
+        @assert length(location) == num_params "Length of the provided location vector, $(length(location)), does not match dimension of the target distribution, $(num_params)."
+        location
+    end
+
+    L = if isnothing(scale)
+        L0 = LowerTriangular(Matrix{Float64}(I, num_params, num_params))
+        initialize_gaussian_scale(rng, model, μ, L0; kwargs...)
+    else
+        @assert size(scale) == (num_params, num_params) "Dimensions of the provided scale matrix, $(size(scale)), does not match the dimension of the target distribution, $(num_params)."
+        scale
+    end
+
+    q = AdvancedVI.FullRankGaussian(μ, L)
+    b = Bijectors.bijector(model; varinfo=varinfo)
+    return Bijectors.transformed(q, Bijectors.inverse(b))
+end
+
+function fullrank_gaussian(
+    model::DynamicPPL.Model,
+    location::Union{Nothing,<:AbstractVector}=nothing,
+    scale::Union{Nothing,<:Diagonal}=nothing;
     kwargs...,
 )
-    return elbo(rng, alg, q, make_logjoint(model; weight=weight), num_samples; kwargs...)
+    return fullrank_gaussian(Random.default_rng(), model, location, scale; kwargs...)
 end
 
-# VI algorithms
-include("advi.jl")
+function vi(
+    model::DynamicPPL.Model,
+    q::Bijectors.TransformedDistribution,
+    n_iterations::Int;
+    objective=RepGradELBO(10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()),
+    show_progress::Bool=PROGRESS[],
+    optimizer=AdvancedVI.DoWG(),
+    averager=AdvancedVI.PolynomialAveraging(),
+    operator=AdvancedVI.ProximalLocationScaleEntropy(),
+    adtype::ADTypes.AbstractADType=DEFAULT_ADTYPE,
+    kwargs...
+)
+    return AdvancedVI.optimize(
+        make_logdensity(model),
+        objective,
+        q,
+        n_iterations;
+        show_progress=show_progress,
+        adtype,
+        optimizer,
+        averager,
+        operator,
+        kwargs...
+    )
+end
 
 end
diff --git a/src/variational/advi.jl b/src/variational/advi.jl