TuringLang · yebai · Jun 3, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 14, 2025
diff --git a/Project.toml b/Project.toml
@@ -53,7 +53,7 @@ Accessors = "0.1"
 AdvancedHMC = "0.3.0, 0.4.0, 0.5.2, 0.6, 0.7"
 AdvancedMH = "0.8"
 AdvancedPS = "0.6.0"
-AdvancedVI = "0.2"
+AdvancedVI = "0.3.1"
 BangBang = "0.4.2"
 Bijectors = "0.14, 0.15"
 Compat = "4.15.0"

diff --git a/src/Turing.jl b/src/Turing.jl
@@ -39,8 +39,6 @@ function setprogress!(progress::Bool)
     @info "[Turing]: progress logging is $(progress ? "enabled" : "disabled") globally"
     PROGRESS[] = progress
     AbstractMCMC.setprogress!(progress; silent=true)
-    # TODO: `AdvancedVI.turnprogress` is removed in AdvancedVI v0.3
-    AdvancedVI.turnprogress(progress)
     return progress
 end
 

diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
@@ -1,50 +1,149 @@
+
 module Variational
 
-using DistributionsAD: DistributionsAD
-using DynamicPPL: DynamicPPL
-using StatsBase: StatsBase
-using StatsFuns: StatsFuns
-using LogDensityProblems: LogDensityProblems
+using DynamicPPL
+using ADTypes
 using Distributions
+using LinearAlgebra
+using LogDensityProblems
+using Random
 
-using Random: Random
+import ..Turing: DEFAULT_ADTYPE, PROGRESS
 
 import AdvancedVI
 import Bijectors
 
 # Reexports
-using AdvancedVI: vi, ADVI, ELBO, elbo, TruncatedADAGrad, DecayedADAGrad
-export vi, ADVI, ELBO, elbo, TruncatedADAGrad, DecayedADAGrad
-
-"""
-    make_logjoint(model::Model; weight = 1.0)
-Constructs the logjoint as a function of latent variables, i.e. the map z → p(x ∣ z) p(z).
-The weight used to scale the likelihood, e.g. when doing stochastic gradient descent one needs to
-use `DynamicPPL.MiniBatch` context to run the `Model` with a weight `num_total_obs / batch_size`.
-## Notes
-- For sake of efficiency, the returned function is closes over an instance of `VarInfo`. This means that you *might* run into some weird behaviour if you call this method sequentially using different types; if that's the case, just generate a new one for each type using `make_logjoint`.
-"""
-function make_logjoint(model::DynamicPPL.Model; weight=1.0)
-    # setup
+using AdvancedVI: RepGradELBO, ScoreGradELBO, DoG, DoWG
+export RepGradELBO, ScoreGradELBO, DoG, DoWG
+
+export vi, q_init, q_meanfield_gaussian, q_fullrank_gaussian
+
+include("bijectors.jl")
+
+function make_logdensity(model::DynamicPPL.Model)
+    weight = 1.0
     ctx = DynamicPPL.MiniBatchContext(DynamicPPL.DefaultContext(), weight)
-    f = DynamicPPL.LogDensityFunction(model, DynamicPPL.VarInfo(model), ctx)
-    return Base.Fix1(LogDensityProblems.logdensity, f)
+    return DynamicPPL.LogDensityFunction(model, DynamicPPL.VarInfo(model), ctx)
 end
 
-# objectives
-function (elbo::ELBO)(
+function initialize_gaussian_scale(
     rng::Random.AbstractRNG,
-    alg::AdvancedVI.VariationalInference,
-    q,
     model::DynamicPPL.Model,
-    num_samples;
-    weight=1.0,
+    location::AbstractVector,
+    scale::AbstractMatrix;
+    num_samples::Int=10,
+    num_max_trials::Int=10,
+    reduce_factor=one(eltype(scale)) / 2,
+)
+    prob = make_logdensity(model)
+    ℓπ = Base.Fix1(LogDensityProblems.logdensity, prob)
+    varinfo = DynamicPPL.VarInfo(model)
+
+    n_trial = 0
+    while true
+        q = AdvancedVI.MvLocationScale(location, scale, Normal())
+        b = Bijectors.bijector(model; varinfo=varinfo)
+        q_trans = Bijectors.transformed(q, Bijectors.inverse(b))
+        energy = mean(ℓπ, eachcol(rand(rng, q_trans, num_samples)))
+
+        if isfinite(energy)
+            return scale
+        elseif n_trial == num_max_trials
+            error("Could not find an initial")
+        end
+
+        scale = reduce_factor * scale
+        n_trial += 1
+    end
+end
+
+function q_init(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model;
+    location::Union{Nothing,<:AbstractVector}=nothing,
+    scale::Union{Nothing,<:Diagonal,<:LowerTriangular}=nothing,
+    meanfield::Bool=true,
+    basedist::Distributions.UnivariateDistribution=Normal(),
     kwargs...,
 )
-    return elbo(rng, alg, q, make_logjoint(model; weight=weight), num_samples; kwargs...)
+    varinfo = DynamicPPL.VarInfo(model)
+    # Use linked `varinfo` to determine the correct number of parameters.
+    # TODO: Replace with `length` once this is implemented for `VarInfo`.
+    varinfo_linked = DynamicPPL.link(varinfo, model)
+    num_params = length(varinfo_linked[:])
+
+    μ = if isnothing(location)
+        zeros(num_params)
+    else
+        @assert length(location) == num_params "Length of the provided location vector, $(length(location)), does not match dimension of the target distribution, $(num_params)."
+        location
+    end
+
+    L = if isnothing(scale)
+        if meanfield
+            initialize_gaussian_scale(rng, model, μ, Diagonal(ones(num_params)); kwargs...)
+        else
+            L0 = LowerTriangular(Matrix{Float64}(I, num_params, num_params))
+            initialize_gaussian_scale(rng, model, μ, L0; kwargs...)
+        end
+    else
+        @assert size(scale) == (num_params, num_params) "Dimensions of the provided scale matrix, $(size(scale)), does not match the dimension of the target distribution, $(num_params)."
+        if meanfield
+            Diagonal(diag(scale))
+        else
+            scale
+        end
+    end
+    q = AdvancedVI.MvLocationScale(μ, L, basedist)
+    b = Bijectors.bijector(model; varinfo=varinfo)
+    return Bijectors.transformed(q, Bijectors.inverse(b))
 end
 
-# VI algorithms
-include("advi.jl")
+function q_meanfield_gaussian(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model;
+    location::Union{Nothing,<:AbstractVector}=nothing,
+    scale::Union{Nothing,<:Diagonal}=nothing,
+    kwargs...,
+)
+    return q_init(rng, model; location, scale, meanfield=true, basedist=Normal(), kwargs...)
+end
+
+function q_fullrank_gaussian(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model;
+    location::Union{Nothing,<:AbstractVector}=nothing,
+    scale::Union{Nothing,<:LowerTriangular}=nothing,
+    kwargs...,
+)
+    return q_init(rng, model; location, scale, meanfield=false, basedist=Normal(), kwargs...)
+end
+
+function vi(
+    model::DynamicPPL.Model,
+    q::Bijectors.TransformedDistribution,
+    n_iterations::Int;
+    objective=RepGradELBO(10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()),
+    show_progress::Bool=PROGRESS[],
+    optimizer=AdvancedVI.DoWG(),
+    averager=AdvancedVI.PolynomialAveraging(),
+    operator=AdvancedVI.ProximalLocationScaleEntropy(),
+    adtype::ADTypes.AbstractADType=DEFAULT_ADTYPE,
+    kwargs...,
+)
+    return AdvancedVI.optimize(
+        make_logdensity(model),
+        objective,
+        q,
+        n_iterations;
+        show_progress=show_progress,
+        adtype,
+        optimizer,
+        averager,
+        operator,
+        kwargs...,
+    )
+end
 
 end
diff --git a/src/variational/advi.jl b/src/variational/advi.jl