-
Notifications
You must be signed in to change notification settings - Fork 199
Open
Labels
bugSomething isn't workingSomething isn't workingimproving-testsEverything to do with the sbi test suiteEverything to do with the sbi test suite
Description
🐛 Bug Description
I was working on PR #1461 (Issue: #1428). I wanted to run all tests quickly (even if they fail) just to test the fixtures, so I set num_samples=10
and steps=5
for posterior sampling in test_npse_iid_inference
. Everything seems to work fine for None
and gaussian
prior, but something weird happens for uniform
.
🔄 Steps to Reproduce
Run the tests:
pytest tests/linearGaussian_npse_test.py::test_npse_iid_inference
Full tests code:
DEFAULT_USE = [
pytest.param(("ve", None), id="ve-None"),
pytest.param(("vp", "gaussian"), id="vp-gaussian"),
]
SLOW_COMBINATIONS = [
pytest.param(("vp", "uniform"), id="vp-uniform", marks=pytest.mark.slow),
pytest.param(("vp", None), id="vp-None", marks=pytest.mark.slow),
pytest.param(("ve", "gaussian"), id="ve-gaussian", marks=pytest.mark.slow),
pytest.param(("ve", "uniform"), id="ve-uniform", marks=pytest.mark.slow),
pytest.param(("subvp", "gaussian"), id="subvp-gaussian", marks=pytest.mark.slow),
pytest.param(("subvp", "uniform"), id="subvp-uniform", marks=pytest.mark.slow),
pytest.param(("subvp", None), id="subvp-None", marks=pytest.mark.slow),
]
@pytest.fixture(scope="module")
def npse_trained_model(request):
"""Module-scoped fixture that trains a score estimator for NPSE tests."""
sde_type, prior_type = request.param
num_dim = 2
num_simulations = 5
# likelihood_mean will be likelihood_shift+theta
likelihood_shift = -1.0 * ones(num_dim)
likelihood_cov = 0.3 * eye(num_dim)
if prior_type == "gaussian" or (prior_type is None):
prior_mean = zeros(num_dim)
prior_cov = eye(num_dim)
prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
prior_npse = prior if prior_type is None else None
elif prior_type == "uniform":
prior = BoxUniform(-2 * ones(num_dim), 2 * ones(num_dim))
prior_npse = prior
# This check that our method to handle "general" priors works.
# i.e. if NPSE does not get a proper passed by the user.
inference = NPSE(prior_npse, show_progress_bars=True, sde_type=sde_type)
theta = prior.sample((num_simulations,))
x = linear_gaussian(theta, likelihood_shift, likelihood_cov)
score_estimator = inference.append_simulations(theta, x).train(
stop_after_epochs=200
)
return {
"score_estimator": score_estimator,
"inference": inference,
"prior": prior,
"likelihood_shift": likelihood_shift,
"likelihood_cov": likelihood_cov,
"prior_mean": prior_mean
if prior_type == "gaussian" or prior_type is None
else None,
"prior_cov": prior_cov
if prior_type == "gaussian" or prior_type is None
else None,
"num_dim": num_dim,
"x_o": zeros(num_dim),
"sde_type": sde_type,
"prior_type": prior_type,
}
@pytest.mark.parametrize(
"npse_trained_model",
DEFAULT_USE + SLOW_COMBINATIONS,
indirect=True, # So pytest knows to pass to the fixture
)
@pytest.mark.parametrize(
"iid_method, num_trial",
[
pytest.param("fnpe", 3, id="fnpe-2trials", marks=pytest.mark.slow),
pytest.param("gauss", 3, id="gauss-6trials", marks=pytest.mark.slow),
pytest.param("auto_gauss", 8, id="auto_gauss-8trials"),
pytest.param(
"auto_gauss", 16, id="auto_gauss-16trials", marks=pytest.mark.slow
),
pytest.param("jac_gauss", 8, id="jac_gauss-8trials", marks=pytest.mark.slow),
],
)
def test_npse_iid_inference(npse_trained_model, iid_method, num_trial):
"""Test whether NPSE infers well a simple example with available ground truth."""
num_samples = 10
# Extract data from fixture
score_estimator = npse_trained_model["score_estimator"]
inference = npse_trained_model["inference"]
prior = npse_trained_model["prior"]
likelihood_shift = npse_trained_model["likelihood_shift"]
likelihood_cov = npse_trained_model["likelihood_cov"]
prior_mean = npse_trained_model["prior_mean"]
prior_cov = npse_trained_model["prior_cov"]
num_dim = npse_trained_model["num_dim"]
sde_type = npse_trained_model["sde_type"]
prior_type = npse_trained_model["prior_type"]
x_o = zeros(num_trial, num_dim)
posterior = inference.build_posterior(score_estimator)
posterior.set_default_x(x_o)
samples = posterior.sample((num_samples,), iid_method=iid_method, steps=5)
if prior_type == "gaussian" or (prior_type is None):
gt_posterior = true_posterior_linear_gaussian_mvn_prior(
x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov
)
target_samples = gt_posterior.sample((num_samples,))
elif prior_type == "uniform":
target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
x_o,
likelihood_shift,
likelihood_cov,
prior, # type: ignore
)
# Compute the c2st and assert it is near chance level of 0.5.
# Some degradation is expected, also because posterior get tighter which
# usually makes the c2st worse.
check_c2st(
samples,
target_samples,
alg=f"npse-{sde_type}-{prior_type}-{num_dim}-{iid_method}-{num_trial}iid-trials",
tol=0.05 * min(num_trial, 8),
)
janfb
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workingimproving-testsEverything to do with the sbi test suiteEverything to do with the sbi test suite