Skip to content

Improve formatting of model coefficients in summary method in pymc-based experiments #359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 31 additions & 23 deletions causalpy/pymc_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,29 +102,37 @@ def print_coefficients(self, round_to=None) -> None:
... "progressbar": False
... }),
... )
>>> result.print_coefficients(round_to=1) # doctest: +NUMBER
>>> result.print_coefficients(round_to=1)
Model coefficients:
Intercept 1, 94% HDI [1, 1]
post_treatment[T.True] 1, 94% HDI [0.9, 1]
group 0.2, 94% HDI [0.09, 0.2]
group:post_treatment[T.True] 0.5, 94% HDI [0.4, 0.6]
sigma 0.08, 94% HDI [0.07, 0.1]
Intercept 1, 94% HDI [1, 1]
post_treatment[T.True] 1, 94% HDI [0.9, 1]
group 0.2, 94% HDI [0.09, 0.2]
group:post_treatment[T.True] 0.5, 94% HDI [0.4, 0.6]
sigma 0.08, 94% HDI [0.07, 0.1]
"""

def print_row(
max_label_length: int, name: str, coeff_samples: xr.DataArray, round_to: int
) -> None:
"""Print one row of the coefficient table"""
formatted_name = f" {name: <{max_label_length}}"
formatted_val = f"{round_num(coeff_samples.mean().data, round_to)}, 94% HDI [{round_num(coeff_samples.quantile(0.03).data, round_to)}, {round_num(coeff_samples.quantile(1-0.03).data, round_to)}]" # noqa: E501
print(f" {formatted_name} {formatted_val}")

print("Model coefficients:")
coeffs = az.extract(self.idata.posterior, var_names="beta")
# Note: f"{name: <30}" pads the name with spaces so that we have alignment of
# the stats despite variable names of different lengths

# Determine the width of the longest label
max_label_length = max(len(name) for name in self.labels + ["sigma"])

for name in self.labels:
coeff_samples = coeffs.sel(coeffs=name)
print(
f"{name: <30}{round_num(coeff_samples.mean().data, round_to)}, 94% HDI [{round_num(coeff_samples.quantile(0.03).data, round_to)}, {round_num(coeff_samples.quantile(1-0.03).data, round_to)}]" # noqa: E501
)
# add coeff for measurement std
print_row(max_label_length, name, coeff_samples, round_to)

# Add coefficient for measurement std
coeff_samples = az.extract(self.model.idata.posterior, var_names="sigma")
name = "sigma"
print(
f"{name: <30}{round_num(coeff_samples.mean().data, round_to)}, 94% HDI [{round_num(coeff_samples.quantile(0.03).data, round_to)}, {round_num(coeff_samples.quantile(1-0.03).data, round_to)}]" # noqa: E501
)
print_row(max_label_length, name, coeff_samples, round_to)


class PrePostFit(ExperimentalDesign, PrePostFitDataValidator):
Expand Down Expand Up @@ -160,13 +168,13 @@ class PrePostFit(ExperimentalDesign, PrePostFitDataValidator):
... }
... ),
... )
>>> result.summary(round_to=1) # doctest: +NUMBER
>>> result.summary(round_to=1)
==================================Pre-Post Fit==================================
Formula: actual ~ 0 + a + g
Model coefficients:
a 0.6, 94% HDI [0.6, 0.6]
g 0.4, 94% HDI [0.4, 0.4]
sigma 0.8, 94% HDI [0.6, 0.9]
a 0.6, 94% HDI [0.6, 0.6]
g 0.4, 94% HDI [0.4, 0.4]
sigma 0.8, 94% HDI [0.6, 0.9]
"""

def __init__(
Expand Down Expand Up @@ -1181,10 +1189,10 @@ class PrePostNEGD(ExperimentalDesign, PrePostNEGDDataValidator):
Results:
Causal impact = 2, $CI_{94%}$[2, 2]
Model coefficients:
Intercept -0.5, 94% HDI [-1, 0.2]
C(group)[T.1] 2, 94% HDI [2, 2]
pre 1, 94% HDI [1, 1]
sigma 0.5, 94% HDI [0.5, 0.6]
Intercept -0.5, 94% HDI [-1, 0.2]
C(group)[T.1] 2, 94% HDI [2, 2]
pre 1, 94% HDI [1, 1]
sigma 0.5, 94% HDI [0.5, 0.6]
"""

def __init__(
Expand Down
14 changes: 14 additions & 0 deletions causalpy/tests/test_integration_pymc_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def test_did():
assert isinstance(result, cp.pymc_experiments.DifferenceInDifferences)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


# TODO: set up fixture for the banks dataset
Expand Down Expand Up @@ -98,6 +99,7 @@ def test_did_banks_simple():
assert isinstance(result, cp.pymc_experiments.DifferenceInDifferences)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand Down Expand Up @@ -149,6 +151,7 @@ def test_did_banks_multi():
assert isinstance(result, cp.pymc_experiments.DifferenceInDifferences)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand All @@ -174,6 +177,7 @@ def test_rd():
assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand All @@ -200,6 +204,7 @@ def test_rd_bandwidth():
assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand Down Expand Up @@ -229,6 +234,7 @@ def test_rd_drinking():
assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


def setup_regression_kink_data(kink):
Expand Down Expand Up @@ -281,6 +287,7 @@ def test_rkink():
assert isinstance(result, cp.pymc_experiments.RegressionKink)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand All @@ -307,6 +314,7 @@ def test_rkink_bandwidth():
assert isinstance(result, cp.pymc_experiments.RegressionKink)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand Down Expand Up @@ -336,6 +344,7 @@ def test_its():
assert isinstance(result, cp.pymc_experiments.SyntheticControl)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand Down Expand Up @@ -366,6 +375,7 @@ def test_its_covid():
assert isinstance(result, cp.pymc_experiments.InterruptedTimeSeries)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand All @@ -392,6 +402,7 @@ def test_sc():
assert isinstance(result, cp.pymc_experiments.SyntheticControl)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand Down Expand Up @@ -430,6 +441,7 @@ def test_sc_brexit():
assert isinstance(result, cp.pymc_experiments.SyntheticControl)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand All @@ -455,6 +467,7 @@ def test_ancova():
assert isinstance(result, cp.pymc_experiments.PrePostNEGD)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand Down Expand Up @@ -485,6 +498,7 @@ def test_geolift1():
assert isinstance(result, cp.pymc_experiments.SyntheticControl)
assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
result.summary()


@pytest.mark.integration
Expand Down
79 changes: 41 additions & 38 deletions docs/source/notebooks/ancova_pymc.ipynb

Large diffs are not rendered by default.

61 changes: 27 additions & 34 deletions docs/source/notebooks/did_pymc.ipynb

Large diffs are not rendered by default.

130 changes: 58 additions & 72 deletions docs/source/notebooks/did_pymc_banks.ipynb

Large diffs are not rendered by default.

109 changes: 51 additions & 58 deletions docs/source/notebooks/its_pymc.ipynb

Large diffs are not rendered by default.

214 changes: 93 additions & 121 deletions docs/source/notebooks/rd_pymc.ipynb

Large diffs are not rendered by default.

197 changes: 73 additions & 124 deletions docs/source/notebooks/rd_pymc_drinking.ipynb

Large diffs are not rendered by default.

219 changes: 95 additions & 124 deletions docs/source/notebooks/rkink_pymc.ipynb

Large diffs are not rendered by default.

109 changes: 47 additions & 62 deletions docs/source/notebooks/sc_pymc.ipynb

Large diffs are not rendered by default.

101 changes: 42 additions & 59 deletions docs/source/notebooks/sc_pymc_brexit.ipynb

Large diffs are not rendered by default.

Loading