Skip to content

Calculate goodness of fit (r2_score) based on posterior expectation rather than posterior prediction #429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 21, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions causalpy/pymc_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,18 @@ class PyMCModel(pm.Model):
... "chains": 2,
... "draws": 2000,
... "progressbar": False,
... "random_seed": rng,
... "random_seed": 42,
... }
... )
>>> model.fit(X, y)
Inference data...
>>> model.score(X, y) # doctest: +ELLIPSIS
r2 ...
r2_std ...
dtype: float64
>>> X_new = rng.normal(loc=0, scale=1, size=(20,2))
>>> model.predict(X_new)
Inference data...
>>> model.score(X, y)
r2 0.390344
r2_std 0.081135
dtype: float64
"""

def __init__(self, sample_kwargs: Optional[Dict[str, Any]] = None):
Expand Down Expand Up @@ -123,7 +123,6 @@ def predict(self, X):
# Ensure random_seed is used in sample_prior_predictive() and
# sample_posterior_predictive() if provided in sample_kwargs.
random_seed = self.sample_kwargs.get("random_seed", None)

self._data_setter(X)
with self: # sample with new input data
post_pred = pm.sample_posterior_predictive(
Expand All @@ -137,18 +136,19 @@ def predict(self, X):
def score(self, X, y) -> pd.Series:
"""Score the Bayesian :math:`R^2` given inputs ``X`` and outputs ``y``.

Note that the score is based on a comparison of the observed data ``y`` and the
model's expected value of the data, `mu`.

.. caution::

The Bayesian :math:`R^2` is not the same as the traditional coefficient of
determination, https://en.wikipedia.org/wiki/Coefficient_of_determination.

"""
yhat = self.predict(X)
yhat = az.extract(
yhat, group="posterior_predictive", var_names="y_hat"
).T.values
mu = self.predict(X)
mu = az.extract(mu, group="posterior_predictive", var_names="mu").T.values
# Note: First argument must be a 1D array
return r2_score(y.flatten(), yhat)
return r2_score(y.flatten(), mu)

def calculate_impact(self, y_true, y_pred):
pre_data = xr.DataArray(y_true, dims=["obs_ind"])
Expand Down
Loading