diff --git a/causalpy/experiments/diff_in_diff.py b/causalpy/experiments/diff_in_diff.py index 37204052..3f9f495a 100644 --- a/causalpy/experiments/diff_in_diff.py +++ b/causalpy/experiments/diff_in_diff.py @@ -104,7 +104,7 @@ def __init__( # fit model if isinstance(self.model, PyMCModel): - COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])} + COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.X.shape[0])} self.model.fit(X=self.X, y=self.y, coords=COORDS) elif isinstance(self.model, RegressorMixin): self.model.fit(X=self.X, y=self.y) diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py index 14ec3749..362063c9 100644 --- a/causalpy/experiments/interrupted_time_series.py +++ b/causalpy/experiments/interrupted_time_series.py @@ -110,7 +110,7 @@ def __init__( # fit the model to the observed (pre-intervention) data if isinstance(self.model, PyMCModel): - COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.pre_X.shape[0])} + COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.pre_X.shape[0])} self.model.fit(X=self.pre_X, y=self.pre_y, coords=COORDS) elif isinstance(self.model, RegressorMixin): self.model.fit(X=self.pre_X, y=self.pre_y) diff --git a/causalpy/experiments/prepostnegd.py b/causalpy/experiments/prepostnegd.py index c33d89dc..79c39832 100644 --- a/causalpy/experiments/prepostnegd.py +++ b/causalpy/experiments/prepostnegd.py @@ -113,7 +113,7 @@ def __init__( # fit the model to the observed (pre-intervention) data if isinstance(self.model, PyMCModel): - COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])} + COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.X.shape[0])} self.model.fit(X=self.X, y=self.y, coords=COORDS) elif isinstance(self.model, RegressorMixin): raise NotImplementedError("Not implemented for OLS model") diff --git a/causalpy/experiments/regression_discontinuity.py b/causalpy/experiments/regression_discontinuity.py index da4f98aa..82905ec3 100644 --- a/causalpy/experiments/regression_discontinuity.py +++ b/causalpy/experiments/regression_discontinuity.py @@ -124,7 +124,7 @@ def __init__( # fit model if isinstance(self.model, PyMCModel): # fit the model to the observed (pre-intervention) data - COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])} + COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.X.shape[0])} self.model.fit(X=self.X, y=self.y, coords=COORDS) elif isinstance(self.model, RegressorMixin): self.model.fit(X=self.X, y=self.y) diff --git a/causalpy/experiments/regression_kink.py b/causalpy/experiments/regression_kink.py index 95ad3fcc..5a124bce 100644 --- a/causalpy/experiments/regression_kink.py +++ b/causalpy/experiments/regression_kink.py @@ -84,7 +84,7 @@ def __init__( self.y, self.X = np.asarray(y), np.asarray(X) self.outcome_variable_name = y.design_info.column_names[0] - COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])} + COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.X.shape[0])} self.model.fit(X=self.X, y=self.y, coords=COORDS) # score the goodness of fit to all data diff --git a/causalpy/experiments/synthetic_control.py b/causalpy/experiments/synthetic_control.py index a1bdecbb..31306e2a 100644 --- a/causalpy/experiments/synthetic_control.py +++ b/causalpy/experiments/synthetic_control.py @@ -105,7 +105,7 @@ def __init__( # fit the model to the observed (pre-intervention) data if isinstance(self.model, PyMCModel): - COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.pre_X.shape[0])} + COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.pre_X.shape[0])} self.model.fit(X=self.pre_X, y=self.pre_y, coords=COORDS) elif isinstance(self.model, RegressorMixin): self.model.fit(X=self.pre_X, y=self.pre_y) diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index c221ef57..f487862e 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -87,9 +87,20 @@ def _data_setter(self, X) -> None: This method is used internally to register new data for the model for prediction. + + NOTE: We are actively changing the `X`. Often, this matrix will have a different + number of rows than the original data. So to make the shapes work, we need to + update all data nodes in the model to have the correct shape. The values are not + used, so we set them to 0. In our case, we just have data nodes X and y, but if + in the future we get more complex models with more data nodes, then we'll need + to update all of them - ideally programmatically. """ + new_no_of_observations = X.shape[0] with self: - pm.set_data({"X": X}) + pm.set_data( + {"X": X, "y": np.zeros(new_no_of_observations)}, + coords={"obs_ind": np.arange(new_no_of_observations)}, + ) def fit(self, X, y, coords: Optional[Dict[str, Any]] = None) -> None: """Draw samples from posterior, prior predictive, and posterior predictive @@ -117,7 +128,6 @@ def predict(self, X): .. caution:: Results in KeyError if model hasn't been fit. - """ # Ensure random_seed is used in sample_prior_predictive() and @@ -206,7 +216,7 @@ class LinearRegression(PyMCModel): >>> lr = LinearRegression(sample_kwargs={"progressbar": False}) >>> lr.fit(X, y, coords={ ... 'coeffs': ['x', 'treated'], - ... 'obs_indx': np.arange(rd.shape[0]) + ... 'obs_ind': np.arange(rd.shape[0]) ... }, ... ) Inference data... @@ -451,7 +461,7 @@ class PropensityScore(PyMCModel): >>> ps = PropensityScore(sample_kwargs={"progressbar": False}) >>> ps.fit(X, t, coords={ ... 'coeffs': ['age', 'race'], - ... 'obs_indx': np.arange(df.shape[0]) + ... 'obs_ind': np.arange(df.shape[0]) ... }, ... ) Inference...