Merge branch 'main' into ajb/ets_rework

TonyBagnall · TonyBagnall · commit e2ef4f127c56 · 2025-07-10T19:09:05.000+01:00
diff --git a/aeon/forecasting/__init__.py b/aeon/forecasting/__init__.py
@@ -5,9 +5,11 @@
     "BaseForecaster",
     "RegressionForecaster",
     "ETSForecaster",
+    "TVPForecaster",
 ]
 
 from aeon.forecasting._ets import ETSForecaster
 from aeon.forecasting._naive import NaiveForecaster
 from aeon.forecasting._regression import RegressionForecaster
+from aeon.forecasting._tvp import TVPForecaster
 from aeon.forecasting.base import BaseForecaster
diff --git a/aeon/forecasting/_tvp.py b/aeon/forecasting/_tvp.py
@@ -0,0 +1,115 @@
+"""Time-Varying Parameter (TVP) Forecaster using Kalman filter."""
+
+import numpy as np
+
+from aeon.forecasting.base import BaseForecaster
+
+
+class TVPForecaster(BaseForecaster):
+    r"""Time-Varying Parameter (TVP) Forecaster using Kalman filter as described in [1].
+
+    This forecaster models the target series using a time-varying linear autoregression:
+
+    .. math::
+
+        \\hat{y}_t = \beta_0,t+\beta_1,t * y_{t-1} + ... + \beta_k,t * y_{t-k}
+
+    where the coefficients $\beta_t$ evolve based on observations $y_t$. At each
+    step, a weight vector is calculated based in the latest residual. This is used to
+    adjust the $\beta$ parameter values and the estimate of parameter variance.
+
+    TVP can be considered as related to stochastic gradient descent (SGD) regression,
+    with the update weight being the dynamically calculated Kalman gain based on the
+    covariance of the parameters rather than a fixed learning rate.
+
+    Parameters
+    ----------
+    window : int
+        Number of autoregressive lags to use, called window to co-ordinate with
+        RegressionForecaster.
+    var : float, default=0.01
+        Observation noise variance. ``var`` controls the influence of recency in the
+        update. A small var (such as the default 0.01) means the parameters will be
+        more
+        affected by
+        recent values. A large var (e.g., 1.0 or more) means the observations are
+        noisy, so the filter will adjust the parameters less to match recent values.
+    beta_var : float, default=0.01
+        State evolution noise variance, applied to all coefficients at each step. Small
+        ``beta_var`` leads to slowly evolving parameters.
+
+    References
+    ----------
+    .. [1] Durbin & Koopman, Time Series Analysis by State Space Methods
+    Oxford University Press, 2nd Edition, 2012
+    """
+
+    def __init__(self, window, horizon=1, var=0.01, beta_var=0.01):
+        self.window = window
+        self.var = var
+        self.beta_var = beta_var
+        super().__init__(axis=1, horizon=horizon)
+
+    def _fit(self, y, exog=None):
+        y = y.squeeze()
+
+        # Create autoregressive design matrix
+        X = np.lib.stride_tricks.sliding_window_view(y, window_shape=self.window)
+        X = X[: -self.horizon]
+        ones = np.ones((X.shape[0], 1))
+        X = np.hstack([ones, X])  # Add intercept column
+
+        y_train = y[self.window + self.horizon - 1 :]
+
+        # Kalman filter initialisation
+        k = X.shape[1]  # number of coefficients (lags + intercept)
+        beta = np.zeros(k)
+        beta_covariance = np.eye(k)
+        beta_var = self.beta_var * np.eye(k)
+
+        for t in range(len(y_train)):
+            x_t = X[t]
+            y_t = y_train[t]
+
+            # Predict covariance
+            beta_covariance = beta_covariance + beta_var
+
+            # Forecast error
+            error_t = y_t - x_t @ beta
+            total_variance = x_t @ beta_covariance @ x_t + self.var
+            kalman_weight = beta_covariance @ x_t / total_variance
+
+            # Update beta parameters with kalman weights times error.
+            beta = beta + kalman_weight * error_t
+            beta_covariance = (
+                beta_covariance - np.outer(kalman_weight, x_t) @ beta_covariance
+            )
+
+        self._beta = beta
+        self._last_window = y[-self.window :]
+        self.forecast_ = (
+            np.insert(self._last_window, 0, 1.0) @ self._beta
+        )  # include intercept
+        return self
+
+    def _predict(self, y, exog=None):
+        y = y.squeeze()
+        x_t = np.insert(y[-self.window :], 0, 1.0)  # include intercept term
+        y_hat = x_t @ self._beta
+        return y_hat
+
+    @classmethod
+    def _get_test_params(cls, parameter_set: str = "default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default='default'
+            Name of the parameter set to return.
+
+        Returns
+        -------
+        dict
+            Dictionary of testing parameter settings.
+        """
+        return {"window": 4}
diff --git a/aeon/forecasting/tests/test_tvp.py b/aeon/forecasting/tests/test_tvp.py
@@ -0,0 +1,90 @@
+"""Test TVP forecaster.
+
+Tests include convergence properties described in Durbin & Koopman, 2012.
+
+"""
+
+import numpy as np
+
+from aeon.forecasting._tvp import TVPForecaster
+
+
+def test_direct():
+    """Test aeon TVP Forecaster equivalent to statsmodels."""
+    expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
+    tvp = TVPForecaster(window=5, horizon=1, var=0.01, beta_var=0.01)
+    p = tvp.forecast(expected)
+    p2 = tvp.direct_forecast(expected, prediction_horizon=5)
+    assert p == p2[0]
+
+
+def test_static_ar1_convergence_to_ols():
+    """Test TVPForecaster converges to the OLS solution for a static AR(1) process."""
+    # Simulate AR(1) data with constant parameters
+    rng = np.random.RandomState(0)
+    true_phi = 0.6
+    true_intercept = 2.0
+    noise_std = 0.5
+    n = 500
+    y = np.zeros(n)
+    # Initialize y[0] near the steady-state mean to avoid startup bias
+    y[0] = true_intercept / (1 - true_phi)
+    for t in range(1, n):
+        y[t] = true_intercept + true_phi * y[t - 1] + rng.normal(0, noise_std)
+    # Fit with beta_var=0 (no parameter drift) and observation variance = noise_var
+    forecaster = TVPForecaster(window=1, horizon=1, var=noise_std**2, beta_var=0.0)
+    forecaster.fit(y)
+    beta_est = forecaster._beta  # [intercept, phi] estimated
+    # Compute static OLS estimates for comparison
+    X = np.vstack(
+        [np.ones(n - 1), y[: n - 1]]
+    ).T  # regress y[t] on [1, y[t-1]] for t=1..n-1
+    y_resp = y[1:]
+    beta_ols, *_ = np.linalg.lstsq(X, y_resp, rcond=None)
+    # The TVP forecaster (with no drift) should converge to OLS estimates
+    assert beta_est.shape == (2,)
+    # Check that estimated parameters are close to OLS solution
+    assert np.allclose(beta_est, beta_ols, atol=0.1)
+    # Also check they are close to true parameters
+    assert abs(beta_est[0] - true_intercept) < 0.2
+    assert abs(beta_est[1] - true_phi) < 0.1
+
+
+def test_tvp_adapts_to_changing_coefficient():
+    """Test TVP adapts its parameters when the true AR(1) coefficient changes."""
+    rng = np.random.RandomState(42)
+    # Piecewise AR(1): phi changes from 0.2 to 0.8 at t=100, intercept remains 1.0
+    n = 200
+    phi1, phi2 = 0.2, 0.8
+    intercept = 1.0
+    noise_std = 0.05
+    y = np.zeros(n)
+    # Start near the mean of first regime
+    y[0] = intercept / (1 - phi1)
+    # First half (t=1 to 99) with phi1
+    for t in range(1, 100):
+        y[t] = intercept + phi1 * y[t - 1] + rng.normal(0, noise_std)
+    # Second half (t=100 to 199) with phi2
+    for t in range(100, n):
+        y[t] = intercept + phi2 * y[t - 1] + rng.normal(0, noise_std)
+    # Fit TVPForecaster with nonzero beta_var to allow parameter drift
+    forecaster = TVPForecaster(window=1, horizon=1, var=noise_std**2, beta_var=0.1)
+    forecaster.fit(y)
+    beta_final = forecaster._beta
+    # Compute OLS on first and second half segments for reference
+    X1 = np.vstack([np.ones(99), y[:99]]).T
+    y1 = y[1:100]
+    beta1_ols, *_ = np.linalg.lstsq(X1, y1, rcond=None)
+    # use points 100..198 to predict 101..199
+    X2 = np.vstack([np.ones(n - 101), y[100 : n - 1]]).T
+    y2 = y[101:n]
+    beta2_ols, *_ = np.linalg.lstsq(X2, y2, rcond=None)
+    # The final estimated phi should be much closer to phi2 than phi1
+    estimated_intercept, estimated_phi = beta_final[0], beta_final[1]
+    # Validate that phi coefficient increased towards phi2
+    assert estimated_phi > 0.5  # moved well above the initial ~0.2
+    assert abs(estimated_phi - phi2) < 0.1  # close to the new true phi
+    # Validate intercept remains reasonable (around true intercept)
+    assert abs(estimated_intercept - intercept) < 0.5
+    # Check that final phi is closer to second-half OLS estimate than first-half
+    assert abs(estimated_phi - beta2_ols[1]) < abs(estimated_phi - beta1_ols[1])
diff --git a/docs/api_reference/forecasting.md b/docs/api_reference/forecasting.md
@@ -11,4 +11,5 @@
     NaiveForecaster
     RegressionForecaster
     ETSForecaster
+    TVPForecaster
 ```
diff --git a/examples/forecasting/regression.ipynb b/examples/forecasting/regression.ipynb