Added support for Online Linear Regression (#72)

kenluck2001 · kennex2004 · ddbourgin · web-flow · commit ccf6e35ff1d0 · 2021-05-31T19:22:48.000-04:00
* added support for online linear regression

* added test

* Addressed reviewer's concern

* Refactor update method, add documentation

* Expand linear regression unit test

Co-authored-by: Kenneth Odoh &lt;kenluck2001@yahoo.com&gt;
Co-authored-by: ddbourgin &lt;ddbourgin@gmail.com&gt;
diff --git a/numpy_ml/linear_models/README.md b/numpy_ml/linear_models/README.md
@@ -1,7 +1,7 @@
 # Linear Models
 The `lm.py` module implements:
 
-1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_least_squares) with maximum likelihood parameter estimates via the normal equation.
+1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_least_squares) with maximum likelihood parameter estimates via the normal equation. For both (Online and Batch mode)
 2. [Ridge regression / Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)
    with maximum likelihood parameter estimates via the normal equation.
 2. [Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) with maximum likelihood parameter estimates via gradient descent.
diff --git a/numpy_ml/linear_models/lm.py b/numpy_ml/linear_models/lm.py
@@ -11,23 +11,88 @@ def __init__(self, fit_intercept=True):
 
         Notes
         -----
-        Given data matrix *X* and target vector *y*, the maximum-likelihood estimate
-        for the regression coefficients, :math:`\\beta`, is:
+        Given data matrix **X** and target vector **y**, the maximum-likelihood
+        estimate for the regression coefficients, :math:`\beta`, is:
 
         .. math::
 
-            \hat{\beta} =
-                \left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y}
+            \hat{\beta} = \Sigma^{-1} \mathbf{X}^\top \mathbf{y}
+
+        where :math:`\Sigma^{-1} = (\mathbf{X}^\top \mathbf{X})^{-1}`.
 
         Parameters
         ----------
         fit_intercept : bool
-            Whether to fit an additional intercept term in addition to the
-            model coefficients. Default is True.
+            Whether to fit an intercept term in addition to the model
+            coefficients. Default is True.
         """
         self.beta = None
+        self.sigma_inv = None
         self.fit_intercept = fit_intercept
 
+        self._is_fit = False
+
+    def update(self, x, y):
+        r"""
+        Incrementally update the least-squares coefficients on a new example
+        via recursive least-squares (RLS) [1]_ .
+
+        Notes
+        -----
+        The RLS algorithm [2]_ is used to efficiently update the regression
+        parameters as new examples become available. For a new example
+        :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the parameter updates are
+
+        .. math::
+
+            \beta_{t+1} = \left(
+                \mathbf{X}_{1:t}^\top \mathbf{X}_{1:t} +
+                    \mathbf{x}_{t+1}\mathbf{x}_{t+1}^\top \right)^{-1}
+                        \mathbf{X}_{1:t}^\top \mathbf{Y}_{1:t} +
+                            \mathbf{x}_{t+1}^\top \mathbf{y}_{t+1}
+
+        where :math:`\beta_{t+1}` are the updated regression coefficients,
+        :math:`\mathbf{X}_{1:t}` and :math:`\mathbf{Y}_{1:t}` are the set of
+        examples observed from timestep 1 to *t*.
+
+        To perform the above update efficiently, the RLS algorithm makes use of
+        the Sherman-Morrison formula [3]_ to avoid re-inverting the covariance
+        matrix on each new update.
+
+        References
+        ----------
+        .. [1] Gauss, C. F. (1821) _Theoria combinationis observationum
+           erroribus minimis obnoxiae_, Werke, 4. Gottinge
+        .. [2] https://en.wikipedia.org/wiki/Recursive_least_squares_filter
+        .. [3] https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
+
+        Parameters
+        ----------
+        x : :py:class:`ndarray <numpy.ndarray>` of shape `(1, M)`
+            A single example of rank `M`
+        y : :py:class:`ndarray <numpy.ndarray>` of shape `(1, K)`
+            A `K`-dimensional target vector for the current example
+        """
+        if not self._is_fit:
+            raise RuntimeError("You must call the `fit` method before calling `update`")
+
+        x, y = np.atleast_2d(x), np.atleast_2d(y)
+        beta, S_inv = self.beta, self.sigma_inv
+
+        X1, Y1 = x.shape[0], y.shape[0]
+        err_str = f"First dimension of x and y must be 1, but got {X1} and {Y1}"
+        assert X1 == Y1 == 1, err_str
+
+        # convert x to a design vector if we're fitting an intercept
+        if self.fit_intercept:
+            x = np.c_[1, x]
+
+        # update the inverse of the covariance matrix via Sherman-Morrison
+        S_inv -= (S_inv @ x.T @ x @ S_inv) / (1 + x @ S_inv @ x.T)
+
+        # update the model coefficients
+        beta += S_inv @ x.T @ (y - x @ beta)
+
     def fit(self, X, y):
         """
         Fit the regression coefficients via maximum likelihood.
@@ -44,8 +109,10 @@ def fit(self, X, y):
         if self.fit_intercept:
             X = np.c_[np.ones(X.shape[0]), X]
 
-        pseudo_inverse = np.linalg.inv(X.T @ X) @ X.T
-        self.beta = np.dot(pseudo_inverse, y)
+        self.sigma_inv = np.linalg.pinv(X.T @ X)
+        self.beta = np.atleast_2d(self.sigma_inv @ X.T @ y)
+
+        self._is_fit = True
 
     def predict(self, X):
         """
@@ -166,22 +233,22 @@ def __init__(self, penalty="l2", gamma=0, fit_intercept=True):
                 \left(
                     \sum_{i=0}^N y_i \log(\hat{y}_i) +
                       (1-y_i) \log(1-\hat{y}_i)
-                \right) - R(\mathbf{b}, \gamma) 
+                \right) - R(\mathbf{b}, \gamma)
             \right]
-        
+
         where
-        
+
         .. math::
-        
+
             R(\mathbf{b}, \gamma) = \left\{
                 \begin{array}{lr}
                     \frac{\gamma}{2} ||\mathbf{beta}||_2^2 & :\texttt{ penalty = 'l2'}\\
                     \gamma ||\beta||_1 & :\texttt{ penalty = 'l1'}
                 \end{array}
                 \right.
-                
-        is a regularization penalty, :math:`\gamma` is a regularization weight, 
-        `N` is the number of examples in **y**, and **b** is the vector of model 
+
+        is a regularization penalty, :math:`\gamma` is a regularization weight,
+        `N` is the number of examples in **y**, and **b** is the vector of model
         coefficients.
 
         Parameters
@@ -251,10 +318,10 @@ def _NLL(self, X, y, y_pred):
             \right]
         """
         N, M = X.shape
-        beta, gamma = self.beta, self.gamma 
+        beta, gamma = self.beta, self.gamma
         order = 2 if self.penalty == "l2" else 1
         norm_beta = np.linalg.norm(beta, ord=order)
-        
+
         nll = -np.log(y_pred[y == 1]).sum() - np.log(1 - y_pred[y == 0]).sum()
         penalty = (gamma / 2) * norm_beta ** 2 if order == 2 else gamma * norm_beta
         return (penalty + nll) / N
diff --git a/numpy_ml/tests/test_linear_regression.py b/numpy_ml/tests/test_linear_regression.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+from sklearn.linear_model import LinearRegression as LinearRegressionGold
+
+from numpy_ml.linear_models.lm import LinearRegression
+from numpy_ml.utils.testing import random_tensor
+
+
+def test_linear_regression(N=10):
+    np.random.seed(12345)
+    N = np.inf if N is None else N
+
+    i = 1
+    while i < N + 1:
+        train_samples = np.random.randint(1, 30)
+        update_samples = np.random.randint(1, 30)
+        n_samples = train_samples + update_samples
+
+        # ensure n_feats < train_samples, otherwise multiple solutions are
+        # possible
+        n_feats = np.random.randint(1, train_samples)
+        target_dim = np.random.randint(1, 10)
+
+        fit_intercept = np.random.choice([True, False])
+
+        X = random_tensor((n_samples, n_feats), standardize=True)
+        y = random_tensor((n_samples, target_dim), standardize=True)
+
+        X_train, X_update = X[:train_samples], X[train_samples:]
+        y_train, y_update = y[:train_samples], y[train_samples:]
+
+        # Fit gold standard model on the entire dataset
+        lr_gold = LinearRegressionGold(fit_intercept=fit_intercept, normalize=False)
+        lr_gold.fit(X, y)
+
+        # Fit our model on just (X_train, y_train)...
+        lr = LinearRegression(fit_intercept=fit_intercept)
+        lr.fit(X_train, y_train)
+
+        # ...then update our model on the examples (X_update, y_update)
+        for x_new, y_new in zip(X_update, y_update):
+            lr.update(x_new, y_new)
+
+        # check that model predictions match
+        np.testing.assert_almost_equal(lr.predict(X), lr_gold.predict(X))
+
+        # check that model coefficients match
+        beta = lr.beta.T[:, 1:] if fit_intercept else lr.beta.T
+        np.testing.assert_almost_equal(beta, lr_gold.coef_)
+        print("\tPASSED")
+        i += 1