feat: lasso documentation

Volodymyr Orlov · Volodymyr Orlov · commit cceb2f046d11 · 2020-12-13T13:35:14.000-08:00
diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs
@@ -187,9 +187,7 @@ impl<T: RealNumber> BaseVector<T> for Vec<T> {
             );
         }
 
-        for i in 0..self.len() {
-            self[i] = other[i];
-        }
+        self[..].clone_from_slice(&other[..]);
     }
 }
 
@@ -929,9 +927,7 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
             );
         }
 
-        for i in 0..self.values.len() {
-            self.values[i] = other.values[i];
-        }
+        self.values[..].clone_from_slice(&other.values[..]);
     }
 
     fn abs_mut(&mut self) -> &Self {
@@ -1066,6 +1062,14 @@ mod tests {
         assert_eq!(32.0, BaseVector::dot(&v1, &v2));
     }
 
+    #[test]
+    fn vec_copy_from() {
+        let mut v1 = vec![1., 2., 3.];
+        let v2 = vec![4., 5., 6.];
+        v1.copy_from(&v2);
+        assert_eq!(v1, v2);
+    }
+
     #[test]
     fn vec_approximate_eq() {
         let a = vec![1., 2., 3.];
@@ -1199,6 +1203,14 @@ mod tests {
         assert_eq!(a.dot(&b), 32.);
     }
 
+    #[test]
+    fn copy_from() {
+        let mut a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]);
+        let b = DenseMatrix::from_2d_array(&[&[7., 8.], &[9., 10.], &[11., 12.]]);
+        a.copy_from(&b);
+        assert_eq!(a, b);
+    }
+
     #[test]
     fn slice() {
         let m = DenseMatrix::from_2d_array(&[
diff --git a/src/linear/elastic_net.rs b/src/linear/elastic_net.rs
@@ -1,5 +1,51 @@
+#![allow(clippy::needless_range_loop)]
 //! # Elastic Net
 //!
+//! Elastic net is an extension of [linear regression](../linear_regression/index.html) that adds regularization penalties to the loss function during training.
+//! Just like in ordinary linear regression you assume a linear relationship between input variables and the target variable.
+//! Unlike linear regression elastic net adds regularization penalties to the loss function during training.
+//! In particular, the elastic net coefficient estimates \\(\beta\\) are the values that minimize
+//!
+//! \\[L(\alpha, \beta) = \vert \boldsymbol{y} - \boldsymbol{X}\beta\vert^2 + \lambda_1 \vert \beta \vert^2 + \lambda_2 \vert \beta \vert_1\\]
+//!
+//! where \\(\lambda_1 = \\alpha l_{1r}\\), \\(\lambda_2 = \\alpha (1 -  l_{1r})\\) and \\(l_{1r}\\) is the l1 ratio, elastic net mixing parameter.
+//!
+//! In essense, elastic net combines both the [L1](../lasso/index.html) and [L2](../ridge_regression/index.html) penalties during training,
+//! which can result in better performance than a model with either one or the other penalty on some problems.
+//! The elastic net is particularly useful when the number of predictors (p) is much bigger than the number of observations (n).
+//!
+//! Example:
+//!
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::*;
+//! use smartcore::linear::elastic_net::*;
+//!
+//! // Longley dataset (https://www.statsmodels.org/stable/datasets/generated/longley.html)
+//! let x = DenseMatrix::from_2d_array(&[
+//!               &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+//!               &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+//!               &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+//!               &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
+//!               &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+//!               &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
+//!               &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
+//!               &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
+//!               &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+//!               &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
+//!               &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+//!               &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
+//!               &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+//!               &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+//!               &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+//!               &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+//!          ]);
+//!
+//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
+//!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
+//!
+//! let y_hat = ElasticNet::fit(&x, &y, Default::default()).
+//!                 and_then(|lr| lr.predict(&x)).unwrap();
+//! ```
 //!
 //! ## References:
 //!
@@ -19,17 +65,24 @@ use crate::math::num::RealNumber;
 
 use crate::linear::lasso_optimizer::InteriorPointOptimizer;
 
-/// Ridge Regression parameters
+/// Elastic net parameters
 #[derive(Serialize, Deserialize, Debug)]
 pub struct ElasticNetParameters<T: RealNumber> {
+    /// Regularization parameter.
     pub alpha: T,
+    /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
+    /// For l1_ratio = 0 the penalty is an L2 penalty.
+    /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
     pub l1_ratio: T,
+    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
     pub normalize: bool,
+    /// The tolerance for the optimization
     pub tol: T,
+    /// The maximum number of iterations
     pub max_iter: usize,
 }
 
-/// Ridge regression
+/// Elastic net
 #[derive(Serialize, Deserialize, Debug)]
 pub struct ElasticNet<T: RealNumber, M: Matrix<T>> {
     coefficients: M,
@@ -56,7 +109,7 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for ElasticNet<T, M> {
 }
 
 impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
-    /// Fits ridge regression to your data.
+    /// Fits elastic net regression to your data.
     /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
     /// * `y` - target values
     /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
@@ -81,7 +134,7 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
         let (w, b) = if parameters.normalize {
             let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;
 
-            let (x, y, gamma) = Self::augment_X_and_y(&scaled_x, y, l2_reg);
+            let (x, y, gamma) = Self::augment_x_and_y(&scaled_x, y, l2_reg);
 
             let mut optimizer = InteriorPointOptimizer::new(&x, p);
 
@@ -102,7 +155,7 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
 
             (w, b)
         } else {
-            let (x, y, gamma) = Self::augment_X_and_y(x, y, l2_reg);
+            let (x, y, gamma) = Self::augment_x_and_y(x, y, l2_reg);
 
             let mut optimizer = InteriorPointOptimizer::new(&x, p);
 
@@ -159,7 +212,7 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
         Ok((scaled_x, col_mean, col_std))
     }
 
-    fn augment_X_and_y(x: &M, y: &M::RowVector, l2_reg: T) -> (M, M::RowVector, T) {
+    fn augment_x_and_y(x: &M, y: &M::RowVector, l2_reg: T) -> (M, M::RowVector, T) {
         let (n, p) = x.shape();
 
         let gamma = T::one() / (T::one() + l2_reg).sqrt();
diff --git a/src/linear/lasso.rs b/src/linear/lasso.rs
@@ -105,18 +105,15 @@ impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
             return Err(Failed::fit("Number of rows in X should = len(y)"));
         }
 
+        let l1_reg = parameters.alpha * T::from_usize(n).unwrap();
+
         let (w, b) = if parameters.normalize {
             let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;
 
             let mut optimizer = InteriorPointOptimizer::new(&scaled_x, p);
 
-            let mut w = optimizer.optimize(
-                &scaled_x,
-                y,
-                parameters.alpha,
-                parameters.max_iter,
-                parameters.tol,
-            )?;
+            let mut w =
+                optimizer.optimize(&scaled_x, y, l1_reg, parameters.max_iter, parameters.tol)?;
 
             for (j, col_std_j) in col_std.iter().enumerate().take(p) {
                 w.set(j, 0, w.get(j, 0) / *col_std_j);
@@ -133,8 +130,7 @@ impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
         } else {
             let mut optimizer = InteriorPointOptimizer::new(x, p);
 
-            let w =
-                optimizer.optimize(x, y, parameters.alpha, parameters.max_iter, parameters.tol)?;
+            let w = optimizer.optimize(x, y, l1_reg, parameters.max_iter, parameters.tol)?;
 
             (w, y.mean())
         };
@@ -215,18 +211,9 @@ mod tests {
             114.2, 115.7, 116.9,
         ];
 
-        let y_hat = Lasso::fit(
-            &x,
-            &y,
-            LassoParameters {
-                alpha: 0.1,
-                normalize: true,
-                tol: 1e-4,
-                max_iter: 1000,
-            },
-        )
-        .and_then(|lr| lr.predict(&x))
-        .unwrap();
+        let y_hat = Lasso::fit(&x, &y, Default::default())
+            .and_then(|lr| lr.predict(&x))
+            .unwrap();
 
         assert!(mean_absolute_error(&y_hat, &y) < 2.0);
 
diff --git a/src/linear/mod.rs b/src/linear/mod.rs
@@ -21,7 +21,7 @@
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
 
 pub(crate) mod bg_solver;
-pub mod elasticnet;
+pub mod elastic_net;
 pub mod lasso;
 pub(crate) mod lasso_optimizer;
 pub mod linear_regression;