Merge pull request #23 from smartcorelib/ridge

VolodymyrOrlov · web-flow · commit 82464f41e45b · 2020-11-11T17:59:24.000-08:00
Ridge regression
diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs
@@ -48,6 +48,7 @@ pub mod nalgebra_bindings;
 pub mod ndarray_bindings;
 /// QR factorization that factors a matrix into a product of an orthogonal matrix and an upper triangular matrix.
 pub mod qr;
+pub mod stats;
 /// Singular value decomposition.
 pub mod svd;
 
@@ -60,6 +61,7 @@ use cholesky::CholeskyDecomposableMatrix;
 use evd::EVDDecomposableMatrix;
 use lu::LUDecomposableMatrix;
 use qr::QRDecomposableMatrix;
+use stats::MatrixStats;
 use svd::SVDDecomposableMatrix;
 
 /// Column or row vector
@@ -168,6 +170,30 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {
     ///assert_eq!(a.unique(), vec![-7., -6., -2., 1., 2., 3., 4.]);
     /// ```
     fn unique(&self) -> Vec<T>;
+
+    /// Computes the arithmetic mean.
+    fn mean(&self) -> T {
+        self.sum() / T::from_usize(self.len()).unwrap()
+    }
+    /// Computes variance.
+    fn var(&self) -> T {
+        let n = self.len();
+
+        let mut mu = T::zero();
+        let mut sum = T::zero();
+        let div = T::from_usize(n).unwrap();
+        for i in 0..n {
+            let xi = self.get(i);
+            mu += xi;
+            sum += xi * xi;
+        }
+        mu /= div;
+        sum / div - mu * mu
+    }
+    /// Computes the standard deviation.
+    fn std(&self) -> T {
+        self.var().sqrt()
+    }
 }
 
 /// Generic matrix type.
@@ -515,6 +541,7 @@ pub trait Matrix<T: RealNumber>:
     + QRDecomposableMatrix<T>
     + LUDecomposableMatrix<T>
     + CholeskyDecomposableMatrix<T>
+    + MatrixStats<T>
     + PartialEq
     + Display
 {
@@ -550,3 +577,29 @@ impl<'a, T: RealNumber, M: BaseMatrix<T>> Iterator for RowIter<'a, T, M> {
         res
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::linalg::BaseVector;
+
+    #[test]
+    fn mean() {
+        let m = vec![1., 2., 3.];
+
+        assert_eq!(m.mean(), 2.0);
+    }
+
+    #[test]
+    fn std() {
+        let m = vec![1., 2., 3.];
+
+        assert!((m.std() - 0.81f64).abs() < 1e-2);
+    }
+
+    #[test]
+    fn var() {
+        let m = vec![1., 2., 3., 4.];
+
+        assert!((m.var() - 1.25f64).abs() < std::f64::EPSILON);
+    }
+}
diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs
@@ -11,6 +11,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix;
 use crate::linalg::evd::EVDDecomposableMatrix;
 use crate::linalg::lu::LUDecomposableMatrix;
 use crate::linalg::qr::QRDecomposableMatrix;
+use crate::linalg::stats::MatrixStats;
 use crate::linalg::svd::SVDDecomposableMatrix;
 use crate::linalg::Matrix;
 pub use crate::linalg::{BaseMatrix, BaseVector};
@@ -443,6 +444,8 @@ impl<T: RealNumber> LUDecomposableMatrix<T> for DenseMatrix<T> {}
 
 impl<T: RealNumber> CholeskyDecomposableMatrix<T> for DenseMatrix<T> {}
 
+impl<T: RealNumber> MatrixStats<T> for DenseMatrix<T> {}
+
 impl<T: RealNumber> Matrix<T> for DenseMatrix<T> {}
 
 impl<T: RealNumber> PartialEq for DenseMatrix<T> {
diff --git a/src/linalg/nalgebra_bindings.rs b/src/linalg/nalgebra_bindings.rs
@@ -46,6 +46,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix;
 use crate::linalg::evd::EVDDecomposableMatrix;
 use crate::linalg::lu::LUDecomposableMatrix;
 use crate::linalg::qr::QRDecomposableMatrix;
+use crate::linalg::stats::MatrixStats;
 use crate::linalg::svd::SVDDecomposableMatrix;
 use crate::linalg::Matrix as SmartCoreMatrix;
 use crate::linalg::{BaseMatrix, BaseVector};
@@ -546,6 +547,11 @@ impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Su
 {
 }
 
+impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
+    MatrixStats<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>
+{
+}
+
 impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
     SmartCoreMatrix<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>
 {
diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs
@@ -53,6 +53,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix;
 use crate::linalg::evd::EVDDecomposableMatrix;
 use crate::linalg::lu::LUDecomposableMatrix;
 use crate::linalg::qr::QRDecomposableMatrix;
+use crate::linalg::stats::MatrixStats;
 use crate::linalg::svd::SVDDecomposableMatrix;
 use crate::linalg::Matrix;
 use crate::linalg::{BaseMatrix, BaseVector};
@@ -496,6 +497,11 @@ impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssi
 {
 }
 
+impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum>
+    MatrixStats<T> for ArrayBase<OwnedRepr<T>, Ix2>
+{
+}
+
 impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum> Matrix<T>
     for ArrayBase<OwnedRepr<T>, Ix2>
 {
diff --git a/src/linalg/stats.rs b/src/linalg/stats.rs
@@ -0,0 +1,166 @@
+//! # Various Statistical Methods
+//!
+//! This module provides reference implementations for  various statistical functions.
+//! Concrete implementations of the `BaseMatrix` trait are free to override these methods for better performance.
+
+use crate::linalg::BaseMatrix;
+use crate::math::num::RealNumber;
+
+/// Defines baseline implementations for various statistical functions
+pub trait MatrixStats<T: RealNumber>: BaseMatrix<T> {
+    /// Computes the arithmetic mean along the specified axis.
+    fn mean(&self, axis: u8) -> Vec<T> {
+        let (n, m) = match axis {
+            0 => {
+                let (n, m) = self.shape();
+                (m, n)
+            }
+            _ => self.shape(),
+        };
+
+        let mut x: Vec<T> = vec![T::zero(); n];
+
+        let div = T::from_usize(m).unwrap();
+
+        for i in 0..n {
+            for j in 0..m {
+                x[i] += match axis {
+                    0 => self.get(j, i),
+                    _ => self.get(i, j),
+                };
+            }
+            x[i] /= div;
+        }
+
+        x
+    }
+
+    /// Computes variance along the specified axis.
+    fn var(&self, axis: u8) -> Vec<T> {
+        let (n, m) = match axis {
+            0 => {
+                let (n, m) = self.shape();
+                (m, n)
+            }
+            _ => self.shape(),
+        };
+
+        let mut x: Vec<T> = vec![T::zero(); n];
+
+        let div = T::from_usize(m).unwrap();
+
+        for i in 0..n {
+            let mut mu = T::zero();
+            let mut sum = T::zero();
+            for j in 0..m {
+                let a = match axis {
+                    0 => self.get(j, i),
+                    _ => self.get(i, j),
+                };
+                mu += a;
+                sum += a * a;
+            }
+            mu /= div;
+            x[i] = sum / div - mu * mu;
+        }
+
+        x
+    }
+
+    /// Computes the standard deviation along the specified axis.
+    fn std(&self, axis: u8) -> Vec<T> {
+        let mut x = self.var(axis);
+
+        let n = match axis {
+            0 => self.shape().1,
+            _ => self.shape().0,
+        };
+
+        for i in 0..n {
+            x[i] = x[i].sqrt();
+        }
+
+        x
+    }
+
+    /// standardize values by removing the mean and scaling to unit variance
+    fn scale_mut(&mut self, mean: &Vec<T>, std: &Vec<T>, axis: u8) {
+        let (n, m) = match axis {
+            0 => {
+                let (n, m) = self.shape();
+                (m, n)
+            }
+            _ => self.shape(),
+        };
+
+        for i in 0..n {
+            for j in 0..m {
+                match axis {
+                    0 => self.set(j, i, (self.get(j, i) - mean[i]) / std[i]),
+                    _ => self.set(i, j, (self.get(i, j) - mean[i]) / std[i]),
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::DenseMatrix;
+    use crate::linalg::BaseVector;
+
+    #[test]
+    fn mean() {
+        let m = DenseMatrix::from_2d_array(&[
+            &[1., 2., 3., 1., 2.],
+            &[4., 5., 6., 3., 4.],
+            &[7., 8., 9., 5., 6.],
+        ]);
+        let expected_0 = vec![4., 5., 6., 3., 4.];
+        let expected_1 = vec![1.8, 4.4, 7.];
+
+        assert_eq!(m.mean(0), expected_0);
+        assert_eq!(m.mean(1), expected_1);
+    }
+
+    #[test]
+    fn std() {
+        let m = DenseMatrix::from_2d_array(&[
+            &[1., 2., 3., 1., 2.],
+            &[4., 5., 6., 3., 4.],
+            &[7., 8., 9., 5., 6.],
+        ]);
+        let expected_0 = vec![2.44, 2.44, 2.44, 1.63, 1.63];
+        let expected_1 = vec![0.74, 1.01, 1.41];
+
+        assert!(m.std(0).approximate_eq(&expected_0, 1e-2));
+        assert!(m.std(1).approximate_eq(&expected_1, 1e-2));
+    }
+
+    #[test]
+    fn var() {
+        let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]);
+        let expected_0 = vec![4., 4., 4., 4.];
+        let expected_1 = vec![1.25, 1.25];
+
+        assert!(m.var(0).approximate_eq(&expected_0, std::f64::EPSILON));
+        assert!(m.var(1).approximate_eq(&expected_1, std::f64::EPSILON));
+    }
+
+    #[test]
+    fn scale() {
+        let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
+        let expected_0 = DenseMatrix::from_2d_array(&[&[-1., -1., -1.], &[1., 1., 1.]]);
+        let expected_1 = DenseMatrix::from_2d_array(&[&[-1.22, 0.0, 1.22], &[-1.22, 0.0, 1.22]]);
+
+        {
+            let mut m = m.clone();
+            m.scale_mut(&m.mean(0), &m.std(0), 0);
+            assert!(m.approximate_eq(&expected_0, std::f32::EPSILON));
+        }
+
+        m.scale_mut(&m.mean(1), &m.std(1), 1);
+        assert!(m.approximate_eq(&expected_1, 1e-2));
+    }
+}
diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs
@@ -154,8 +154,8 @@ impl<T: RealNumber, M: Matrix<T>> LinearRegression<T, M> {
     }
 
     /// Get estimates regression coefficients
-    pub fn coefficients(&self) -> M {
-        self.coefficients.clone()
+    pub fn coefficients(&self) -> &M {
+        &self.coefficients
     }
 
     /// Get estimate of intercept
diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs
diff --git a/src/linear/mod.rs b/src/linear/mod.rs
diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs

Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix;`
`46`	`46`	`use crate::linalg::evd::EVDDecomposableMatrix;`
`47`	`47`	`use crate::linalg::lu::LUDecomposableMatrix;`
`48`	`48`	`use crate::linalg::qr::QRDecomposableMatrix;`
	`49`	`+use crate::linalg::stats::MatrixStats;`
`49`	`50`	`use crate::linalg::svd::SVDDecomposableMatrix;`
`50`	`51`	`use crate::linalg::Matrix as SmartCoreMatrix;`
`51`	`52`	`use crate::linalg::{BaseMatrix, BaseVector};`
`@@ -546,6 +547,11 @@ impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Su`
`546`	`547`	`{`
`547`	`548`	`}`
`548`	`549`
	`550`	`+impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>`
	`551`	`+ MatrixStats<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>`
	`552`	`+{`
	`553`	`+}`
	`554`	`+`
`549`	`555`	`impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>`
`550`	`556`	`SmartCoreMatrix<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>`
`551`	`557`	`{`
Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix;`
`53`	`53`	`use crate::linalg::evd::EVDDecomposableMatrix;`
`54`	`54`	`use crate::linalg::lu::LUDecomposableMatrix;`
`55`	`55`	`use crate::linalg::qr::QRDecomposableMatrix;`
	`56`	`+use crate::linalg::stats::MatrixStats;`
`56`	`57`	`use crate::linalg::svd::SVDDecomposableMatrix;`
`57`	`58`	`use crate::linalg::Matrix;`
`58`	`59`	`use crate::linalg::{BaseMatrix, BaseVector};`
`@@ -496,6 +497,11 @@ impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssi`
`496`	`497`	`{`
`497`	`498`	`}`
`498`	`499`
	`500`	`+impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum>`
	`501`	`+ MatrixStats<T> for ArrayBase<OwnedRepr<T>, Ix2>`
	`502`	`+{`
	`503`	`+}`
	`504`	`+`
`499`	`505`	`impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum> Matrix<T>`
`500`	`506`	`for ArrayBase<OwnedRepr<T>, Ix2>`
`501`	`507`	`{`
Original file line number	Diff line number	Diff line change
`@@ -154,8 +154,8 @@ impl<T: RealNumber, M: Matrix<T>> LinearRegression<T, M> {`
`154`	`154`	`}`
`155`	`155`
`156`	`156`	`/// Get estimates regression coefficients`
`157`		`- pub fn coefficients(&self) -> M {`
`158`		`- self.coefficients.clone()`
	`157`	`+ pub fn coefficients(&self) -> &M {`
	`158`	`+ &self.coefficients`
`159`	`159`	`}`
`160`	`160`
`161`	`161`	`/// Get estimate of intercept`