Skip to content

Commit ab7f466

Browse files
Volodymyr OrlovVolodymyr Orlov
authored andcommitted
feat: + ridge regression
1 parent b8fea67 commit ab7f466

File tree

7 files changed

+526
-0
lines changed

7 files changed

+526
-0
lines changed

src/linalg/mod.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pub mod nalgebra_bindings;
4848
pub mod ndarray_bindings;
4949
/// QR factorization that factors a matrix into a product of an orthogonal matrix and an upper triangular matrix.
5050
pub mod qr;
51+
pub mod stats;
5152
/// Singular value decomposition.
5253
pub mod svd;
5354

@@ -60,6 +61,7 @@ use cholesky::CholeskyDecomposableMatrix;
6061
use evd::EVDDecomposableMatrix;
6162
use lu::LUDecomposableMatrix;
6263
use qr::QRDecomposableMatrix;
64+
use stats::MatrixStats;
6365
use svd::SVDDecomposableMatrix;
6466

6567
/// Column or row vector
@@ -163,6 +165,32 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {
163165
///assert_eq!(a.unique(), vec![-7., -6., -2., 1., 2., 3., 4.]);
164166
/// ```
165167
fn unique(&self) -> Vec<T>;
168+
169+
/// Compute the arithmetic mean.
170+
fn mean(&self) -> T {
171+
let n = self.len();
172+
let mut mean = T::zero();
173+
174+
for i in 0..n {
175+
mean += self.get(i);
176+
}
177+
mean / T::from_usize(n).unwrap()
178+
}
179+
/// Compute the standard deviation.
180+
fn std(&self) -> T {
181+
let n = self.len();
182+
183+
let mut mu = T::zero();
184+
let mut sum = T::zero();
185+
let div = T::from_usize(n).unwrap();
186+
for i in 0..n {
187+
let xi = self.get(i);
188+
mu += xi;
189+
sum += xi * xi;
190+
}
191+
mu /= div;
192+
(sum / div - mu * mu).sqrt()
193+
}
166194
}
167195

168196
/// Generic matrix type.
@@ -510,6 +538,7 @@ pub trait Matrix<T: RealNumber>:
510538
+ QRDecomposableMatrix<T>
511539
+ LUDecomposableMatrix<T>
512540
+ CholeskyDecomposableMatrix<T>
541+
+ MatrixStats<T>
513542
+ PartialEq
514543
+ Display
515544
{
@@ -545,3 +574,22 @@ impl<'a, T: RealNumber, M: BaseMatrix<T>> Iterator for RowIter<'a, T, M> {
545574
res
546575
}
547576
}
577+
578+
#[cfg(test)]
579+
mod tests {
580+
use crate::linalg::BaseVector;
581+
582+
#[test]
583+
fn mean() {
584+
let m = vec![1., 2., 3.];
585+
586+
assert_eq!(m.mean(), 2.0);
587+
}
588+
589+
#[test]
590+
fn std() {
591+
let m = vec![1., 2., 3.];
592+
593+
assert!((m.std() - 0.81f64).abs() < 1e-2);
594+
}
595+
}

src/linalg/naive/dense_matrix.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix;
1212
use crate::linalg::evd::EVDDecomposableMatrix;
1313
use crate::linalg::lu::LUDecomposableMatrix;
1414
use crate::linalg::qr::QRDecomposableMatrix;
15+
use crate::linalg::stats::MatrixStats;
1516
use crate::linalg::svd::SVDDecomposableMatrix;
1617
use crate::linalg::Matrix;
1718
pub use crate::linalg::{BaseMatrix, BaseVector};
@@ -445,6 +446,8 @@ impl<T: RealNumber> LUDecomposableMatrix<T> for DenseMatrix<T> {}
445446

446447
impl<T: RealNumber> CholeskyDecomposableMatrix<T> for DenseMatrix<T> {}
447448

449+
impl<T: RealNumber> MatrixStats<T> for DenseMatrix<T> {}
450+
448451
impl<T: RealNumber> Matrix<T> for DenseMatrix<T> {}
449452

450453
impl<T: RealNumber> PartialEq for DenseMatrix<T> {

src/linalg/nalgebra_bindings.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix;
4646
use crate::linalg::evd::EVDDecomposableMatrix;
4747
use crate::linalg::lu::LUDecomposableMatrix;
4848
use crate::linalg::qr::QRDecomposableMatrix;
49+
use crate::linalg::stats::MatrixStats;
4950
use crate::linalg::svd::SVDDecomposableMatrix;
5051
use crate::linalg::Matrix as SmartCoreMatrix;
5152
use crate::linalg::{BaseMatrix, BaseVector};
@@ -550,6 +551,11 @@ impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Su
550551
{
551552
}
552553

554+
impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
555+
MatrixStats<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>
556+
{
557+
}
558+
553559
impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
554560
SmartCoreMatrix<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>
555561
{

src/linalg/ndarray_bindings.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix;
5353
use crate::linalg::evd::EVDDecomposableMatrix;
5454
use crate::linalg::lu::LUDecomposableMatrix;
5555
use crate::linalg::qr::QRDecomposableMatrix;
56+
use crate::linalg::stats::MatrixStats;
5657
use crate::linalg::svd::SVDDecomposableMatrix;
5758
use crate::linalg::Matrix;
5859
use crate::linalg::{BaseMatrix, BaseVector};
@@ -500,6 +501,11 @@ impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssi
500501
{
501502
}
502503

504+
impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum>
505+
MatrixStats<T> for ArrayBase<OwnedRepr<T>, Ix2>
506+
{
507+
}
508+
503509
impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum> Matrix<T>
504510
for ArrayBase<OwnedRepr<T>, Ix2>
505511
{

src/linalg/stats.rs

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
//! # Various Statistical Methods
2+
//!
3+
//!
4+
5+
use crate::linalg::BaseMatrix;
6+
use crate::math::num::RealNumber;
7+
8+
/// Defines baseline implementations for various statistical functions
9+
pub trait MatrixStats<T: RealNumber>: BaseMatrix<T> {
10+
/// Compute the arithmetic mean along the specified axis.
11+
fn mean(&self, axis: u8) -> Vec<T> {
12+
let (n, m) = match axis {
13+
0 => {
14+
let (n, m) = self.shape();
15+
(m, n)
16+
}
17+
_ => self.shape(),
18+
};
19+
20+
let mut x: Vec<T> = vec![T::zero(); n];
21+
22+
let div = T::from_usize(m).unwrap();
23+
24+
for i in 0..n {
25+
for j in 0..m {
26+
x[i] += match axis {
27+
0 => self.get(j, i),
28+
_ => self.get(i, j),
29+
};
30+
}
31+
x[i] /= div;
32+
}
33+
34+
x
35+
}
36+
37+
/// Compute the standard deviation along the specified axis.
38+
fn std(&self, axis: u8) -> Vec<T> {
39+
let (n, m) = match axis {
40+
0 => {
41+
let (n, m) = self.shape();
42+
(m, n)
43+
}
44+
_ => self.shape(),
45+
};
46+
47+
let mut x: Vec<T> = vec![T::zero(); n];
48+
49+
let div = T::from_usize(m).unwrap();
50+
51+
for i in 0..n {
52+
let mut mu = T::zero();
53+
let mut sum = T::zero();
54+
for j in 0..m {
55+
let a = match axis {
56+
0 => self.get(j, i),
57+
_ => self.get(i, j),
58+
};
59+
mu += a;
60+
sum += a * a;
61+
}
62+
mu /= div;
63+
x[i] = (sum / div - mu * mu).sqrt();
64+
}
65+
66+
x
67+
}
68+
69+
/// standardize values by removing the mean and scaling to unit variance
70+
fn scale_mut(&mut self, mean: &Vec<T>, std: &Vec<T>, axis: u8) {
71+
let (n, m) = match axis {
72+
0 => {
73+
let (n, m) = self.shape();
74+
(m, n)
75+
}
76+
_ => self.shape(),
77+
};
78+
79+
for i in 0..n {
80+
for j in 0..m {
81+
match axis {
82+
0 => self.set(j, i, (self.get(j, i) - mean[i]) / std[i]),
83+
_ => self.set(i, j, (self.get(i, j) - mean[i]) / std[i]),
84+
}
85+
}
86+
}
87+
}
88+
}
89+
90+
#[cfg(test)]
91+
mod tests {
92+
use super::*;
93+
use crate::linalg::naive::dense_matrix::DenseMatrix;
94+
use crate::linalg::BaseVector;
95+
96+
#[test]
97+
fn mean() {
98+
let m = DenseMatrix::from_2d_array(&[
99+
&[1., 2., 3., 1., 2.],
100+
&[4., 5., 6., 3., 4.],
101+
&[7., 8., 9., 5., 6.],
102+
]);
103+
let expected_0 = vec![4., 5., 6., 3., 4.];
104+
let expected_1 = vec![1.8, 4.4, 7.];
105+
106+
assert_eq!(m.mean(0), expected_0);
107+
assert_eq!(m.mean(1), expected_1);
108+
}
109+
110+
#[test]
111+
fn std() {
112+
let m = DenseMatrix::from_2d_array(&[
113+
&[1., 2., 3., 1., 2.],
114+
&[4., 5., 6., 3., 4.],
115+
&[7., 8., 9., 5., 6.],
116+
]);
117+
let expected_0 = vec![2.44, 2.44, 2.44, 1.63, 1.63];
118+
let expected_1 = vec![0.74, 1.01, 1.41];
119+
120+
assert!(m.std(0).approximate_eq(&expected_0, 1e-2));
121+
assert!(m.std(1).approximate_eq(&expected_1, 1e-2));
122+
}
123+
124+
#[test]
125+
fn scale() {
126+
let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
127+
let expected_0 = DenseMatrix::from_2d_array(&[&[-1., -1., -1.], &[1., 1., 1.]]);
128+
let expected_1 = DenseMatrix::from_2d_array(&[&[-1.22, 0.0, 1.22], &[-1.22, 0.0, 1.22]]);
129+
130+
{
131+
let mut m = m.clone();
132+
m.scale_mut(&m.mean(0), &m.std(0), 0);
133+
assert!(m.approximate_eq(&expected_0, std::f32::EPSILON));
134+
}
135+
136+
m.scale_mut(&m.mean(1), &m.std(1), 1);
137+
assert!(m.approximate_eq(&expected_1, 1e-2));
138+
}
139+
}

src/linear/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@
2222
2323
pub mod linear_regression;
2424
pub mod logistic_regression;
25+
pub mod ridge_regression;

0 commit comments

Comments
 (0)