Skip to content

Commit 9b22197

Browse files
Volodymyr OrlovVolodymyr Orlov
authored andcommitted
fix: clippy, documentation and formatting
1 parent a2be9e1 commit 9b22197

File tree

7 files changed

+80
-62
lines changed

7 files changed

+80
-62
lines changed

src/linalg/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,8 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {
281281

282282
let mut result = Self::zeros(n);
283283

284-
for i in 0..n {
285-
result.set(i, self.get(index[i]));
284+
for (i, idx) in index.iter().enumerate() {
285+
result.set(i, self.get(*idx));
286286
}
287287

288288
result
@@ -639,11 +639,11 @@ pub trait BaseMatrix<T: RealNumber>: Clone + Debug {
639639
_ => Self::zeros(n, index.len()),
640640
};
641641

642-
for i in 0..index.len() {
642+
for (i, idx) in index.iter().enumerate() {
643643
for j in 0..k {
644644
match axis {
645-
0 => result.set(i, j, self.get(index[i], j)),
646-
_ => result.set(j, i, self.get(j, index[i])),
645+
0 => result.set(i, j, self.get(*idx, j)),
646+
_ => result.set(j, i, self.get(j, *idx)),
647647
};
648648
}
649649
}

src/linear/logistic_regression.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ use crate::optimization::FunctionOrder;
6969

7070
/// Logistic Regression parameters
7171
#[derive(Serialize, Deserialize, Debug, Clone)]
72-
pub struct LogisticRegressionParameters {
73-
}
72+
pub struct LogisticRegressionParameters {}
7473

7574
/// Logistic Regression
7675
#[derive(Serialize, Deserialize, Debug)]
@@ -105,8 +104,7 @@ struct BinaryObjectiveFunction<'a, T: RealNumber, M: Matrix<T>> {
105104

106105
impl Default for LogisticRegressionParameters {
107106
fn default() -> Self {
108-
LogisticRegressionParameters {
109-
}
107+
LogisticRegressionParameters {}
110108
}
111109
}
112110

@@ -231,7 +229,11 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
231229
/// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
232230
/// * `y` - target class values
233231
/// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
234-
pub fn fit(x: &M, y: &M::RowVector, _parameters: LogisticRegressionParameters) -> Result<LogisticRegression<T, M>, Failed> {
232+
pub fn fit(
233+
x: &M,
234+
y: &M::RowVector,
235+
_parameters: LogisticRegressionParameters,
236+
) -> Result<LogisticRegression<T, M>, Failed> {
235237
let y_m = M::from_row_vector(y.clone());
236238
let (x_nrows, num_attributes) = x.shape();
237239
let (_, y_nrows) = y_m.shape();

src/model_selection/kfold.rs

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,13 @@
11
//! # KFold
22
//!
3-
//! In statistics and machine learning we usually split our data into multiple subsets: training data and testing data (and sometimes to validate),
4-
//! and fit our model on the train data, in order to make predictions on the test data. We do that to avoid overfitting or underfitting model to our data.
5-
//! Overfitting is bad because the model we trained fits trained data too well and can’t make any inferences on new data.
6-
//! Underfitted is bad because the model is undetrained and does not fit the training data well.
7-
//! Splitting data into multiple subsets helps to find the right combination of hyperparameters, estimate model performance and choose the right model for
8-
//! your data.
9-
//!
10-
//! In SmartCore you can split your data into training and test datasets using `train_test_split` function.
3+
//! Defines k-fold cross validator.
114
125
use crate::linalg::Matrix;
136
use crate::math::num::RealNumber;
7+
use crate::model_selection::BaseKFold;
148
use rand::seq::SliceRandom;
159
use rand::thread_rng;
1610

17-
/// An interface for the K-Folds cross-validator
18-
pub trait BaseKFold {
19-
/// An iterator over indices that split data into training and test set.
20-
type Output: Iterator<Item = (Vec<usize>, Vec<usize>)>;
21-
/// Return a tuple containing the the training set indices for that split and
22-
/// the testing set indices for that split.
23-
fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output;
24-
/// Returns the number of splits
25-
fn n_splits(&self) -> usize;
26-
}
27-
2811
/// K-Folds cross-validator
2912
pub struct KFold {
3013
/// Number of folds. Must be at least 2.
@@ -101,12 +84,12 @@ impl KFold {
10184
}
10285

10386
/// An iterator over indices that split data into training and test set.
104-
pub struct BaseKFoldIter {
87+
pub struct KFoldIter {
10588
indices: Vec<usize>,
10689
test_indices: Vec<Vec<bool>>,
10790
}
10891

109-
impl Iterator for BaseKFoldIter {
92+
impl Iterator for KFoldIter {
11093
type Item = (Vec<usize>, Vec<usize>);
11194

11295
fn next(&mut self) -> Option<(Vec<usize>, Vec<usize>)> {
@@ -133,7 +116,7 @@ impl Iterator for BaseKFoldIter {
133116

134117
/// Abstract class for all KFold functionalities
135118
impl BaseKFold for KFold {
136-
type Output = BaseKFoldIter;
119+
type Output = KFoldIter;
137120

138121
fn n_splits(&self) -> usize {
139122
self.n_splits
@@ -148,7 +131,7 @@ impl BaseKFold for KFold {
148131
let mut test_indices = self.test_masks(x);
149132
test_indices.reverse();
150133

151-
BaseKFoldIter {
134+
KFoldIter {
152135
indices,
153136
test_indices,
154137
}

src/model_selection/mod.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,27 @@ use crate::error::Failed;
1414
use crate::linalg::BaseVector;
1515
use crate::linalg::Matrix;
1616
use crate::math::num::RealNumber;
17-
use crate::model_selection::kfold::BaseKFold;
1817
use rand::seq::SliceRandom;
1918
use rand::thread_rng;
2019

21-
pub mod kfold;
20+
pub(crate) mod kfold;
21+
22+
pub use kfold::{KFold, KFoldIter};
23+
24+
/// An interface for the K-Folds cross-validator
25+
pub trait BaseKFold {
26+
/// An iterator over indices that split data into training and test set.
27+
type Output: Iterator<Item = (Vec<usize>, Vec<usize>)>;
28+
/// Return a tuple containing the the training set indices for that split and
29+
/// the testing set indices for that split.
30+
fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output;
31+
/// Returns the number of splits
32+
fn n_splits(&self) -> usize;
33+
}
2234

2335
/// Splits data into 2 disjoint datasets.
2436
/// * `x` - features, matrix of size _NxM_ where _N_ is number of samples and _M_ is number of attributes.
25-
/// * `y` - target values, should be of size _M_
37+
/// * `y` - target values, should be of size _N_
2638
/// * `test_size`, (0, 1] - the proportion of the dataset to include in the test split.
2739
/// * `shuffle`, - whether or not to shuffle the data before splitting
2840
pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
@@ -65,22 +77,33 @@ pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
6577
(x_train, x_test, y_train, y_test)
6678
}
6779

80+
/// Cross validation results.
6881
#[derive(Clone, Debug)]
6982
pub struct CrossValidationResult<T: RealNumber> {
83+
/// Vector with test scores on each cv split
7084
pub test_score: Vec<T>,
85+
/// Vector with training scores on each cv split
7186
pub train_score: Vec<T>,
7287
}
7388

7489
impl<T: RealNumber> CrossValidationResult<T> {
90+
/// Average test score
7591
pub fn mean_test_score(&self) -> T {
7692
self.test_score.sum() / T::from_usize(self.test_score.len()).unwrap()
7793
}
78-
94+
/// Average training score
7995
pub fn mean_train_score(&self) -> T {
8096
self.train_score.sum() / T::from_usize(self.train_score.len()).unwrap()
8197
}
8298
}
8399

100+
/// Evaluate an estimator by cross-validation using given metric.
101+
/// * `fit_estimator` - a `fit` function of an estimator
102+
/// * `x` - features, matrix of size _NxM_ where _N_ is number of samples and _M_ is number of attributes.
103+
/// * `y` - target values, should be of size _N_
104+
/// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
105+
/// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
106+
/// * `score` - a metric to use for evaluation, see [metrics](../metrics/index.html)
84107
pub fn cross_validate<T, M, H, E, K, F, S>(
85108
fit_estimator: F,
86109
x: &M,
@@ -302,7 +325,6 @@ mod tests {
302325

303326
#[test]
304327
fn test_some_classifier() {
305-
306328
let x = DenseMatrix::from_2d_array(&[
307329
&[5.1, 3.5, 1.4, 0.2],
308330
&[4.9, 3.0, 1.4, 0.2],
@@ -334,8 +356,15 @@ mod tests {
334356
..KFold::default()
335357
};
336358

337-
let results =
338-
cross_validate(DecisionTreeClassifier::fit, &x, &y, Default::default(), cv, &accuracy).unwrap();
359+
let results = cross_validate(
360+
DecisionTreeClassifier::fit,
361+
&x,
362+
&y,
363+
Default::default(),
364+
cv,
365+
&accuracy,
366+
)
367+
.unwrap();
339368

340369
println!("{}", results.mean_test_score());
341370
println!("{}", results.mean_train_score());

src/naive_bayes/multinomial.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ pub struct MultinomialNB<T: RealNumber, M: Matrix<T>> {
188188
inner: BaseNaiveBayes<T, M, MultinomialNBDistribution<T>>,
189189
}
190190

191-
impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for MultinomialNB <T, M> {
191+
impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for MultinomialNB<T, M> {
192192
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
193193
self.predict(x)
194194
}

src/svm/svc.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVCParameters<T, M
167167
epoch: self.epoch,
168168
c: self.c,
169169
tol: self.tol,
170-
kernel: kernel,
171-
m: PhantomData
170+
kernel,
171+
m: PhantomData,
172172
}
173173
}
174174
}
@@ -180,12 +180,14 @@ impl<T: RealNumber, M: Matrix<T>> Default for SVCParameters<T, M, LinearKernel>
180180
c: T::one(),
181181
tol: T::from_f64(1e-3).unwrap(),
182182
kernel: Kernels::linear(),
183-
m: PhantomData
183+
m: PhantomData,
184184
}
185185
}
186186
}
187187

188-
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector> for SVC<T, M, K> {
188+
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector>
189+
for SVC<T, M, K>
190+
{
189191
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
190192
self.predict(x)
191193
}
@@ -743,10 +745,12 @@ mod tests {
743745
let y_hat = SVC::fit(
744746
&x,
745747
&y,
746-
SVCParameters::default().with_c(200.0).with_kernel(Kernels::linear()),
748+
SVCParameters::default()
749+
.with_c(200.0)
750+
.with_kernel(Kernels::linear()),
747751
)
748752
.and_then(|lr| lr.predict(&x))
749-
.unwrap();
753+
.unwrap();
750754

751755
assert!(accuracy(&y_hat, &y) >= 0.9);
752756
}
@@ -784,7 +788,9 @@ mod tests {
784788
let y_hat = SVC::fit(
785789
&x,
786790
&y,
787-
SVCParameters::default().with_c(1.0).with_kernel(Kernels::rbf(0.7)),
791+
SVCParameters::default()
792+
.with_c(1.0)
793+
.with_kernel(Kernels::rbf(0.7)),
788794
)
789795
.and_then(|lr| lr.predict(&x))
790796
.unwrap();

src/svm/svr.rs

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ struct Cache<T: Clone> {
134134
data: Vec<RefCell<Option<Vec<T>>>>,
135135
}
136136

137-
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVRParameters<T, M, K> {
137+
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVRParameters<T, M, K> {
138138
/// Epsilon in the epsilon-SVR model.
139139
pub fn with_eps(mut self, eps: T) -> Self {
140140
self.eps = eps;
@@ -153,11 +153,11 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVRParameters<T, M
153153
/// The kernel function.
154154
pub fn with_kernel<KK: Kernel<T, M::RowVector>>(&self, kernel: KK) -> SVRParameters<T, M, KK> {
155155
SVRParameters {
156-
eps: self.eps,
156+
eps: self.eps,
157157
c: self.c,
158158
tol: self.tol,
159-
kernel: kernel,
160-
m: PhantomData
159+
kernel,
160+
m: PhantomData,
161161
}
162162
}
163163
}
@@ -169,12 +169,14 @@ impl<T: RealNumber, M: Matrix<T>> Default for SVRParameters<T, M, LinearKernel>
169169
c: T::one(),
170170
tol: T::from_f64(1e-3).unwrap(),
171171
kernel: Kernels::linear(),
172-
m: PhantomData
172+
m: PhantomData,
173173
}
174174
}
175175
}
176176

177-
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector> for SVR<T, M, K> {
177+
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector>
178+
for SVR<T, M, K>
179+
{
178180
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
179181
self.predict(x)
180182
}
@@ -188,7 +190,7 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
188190
/// * `parameters` - optional parameters, use `Default::default()` to set parameters to default values.
189191
pub fn fit(
190192
x: &M,
191-
y: &M::RowVector,
193+
y: &M::RowVector,
192194
parameters: SVRParameters<T, M, K>,
193195
) -> Result<SVR<T, M, K>, Failed> {
194196
let (n, _) = x.shape();
@@ -544,13 +546,9 @@ mod tests {
544546
114.2, 115.7, 116.9,
545547
];
546548

547-
let y_hat = SVR::fit(
548-
&x,
549-
&y,
550-
SVRParameters::default().with_eps(2.0).with_c(10.0),
551-
)
552-
.and_then(|lr| lr.predict(&x))
553-
.unwrap();
549+
let y_hat = SVR::fit(&x, &y, SVRParameters::default().with_eps(2.0).with_c(10.0))
550+
.and_then(|lr| lr.predict(&x))
551+
.unwrap();
554552

555553
assert!(mean_squared_error(&y_hat, &y) < 2.5);
556554
}

0 commit comments

Comments
 (0)