Skip to content

Commit 051023e

Browse files
Merge pull request #47 from smartcorelib/development
Release, v0.2.0
2 parents 6602de0 + bb9a05b commit 051023e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+8829
-834
lines changed

.circleci/config.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
version: 2.1
22

3+
workflows:
4+
version: 2.1
5+
build:
6+
jobs:
7+
- build
8+
- clippy
39
jobs:
410
build:
511
docker:
@@ -24,3 +30,14 @@ jobs:
2430
paths:
2531
- "~/.cargo"
2632
- "./target"
33+
clippy:
34+
docker:
35+
- image: circleci/rust:latest
36+
steps:
37+
- checkout
38+
- run:
39+
name: Install cargo clippy
40+
command: rustup component add clippy
41+
- run:
42+
name: Run cargo clippy
43+
command: cargo clippy --all-features -- -Drust-2018-idioms -Dwarnings

Cargo.toml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "smartcore"
33
description = "The most advanced machine learning library in rust."
44
homepage = "https://smartcorelib.org"
5-
version = "0.1.0"
5+
version = "0.2.0"
66
authors = ["SmartCore Developers"]
77
edition = "2018"
88
license = "Apache-2.0"
@@ -19,11 +19,12 @@ nalgebra-bindings = ["nalgebra"]
1919
datasets = []
2020

2121
[dependencies]
22-
ndarray = { version = "0.13", optional = true }
23-
nalgebra = { version = "0.22.0", optional = true }
22+
ndarray = { version = "0.14", optional = true }
23+
nalgebra = { version = "0.23.0", optional = true }
2424
num-traits = "0.2.12"
2525
num = "0.3.0"
2626
rand = "0.7.3"
27+
rand_distr = "0.3.0"
2728
serde = { version = "1.0.115", features = ["derive"] }
2829
serde_derive = "1.0.115"
2930

@@ -34,4 +35,9 @@ bincode = "1.3.1"
3435

3536
[[bench]]
3637
name = "distance"
37-
harness = false
38+
harness = false
39+
40+
[[bench]]
41+
name = "naive_bayes"
42+
harness = false
43+
required-features = ["ndarray-bindings", "nalgebra-bindings"]

benches/naive_bayes.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use criterion::BenchmarkId;
2+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
3+
4+
use nalgebra::DMatrix;
5+
use ndarray::Array2;
6+
use smartcore::linalg::naive::dense_matrix::DenseMatrix;
7+
use smartcore::linalg::BaseMatrix;
8+
use smartcore::linalg::BaseVector;
9+
use smartcore::naive_bayes::gaussian::GaussianNB;
10+
11+
pub fn gaussian_naive_bayes_fit_benchmark(c: &mut Criterion) {
12+
let mut group = c.benchmark_group("GaussianNB::fit");
13+
14+
for n_samples in [100_usize, 1000_usize, 10000_usize].iter() {
15+
for n_features in [10_usize, 100_usize, 1000_usize].iter() {
16+
let x = DenseMatrix::<f64>::rand(*n_samples, *n_features);
17+
let y: Vec<f64> = (0..*n_samples)
18+
.map(|i| (i % *n_samples / 5_usize) as f64)
19+
.collect::<Vec<f64>>();
20+
group.bench_with_input(
21+
BenchmarkId::from_parameter(format!(
22+
"n_samples: {}, n_features: {}",
23+
n_samples, n_features
24+
)),
25+
n_samples,
26+
|b, _| {
27+
b.iter(|| {
28+
GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
29+
})
30+
},
31+
);
32+
}
33+
}
34+
group.finish();
35+
}
36+
37+
pub fn gaussian_naive_matrix_datastructure(c: &mut Criterion) {
38+
let mut group = c.benchmark_group("GaussianNB");
39+
let classes = (0..10000).map(|i| (i % 25) as f64).collect::<Vec<f64>>();
40+
41+
group.bench_function("DenseMatrix", |b| {
42+
let x = DenseMatrix::<f64>::rand(10000, 500);
43+
let y = <DenseMatrix<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
44+
45+
b.iter(|| {
46+
GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
47+
})
48+
});
49+
50+
group.bench_function("ndarray", |b| {
51+
let x = Array2::<f64>::rand(10000, 500);
52+
let y = <Array2<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
53+
54+
b.iter(|| {
55+
GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
56+
})
57+
});
58+
59+
group.bench_function("ndalgebra", |b| {
60+
let x = DMatrix::<f64>::rand(10000, 500);
61+
let y = <DMatrix<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
62+
63+
b.iter(|| {
64+
GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
65+
})
66+
});
67+
}
68+
criterion_group!(
69+
benches,
70+
gaussian_naive_bayes_fit_benchmark,
71+
gaussian_naive_matrix_datastructure
72+
);
73+
criterion_main!(benches);

src/algorithm/neighbour/bbd_tree.rs

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,11 @@ impl<T: RealNumber> BBDTree<T> {
4444

4545
let (n, _) = data.shape();
4646

47-
let mut index = vec![0; n];
48-
for i in 0..n {
49-
index[i] = i;
50-
}
47+
let index = (0..n).collect::<Vec<_>>();
5148

5249
let mut tree = BBDTree {
53-
nodes: nodes,
54-
index: index,
50+
nodes,
51+
index,
5552
root: 0,
5653
};
5754

@@ -64,7 +61,7 @@ impl<T: RealNumber> BBDTree<T> {
6461

6562
pub(in crate) fn clustering(
6663
&self,
67-
centroids: &Vec<Vec<T>>,
64+
centroids: &[Vec<T>],
6865
sums: &mut Vec<Vec<T>>,
6966
counts: &mut Vec<usize>,
7067
membership: &mut Vec<usize>,
@@ -92,8 +89,8 @@ impl<T: RealNumber> BBDTree<T> {
9289
fn filter(
9390
&self,
9491
node: usize,
95-
centroids: &Vec<Vec<T>>,
96-
candidates: &Vec<usize>,
92+
centroids: &[Vec<T>],
93+
candidates: &[usize],
9794
k: usize,
9895
sums: &mut Vec<Vec<T>>,
9996
counts: &mut Vec<usize>,
@@ -113,19 +110,19 @@ impl<T: RealNumber> BBDTree<T> {
113110
}
114111
}
115112

116-
if !self.nodes[node].lower.is_none() {
113+
if self.nodes[node].lower.is_some() {
117114
let mut new_candidates = vec![0; k];
118115
let mut newk = 0;
119116

120-
for i in 0..k {
117+
for candidate in candidates.iter().take(k) {
121118
if !BBDTree::prune(
122119
&self.nodes[node].center,
123120
&self.nodes[node].radius,
124121
centroids,
125122
closest,
126-
candidates[i],
123+
*candidate,
127124
) {
128-
new_candidates[newk] = candidates[i];
125+
new_candidates[newk] = *candidate;
129126
newk += 1;
130127
}
131128
}
@@ -134,15 +131,15 @@ impl<T: RealNumber> BBDTree<T> {
134131
return self.filter(
135132
self.nodes[node].lower.unwrap(),
136133
centroids,
137-
&mut new_candidates,
134+
&new_candidates,
138135
newk,
139136
sums,
140137
counts,
141138
membership,
142139
) + self.filter(
143140
self.nodes[node].upper.unwrap(),
144141
centroids,
145-
&mut new_candidates,
142+
&new_candidates,
146143
newk,
147144
sums,
148145
counts,
@@ -152,7 +149,7 @@ impl<T: RealNumber> BBDTree<T> {
152149
}
153150

154151
for i in 0..d {
155-
sums[closest][i] = sums[closest][i] + self.nodes[node].sum[i];
152+
sums[closest][i] += self.nodes[node].sum[i];
156153
}
157154

158155
counts[closest] += self.nodes[node].count;
@@ -166,9 +163,9 @@ impl<T: RealNumber> BBDTree<T> {
166163
}
167164

168165
fn prune(
169-
center: &Vec<T>,
170-
radius: &Vec<T>,
171-
centroids: &Vec<Vec<T>>,
166+
center: &[T],
167+
radius: &[T],
168+
centroids: &[Vec<T>],
172169
best_index: usize,
173170
test_index: usize,
174171
) -> bool {
@@ -184,11 +181,11 @@ impl<T: RealNumber> BBDTree<T> {
184181
let mut rhs = T::zero();
185182
for i in 0..d {
186183
let diff = test[i] - best[i];
187-
lhs = lhs + diff * diff;
184+
lhs += diff * diff;
188185
if diff > T::zero() {
189-
rhs = rhs + (center[i] + radius[i] - best[i]) * diff;
186+
rhs += (center[i] + radius[i] - best[i]) * diff;
190187
} else {
191-
rhs = rhs + (center[i] - radius[i] - best[i]) * diff;
188+
rhs += (center[i] - radius[i] - best[i]) * diff;
192189
}
193190
}
194191

@@ -244,7 +241,7 @@ impl<T: RealNumber> BBDTree<T> {
244241
if end > begin + 1 {
245242
let len = end - begin;
246243
for i in 0..d {
247-
node.sum[i] = node.sum[i] * T::from(len).unwrap();
244+
node.sum[i] *= T::from(len).unwrap();
248245
}
249246
}
250247

@@ -261,9 +258,7 @@ impl<T: RealNumber> BBDTree<T> {
261258
let mut i2_good = data.get(self.index[i2], split_index) >= split_cutoff;
262259

263260
if !i1_good && !i2_good {
264-
let temp = self.index[i1];
265-
self.index[i1] = self.index[i2];
266-
self.index[i2] = temp;
261+
self.index.swap(i1, i2);
267262
i1_good = true;
268263
i2_good = true;
269264
}
@@ -287,8 +282,8 @@ impl<T: RealNumber> BBDTree<T> {
287282
}
288283

289284
let mut mean = vec![T::zero(); d];
290-
for i in 0..d {
291-
mean[i] = node.sum[i] / T::from(node.count).unwrap();
285+
for (i, mean_i) in mean.iter_mut().enumerate().take(d) {
286+
*mean_i = node.sum[i] / T::from(node.count).unwrap();
292287
}
293288

294289
node.cost = BBDTree::node_cost(&self.nodes[node.lower.unwrap()], &mean)
@@ -297,12 +292,12 @@ impl<T: RealNumber> BBDTree<T> {
297292
self.add_node(node)
298293
}
299294

300-
fn node_cost(node: &BBDTreeNode<T>, center: &Vec<T>) -> T {
295+
fn node_cost(node: &BBDTreeNode<T>, center: &[T]) -> T {
301296
let d = center.len();
302297
let mut scatter = T::zero();
303-
for i in 0..d {
304-
let x = (node.sum[i] / T::from(node.count).unwrap()) - center[i];
305-
scatter = scatter + x * x;
298+
for (i, center_i) in center.iter().enumerate().take(d) {
299+
let x = (node.sum[i] / T::from(node.count).unwrap()) - *center_i;
300+
scatter += x * x;
306301
}
307302
node.cost + T::from(node.count).unwrap() * scatter
308303
}

0 commit comments

Comments
 (0)