Skip to content

Commit 97dece9

Browse files
Merge pull request #41 from smartcorelib/nb_documentation
feat: NB documentation
2 parents 2c892aa + 8ca13a7 commit 97dece9

File tree

7 files changed

+168
-11
lines changed

7 files changed

+168
-11
lines changed

benches/naive_bayes.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use ndarray::Array2;
66
use smartcore::linalg::naive::dense_matrix::DenseMatrix;
77
use smartcore::linalg::BaseMatrix;
88
use smartcore::linalg::BaseVector;
9-
use smartcore::naive_bayes::GaussianNB;
9+
use smartcore::naive_bayes::gaussian::GaussianNB;
1010

1111
pub fn gaussian_naive_bayes_fit_benchmark(c: &mut Criterion) {
1212
let mut group = c.benchmark_group("GaussianNB::fit");

src/naive_bayes/bernoulli.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,38 @@
1+
//! # Bernoulli Naive Bayes
2+
//!
3+
//! Bernoulli Naive Bayes classifier is a variant of [Naive Bayes](../index.html) for the data that is distributed according to multivariate Bernoulli distribution.
4+
//! It is used for discrete data with binary features. One example of a binary feature is a word that occurs in the text or not.
5+
//!
6+
//! Example:
7+
//!
8+
//! ```
9+
//! use smartcore::linalg::naive::dense_matrix::*;
10+
//! use smartcore::naive_bayes::bernoulli::BernoulliNB;
11+
//!
12+
//! // Training data points are:
13+
//! // Chinese Beijing Chinese (class: China)
14+
//! // Chinese Chinese Shanghai (class: China)
15+
//! // Chinese Macao (class: China)
16+
//! // Tokyo Japan Chinese (class: Japan)
17+
//! let x = DenseMatrix::<f64>::from_2d_array(&[
18+
//! &[1., 1., 0., 0., 0., 0.],
19+
//! &[0., 1., 0., 0., 1., 0.],
20+
//! &[0., 1., 0., 1., 0., 0.],
21+
//! &[0., 1., 1., 0., 0., 1.],
22+
//! ]);
23+
//! let y = vec![0., 0., 0., 1.];
24+
//!
25+
//! let nb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
26+
//!
27+
//! // Testing data point is:
28+
//! // Chinese Chinese Chinese Tokyo Japan
29+
//! let x_test = DenseMatrix::<f64>::from_2d_array(&[&[0., 1., 1., 0., 0., 1.]]);
30+
//! let y_hat = nb.predict(&x_test).unwrap();
31+
//! ```
32+
//!
33+
//! ## References:
34+
//!
35+
//! * ["Introduction to Information Retrieval", Manning C. D., Raghavan P., Schutze H., 2009, Chapter 13 ](https://nlp.stanford.edu/IR-book/information-retrieval-book.html)
136
use crate::error::Failed;
237
use crate::linalg::row_iter;
338
use crate::linalg::BaseVector;

src/naive_bayes/categorical.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,35 @@
1+
//! # Categorical Naive Bayes
2+
//!
3+
//! Categorical Naive Bayes is a variant of [Naive Bayes](../index.html) for the categorically distributed data.
4+
//! It assumes that each feature has its own categorical distribution.
5+
//!
6+
//! Example:
7+
//!
8+
//! ```
9+
//! use smartcore::linalg::naive::dense_matrix::*;
10+
//! use smartcore::naive_bayes::categorical::CategoricalNB;
11+
//!
12+
//! let x = DenseMatrix::from_2d_array(&[
13+
//! &[3., 4., 0., 1.],
14+
//! &[3., 0., 0., 1.],
15+
//! &[4., 4., 1., 2.],
16+
//! &[4., 2., 4., 3.],
17+
//! &[4., 2., 4., 2.],
18+
//! &[4., 1., 1., 0.],
19+
//! &[1., 1., 1., 1.],
20+
//! &[0., 4., 1., 0.],
21+
//! &[0., 3., 2., 1.],
22+
//! &[0., 3., 1., 1.],
23+
//! &[3., 4., 0., 1.],
24+
//! &[3., 4., 2., 4.],
25+
//! &[0., 3., 1., 2.],
26+
//! &[0., 4., 1., 2.],
27+
//! ]);
28+
//! let y = vec![0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0.];
29+
//!
30+
//! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
31+
//! let y_hat = nb.predict(&x).unwrap();
32+
//! ```
133
use crate::error::Failed;
234
use crate::linalg::BaseVector;
335
use crate::linalg::Matrix;

src/naive_bayes/gaussian.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,27 @@
1+
//! # Gaussian Naive Bayes
2+
//!
3+
//! Gaussian Naive Bayes is a variant of [Naive Bayes](../index.html) for the data that follows Gaussian distribution and
4+
//! it supports continuous valued features conforming to a normal distribution.
5+
//!
6+
//! Example:
7+
//!
8+
//! ```
9+
//! use smartcore::linalg::naive::dense_matrix::*;
10+
//! use smartcore::naive_bayes::gaussian::GaussianNB;
11+
//!
12+
//! let x = DenseMatrix::from_2d_array(&[
13+
//! &[-1., -1.],
14+
//! &[-2., -1.],
15+
//! &[-3., -2.],
16+
//! &[ 1., 1.],
17+
//! &[ 2., 1.],
18+
//! &[ 3., 2.],
19+
//! ]);
20+
//! let y = vec![1., 1., 1., 2., 2., 2.];
21+
//!
22+
//! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
23+
//! let y_hat = nb.predict(&x).unwrap();
24+
//! ```
125
use crate::error::Failed;
226
use crate::linalg::row_iter;
327
use crate::linalg::BaseVector;

src/naive_bayes/mod.rs

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,40 @@
1+
//! # Naive Bayes
2+
//!
3+
//! Naive Bayes (NB) is a simple but powerful machine learning algorithm.
4+
//! Naive Bayes classifier is based on Bayes’ Theorem with an ssumption of conditional independence
5+
//! between every pair of features given the value of the class variable.
6+
//!
7+
//! Bayes’ theorem can be written as
8+
//!
9+
//! \\[ P(y | X) = \frac{P(y)P(X| y)}{P(X)} \\]
10+
//!
11+
//! where
12+
//!
13+
//! * \\(X = (x_1,...x_n)\\) represents the predictors.
14+
//! * \\(P(y | X)\\) is the probability of class _y_ given the data X
15+
//! * \\(P(X| y)\\) is the probability of data X given the class _y_.
16+
//! * \\(P(y)\\) is the probability of class y. This is called the prior probability of y.
17+
//! * \\(P(y | X)\\) is the probability of the data (regardless of the class value).
18+
//!
19+
//! The naive conditional independence assumption let us rewrite this equation as
20+
//!
21+
//! \\[ P(y | x_1,...x_n) = \frac{P(y)\prod_{i=1}^nP(x_i|y)}{P(x_1,...x_n)} \\]
22+
//!
23+
//!
24+
//! The denominator can be removed since \\(P(x_1,...x_n)\\) is constrant for all the entries in the dataset.
25+
//!
26+
//! \\[ P(y | x_1,...x_n) \propto P(y)\prod_{i=1}^nP(x_i|y) \\]
27+
//!
28+
//! To find class y from predictors X we use this equation
29+
//!
30+
//! \\[ y = \underset{y}{argmax} P(y)\prod_{i=1}^nP(x_i|y) \\]
31+
//!
32+
//! ## References:
33+
//!
34+
//! * ["Machine Learning: A Probabilistic Perspective", Kevin P. Murphy, 2012, Chapter 3 ](https://mitpress.mit.edu/books/machine-learning-1)
35+
//!
36+
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
37+
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
138
use crate::error::Failed;
239
use crate::linalg::BaseVector;
340
use crate::linalg::Matrix;
@@ -64,12 +101,7 @@ impl<T: RealNumber, M: Matrix<T>, D: NBDistribution<T, M>> BaseNaiveBayes<T, M,
64101
Ok(y_hat)
65102
}
66103
}
67-
mod bernoulli;
68-
mod categorical;
69-
mod gaussian;
70-
mod multinomial;
71-
72-
pub use bernoulli::{BernoulliNB, BernoulliNBParameters};
73-
pub use categorical::{CategoricalNB, CategoricalNBParameters};
74-
pub use gaussian::{GaussianNB, GaussianNBParameters};
75-
pub use multinomial::{MultinomialNB, MultinomialNBParameters};
104+
pub mod bernoulli;
105+
pub mod categorical;
106+
pub mod gaussian;
107+
pub mod multinomial;

src/naive_bayes/multinomial.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,38 @@
1+
//! # Multinomial Naive Bayes
2+
//!
3+
//! Multinomial Naive Bayes classifier is a variant of [Naive Bayes](../index.html) for the multinomially distributed data.
4+
//! It is often used for discrete data with predictors representing the number of times an event was observed in a particular instance,
5+
//! for example frequency of the words present in the document.
6+
//!
7+
//! Example:
8+
//!
9+
//! ```
10+
//! use smartcore::linalg::naive::dense_matrix::*;
11+
//! use smartcore::naive_bayes::multinomial::MultinomialNB;
12+
//!
13+
//! // Training data points are:
14+
//! // Chinese Beijing Chinese (class: China)
15+
//! // Chinese Chinese Shanghai (class: China)
16+
//! // Chinese Macao (class: China)
17+
//! // Tokyo Japan Chinese (class: Japan)
18+
//! let x = DenseMatrix::<f64>::from_2d_array(&[
19+
//! &[1., 2., 0., 0., 0., 0.],
20+
//! &[0., 2., 0., 0., 1., 0.],
21+
//! &[0., 1., 0., 1., 0., 0.],
22+
//! &[0., 1., 1., 0., 0., 1.],
23+
//! ]);
24+
//! let y = vec![0., 0., 0., 1.];
25+
//! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
26+
//!
27+
//! // Testing data point is:
28+
//! // Chinese Chinese Chinese Tokyo Japan
29+
//! let x_test = DenseMatrix::<f64>::from_2d_array(&[&[0., 3., 1., 0., 0., 1.]]);
30+
//! let y_hat = nb.predict(&x_test).unwrap();
31+
//! ```
32+
//!
33+
//! ## References:
34+
//!
35+
//! * ["Introduction to Information Retrieval", Manning C. D., Raghavan P., Schutze H., 2009, Chapter 13 ](https://nlp.stanford.edu/IR-book/information-retrieval-book.html)
136
use crate::error::Failed;
237
use crate::linalg::row_iter;
338
use crate::linalg::BaseVector;

src/svm/svc.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
//!
2929
//! ```
3030
//! use smartcore::linalg::naive::dense_matrix::*;
31-
//! use smartcore::linear::linear_regression::*;
3231
//! use smartcore::svm::Kernels;
3332
//! use smartcore::svm::svc::{SVC, SVCParameters};
3433
//!

0 commit comments

Comments
 (0)