Skip to content

Commit 1e10697

Browse files
sfsf9797ddbourgin
andauthored
Naive bayes (#68)
* readme first draft * added gaussianNB * added __init__ file * unit test for gaussianNB * update readme * update readme * Move naive bayes classifer under linear models * Add more stringent tests for GaussianNBClassifier * Overhaul GaussianNBClassifier: fix log posterior calc, fix attribute names + descriptions, expand documentation * Update README for GaussianNBClassifier Co-authored-by: ddbourgin <ddbourgin@gmail.com>
1 parent 7c210a6 commit 1e10697

File tree

4 files changed

+285
-0
lines changed

4 files changed

+285
-0
lines changed

numpy_ml/linear_models/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ The `lm.py` module implements:
88
3. [Bayesian linear regression](https://en.wikipedia.org/wiki/Bayesian_linear_regression) with maximum a posteriori parameter estimates via [conjugacy](https://en.wikipedia.org/wiki/Conjugate_prior#Table_of_conjugate_distributions)
99
- Known coefficient prior mean and known error variance
1010
- Known coefficient prior mean and unknown error variance
11+
4. [Naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) with Gaussian feature likelihoods.
1112

1213
## Plots
1314
<p align="center">

numpy_ml/linear_models/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
from .lm import *
2+
from .naive_bayes import *

numpy_ml/linear_models/naive_bayes.py

+211
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
import numpy as np
2+
3+
4+
class GaussianNBClassifier:
5+
def __init__(self, eps=1e-6):
6+
r"""
7+
A naive Bayes classifier for real-valued data.
8+
9+
Notes
10+
-----
11+
The naive Bayes model assumes the features of each training example
12+
:math:`\mathbf{x}` are mutually independent given the example label
13+
:math:`y`:
14+
15+
.. math::
16+
17+
P(\mathbf{x}_i \mid y_i) = \prod_{j=1}^M P(x_{i,j} \mid y_i)
18+
19+
where :math:`M` is the rank of the `i`th example :math:`\mathbf{x}_i`
20+
and :math:`y_i` is the label associated with the `i`th example.
21+
22+
Combining the conditional independence assumption with a simple
23+
application of Bayes' theorem gives the naive Bayes classification
24+
rule:
25+
26+
.. math::
27+
28+
\hat{y} &= \arg \max_y P(y \mid \mathbf{x}) \\
29+
&= \arg \max_y P(y) P(\mathbf{x} \mid y) \\
30+
&= \arg \max_y P(y) \prod_{j=1}^M P(x_j \mid y)
31+
32+
In the final expression, the prior class probability :math:`P(y)` can
33+
be specified in advance or estimated empirically from the training
34+
data.
35+
36+
In the Gaussian version of the naive Bayes model, the feature
37+
likelihood is assumed to be normally distributed for each class:
38+
39+
.. math::
40+
41+
\mathbf{x}_i \mid y_i = c, \theta \sim \mathcal{N}(\mu_c, \Sigma_c)
42+
43+
where :math:`\theta` is the set of model parameters: :math:`\{\mu_1,
44+
\Sigma_1, \ldots, \mu_K, \Sigma_K\}`, :math:`K` is the total number of
45+
unique classes present in the data, and the parameters for the Gaussian
46+
associated with class :math:`c`, :math:`\mu_c` and :math:`\Sigma_c`
47+
(where :math:`1 \leq c \leq K`), are estimated via MLE from the set of
48+
training examples with label :math:`c`.
49+
50+
Parameters
51+
----------
52+
eps : float
53+
A value added to the variance to prevent numerical error. Default
54+
is 1e-6.
55+
56+
Attributes
57+
----------
58+
parameters : dict
59+
Dictionary of model parameters: "mean", the `(K, M)` array of
60+
feature means under each class, "sigma", the `(K, M)` array of
61+
feature variances under each class, and "prior", the `(K,)` array of
62+
empirical prior probabilities for each class label.
63+
hyperparameters : dict
64+
Dictionary of model hyperparameters
65+
labels : :py:class:`ndarray <numpy.ndarray>` of shape `(K,)`
66+
An array containing the unique class labels for the training
67+
examples.
68+
"""
69+
self.labels = None
70+
self.hyperparameters = {"eps": eps}
71+
self.parameters = {
72+
"mean": None, # shape: (K, M)
73+
"sigma": None, # shape: (K, M)
74+
"prior": None, # shape: (K,)
75+
}
76+
77+
def fit(self, X, y):
78+
"""
79+
Fit the model parameters via maximum likelihood.
80+
81+
Notes
82+
-----
83+
The model parameters are stored in the :py:attr:`parameters` attribute.
84+
The following keys are present:
85+
86+
mean: :py:class:`ndarray <numpy.ndarray>` of shape `(K, M)`
87+
Feature means for each of the `K` label classes
88+
sigma: :py:class:`ndarray <numpy.ndarray>` of shape `(K, M)`
89+
Feature variances for each of the `K` label classes
90+
prior : :py:class:`ndarray <numpy.ndarray>` of shape `(K,)`
91+
Prior probability of each of the `K` label classes, estimated
92+
empirically from the training data
93+
94+
Parameters
95+
----------
96+
X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
97+
A dataset consisting of `N` examples, each of dimension `M`
98+
y: :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
99+
The class label for each of the `N` examples in `X`
100+
101+
Returns
102+
-------
103+
self: object
104+
"""
105+
P = self.parameters
106+
H = self.hyperparameters
107+
108+
self.labels = np.unique(y)
109+
110+
K = len(self.labels)
111+
N, M = X.shape
112+
113+
P["mean"] = np.zeros((K, M))
114+
P["sigma"] = np.zeros((K, M))
115+
P["prior"] = np.zeros((K,))
116+
117+
for i, c in enumerate(self.labels):
118+
X_c = X[y == c, :]
119+
120+
P["mean"][i, :] = np.mean(X_c, axis=0)
121+
P["sigma"][i, :] = np.var(X_c, axis=0) + H["eps"]
122+
P["prior"][i] = X_c.shape[0] / N
123+
return self
124+
125+
def predict(self, X):
126+
"""
127+
Use the trained classifier to predict the class label for each example
128+
in **X**.
129+
130+
Parameters
131+
----------
132+
X: :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
133+
A dataset of `N` examples, each of dimension `M`
134+
135+
Returns
136+
-------
137+
labels : :py:class:`ndarray <numpy.ndarray>` of shape `(N)`
138+
The predicted class labels for each example in `X`
139+
"""
140+
return self.labels[self._log_posterior(X).argmax(axis=1)]
141+
142+
def _log_posterior(self, X):
143+
r"""
144+
Compute the (unnormalized) log posterior for each class.
145+
146+
Parameters
147+
----------
148+
X: :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
149+
A dataset of `N` examples, each of dimension `M`
150+
151+
Returns
152+
-------
153+
log_posterior : :py:class:`ndarray <numpy.ndarray>` of shape `(N, K)`
154+
Unnormalized log posterior probability of each class for each
155+
example in `X`
156+
"""
157+
K = len(self.labels)
158+
log_posterior = np.zeros((X.shape[0], K))
159+
for i in range(K):
160+
log_posterior[:, i] = self._log_class_posterior(X, i)
161+
return log_posterior
162+
163+
def _log_class_posterior(self, X, class_idx):
164+
r"""
165+
Compute the (unnormalized) log posterior for the label at index
166+
`class_idx` in :py:attr:`labels`.
167+
168+
Notes
169+
-----
170+
Unnormalized log posterior for example :math:`\mathbf{x}_i` and class
171+
:math:`c` is::
172+
173+
.. math::
174+
175+
\log P(y_i = c \mid \mathbf{x}_i, \theta)
176+
&\propto \log P(y=c \mid \theta) +
177+
\log P(\mathbf{x}_i \mid y_i = c, \theta) \\
178+
&\propto \log P(y=c \mid \theta)
179+
\sum{j=1}^M \log P(x_j \mid y_i = c, \theta)
180+
181+
In the Gaussian naive Bayes model, the feature likelihood for class
182+
:math:`c`, :math:`P(\mathbf{x}_i \mid y_i = c, \theta)` is assumed to
183+
be normally distributed
184+
185+
.. math::
186+
187+
\mathbf{x}_i \mid y_i = c, \theta \sim \mathcal{N}(\mu_c, \Sigma_c)
188+
189+
190+
Parameters
191+
----------
192+
X: :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
193+
A dataset of `N` examples, each of dimension `M`
194+
class_idx : int
195+
The index of the current class in :py:attr:`labels`
196+
197+
Returns
198+
-------
199+
log_class_posterior : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
200+
Unnormalized log probability of the label at index `class_idx`
201+
in :py:attr:`labels` for each example in `X`
202+
"""
203+
P = self.parameters
204+
mu = P["mean"][class_idx]
205+
prior = P["prior"][class_idx]
206+
sigsq = P["sigma"][class_idx]
207+
208+
# log likelihood = log X | N(mu, sigsq)
209+
log_likelihood = -0.5 * np.sum(np.log(2 * np.pi * sigsq))
210+
log_likelihood -= 0.5 * np.sum(((X - mu) ** 2) / sigsq, axis=1)
211+
return log_likelihood + np.log(prior)

numpy_ml/tests/test_naive_bayes.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import numpy as np
2+
from sklearn import datasets
3+
from sklearn.model_selection import train_test_split
4+
5+
from sklearn import naive_bayes
6+
7+
from numpy_ml.linear_models import GaussianNBClassifier
8+
from numpy_ml.utils.testing import random_tensor
9+
10+
11+
def test_GaussianNB(N=10):
12+
np.random.seed(12345)
13+
N = np.inf if N is None else N
14+
15+
i = 1
16+
while i < N + 1:
17+
n_ex = np.random.randint(1, 300)
18+
n_feats = np.random.randint(1, 100)
19+
n_classes = np.random.randint(2, 10)
20+
21+
X = random_tensor((n_ex, n_feats), standardize=True)
22+
y = np.random.randint(0, n_classes, size=n_ex)
23+
24+
X_test = random_tensor((n_ex, n_feats), standardize=True)
25+
26+
NB = GaussianNBClassifier(eps=1e-09)
27+
NB.fit(X, y)
28+
29+
preds = NB.predict(X_test)
30+
31+
sklearn_NB = naive_bayes.GaussianNB()
32+
sklearn_NB.fit(X, y)
33+
34+
sk_preds = sklearn_NB.predict(X_test)
35+
36+
for i in range(len(NB.labels)):
37+
P = NB.parameters
38+
jointi = np.log(sklearn_NB.class_prior_[i])
39+
jointi_mine = np.log(P["prior"][i])
40+
41+
np.testing.assert_almost_equal(jointi, jointi_mine)
42+
43+
n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * sklearn_NB.sigma_[i, :]))
44+
n_ij_mine = -0.5 * np.sum(np.log(2.0 * np.pi * P["sigma"][i]))
45+
46+
np.testing.assert_almost_equal(n_ij_mine, n_ij)
47+
48+
n_ij2 = n_ij - 0.5 * np.sum(
49+
((X_test - sklearn_NB.theta_[i, :]) ** 2) / (sklearn_NB.sigma_[i, :]), 1
50+
)
51+
52+
n_ij2_mine = n_ij_mine - 0.5 * np.sum(
53+
((X_test - P["mean"][i]) ** 2) / (P["sigma"][i]), 1
54+
)
55+
np.testing.assert_almost_equal(n_ij2_mine, n_ij2, decimal=4)
56+
57+
llh = jointi + n_ij2
58+
llh_mine = jointi_mine + n_ij2_mine
59+
60+
np.testing.assert_almost_equal(llh_mine, llh, decimal=4)
61+
62+
np.testing.assert_almost_equal(P["prior"], sklearn_NB.class_prior_)
63+
np.testing.assert_almost_equal(P["mean"], sklearn_NB.theta_)
64+
np.testing.assert_almost_equal(P["sigma"], sklearn_NB.sigma_)
65+
np.testing.assert_almost_equal(
66+
sklearn_NB._joint_log_likelihood(X_test),
67+
NB._log_posterior(X_test),
68+
decimal=4,
69+
)
70+
np.testing.assert_almost_equal(preds, sk_preds)
71+
print("PASSED")
72+
i += 1

0 commit comments

Comments
 (0)