Skip to content

Commit f260898

Browse files
JPFrancoiajnothman
authored andcommitted
[MRG] Correcting length of explained_variance_ratio_, eigen solver (scikit-learn#7632)
1 parent 3c18735 commit f260898

File tree

3 files changed

+46
-21
lines changed

3 files changed

+46
-21
lines changed

doc/whats_new.rst

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,31 @@ Bug fixes
9898
`#6497 <https://github.com/scikit-learn/scikit-learn/pull/6497>`_
9999
by `Sebastian Säger`_
100100

101+
- Attribute ``explained_variance_ratio`` of
102+
:class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated
103+
with SVD and Eigen solver are now of the same length. (`#7632
104+
<https://github.com/scikit-learn/scikit-learn/pull/7632>`_).
105+
By `JPFrancoia`_
106+
101107
- Fixes issue in :ref:`univariate_feature_selection` where score
102108
functions were not accepting multi-label targets.(`#7676
103109
<https://github.com/scikit-learn/scikit-learn/pull/7676>`_)
104110
by `Mohammed Affan`_
105-
111+
112+
113+
API changes summary
114+
-------------------
115+
116+
Linear, kernelized and related models
117+
118+
- Length of `explained_variance_ratio` of
119+
:class:`discriminant_analysis.LinearDiscriminantAnalysis`
120+
changed for both Eigen and SVD solvers. The attribute has now a length
121+
of min(n_components, n_classes - 1). (`#7632
122+
<https://github.com/scikit-learn/scikit-learn/pull/7632>`_).
123+
By `JPFrancoia`_
124+
125+
106126
.. _changes_0_18:
107127

108128
Version 0.18
@@ -571,8 +591,8 @@ Decomposition, manifold learning and clustering
571591
:class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
572592
and :class:`manifold.SpectralEmbedding` (`#5012 <https://github.com/scikit-learn/scikit-learn/pull/5012>`_). By `Peter Fischer`_.
573593

574-
- Attribute ``explained_variance_ratio_`` calculated with the SVD solver of
575-
:class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
594+
- Attribute ``explained_variance_ratio_`` calculated with the SVD solver
595+
of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
576596
correct results. By `JPFrancoia`_
577597

578598
Preprocessing and feature selection

sklearn/discriminant_analysis.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,8 @@ def _solve_lsqr(self, X, y, shrinkage):
293293
self.means_ = _class_means(X, y)
294294
self.covariance_ = _class_cov(X, y, self.priors_, shrinkage)
295295
self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T
296-
self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T))
297-
+ np.log(self.priors_))
296+
self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +
297+
np.log(self.priors_))
298298

299299
def _solve_eigen(self, X, y, shrinkage):
300300
"""Eigenvalue solver.
@@ -336,15 +336,16 @@ class scatter). This solver supports both classification and
336336
Sb = St - Sw # between scatter
337337

338338
evals, evecs = linalg.eigh(Sb, Sw)
339-
self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1]
339+
self.explained_variance_ratio_ = np.sort(evals / np.sum(evals)
340+
)[::-1][:self._max_components]
340341
evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors
341342
# evecs /= np.linalg.norm(evecs, axis=0) # doesn't work with numpy 1.6
342343
evecs /= np.apply_along_axis(np.linalg.norm, 0, evecs)
343344

344345
self.scalings_ = evecs
345346
self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)
346-
self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T))
347-
+ np.log(self.priors_))
347+
self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +
348+
np.log(self.priors_))
348349

349350
def _solve_svd(self, X, y):
350351
"""SVD solver.
@@ -400,12 +401,12 @@ def _solve_svd(self, X, y):
400401
_, S, V = linalg.svd(X, full_matrices=0)
401402

402403
self.explained_variance_ratio_ = (S**2 / np.sum(
403-
S**2))[:self.n_components]
404+
S**2))[:self._max_components]
404405
rank = np.sum(S > self.tol * S[0])
405406
self.scalings_ = np.dot(scalings, V.T[:, :rank])
406407
coef = np.dot(self.means_ - self.xbar_, self.scalings_)
407-
self.intercept_ = (-0.5 * np.sum(coef ** 2, axis=1)
408-
+ np.log(self.priors_))
408+
self.intercept_ = (-0.5 * np.sum(coef ** 2, axis=1) +
409+
np.log(self.priors_))
409410
self.coef_ = np.dot(coef, self.scalings_.T)
410411
self.intercept_ -= np.dot(self.xbar_, self.coef_.T)
411412

@@ -457,6 +458,13 @@ def fit(self, X, y, store_covariance=None, tol=None):
457458
UserWarning)
458459
self.priors_ = self.priors_ / self.priors_.sum()
459460

461+
# Get the maximum number of components
462+
if self.n_components is None:
463+
self._max_components = len(self.classes_) - 1
464+
else:
465+
self._max_components = min(len(self.classes_) - 1,
466+
self.n_components)
467+
460468
if self.solver == 'svd':
461469
if self.shrinkage is not None:
462470
raise NotImplementedError('shrinkage not supported')
@@ -497,9 +505,8 @@ def transform(self, X):
497505
X_new = np.dot(X - self.xbar_, self.scalings_)
498506
elif self.solver == 'eigen':
499507
X_new = np.dot(X, self.scalings_)
500-
n_components = X.shape[1] if self.n_components is None \
501-
else self.n_components
502-
return X_new[:, :n_components]
508+
509+
return X_new[:, :self._max_components]
503510

504511
def predict_proba(self, X):
505512
"""Estimate probability.

sklearn/tests/test_discriminant_analysis.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -171,19 +171,17 @@ def test_lda_explained_variance_ratio():
171171
clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
172172
clf_lda_eigen.fit(X, y)
173173
assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3)
174+
assert_equal(clf_lda_eigen.explained_variance_ratio_.shape, (2,),
175+
"Unexpected length for explained_variance_ratio_")
174176

175177
clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
176178
clf_lda_svd.fit(X, y)
177179
assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3)
180+
assert_equal(clf_lda_svd.explained_variance_ratio_.shape, (2,),
181+
"Unexpected length for explained_variance_ratio_")
178182

179-
tested_length = min(clf_lda_svd.explained_variance_ratio_.shape[0],
180-
clf_lda_eigen.explained_variance_ratio_.shape[0])
181-
182-
# NOTE: clf_lda_eigen.explained_variance_ratio_ is not of n_components
183-
# length. Make it the same length as clf_lda_svd.explained_variance_ratio_
184-
# before comparison.
185183
assert_array_almost_equal(clf_lda_svd.explained_variance_ratio_,
186-
clf_lda_eigen.explained_variance_ratio_[:tested_length])
184+
clf_lda_eigen.explained_variance_ratio_)
187185

188186

189187
def test_lda_orthogonality():

0 commit comments

Comments
 (0)