Skip to content

Commit a9611d0

Browse files
authored
TST Extend tests for scipy.sparse.*array in sklearn/ensemble/tests/test_weight_boosting.py (scikit-learn#27148)
1 parent 8eef0e7 commit a9611d0

File tree

1 file changed

+128
-98
lines changed

1 file changed

+128
-98
lines changed

sklearn/ensemble/tests/test_weight_boosting.py

Lines changed: 128 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import numpy as np
66
import pytest
7-
from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, dok_matrix, lil_matrix
87

98
from sklearn import datasets
109
from sklearn.base import BaseEstimator, clone
@@ -22,6 +21,13 @@
2221
assert_array_equal,
2322
assert_array_less,
2423
)
24+
from sklearn.utils.fixes import (
25+
COO_CONTAINERS,
26+
CSC_CONTAINERS,
27+
CSR_CONTAINERS,
28+
DOK_CONTAINERS,
29+
LIL_CONTAINERS,
30+
)
2531

2632
# Common random state
2733
rng = np.random.RandomState(0)
@@ -308,7 +314,20 @@ def test_sample_weights_infinite():
308314
clf.fit(iris.data, iris.target)
309315

310316

311-
def test_sparse_classification():
317+
@pytest.mark.parametrize(
318+
"sparse_container, expected_internal_type",
319+
zip(
320+
[
321+
*CSC_CONTAINERS,
322+
*CSR_CONTAINERS,
323+
*LIL_CONTAINERS,
324+
*COO_CONTAINERS,
325+
*DOK_CONTAINERS,
326+
],
327+
CSC_CONTAINERS + 4 * CSR_CONTAINERS,
328+
),
329+
)
330+
def test_sparse_classification(sparse_container, expected_internal_type):
312331
# Check classification with sparse input.
313332

314333
class CustomSVC(SVC):
@@ -328,80 +347,92 @@ def fit(self, X, y, sample_weight=None):
328347

329348
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
330349

331-
for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix, dok_matrix]:
332-
X_train_sparse = sparse_format(X_train)
333-
X_test_sparse = sparse_format(X_test)
334-
335-
# Trained on sparse format
336-
sparse_classifier = AdaBoostClassifier(
337-
estimator=CustomSVC(probability=True),
338-
random_state=1,
339-
algorithm="SAMME",
340-
).fit(X_train_sparse, y_train)
341-
342-
# Trained on dense format
343-
dense_classifier = AdaBoostClassifier(
344-
estimator=CustomSVC(probability=True),
345-
random_state=1,
346-
algorithm="SAMME",
347-
).fit(X_train, y_train)
348-
349-
# predict
350-
sparse_results = sparse_classifier.predict(X_test_sparse)
351-
dense_results = dense_classifier.predict(X_test)
352-
assert_array_equal(sparse_results, dense_results)
353-
354-
# decision_function
355-
sparse_results = sparse_classifier.decision_function(X_test_sparse)
356-
dense_results = dense_classifier.decision_function(X_test)
357-
assert_array_almost_equal(sparse_results, dense_results)
358-
359-
# predict_log_proba
360-
sparse_results = sparse_classifier.predict_log_proba(X_test_sparse)
361-
dense_results = dense_classifier.predict_log_proba(X_test)
362-
assert_array_almost_equal(sparse_results, dense_results)
363-
364-
# predict_proba
365-
sparse_results = sparse_classifier.predict_proba(X_test_sparse)
366-
dense_results = dense_classifier.predict_proba(X_test)
367-
assert_array_almost_equal(sparse_results, dense_results)
368-
369-
# score
370-
sparse_results = sparse_classifier.score(X_test_sparse, y_test)
371-
dense_results = dense_classifier.score(X_test, y_test)
372-
assert_array_almost_equal(sparse_results, dense_results)
373-
374-
# staged_decision_function
375-
sparse_results = sparse_classifier.staged_decision_function(X_test_sparse)
376-
dense_results = dense_classifier.staged_decision_function(X_test)
377-
for sprase_res, dense_res in zip(sparse_results, dense_results):
378-
assert_array_almost_equal(sprase_res, dense_res)
379-
380-
# staged_predict
381-
sparse_results = sparse_classifier.staged_predict(X_test_sparse)
382-
dense_results = dense_classifier.staged_predict(X_test)
383-
for sprase_res, dense_res in zip(sparse_results, dense_results):
384-
assert_array_equal(sprase_res, dense_res)
385-
386-
# staged_predict_proba
387-
sparse_results = sparse_classifier.staged_predict_proba(X_test_sparse)
388-
dense_results = dense_classifier.staged_predict_proba(X_test)
389-
for sprase_res, dense_res in zip(sparse_results, dense_results):
390-
assert_array_almost_equal(sprase_res, dense_res)
391-
392-
# staged_score
393-
sparse_results = sparse_classifier.staged_score(X_test_sparse, y_test)
394-
dense_results = dense_classifier.staged_score(X_test, y_test)
395-
for sprase_res, dense_res in zip(sparse_results, dense_results):
396-
assert_array_equal(sprase_res, dense_res)
397-
398-
# Verify sparsity of data is maintained during training
399-
types = [i.data_type_ for i in sparse_classifier.estimators_]
400-
401-
assert all([(t == csc_matrix or t == csr_matrix) for t in types])
402-
403-
404-
def test_sparse_regression():
350+
X_train_sparse = sparse_container(X_train)
351+
X_test_sparse = sparse_container(X_test)
352+
353+
# Trained on sparse format
354+
sparse_classifier = AdaBoostClassifier(
355+
estimator=CustomSVC(probability=True),
356+
random_state=1,
357+
algorithm="SAMME",
358+
).fit(X_train_sparse, y_train)
359+
360+
# Trained on dense format
361+
dense_classifier = AdaBoostClassifier(
362+
estimator=CustomSVC(probability=True),
363+
random_state=1,
364+
algorithm="SAMME",
365+
).fit(X_train, y_train)
366+
367+
# predict
368+
sparse_clf_results = sparse_classifier.predict(X_test_sparse)
369+
dense_clf_results = dense_classifier.predict(X_test)
370+
assert_array_equal(sparse_clf_results, dense_clf_results)
371+
372+
# decision_function
373+
sparse_clf_results = sparse_classifier.decision_function(X_test_sparse)
374+
dense_clf_results = dense_classifier.decision_function(X_test)
375+
assert_array_almost_equal(sparse_clf_results, dense_clf_results)
376+
377+
# predict_log_proba
378+
sparse_clf_results = sparse_classifier.predict_log_proba(X_test_sparse)
379+
dense_clf_results = dense_classifier.predict_log_proba(X_test)
380+
assert_array_almost_equal(sparse_clf_results, dense_clf_results)
381+
382+
# predict_proba
383+
sparse_clf_results = sparse_classifier.predict_proba(X_test_sparse)
384+
dense_clf_results = dense_classifier.predict_proba(X_test)
385+
assert_array_almost_equal(sparse_clf_results, dense_clf_results)
386+
387+
# score
388+
sparse_clf_results = sparse_classifier.score(X_test_sparse, y_test)
389+
dense_clf_results = dense_classifier.score(X_test, y_test)
390+
assert_array_almost_equal(sparse_clf_results, dense_clf_results)
391+
392+
# staged_decision_function
393+
sparse_clf_results = sparse_classifier.staged_decision_function(X_test_sparse)
394+
dense_clf_results = dense_classifier.staged_decision_function(X_test)
395+
for sparse_clf_res, dense_clf_res in zip(sparse_clf_results, dense_clf_results):
396+
assert_array_almost_equal(sparse_clf_res, dense_clf_res)
397+
398+
# staged_predict
399+
sparse_clf_results = sparse_classifier.staged_predict(X_test_sparse)
400+
dense_clf_results = dense_classifier.staged_predict(X_test)
401+
for sparse_clf_res, dense_clf_res in zip(sparse_clf_results, dense_clf_results):
402+
assert_array_equal(sparse_clf_res, dense_clf_res)
403+
404+
# staged_predict_proba
405+
sparse_clf_results = sparse_classifier.staged_predict_proba(X_test_sparse)
406+
dense_clf_results = dense_classifier.staged_predict_proba(X_test)
407+
for sparse_clf_res, dense_clf_res in zip(sparse_clf_results, dense_clf_results):
408+
assert_array_almost_equal(sparse_clf_res, dense_clf_res)
409+
410+
# staged_score
411+
sparse_clf_results = sparse_classifier.staged_score(X_test_sparse, y_test)
412+
dense_clf_results = dense_classifier.staged_score(X_test, y_test)
413+
for sparse_clf_res, dense_clf_res in zip(sparse_clf_results, dense_clf_results):
414+
assert_array_equal(sparse_clf_res, dense_clf_res)
415+
416+
# Verify sparsity of data is maintained during training
417+
types = [i.data_type_ for i in sparse_classifier.estimators_]
418+
419+
assert all([t == expected_internal_type for t in types])
420+
421+
422+
@pytest.mark.parametrize(
423+
"sparse_container, expected_internal_type",
424+
zip(
425+
[
426+
*CSC_CONTAINERS,
427+
*CSR_CONTAINERS,
428+
*LIL_CONTAINERS,
429+
*COO_CONTAINERS,
430+
*DOK_CONTAINERS,
431+
],
432+
CSC_CONTAINERS + 4 * CSR_CONTAINERS,
433+
),
434+
)
435+
def test_sparse_regression(sparse_container, expected_internal_type):
405436
# Check regression with sparse input.
406437

407438
class CustomSVR(SVR):
@@ -419,34 +450,33 @@ def fit(self, X, y, sample_weight=None):
419450

420451
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
421452

422-
for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix, dok_matrix]:
423-
X_train_sparse = sparse_format(X_train)
424-
X_test_sparse = sparse_format(X_test)
453+
X_train_sparse = sparse_container(X_train)
454+
X_test_sparse = sparse_container(X_test)
425455

426-
# Trained on sparse format
427-
sparse_classifier = AdaBoostRegressor(
428-
estimator=CustomSVR(), random_state=1
429-
).fit(X_train_sparse, y_train)
456+
# Trained on sparse format
457+
sparse_regressor = AdaBoostRegressor(estimator=CustomSVR(), random_state=1).fit(
458+
X_train_sparse, y_train
459+
)
430460

431-
# Trained on dense format
432-
dense_classifier = dense_results = AdaBoostRegressor(
433-
estimator=CustomSVR(), random_state=1
434-
).fit(X_train, y_train)
461+
# Trained on dense format
462+
dense_regressor = AdaBoostRegressor(estimator=CustomSVR(), random_state=1).fit(
463+
X_train, y_train
464+
)
435465

436-
# predict
437-
sparse_results = sparse_classifier.predict(X_test_sparse)
438-
dense_results = dense_classifier.predict(X_test)
439-
assert_array_almost_equal(sparse_results, dense_results)
466+
# predict
467+
sparse_regr_results = sparse_regressor.predict(X_test_sparse)
468+
dense_regr_results = dense_regressor.predict(X_test)
469+
assert_array_almost_equal(sparse_regr_results, dense_regr_results)
440470

441-
# staged_predict
442-
sparse_results = sparse_classifier.staged_predict(X_test_sparse)
443-
dense_results = dense_classifier.staged_predict(X_test)
444-
for sprase_res, dense_res in zip(sparse_results, dense_results):
445-
assert_array_almost_equal(sprase_res, dense_res)
471+
# staged_predict
472+
sparse_regr_results = sparse_regressor.staged_predict(X_test_sparse)
473+
dense_regr_results = dense_regressor.staged_predict(X_test)
474+
for sparse_regr_res, dense_regr_res in zip(sparse_regr_results, dense_regr_results):
475+
assert_array_almost_equal(sparse_regr_res, dense_regr_res)
446476

447-
types = [i.data_type_ for i in sparse_classifier.estimators_]
477+
types = [i.data_type_ for i in sparse_regressor.estimators_]
448478

449-
assert all([(t == csc_matrix or t == csr_matrix) for t in types])
479+
assert all([t == expected_internal_type for t in types])
450480

451481

452482
def test_sample_weight_adaboost_regressor():

0 commit comments

Comments
 (0)