Skip to content

Commit 2800ed5

Browse files
authored
TST Extend tests for scipy.sparse.*array in model_selection/tests/test_validation.py (scikit-learn#27366)
1 parent 3c9495c commit 2800ed5

File tree

1 file changed

+33
-24
lines changed

1 file changed

+33
-24
lines changed

sklearn/model_selection/tests/test_validation.py

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import numpy as np
1212
import pytest
13-
from scipy.sparse import coo_matrix, csr_matrix, issparse
13+
from scipy.sparse import issparse
1414

1515
from sklearn.base import BaseEstimator, clone
1616
from sklearn.cluster import KMeans
@@ -88,6 +88,7 @@
8888
assert_array_almost_equal,
8989
assert_array_equal,
9090
)
91+
from sklearn.utils.fixes import COO_CONTAINERS, CSR_CONTAINERS
9192
from sklearn.utils.validation import _num_samples
9293

9394

@@ -244,11 +245,11 @@ def fit(
244245
"MockClassifier extra fit_param sparse_param.shape "
245246
"is ({0}, {1}), should be ({2}, {3})"
246247
)
247-
assert sparse_param.shape == P_sparse.shape, fmt.format(
248+
assert sparse_param.shape == P.shape, fmt.format(
248249
sparse_param.shape[0],
249250
sparse_param.shape[1],
250-
P_sparse.shape[0],
251-
P_sparse.shape[1],
251+
P.shape[0],
252+
P.shape[1],
252253
)
253254
return self
254255

@@ -270,16 +271,17 @@ def get_params(self, deep=False):
270271
# XXX: use 2D array, since 1D X is being detected as a single sample in
271272
# check_consistent_length
272273
X = np.ones((10, 2))
273-
X_sparse = coo_matrix(X)
274274
y = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
275275
# The number of samples per class needs to be > n_splits,
276276
# for StratifiedKFold(n_splits=3)
277277
y2 = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
278-
P_sparse = coo_matrix(np.eye(5))
278+
P = np.eye(5)
279279

280280

281-
def test_cross_val_score():
281+
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
282+
def test_cross_val_score(coo_container):
282283
clf = MockClassifier()
284+
X_sparse = coo_container(X)
283285

284286
for a in range(-10, 10):
285287
clf.a = a
@@ -399,7 +401,8 @@ def test_cross_validate_nested_estimator():
399401

400402

401403
@pytest.mark.parametrize("use_sparse", [False, True])
402-
def test_cross_validate(use_sparse: bool):
404+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
405+
def test_cross_validate(use_sparse: bool, csr_container):
403406
# Compute train and test mse/r2 scores
404407
cv = KFold()
405408

@@ -412,8 +415,8 @@ def test_cross_validate(use_sparse: bool):
412415
clf = SVC(kernel="linear", random_state=0)
413416

414417
if use_sparse:
415-
X_reg = csr_matrix(X_reg)
416-
X_clf = csr_matrix(X_clf)
418+
X_reg = csr_container(X_reg)
419+
X_clf = csr_container(X_clf)
417420

418421
for X, y, est in ((X_reg, y_reg, reg), (X_clf, y_clf, clf)):
419422
# It's okay to evaluate regression metrics on classification too
@@ -681,15 +684,16 @@ def test_cross_val_score_precomputed():
681684
cross_val_score(svm, linear_kernel.tolist(), y)
682685

683686

684-
def test_cross_val_score_fit_params():
687+
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
688+
def test_cross_val_score_fit_params(coo_container):
685689
clf = MockClassifier()
686690
n_samples = X.shape[0]
687691
n_classes = len(np.unique(y))
688692

689-
W_sparse = coo_matrix(
693+
W_sparse = coo_container(
690694
(np.array([1]), (np.array([1]), np.array([0]))), shape=(10, 1)
691695
)
692-
P_sparse = coo_matrix(np.eye(5))
696+
P_sparse = coo_container(np.eye(5))
693697

694698
DUMMY_INT = 42
695699
DUMMY_STR = "42"
@@ -775,10 +779,11 @@ def test_cross_val_score_with_score_func_regression():
775779
assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
776780

777781

778-
def test_permutation_score():
782+
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
783+
def test_permutation_score(coo_container):
779784
iris = load_iris()
780785
X = iris.data
781-
X_sparse = coo_matrix(X)
786+
X_sparse = coo_container(X)
782787
y = iris.target
783788
svm = SVC(kernel="linear")
784789
cv = StratifiedKFold(2)
@@ -914,7 +919,8 @@ def test_cross_val_score_multilabel():
914919
assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
915920

916921

917-
def test_cross_val_predict():
922+
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
923+
def test_cross_val_predict(coo_container):
918924
X, y = load_diabetes(return_X_y=True)
919925
cv = KFold()
920926

@@ -938,7 +944,7 @@ def test_cross_val_predict():
938944

939945
Xsp = X.copy()
940946
Xsp *= Xsp > np.median(Xsp)
941-
Xsp = coo_matrix(Xsp)
947+
Xsp = coo_container(Xsp)
942948
preds = cross_val_predict(est, Xsp, y)
943949
assert_array_almost_equal(len(preds), len(y))
944950

@@ -1054,10 +1060,11 @@ def test_cross_val_predict_predict_log_proba_shape():
10541060
assert preds.shape == (150, 3)
10551061

10561062

1057-
def test_cross_val_predict_input_types():
1063+
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
1064+
def test_cross_val_predict_input_types(coo_container):
10581065
iris = load_iris()
10591066
X, y = iris.data, iris.target
1060-
X_sparse = coo_matrix(X)
1067+
X_sparse = coo_container(X)
10611068
multioutput_y = np.column_stack([y, y[::-1]])
10621069

10631070
clf = Ridge(fit_intercept=False, random_state=0)
@@ -1162,11 +1169,12 @@ def test_cross_val_predict_y_none():
11621169
assert_allclose(X, y_hat_proba)
11631170

11641171

1165-
def test_cross_val_score_sparse_fit_params():
1172+
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
1173+
def test_cross_val_score_sparse_fit_params(coo_container):
11661174
iris = load_iris()
11671175
X, y = iris.data, iris.target
11681176
clf = MockClassifier()
1169-
fit_params = {"sparse_sample_weight": coo_matrix(np.eye(X.shape[0]))}
1177+
fit_params = {"sparse_sample_weight": coo_container(np.eye(X.shape[0]))}
11701178
a = cross_val_score(clf, X, y, params=fit_params, cv=3)
11711179
assert_array_equal(a, np.ones(3))
11721180

@@ -1739,7 +1747,8 @@ def test_check_is_permutation():
17391747
assert not _check_is_permutation(np.hstack((p, 0)), 100)
17401748

17411749

1742-
def test_cross_val_predict_sparse_prediction():
1750+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
1751+
def test_cross_val_predict_sparse_prediction(csr_container):
17431752
# check that cross_val_predict gives same result for sparse and dense input
17441753
X, y = make_multilabel_classification(
17451754
n_classes=2,
@@ -1748,8 +1757,8 @@ def test_cross_val_predict_sparse_prediction():
17481757
return_indicator=True,
17491758
random_state=1,
17501759
)
1751-
X_sparse = csr_matrix(X)
1752-
y_sparse = csr_matrix(y)
1760+
X_sparse = csr_container(X)
1761+
y_sparse = csr_container(y)
17531762
classif = OneVsRestClassifier(SVC(kernel="linear"))
17541763
preds = cross_val_predict(classif, X, y, cv=10)
17551764
preds_sparse = cross_val_predict(classif, X_sparse, y_sparse, cv=10)

0 commit comments

Comments
 (0)