Skip to content

Commit 856fbd0

Browse files
authored
TST Extend tests for scipy.sparse.*array in sklearn/cluster/tests/test_dbscan.py + test_birch.py + test_column_transformer.py (scikit-learn#27097)
1 parent 7ad7090 commit 856fbd0

File tree

3 files changed

+43
-27
lines changed

3 files changed

+43
-27
lines changed

sklearn/cluster/tests/test_birch.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44

55
import numpy as np
66
import pytest
7-
from scipy import sparse
87

98
from sklearn.cluster import AgglomerativeClustering, Birch
109
from sklearn.cluster.tests.common import generate_clustered_data
1110
from sklearn.datasets import make_blobs
1211
from sklearn.exceptions import ConvergenceWarning
1312
from sklearn.metrics import pairwise_distances_argmin, v_measure_score
1413
from sklearn.utils._testing import assert_allclose, assert_array_equal
14+
from sklearn.utils.fixes import CSR_CONTAINERS
1515

1616

1717
def test_n_samples_leaves_roots(global_random_seed, global_dtype):
@@ -93,14 +93,15 @@ def test_n_clusters(global_random_seed, global_dtype):
9393
brc4.fit(X)
9494

9595

96-
def test_sparse_X(global_random_seed, global_dtype):
96+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
97+
def test_sparse_X(global_random_seed, global_dtype, csr_container):
9798
# Test that sparse and dense data give same results
9899
X, y = make_blobs(n_samples=100, centers=10, random_state=global_random_seed)
99100
X = X.astype(global_dtype, copy=False)
100101
brc = Birch(n_clusters=10)
101102
brc.fit(X)
102103

103-
csr = sparse.csr_matrix(X)
104+
csr = csr_container(X)
104105
brc_sparse = Birch(n_clusters=10)
105106
brc_sparse.fit(csr)
106107

sklearn/cluster/tests/test_dbscan.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77

88
import numpy as np
99
import pytest
10-
from scipy import sparse
1110
from scipy.spatial import distance
1211

1312
from sklearn.cluster import DBSCAN, dbscan
1413
from sklearn.cluster.tests.common import generate_clustered_data
1514
from sklearn.metrics.pairwise import pairwise_distances
1615
from sklearn.neighbors import NearestNeighbors
1716
from sklearn.utils._testing import assert_array_equal
17+
from sklearn.utils.fixes import CSR_CONTAINERS, LIL_CONTAINERS
1818

1919
n_clusters = 3
2020
X = generate_clustered_data(n_clusters=n_clusters)
@@ -66,8 +66,9 @@ def test_dbscan_feature():
6666
assert n_clusters_2 == n_clusters
6767

6868

69-
def test_dbscan_sparse():
70-
core_sparse, labels_sparse = dbscan(sparse.lil_matrix(X), eps=0.8, min_samples=10)
69+
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
70+
def test_dbscan_sparse(lil_container):
71+
core_sparse, labels_sparse = dbscan(lil_container(X), eps=0.8, min_samples=10)
7172
core_dense, labels_dense = dbscan(X, eps=0.8, min_samples=10)
7273
assert_array_equal(core_dense, core_sparse)
7374
assert_array_equal(labels_dense, labels_sparse)
@@ -106,27 +107,28 @@ def test_dbscan_sparse_precomputed_different_eps():
106107
assert_array_equal(dbscan_lower[1], dbscan_higher[1])
107108

108109

109-
@pytest.mark.parametrize("use_sparse", [True, False])
110110
@pytest.mark.parametrize("metric", ["precomputed", "minkowski"])
111-
def test_dbscan_input_not_modified(use_sparse, metric):
111+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS + [None])
112+
def test_dbscan_input_not_modified(metric, csr_container):
112113
# test that the input is not modified by dbscan
113114
X = np.random.RandomState(0).rand(10, 10)
114-
X = sparse.csr_matrix(X) if use_sparse else X
115+
X = csr_container(X) if csr_container is not None else X
115116
X_copy = X.copy()
116117
dbscan(X, metric=metric)
117118

118-
if use_sparse:
119+
if csr_container is not None:
119120
assert_array_equal(X.toarray(), X_copy.toarray())
120121
else:
121122
assert_array_equal(X, X_copy)
122123

123124

124-
def test_dbscan_no_core_samples():
125+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
126+
def test_dbscan_no_core_samples(csr_container):
125127
rng = np.random.RandomState(0)
126128
X = rng.rand(40, 10)
127129
X[X < 0.8] = 0
128130

129-
for X_ in [X, sparse.csr_matrix(X)]:
131+
for X_ in [X, csr_container(X)]:
130132
db = DBSCAN(min_samples=6).fit(X_)
131133
assert_array_equal(db.components_, np.empty((0, X_.shape[1])))
132134
assert_array_equal(db.labels_, -1)
@@ -391,7 +393,8 @@ def test_dbscan_precomputed_metric_with_degenerate_input_arrays():
391393
assert len(set(labels)) == 1
392394

393395

394-
def test_dbscan_precomputed_metric_with_initial_rows_zero():
396+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
397+
def test_dbscan_precomputed_metric_with_initial_rows_zero(csr_container):
395398
# sample matrix with initial two row all zero
396399
ar = np.array(
397400
[
@@ -404,6 +407,6 @@ def test_dbscan_precomputed_metric_with_initial_rows_zero():
404407
[0.0, 0.0, 0.0, 0.0, 0.3, 0.1, 0.0],
405408
]
406409
)
407-
matrix = sparse.csr_matrix(ar)
410+
matrix = csr_container(ar)
408411
labels = DBSCAN(eps=0.2, metric="precomputed", min_samples=2).fit(matrix).labels_
409412
assert_array_equal(labels, [-1, -1, 0, 0, 0, 1, 1])

sklearn/compose/tests/test_column_transformer.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
assert_almost_equal,
3434
assert_array_equal,
3535
)
36+
from sklearn.utils.fixes import CSR_CONTAINERS
3637

3738

3839
class Trans(TransformerMixin, BaseEstimator):
@@ -58,12 +59,15 @@ def transform(self, X):
5859

5960

6061
class SparseMatrixTrans(BaseEstimator):
62+
def __init__(self, csr_container):
63+
self.csr_container = csr_container
64+
6165
def fit(self, X, y=None):
6266
return self
6367

6468
def transform(self, X, y=None):
6569
n_samples = len(X)
66-
return sparse.eye(n_samples, n_samples).tocsr()
70+
return self.csr_container(sparse.eye(n_samples, n_samples))
6771

6872

6973
class TransNo2D(BaseEstimator):
@@ -431,14 +435,15 @@ def test_column_transformer_output_indices_df():
431435
assert_array_equal(X_trans[:, []], X_trans[:, ct.output_indices_["remainder"]])
432436

433437

434-
def test_column_transformer_sparse_array():
435-
X_sparse = sparse.eye(3, 2).tocsr()
438+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
439+
def test_column_transformer_sparse_array(csr_container):
440+
X_sparse = csr_container(sparse.eye(3, 2))
436441

437442
# no distinction between 1D and 2D
438-
X_res_first = X_sparse[:, 0]
443+
X_res_first = X_sparse[:, [0]]
439444
X_res_both = X_sparse
440445

441-
for col in [0, [0], slice(0, 1)]:
446+
for col in [(0,), [0], slice(0, 1)]:
442447
for remainder, res in [("drop", X_res_first), ("passthrough", X_res_both)]:
443448
ct = ColumnTransformer(
444449
[("trans", Trans(), col)], remainder=remainder, sparse_threshold=0.8
@@ -474,10 +479,11 @@ def test_column_transformer_list():
474479
assert_array_equal(ct.fit(X_list).transform(X_list), expected_result)
475480

476481

477-
def test_column_transformer_sparse_stacking():
482+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
483+
def test_column_transformer_sparse_stacking(csr_container):
478484
X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
479485
col_trans = ColumnTransformer(
480-
[("trans1", Trans(), [0]), ("trans2", SparseMatrixTrans(), 1)],
486+
[("trans1", Trans(), [0]), ("trans2", SparseMatrixTrans(csr_container), 1)],
481487
sparse_threshold=0.8,
482488
)
483489
col_trans.fit(X_array)
@@ -489,7 +495,7 @@ def test_column_transformer_sparse_stacking():
489495
assert col_trans.transformers_[-1][0] != "remainder"
490496

491497
col_trans = ColumnTransformer(
492-
[("trans1", Trans(), [0]), ("trans2", SparseMatrixTrans(), 1)],
498+
[("trans1", Trans(), [0]), ("trans2", SparseMatrixTrans(csr_container), 1)],
493499
sparse_threshold=0.1,
494500
)
495501
col_trans.fit(X_array)
@@ -999,11 +1005,14 @@ def test_column_transformer_drops_all_remainder_transformer():
9991005
assert_array_equal(ct.transformers_[-1][2], [1, 2])
10001006

10011007

1002-
def test_column_transformer_sparse_remainder_transformer():
1008+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
1009+
def test_column_transformer_sparse_remainder_transformer(csr_container):
10031010
X_array = np.array([[0, 1, 2], [2, 4, 6], [8, 6, 4]]).T
10041011

10051012
ct = ColumnTransformer(
1006-
[("trans1", Trans(), [0])], remainder=SparseMatrixTrans(), sparse_threshold=0.8
1013+
[("trans1", Trans(), [0])],
1014+
remainder=SparseMatrixTrans(csr_container),
1015+
sparse_threshold=0.8,
10071016
)
10081017

10091018
X_trans = ct.fit_transform(X_array)
@@ -1020,10 +1029,13 @@ def test_column_transformer_sparse_remainder_transformer():
10201029
assert_array_equal(ct.transformers_[-1][2], [1, 2])
10211030

10221031

1023-
def test_column_transformer_drop_all_sparse_remainder_transformer():
1032+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
1033+
def test_column_transformer_drop_all_sparse_remainder_transformer(csr_container):
10241034
X_array = np.array([[0, 1, 2], [2, 4, 6], [8, 6, 4]]).T
10251035
ct = ColumnTransformer(
1026-
[("trans1", "drop", [0])], remainder=SparseMatrixTrans(), sparse_threshold=0.8
1036+
[("trans1", "drop", [0])],
1037+
remainder=SparseMatrixTrans(csr_container),
1038+
sparse_threshold=0.8,
10271039
)
10281040

10291041
X_trans = ct.fit_transform(X_array)
@@ -1231,7 +1243,7 @@ def test_column_transformer_negative_column_indexes():
12311243
assert_array_equal(tf_1.fit_transform(X), tf_2.fit_transform(X))
12321244

12331245

1234-
@pytest.mark.parametrize("array_type", [np.asarray, sparse.csr_matrix])
1246+
@pytest.mark.parametrize("array_type", [np.asarray, *CSR_CONTAINERS])
12351247
def test_column_transformer_mask_indexing(array_type):
12361248
# Regression test for #14510
12371249
# Boolean array-like does not behave as boolean array with sparse matrices.

0 commit comments

Comments
 (0)