Skip to content

Commit 1fbf5fb

Browse files
authored
TST Speed up some of the slowest tests (scikit-learn#27383)
1 parent 04b3a5c commit 1fbf5fb

File tree

6 files changed

+29
-25
lines changed

6 files changed

+29
-25
lines changed

sklearn/decomposition/_dict_learning.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
16921692
>>> from sklearn.datasets import make_sparse_coded_signal
16931693
>>> from sklearn.decomposition import DictionaryLearning
16941694
>>> X, dictionary, code = make_sparse_coded_signal(
1695-
... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,
1695+
... n_samples=30, n_components=15, n_features=20, n_nonzero_coefs=10,
16961696
... random_state=42,
16971697
... )
16981698
>>> dict_learner = DictionaryLearning(
@@ -1704,15 +1704,15 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
17041704
We can check the level of sparsity of `X_transformed`:
17051705
17061706
>>> np.mean(X_transformed == 0)
1707-
0.41...
1707+
0.52...
17081708
17091709
We can compare the average squared euclidean norm of the reconstruction
17101710
error of the sparse coded signal relative to the squared euclidean norm of
17111711
the original signal:
17121712
17131713
>>> X_hat = X_transformed @ dict_learner.components_
17141714
>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
1715-
0.07...
1715+
0.05...
17161716
"""
17171717

17181718
_parameter_constraints: dict = {
@@ -2062,16 +2062,16 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
20622062
>>> from sklearn.datasets import make_sparse_coded_signal
20632063
>>> from sklearn.decomposition import MiniBatchDictionaryLearning
20642064
>>> X, dictionary, code = make_sparse_coded_signal(
2065-
... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,
2065+
... n_samples=30, n_components=15, n_features=20, n_nonzero_coefs=10,
20662066
... random_state=42)
20672067
>>> dict_learner = MiniBatchDictionaryLearning(
20682068
... n_components=15, batch_size=3, transform_algorithm='lasso_lars',
2069-
... transform_alpha=0.1, random_state=42)
2069+
... transform_alpha=0.1, max_iter=20, random_state=42)
20702070
>>> X_transformed = dict_learner.fit_transform(X)
20712071
20722072
We can check the level of sparsity of `X_transformed`:
20732073
2074-
>>> np.mean(X_transformed == 0) < 0.5
2074+
>>> np.mean(X_transformed == 0) > 0.5
20752075
True
20762076
20772077
We can compare the average squared euclidean norm of the reconstruction
@@ -2080,7 +2080,7 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
20802080
20812081
>>> X_hat = X_transformed @ dict_learner.components_
20822082
>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
2083-
0.057...
2083+
0.052...
20842084
"""
20852085

20862086
_parameter_constraints: dict = {

sklearn/decomposition/tests/test_dict_learning.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def test_sparse_encode_shapes_omp():
4343
for n_components, n_samples in itertools.product([1, 5], [1, 9]):
4444
X_ = rng.randn(n_samples, n_features)
4545
dictionary = rng.randn(n_components, n_features)
46-
for algorithm, n_jobs in itertools.product(algorithms, [1, 3]):
46+
for algorithm, n_jobs in itertools.product(algorithms, [1, 2]):
4747
code = sparse_encode(X_, dictionary, algorithm=algorithm, n_jobs=n_jobs)
4848
assert code.shape == (n_samples, n_components)
4949

sklearn/decomposition/tests/test_sparse_pca.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,12 @@ def test_initialization():
120120
def test_mini_batch_correct_shapes():
121121
rng = np.random.RandomState(0)
122122
X = rng.randn(12, 10)
123-
pca = MiniBatchSparsePCA(n_components=8, random_state=rng)
123+
pca = MiniBatchSparsePCA(n_components=8, max_iter=1, random_state=rng)
124124
U = pca.fit_transform(X)
125125
assert pca.components_.shape == (8, 10)
126126
assert U.shape == (12, 8)
127127
# test overcomplete decomposition
128-
pca = MiniBatchSparsePCA(n_components=13, random_state=rng)
128+
pca = MiniBatchSparsePCA(n_components=13, max_iter=1, random_state=rng)
129129
U = pca.fit_transform(X)
130130
assert pca.components_.shape == (13, 10)
131131
assert U.shape == (12, 13)

sklearn/ensemble/tests/test_gradient_boosting.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,15 @@ def test_classification_toy(loss, global_random_seed):
9999
def test_classification_synthetic(loss, global_random_seed):
100100
# Test GradientBoostingClassifier on synthetic dataset used by
101101
# Hastie et al. in ESLII - Figure 10.9
102-
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=global_random_seed)
102+
# Note that Figure 10.9 reuses the dataset generated for figure 10.2
103+
# and should have 2_000 train data points and 10_000 test data points.
104+
# Here we intentionally use a smaller variant to make the test run faster,
105+
# but the conclusions are still the same, despite the smaller datasets.
106+
X, y = datasets.make_hastie_10_2(n_samples=2000, random_state=global_random_seed)
103107

104-
X_train, X_test = X[:2000], X[2000:]
105-
y_train, y_test = y[:2000], y[2000:]
108+
split_idx = 500
109+
X_train, X_test = X[:split_idx], X[split_idx:]
110+
y_train, y_test = y[:split_idx], y[split_idx:]
106111

107112
# Increasing the number of trees should decrease the test error
108113
common_params = {
@@ -111,13 +116,13 @@ def test_classification_synthetic(loss, global_random_seed):
111116
"loss": loss,
112117
"random_state": global_random_seed,
113118
}
114-
gbrt_100_stumps = GradientBoostingClassifier(n_estimators=100, **common_params)
115-
gbrt_100_stumps.fit(X_train, y_train)
119+
gbrt_10_stumps = GradientBoostingClassifier(n_estimators=10, **common_params)
120+
gbrt_10_stumps.fit(X_train, y_train)
116121

117-
gbrt_200_stumps = GradientBoostingClassifier(n_estimators=200, **common_params)
118-
gbrt_200_stumps.fit(X_train, y_train)
122+
gbrt_50_stumps = GradientBoostingClassifier(n_estimators=50, **common_params)
123+
gbrt_50_stumps.fit(X_train, y_train)
119124

120-
assert gbrt_100_stumps.score(X_test, y_test) < gbrt_200_stumps.score(X_test, y_test)
125+
assert gbrt_10_stumps.score(X_test, y_test) < gbrt_50_stumps.score(X_test, y_test)
121126

122127
# Decision stumps are better suited for this dataset with a large number of
123128
# estimators.

sklearn/neighbors/tests/test_neighbors.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@
7474
) # type: ignore
7575

7676
P = (1, 2, 3, 4, np.inf)
77-
JOBLIB_BACKENDS = list(joblib.parallel.BACKENDS.keys())
7877

7978
# Filter deprecation warnings.
8079
neighbors.kneighbors_graph = ignore_warnings(neighbors.kneighbors_graph)
@@ -2044,10 +2043,10 @@ def test_same_radius_neighbors_parallel(algorithm):
20442043
assert_allclose(graph, graph_parallel)
20452044

20462045

2047-
@pytest.mark.parametrize("backend", JOBLIB_BACKENDS)
2046+
@pytest.mark.parametrize("backend", ["threading", "loky"])
20482047
@pytest.mark.parametrize("algorithm", ALGORITHMS)
20492048
def test_knn_forcing_backend(backend, algorithm):
2050-
# Non-regression test which ensure the knn methods are properly working
2049+
# Non-regression test which ensures the knn methods are properly working
20512050
# even when forcing the global joblib backend.
20522051
with joblib.parallel_backend(backend):
20532052
X, y = datasets.make_classification(
@@ -2056,12 +2055,12 @@ def test_knn_forcing_backend(backend, algorithm):
20562055
X_train, X_test, y_train, y_test = train_test_split(X, y)
20572056

20582057
clf = neighbors.KNeighborsClassifier(
2059-
n_neighbors=3, algorithm=algorithm, n_jobs=3
2058+
n_neighbors=3, algorithm=algorithm, n_jobs=2
20602059
)
20612060
clf.fit(X_train, y_train)
20622061
clf.predict(X_test)
20632062
clf.kneighbors(X_test)
2064-
clf.kneighbors_graph(X_test, mode="distance").toarray()
2063+
clf.kneighbors_graph(X_test, mode="distance")
20652064

20662065

20672066
def test_dtype_convert():

sklearn/tree/tests/test_tree.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -804,10 +804,10 @@ def test_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input(
804804
)
805805

806806

807-
def test_min_impurity_decrease():
807+
def test_min_impurity_decrease(global_random_seed):
808808
# test if min_impurity_decrease ensure that a split is made only if
809809
# if the impurity decrease is at least that value
810-
X, y = datasets.make_classification(n_samples=10000, random_state=42)
810+
X, y = datasets.make_classification(n_samples=100, random_state=global_random_seed)
811811

812812
# test both DepthFirstTreeBuilder and BestFirstTreeBuilder
813813
# by setting max_leaf_nodes

0 commit comments

Comments
 (0)