Skip to content

Commit bc8eb66

Browse files
authored
MAINT Remove some unwanted side effects in our test suite (scikit-learn#29584)
1 parent 15d5a06 commit bc8eb66

File tree

6 files changed

+53
-15
lines changed

6 files changed

+53
-15
lines changed

sklearn/gaussian_process/tests/test_kernels.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737

3838
X = np.random.RandomState(0).normal(0, 1, (5, 2))
3939
Y = np.random.RandomState(0).normal(0, 1, (6, 2))
40+
# Set shared test data as read-only to avoid unintentional in-place
41+
# modifications that would introduce side-effects between tests.
42+
X.flags.writeable = False
43+
Y.flags.writeable = False
4044

4145
kernel_rbf_plus_white = RBF(length_scale=2.0) + WhiteKernel(noise_level=3.0)
4246
kernels = [
@@ -70,6 +74,7 @@
7074
@pytest.mark.parametrize("kernel", kernels)
7175
def test_kernel_gradient(kernel):
7276
# Compare analytic and numeric gradient of kernels.
77+
kernel = clone(kernel) # make tests independent of one-another
7378
K, K_gradient = kernel(X, eval_gradient=True)
7479

7580
assert K_gradient.shape[0] == X.shape[0]
@@ -97,6 +102,7 @@ def eval_kernel_for_theta(theta):
97102
)
98103
def test_kernel_theta(kernel):
99104
# Check that parameter vector theta of kernel is set correctly.
105+
kernel = clone(kernel) # make tests independent of one-another
100106
theta = kernel.theta
101107
_, K_gradient = kernel(X, eval_gradient=True)
102108

@@ -154,6 +160,7 @@ def test_kernel_theta(kernel):
154160
],
155161
)
156162
def test_auto_vs_cross(kernel):
163+
kernel = clone(kernel) # make tests independent of one-another
157164
# Auto-correlation and cross-correlation should be consistent.
158165
K_auto = kernel(X)
159166
K_cross = kernel(X, X)
@@ -162,6 +169,7 @@ def test_auto_vs_cross(kernel):
162169

163170
@pytest.mark.parametrize("kernel", kernels)
164171
def test_kernel_diag(kernel):
172+
kernel = clone(kernel) # make tests independent of one-another
165173
# Test that diag method of kernel returns consistent results.
166174
K_call_diag = np.diag(kernel(X))
167175
K_diag = kernel.diag(X)
@@ -182,12 +190,12 @@ def test_kernel_anisotropic():
182190
kernel = 3.0 * RBF([0.5, 2.0])
183191

184192
K = kernel(X)
185-
X1 = np.array(X)
193+
X1 = X.copy()
186194
X1[:, 0] *= 4
187195
K1 = 3.0 * RBF(2.0)(X1)
188196
assert_almost_equal(K, K1)
189197

190-
X2 = np.array(X)
198+
X2 = X.copy()
191199
X2[:, 1] /= 4
192200
K2 = 3.0 * RBF(0.5)(X2)
193201
assert_almost_equal(K, K2)
@@ -202,13 +210,15 @@ def test_kernel_anisotropic():
202210
"kernel", [kernel for kernel in kernels if kernel.is_stationary()]
203211
)
204212
def test_kernel_stationary(kernel):
213+
kernel = clone(kernel) # make tests independent of one-another
205214
# Test stationarity of kernels.
206215
K = kernel(X, X + 1)
207216
assert_almost_equal(K[0, 0], np.diag(K))
208217

209218

210219
@pytest.mark.parametrize("kernel", kernels)
211220
def test_kernel_input_type(kernel):
221+
kernel = clone(kernel) # make tests independent of one-another
212222
# Test whether kernels is for vectors or structured data
213223
if isinstance(kernel, Exponentiation):
214224
assert kernel.requires_vector_input == kernel.kernel.requires_vector_input
@@ -237,6 +247,7 @@ def check_hyperparameters_equal(kernel1, kernel2):
237247

238248
@pytest.mark.parametrize("kernel", kernels)
239249
def test_kernel_clone(kernel):
250+
kernel = clone(kernel) # make tests independent of one-another
240251
# Test that sklearn's clone works correctly on kernels.
241252
kernel_cloned = clone(kernel)
242253

@@ -254,6 +265,7 @@ def test_kernel_clone(kernel):
254265

255266
@pytest.mark.parametrize("kernel", kernels)
256267
def test_kernel_clone_after_set_params(kernel):
268+
kernel = clone(kernel) # make tests independent of one-another
257269
# This test is to verify that using set_params does not
258270
# break clone on kernels.
259271
# This used to break because in kernels such as the RBF, non-trivial
@@ -312,6 +324,7 @@ def test_matern_kernel():
312324

313325
@pytest.mark.parametrize("kernel", kernels)
314326
def test_kernel_versus_pairwise(kernel):
327+
kernel = clone(kernel) # make tests independent of one-another
315328
# Check that GP kernels can also be used as pairwise kernels.
316329

317330
# Test auto-kernel
@@ -330,6 +343,7 @@ def test_kernel_versus_pairwise(kernel):
330343

331344
@pytest.mark.parametrize("kernel", kernels)
332345
def test_set_get_params(kernel):
346+
kernel = clone(kernel) # make tests independent of one-another
333347
# Check that set_params()/get_params() is consistent with kernel.theta.
334348

335349
# Test get_params()
@@ -372,6 +386,7 @@ def test_set_get_params(kernel):
372386

373387
@pytest.mark.parametrize("kernel", kernels)
374388
def test_repr_kernels(kernel):
389+
kernel = clone(kernel) # make tests independent of one-another
375390
# Smoke-test for repr in kernels.
376391

377392
repr(kernel)

sklearn/metrics/_plot/tests/test_roc_curve_display.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from numpy.testing import assert_allclose
44
from scipy.integrate import trapezoid
55

6+
from sklearn import clone
67
from sklearn.compose import make_column_transformer
78
from sklearn.datasets import load_breast_cancer, load_iris
89
from sklearn.exceptions import NotFittedError
@@ -16,7 +17,11 @@
1617

1718
@pytest.fixture(scope="module")
1819
def data():
19-
return load_iris(return_X_y=True)
20+
X, y = load_iris(return_X_y=True)
21+
# Avoid introducing test dependencies by mistake.
22+
X.flags.writeable = False
23+
y.flags.writeable = False
24+
return X, y
2025

2126

2227
@pytest.fixture(scope="module")
@@ -218,6 +223,8 @@ def test_roc_curve_display_complex_pipeline(pyplot, data_binary, clf, constructo
218223
"""Check the behaviour with complex pipeline."""
219224
X, y = data_binary
220225

226+
clf = clone(clf)
227+
221228
if constructor_name == "from_estimator":
222229
with pytest.raises(NotFittedError):
223230
RocCurveDisplay.from_estimator(clf, X, y)

sklearn/neighbors/tests/test_nca.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@
2222
from sklearn.utils.validation import validate_data
2323

2424
rng = check_random_state(0)
25-
# load and shuffle iris dataset
25+
# Load and shuffle the iris dataset.
2626
iris = load_iris()
2727
perm = rng.permutation(iris.target.size)
2828
iris_data = iris.data[perm]
2929
iris_target = iris.target[perm]
30+
# Avoid having test data introducing dependencies between tests.
31+
iris_data.flags.writeable = False
32+
iris_target.flags.writeable = False
3033
EPS = np.finfo(float).eps
3134

3235

@@ -414,8 +417,8 @@ def test_no_verbose(capsys):
414417

415418

416419
def test_singleton_class():
417-
X = iris_data
418-
y = iris_target
420+
X = iris_data.copy()
421+
y = iris_target.copy()
419422

420423
# one singleton class
421424
singleton_class = 1

sklearn/tests/test_kernel_approximation.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@
3131
X /= X.sum(axis=1)[:, np.newaxis]
3232
Y /= Y.sum(axis=1)[:, np.newaxis]
3333

34+
# Make sure X and Y are not writable to avoid introducing dependencies between
35+
# tests.
36+
X.flags.writeable = False
37+
Y.flags.writeable = False
38+
3439

3540
@pytest.mark.parametrize("gamma", [0.1, 1, 2.5])
3641
@pytest.mark.parametrize("degree, n_components", [(1, 500), (2, 500), (3, 5000)])
@@ -95,8 +100,8 @@ def test_additive_chi2_sampler(csr_container):
95100

96101
# compute exact kernel
97102
# abbreviations for easier formula
98-
X_ = X[:, np.newaxis, :]
99-
Y_ = Y[np.newaxis, :, :]
103+
X_ = X[:, np.newaxis, :].copy()
104+
Y_ = Y[np.newaxis, :, :].copy()
100105

101106
large_kernel = 2 * X_ * Y_ / (X_ + Y_)
102107

@@ -163,11 +168,12 @@ def test_skewed_chi2_sampler():
163168
# set on negative component but greater than c to ensure that the kernel
164169
# approximation is valid on the group (-c; +\infty) endowed with the skewed
165170
# multiplication.
166-
Y[0, 0] = -c / 2.0
171+
Y_ = Y.copy()
172+
Y_[0, 0] = -c / 2.0
167173

168174
# abbreviations for easier formula
169175
X_c = (X + c)[:, np.newaxis, :]
170-
Y_c = (Y + c)[np.newaxis, :, :]
176+
Y_c = (Y_ + c)[np.newaxis, :, :]
171177

172178
# we do it in log-space in the hope that it's more stable
173179
# this array is n_samples_x x n_samples_y big x n_features
@@ -180,15 +186,15 @@ def test_skewed_chi2_sampler():
180186
# approximate kernel mapping
181187
transform = SkewedChi2Sampler(skewedness=c, n_components=1000, random_state=42)
182188
X_trans = transform.fit_transform(X)
183-
Y_trans = transform.transform(Y)
189+
Y_trans = transform.transform(Y_)
184190

185191
kernel_approx = np.dot(X_trans, Y_trans.T)
186192
assert_array_almost_equal(kernel, kernel_approx, 1)
187193
assert np.isfinite(kernel).all(), "NaNs found in the Gram matrix"
188194
assert np.isfinite(kernel_approx).all(), "NaNs found in the approximate Gram matrix"
189195

190196
# test error is raised on when inputs contains values smaller than -c
191-
Y_neg = Y.copy()
197+
Y_neg = Y_.copy()
192198
Y_neg[0, 0] = -c * 2.0
193199
msg = "X may not contain entries smaller than -skewedness"
194200
with pytest.raises(ValueError, match=msg):

sklearn/tests/test_pipeline.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,13 @@
5353
from sklearn.utils.fixes import CSR_CONTAINERS
5454
from sklearn.utils.validation import _check_feature_names, check_is_fitted
5555

56+
# Load a shared tests data sets for the tests in this module. Mark them
57+
# read-only to avoid unintentional in-place modifications that would introduce
58+
# side-effects between tests.
5659
iris = load_iris()
60+
iris.data.flags.writeable = False
61+
iris.target.flags.writeable = False
62+
5763

5864
JUNK_FOOD_DOCS = (
5965
"the pizza pizza beer copyright",
@@ -507,7 +513,7 @@ def test_predict_methods_with_predict_params(method_name):
507513
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
508514
def test_feature_union(csr_container):
509515
# basic sanity check for feature union
510-
X = iris.data
516+
X = iris.data.copy()
511517
X -= X.mean(axis=0)
512518
y = iris.target
513519
svd = TruncatedSVD(n_components=2, random_state=0)
@@ -1592,7 +1598,7 @@ def fit(self, X, y=None, **fit_params):
15921598
def test_pipeline_missing_values_leniency():
15931599
# check that pipeline let the missing values validation to
15941600
# the underlying transformers and predictors.
1595-
X, y = iris.data, iris.target
1601+
X, y = iris.data.copy(), iris.target.copy()
15961602
mask = np.random.choice([1, 0], X.shape, p=[0.1, 0.9]).astype(bool)
15971603
X[mask] = np.nan
15981604
pipe = make_pipeline(SimpleImputer(), LogisticRegression())

sklearn/tree/tests/test_tree.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2686,7 +2686,8 @@ def test_regression_tree_missing_values_toy(Tree, X, criterion):
26862686
tree = Tree(criterion=criterion, random_state=0).fit(X, y)
26872687
tree_ref = clone(tree).fit(y.reshape(-1, 1), y)
26882688

2689-
assert all(tree.tree_.impurity >= 0) # MSE should always be positive
2689+
impurity = tree.tree_.impurity
2690+
assert all(impurity >= 0), impurity.min() # MSE should always be positive
26902691

26912692
# Note: the impurity matches after the first split only on greedy trees
26922693
if Tree is DecisionTreeRegressor:

0 commit comments

Comments
 (0)