Skip to content

Commit 9f6592f

Browse files
TST Extend tests for scipy.sparse.*array in test_polynomial.py (scikit-learn#27166)
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
1 parent 1884eb7 commit 9f6592f

File tree

3 files changed

+67
-46
lines changed

3 files changed

+67
-46
lines changed

doc/whats_new/v1.4.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ and classes are impacted:
119119
:user:`Lohit SundaramahaLingam <lohitslohit>`;
120120
- :class:`neural_network.BernoulliRBM` in :pr:`27252` by
121121
:user:`Yao Xiao <Charlie-XIAO>`.
122+
- :class:`preprocessing.PolynomialFeatures` in :pr:`27166` by
123+
:user:`Mohit Joshi <work-mohit>`.
122124

123125
Changelog
124126
---------

sklearn/preprocessing/_polynomial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ def transform(self, X):
495495
if combi:
496496
out_col = 1
497497
for col_idx in combi:
498-
out_col = X[:, col_idx].multiply(out_col)
498+
out_col = X[:, [col_idx]].multiply(out_col)
499499
columns.append(out_col)
500500
else:
501501
bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))

sklearn/preprocessing/tests/test_polynomial.py

Lines changed: 64 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@
2020
_get_sizeof_LARGEST_INT_t,
2121
)
2222
from sklearn.utils._testing import assert_array_almost_equal
23-
from sklearn.utils.fixes import parse_version, sp_version
23+
from sklearn.utils.fixes import (
24+
CSC_CONTAINERS,
25+
CSR_CONTAINERS,
26+
parse_version,
27+
sp_version,
28+
)
2429

2530

2631
@pytest.mark.parametrize("est", (PolynomialFeatures, SplineTransformer))
@@ -522,27 +527,24 @@ def single_feature_degree3():
522527
((2, 3), False, True, []),
523528
],
524529
)
525-
@pytest.mark.parametrize(
526-
"sparse_X",
527-
[False, sparse.csr_matrix, sparse.csc_matrix],
528-
)
530+
@pytest.mark.parametrize("X_container", [None] + CSR_CONTAINERS + CSC_CONTAINERS)
529531
def test_polynomial_features_one_feature(
530532
single_feature_degree3,
531533
degree,
532534
include_bias,
533535
interaction_only,
534536
indices,
535-
sparse_X,
537+
X_container,
536538
):
537539
"""Test PolynomialFeatures on single feature up to degree 3."""
538540
X, P = single_feature_degree3
539-
if sparse_X:
540-
X = sparse_X(X)
541+
if X_container is not None:
542+
X = X_container(X)
541543
tf = PolynomialFeatures(
542544
degree=degree, include_bias=include_bias, interaction_only=interaction_only
543545
).fit(X)
544546
out = tf.transform(X)
545-
if sparse_X:
547+
if X_container is not None:
546548
out = out.toarray()
547549
assert_allclose(out, P[:, indices])
548550
if tf.n_output_features_ > 0:
@@ -596,27 +598,24 @@ def two_features_degree3():
596598
((3, 3), False, True, []), # would need 3 input features
597599
],
598600
)
599-
@pytest.mark.parametrize(
600-
"sparse_X",
601-
[False, sparse.csr_matrix, sparse.csc_matrix],
602-
)
601+
@pytest.mark.parametrize("X_container", [None] + CSR_CONTAINERS + CSC_CONTAINERS)
603602
def test_polynomial_features_two_features(
604603
two_features_degree3,
605604
degree,
606605
include_bias,
607606
interaction_only,
608607
indices,
609-
sparse_X,
608+
X_container,
610609
):
611610
"""Test PolynomialFeatures on 2 features up to degree 3."""
612611
X, P = two_features_degree3
613-
if sparse_X:
614-
X = sparse_X(X)
612+
if X_container is not None:
613+
X = X_container(X)
615614
tf = PolynomialFeatures(
616615
degree=degree, include_bias=include_bias, interaction_only=interaction_only
617616
).fit(X)
618617
out = tf.transform(X)
619-
if sparse_X:
618+
if X_container is not None:
620619
out = out.toarray()
621620
assert_allclose(out, P[:, indices])
622621
if tf.n_output_features_ > 0:
@@ -712,10 +711,13 @@ def test_polynomial_feature_names():
712711
(4, False, True, np.float64),
713712
],
714713
)
715-
def test_polynomial_features_csc_X(deg, include_bias, interaction_only, dtype):
714+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
715+
def test_polynomial_features_csc_X(
716+
deg, include_bias, interaction_only, dtype, csc_container
717+
):
716718
rng = np.random.RandomState(0)
717719
X = rng.randint(0, 2, (100, 2))
718-
X_csc = sparse.csc_matrix(X)
720+
X_csc = csc_container(X)
719721

720722
est = PolynomialFeatures(
721723
deg, include_bias=include_bias, interaction_only=interaction_only
@@ -739,10 +741,13 @@ def test_polynomial_features_csc_X(deg, include_bias, interaction_only, dtype):
739741
(3, False, True, np.float64),
740742
],
741743
)
742-
def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype):
744+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
745+
def test_polynomial_features_csr_X(
746+
deg, include_bias, interaction_only, dtype, csr_container
747+
):
743748
rng = np.random.RandomState(0)
744749
X = rng.randint(0, 2, (100, 2))
745-
X_csr = sparse.csr_matrix(X)
750+
X_csr = csr_container(X)
746751

747752
est = PolynomialFeatures(
748753
deg, include_bias=include_bias, interaction_only=interaction_only
@@ -761,17 +766,14 @@ def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype):
761766
)
762767
@pytest.mark.parametrize("interaction_only", [True, False])
763768
@pytest.mark.parametrize("include_bias", [True, False])
769+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
764770
def test_num_combinations(
765-
n_features,
766-
min_degree,
767-
max_degree,
768-
interaction_only,
769-
include_bias,
771+
n_features, min_degree, max_degree, interaction_only, include_bias, csr_container
770772
):
771773
"""
772774
Test that n_output_features_ is calculated correctly.
773775
"""
774-
x = sparse.csr_matrix(([1], ([0], [n_features - 1])))
776+
x = csr_container(([1], ([0], [n_features - 1])))
775777
est = PolynomialFeatures(
776778
degree=max_degree,
777779
interaction_only=interaction_only,
@@ -799,8 +801,11 @@ def test_num_combinations(
799801
(3, False, True, np.float64),
800802
],
801803
)
802-
def test_polynomial_features_csr_X_floats(deg, include_bias, interaction_only, dtype):
803-
X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr()
804+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
805+
def test_polynomial_features_csr_X_floats(
806+
deg, include_bias, interaction_only, dtype, csr_container
807+
):
808+
X_csr = csr_container(sparse_random(1000, 10, 0.5, random_state=0))
804809
X = X_csr.toarray()
805810

806811
est = PolynomialFeatures(
@@ -831,8 +836,11 @@ def test_polynomial_features_csr_X_floats(deg, include_bias, interaction_only, d
831836
(2, 3, False),
832837
],
833838
)
834-
def test_polynomial_features_csr_X_zero_row(zero_row_index, deg, interaction_only):
835-
X_csr = sparse_random(3, 10, 1.0, random_state=0).tocsr()
839+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
840+
def test_polynomial_features_csr_X_zero_row(
841+
zero_row_index, deg, interaction_only, csr_container
842+
):
843+
X_csr = csr_container(sparse_random(3, 10, 1.0, random_state=0))
836844
X_csr[zero_row_index, :] = 0.0
837845
X = X_csr.toarray()
838846

@@ -851,8 +859,11 @@ def test_polynomial_features_csr_X_zero_row(zero_row_index, deg, interaction_onl
851859
["include_bias", "interaction_only"],
852860
[(True, True), (True, False), (False, True), (False, False)],
853861
)
854-
def test_polynomial_features_csr_X_degree_4(include_bias, interaction_only):
855-
X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr()
862+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
863+
def test_polynomial_features_csr_X_degree_4(
864+
include_bias, interaction_only, csr_container
865+
):
866+
X_csr = csr_container(sparse_random(1000, 10, 0.5, random_state=0))
856867
X = X_csr.toarray()
857868

858869
est = PolynomialFeatures(
@@ -881,8 +892,9 @@ def test_polynomial_features_csr_X_degree_4(include_bias, interaction_only):
881892
(3, 3, False),
882893
],
883894
)
884-
def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only):
885-
X_csr = sparse_random(1000, dim, 0.5, random_state=0).tocsr()
895+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
896+
def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only, csr_container):
897+
X_csr = csr_container(sparse_random(1000, dim, 0.5, random_state=0))
886898
X = X_csr.toarray()
887899

888900
est = PolynomialFeatures(deg, interaction_only=interaction_only)
@@ -896,8 +908,9 @@ def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only):
896908

897909
@pytest.mark.parametrize("interaction_only", [True, False])
898910
@pytest.mark.parametrize("include_bias", [True, False])
911+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
899912
def test_csr_polynomial_expansion_index_overflow_non_regression(
900-
interaction_only, include_bias
913+
interaction_only, include_bias, csr_container
901914
):
902915
"""Check the automatic index dtype promotion to `np.int64` when needed.
903916
@@ -925,7 +938,7 @@ def degree_2_calc(d, i, j):
925938
col = np.array(
926939
[n_features - 2, n_features - 1, n_features - 2, n_features - 1], dtype=np.int64
927940
)
928-
X = sparse.csr_matrix(
941+
X = csr_container(
929942
(data, (row, col)),
930943
shape=(n_samples, n_features),
931944
dtype=data_dtype,
@@ -1028,8 +1041,9 @@ def degree_2_calc(d, i, j):
10281041
)
10291042
@pytest.mark.parametrize("interaction_only", [True, False])
10301043
@pytest.mark.parametrize("include_bias", [True, False])
1044+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
10311045
def test_csr_polynomial_expansion_index_overflow(
1032-
degree, n_features, interaction_only, include_bias
1046+
degree, n_features, interaction_only, include_bias, csr_container
10331047
):
10341048
"""Tests known edge-cases to the dtype promotion strategy and custom
10351049
Cython code, including a current bug in the upstream
@@ -1050,7 +1064,7 @@ def test_csr_polynomial_expansion_index_overflow(
10501064
n_features * (n_features + 1) * (n_features + 2) // 6 + expected_indices[1]
10511065
)
10521066

1053-
X = sparse.csr_matrix((data, (row, col)))
1067+
X = csr_container((data, (row, col)))
10541068
pf = PolynomialFeatures(
10551069
interaction_only=interaction_only, include_bias=include_bias, degree=degree
10561070
)
@@ -1131,12 +1145,15 @@ def test_csr_polynomial_expansion_index_overflow(
11311145

11321146
@pytest.mark.parametrize("interaction_only", [True, False])
11331147
@pytest.mark.parametrize("include_bias", [True, False])
1134-
def test_csr_polynomial_expansion_too_large_to_index(interaction_only, include_bias):
1148+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
1149+
def test_csr_polynomial_expansion_too_large_to_index(
1150+
interaction_only, include_bias, csr_container
1151+
):
11351152
n_features = np.iinfo(np.int64).max // 2
11361153
data = [1.0]
11371154
row = [0]
11381155
col = [n_features - 1]
1139-
X = sparse.csr_matrix((data, (row, col)))
1156+
X = csr_container((data, (row, col)))
11401157
pf = PolynomialFeatures(
11411158
interaction_only=interaction_only, include_bias=include_bias, degree=(2, 2)
11421159
)
@@ -1150,7 +1167,8 @@ def test_csr_polynomial_expansion_too_large_to_index(interaction_only, include_b
11501167
pf.fit_transform(X)
11511168

11521169

1153-
def test_polynomial_features_behaviour_on_zero_degree():
1170+
@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + CSC_CONTAINERS)
1171+
def test_polynomial_features_behaviour_on_zero_degree(sparse_container):
11541172
"""Check that PolynomialFeatures raises error when degree=0 and include_bias=False,
11551173
and output a single constant column when include_bias=True
11561174
"""
@@ -1171,7 +1189,7 @@ def test_polynomial_features_behaviour_on_zero_degree():
11711189
with pytest.raises(ValueError, match=err_msg):
11721190
poly.fit_transform(X)
11731191

1174-
for _X in [X, sparse.csr_matrix(X), sparse.csc_matrix(X)]:
1192+
for _X in [X, sparse_container(X)]:
11751193
poly = PolynomialFeatures(degree=0, include_bias=True)
11761194
output = poly.fit_transform(_X)
11771195
# convert to dense array if needed
@@ -1202,7 +1220,8 @@ def test_sizeof_LARGEST_INT_t():
12021220
),
12031221
run=True,
12041222
)
1205-
def test_csr_polynomial_expansion_windows_fail():
1223+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
1224+
def test_csr_polynomial_expansion_windows_fail(csr_container):
12061225
# Minimum needed to ensure integer overflow occurs while guaranteeing an
12071226
# int64-indexable output.
12081227
n_features = int(np.iinfo(np.int64).max ** (1 / 3) + 3)
@@ -1223,7 +1242,7 @@ def test_csr_polynomial_expansion_windows_fail():
12231242
int(n_features * (n_features + 1) * (n_features + 2) // 6 + expected_indices[1])
12241243
)
12251244

1226-
X = sparse.csr_matrix((data, (row, col)))
1245+
X = csr_container((data, (row, col)))
12271246
pf = PolynomialFeatures(interaction_only=False, include_bias=False, degree=3)
12281247
if sys.maxsize <= 2**32:
12291248
msg = (

0 commit comments

Comments
 (0)