20
20
_get_sizeof_LARGEST_INT_t ,
21
21
)
22
22
from sklearn .utils ._testing import assert_array_almost_equal
23
- from sklearn .utils .fixes import parse_version , sp_version
23
+ from sklearn .utils .fixes import (
24
+ CSC_CONTAINERS ,
25
+ CSR_CONTAINERS ,
26
+ parse_version ,
27
+ sp_version ,
28
+ )
24
29
25
30
26
31
@pytest .mark .parametrize ("est" , (PolynomialFeatures , SplineTransformer ))
@@ -522,27 +527,24 @@ def single_feature_degree3():
522
527
((2 , 3 ), False , True , []),
523
528
],
524
529
)
525
- @pytest .mark .parametrize (
526
- "sparse_X" ,
527
- [False , sparse .csr_matrix , sparse .csc_matrix ],
528
- )
530
+ @pytest .mark .parametrize ("X_container" , [None ] + CSR_CONTAINERS + CSC_CONTAINERS )
529
531
def test_polynomial_features_one_feature (
530
532
single_feature_degree3 ,
531
533
degree ,
532
534
include_bias ,
533
535
interaction_only ,
534
536
indices ,
535
- sparse_X ,
537
+ X_container ,
536
538
):
537
539
"""Test PolynomialFeatures on single feature up to degree 3."""
538
540
X , P = single_feature_degree3
539
- if sparse_X :
540
- X = sparse_X (X )
541
+ if X_container is not None :
542
+ X = X_container (X )
541
543
tf = PolynomialFeatures (
542
544
degree = degree , include_bias = include_bias , interaction_only = interaction_only
543
545
).fit (X )
544
546
out = tf .transform (X )
545
- if sparse_X :
547
+ if X_container is not None :
546
548
out = out .toarray ()
547
549
assert_allclose (out , P [:, indices ])
548
550
if tf .n_output_features_ > 0 :
@@ -596,27 +598,24 @@ def two_features_degree3():
596
598
((3 , 3 ), False , True , []), # would need 3 input features
597
599
],
598
600
)
599
- @pytest .mark .parametrize (
600
- "sparse_X" ,
601
- [False , sparse .csr_matrix , sparse .csc_matrix ],
602
- )
601
+ @pytest .mark .parametrize ("X_container" , [None ] + CSR_CONTAINERS + CSC_CONTAINERS )
603
602
def test_polynomial_features_two_features (
604
603
two_features_degree3 ,
605
604
degree ,
606
605
include_bias ,
607
606
interaction_only ,
608
607
indices ,
609
- sparse_X ,
608
+ X_container ,
610
609
):
611
610
"""Test PolynomialFeatures on 2 features up to degree 3."""
612
611
X , P = two_features_degree3
613
- if sparse_X :
614
- X = sparse_X (X )
612
+ if X_container is not None :
613
+ X = X_container (X )
615
614
tf = PolynomialFeatures (
616
615
degree = degree , include_bias = include_bias , interaction_only = interaction_only
617
616
).fit (X )
618
617
out = tf .transform (X )
619
- if sparse_X :
618
+ if X_container is not None :
620
619
out = out .toarray ()
621
620
assert_allclose (out , P [:, indices ])
622
621
if tf .n_output_features_ > 0 :
@@ -712,10 +711,13 @@ def test_polynomial_feature_names():
712
711
(4 , False , True , np .float64 ),
713
712
],
714
713
)
715
- def test_polynomial_features_csc_X (deg , include_bias , interaction_only , dtype ):
714
+ @pytest .mark .parametrize ("csc_container" , CSC_CONTAINERS )
715
+ def test_polynomial_features_csc_X (
716
+ deg , include_bias , interaction_only , dtype , csc_container
717
+ ):
716
718
rng = np .random .RandomState (0 )
717
719
X = rng .randint (0 , 2 , (100 , 2 ))
718
- X_csc = sparse . csc_matrix (X )
720
+ X_csc = csc_container (X )
719
721
720
722
est = PolynomialFeatures (
721
723
deg , include_bias = include_bias , interaction_only = interaction_only
@@ -739,10 +741,13 @@ def test_polynomial_features_csc_X(deg, include_bias, interaction_only, dtype):
739
741
(3 , False , True , np .float64 ),
740
742
],
741
743
)
742
- def test_polynomial_features_csr_X (deg , include_bias , interaction_only , dtype ):
744
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
745
+ def test_polynomial_features_csr_X (
746
+ deg , include_bias , interaction_only , dtype , csr_container
747
+ ):
743
748
rng = np .random .RandomState (0 )
744
749
X = rng .randint (0 , 2 , (100 , 2 ))
745
- X_csr = sparse . csr_matrix (X )
750
+ X_csr = csr_container (X )
746
751
747
752
est = PolynomialFeatures (
748
753
deg , include_bias = include_bias , interaction_only = interaction_only
@@ -761,17 +766,14 @@ def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype):
761
766
)
762
767
@pytest .mark .parametrize ("interaction_only" , [True , False ])
763
768
@pytest .mark .parametrize ("include_bias" , [True , False ])
769
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
764
770
def test_num_combinations (
765
- n_features ,
766
- min_degree ,
767
- max_degree ,
768
- interaction_only ,
769
- include_bias ,
771
+ n_features , min_degree , max_degree , interaction_only , include_bias , csr_container
770
772
):
771
773
"""
772
774
Test that n_output_features_ is calculated correctly.
773
775
"""
774
- x = sparse . csr_matrix (([1 ], ([0 ], [n_features - 1 ])))
776
+ x = csr_container (([1 ], ([0 ], [n_features - 1 ])))
775
777
est = PolynomialFeatures (
776
778
degree = max_degree ,
777
779
interaction_only = interaction_only ,
@@ -799,8 +801,11 @@ def test_num_combinations(
799
801
(3 , False , True , np .float64 ),
800
802
],
801
803
)
802
- def test_polynomial_features_csr_X_floats (deg , include_bias , interaction_only , dtype ):
803
- X_csr = sparse_random (1000 , 10 , 0.5 , random_state = 0 ).tocsr ()
804
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
805
+ def test_polynomial_features_csr_X_floats (
806
+ deg , include_bias , interaction_only , dtype , csr_container
807
+ ):
808
+ X_csr = csr_container (sparse_random (1000 , 10 , 0.5 , random_state = 0 ))
804
809
X = X_csr .toarray ()
805
810
806
811
est = PolynomialFeatures (
@@ -831,8 +836,11 @@ def test_polynomial_features_csr_X_floats(deg, include_bias, interaction_only, d
831
836
(2 , 3 , False ),
832
837
],
833
838
)
834
- def test_polynomial_features_csr_X_zero_row (zero_row_index , deg , interaction_only ):
835
- X_csr = sparse_random (3 , 10 , 1.0 , random_state = 0 ).tocsr ()
839
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
840
+ def test_polynomial_features_csr_X_zero_row (
841
+ zero_row_index , deg , interaction_only , csr_container
842
+ ):
843
+ X_csr = csr_container (sparse_random (3 , 10 , 1.0 , random_state = 0 ))
836
844
X_csr [zero_row_index , :] = 0.0
837
845
X = X_csr .toarray ()
838
846
@@ -851,8 +859,11 @@ def test_polynomial_features_csr_X_zero_row(zero_row_index, deg, interaction_onl
851
859
["include_bias" , "interaction_only" ],
852
860
[(True , True ), (True , False ), (False , True ), (False , False )],
853
861
)
854
- def test_polynomial_features_csr_X_degree_4 (include_bias , interaction_only ):
855
- X_csr = sparse_random (1000 , 10 , 0.5 , random_state = 0 ).tocsr ()
862
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
863
+ def test_polynomial_features_csr_X_degree_4 (
864
+ include_bias , interaction_only , csr_container
865
+ ):
866
+ X_csr = csr_container (sparse_random (1000 , 10 , 0.5 , random_state = 0 ))
856
867
X = X_csr .toarray ()
857
868
858
869
est = PolynomialFeatures (
@@ -881,8 +892,9 @@ def test_polynomial_features_csr_X_degree_4(include_bias, interaction_only):
881
892
(3 , 3 , False ),
882
893
],
883
894
)
884
- def test_polynomial_features_csr_X_dim_edges (deg , dim , interaction_only ):
885
- X_csr = sparse_random (1000 , dim , 0.5 , random_state = 0 ).tocsr ()
895
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
896
+ def test_polynomial_features_csr_X_dim_edges (deg , dim , interaction_only , csr_container ):
897
+ X_csr = csr_container (sparse_random (1000 , dim , 0.5 , random_state = 0 ))
886
898
X = X_csr .toarray ()
887
899
888
900
est = PolynomialFeatures (deg , interaction_only = interaction_only )
@@ -896,8 +908,9 @@ def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only):
896
908
897
909
@pytest .mark .parametrize ("interaction_only" , [True , False ])
898
910
@pytest .mark .parametrize ("include_bias" , [True , False ])
911
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
899
912
def test_csr_polynomial_expansion_index_overflow_non_regression (
900
- interaction_only , include_bias
913
+ interaction_only , include_bias , csr_container
901
914
):
902
915
"""Check the automatic index dtype promotion to `np.int64` when needed.
903
916
@@ -925,7 +938,7 @@ def degree_2_calc(d, i, j):
925
938
col = np .array (
926
939
[n_features - 2 , n_features - 1 , n_features - 2 , n_features - 1 ], dtype = np .int64
927
940
)
928
- X = sparse . csr_matrix (
941
+ X = csr_container (
929
942
(data , (row , col )),
930
943
shape = (n_samples , n_features ),
931
944
dtype = data_dtype ,
@@ -1028,8 +1041,9 @@ def degree_2_calc(d, i, j):
1028
1041
)
1029
1042
@pytest .mark .parametrize ("interaction_only" , [True , False ])
1030
1043
@pytest .mark .parametrize ("include_bias" , [True , False ])
1044
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
1031
1045
def test_csr_polynomial_expansion_index_overflow (
1032
- degree , n_features , interaction_only , include_bias
1046
+ degree , n_features , interaction_only , include_bias , csr_container
1033
1047
):
1034
1048
"""Tests known edge-cases to the dtype promotion strategy and custom
1035
1049
Cython code, including a current bug in the upstream
@@ -1050,7 +1064,7 @@ def test_csr_polynomial_expansion_index_overflow(
1050
1064
n_features * (n_features + 1 ) * (n_features + 2 ) // 6 + expected_indices [1 ]
1051
1065
)
1052
1066
1053
- X = sparse . csr_matrix ((data , (row , col )))
1067
+ X = csr_container ((data , (row , col )))
1054
1068
pf = PolynomialFeatures (
1055
1069
interaction_only = interaction_only , include_bias = include_bias , degree = degree
1056
1070
)
@@ -1131,12 +1145,15 @@ def test_csr_polynomial_expansion_index_overflow(
1131
1145
1132
1146
@pytest .mark .parametrize ("interaction_only" , [True , False ])
1133
1147
@pytest .mark .parametrize ("include_bias" , [True , False ])
1134
- def test_csr_polynomial_expansion_too_large_to_index (interaction_only , include_bias ):
1148
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
1149
+ def test_csr_polynomial_expansion_too_large_to_index (
1150
+ interaction_only , include_bias , csr_container
1151
+ ):
1135
1152
n_features = np .iinfo (np .int64 ).max // 2
1136
1153
data = [1.0 ]
1137
1154
row = [0 ]
1138
1155
col = [n_features - 1 ]
1139
- X = sparse . csr_matrix ((data , (row , col )))
1156
+ X = csr_container ((data , (row , col )))
1140
1157
pf = PolynomialFeatures (
1141
1158
interaction_only = interaction_only , include_bias = include_bias , degree = (2 , 2 )
1142
1159
)
@@ -1150,7 +1167,8 @@ def test_csr_polynomial_expansion_too_large_to_index(interaction_only, include_b
1150
1167
pf .fit_transform (X )
1151
1168
1152
1169
1153
- def test_polynomial_features_behaviour_on_zero_degree ():
1170
+ @pytest .mark .parametrize ("sparse_container" , CSR_CONTAINERS + CSC_CONTAINERS )
1171
+ def test_polynomial_features_behaviour_on_zero_degree (sparse_container ):
1154
1172
"""Check that PolynomialFeatures raises error when degree=0 and include_bias=False,
1155
1173
and output a single constant column when include_bias=True
1156
1174
"""
@@ -1171,7 +1189,7 @@ def test_polynomial_features_behaviour_on_zero_degree():
1171
1189
with pytest .raises (ValueError , match = err_msg ):
1172
1190
poly .fit_transform (X )
1173
1191
1174
- for _X in [X , sparse . csr_matrix ( X ), sparse . csc_matrix (X )]:
1192
+ for _X in [X , sparse_container (X )]:
1175
1193
poly = PolynomialFeatures (degree = 0 , include_bias = True )
1176
1194
output = poly .fit_transform (_X )
1177
1195
# convert to dense array if needed
@@ -1202,7 +1220,8 @@ def test_sizeof_LARGEST_INT_t():
1202
1220
),
1203
1221
run = True ,
1204
1222
)
1205
- def test_csr_polynomial_expansion_windows_fail ():
1223
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
1224
+ def test_csr_polynomial_expansion_windows_fail (csr_container ):
1206
1225
# Minimum needed to ensure integer overflow occurs while guaranteeing an
1207
1226
# int64-indexable output.
1208
1227
n_features = int (np .iinfo (np .int64 ).max ** (1 / 3 ) + 3 )
@@ -1223,7 +1242,7 @@ def test_csr_polynomial_expansion_windows_fail():
1223
1242
int (n_features * (n_features + 1 ) * (n_features + 2 ) // 6 + expected_indices [1 ])
1224
1243
)
1225
1244
1226
- X = sparse . csr_matrix ((data , (row , col )))
1245
+ X = csr_container ((data , (row , col )))
1227
1246
pf = PolynomialFeatures (interaction_only = False , include_bias = False , degree = 3 )
1228
1247
if sys .maxsize <= 2 ** 32 :
1229
1248
msg = (
0 commit comments