61
61
StratifiedShuffleSplit ,
62
62
train_test_split ,
63
63
)
64
- from sklearn .model_selection ._search import BaseSearchCV
64
+ from sklearn .model_selection ._search import (
65
+ BaseSearchCV ,
66
+ _yield_masked_array_for_each_param ,
67
+ )
65
68
from sklearn .model_selection .tests .common import OneTimeSplitter
66
69
from sklearn .naive_bayes import ComplementNB
67
70
from sklearn .neighbors import KernelDensity , KNeighborsClassifier , LocalOutlierFactor
68
- from sklearn .pipeline import Pipeline
69
- from sklearn .preprocessing import OneHotEncoder , OrdinalEncoder , StandardScaler
71
+ from sklearn .pipeline import Pipeline , make_pipeline
72
+ from sklearn .preprocessing import (
73
+ OneHotEncoder ,
74
+ OrdinalEncoder ,
75
+ SplineTransformer ,
76
+ StandardScaler ,
77
+ )
70
78
from sklearn .svm import SVC , LinearSVC
71
79
from sklearn .tests .metadata_routing_common import (
72
80
ConsumingScorer ,
@@ -2724,6 +2732,37 @@ def test_search_with_estimators_issue_29157():
2724
2732
assert grid_search .cv_results_ ["param_enc__enc" ].dtype == object
2725
2733
2726
2734
2735
+ def test_cv_results_multi_size_array ():
2736
+ """Check that GridSearchCV works with params that are arrays of different sizes.
2737
+
2738
+ Non-regression test for #29277.
2739
+ """
2740
+ n_features = 10
2741
+ X , y = make_classification (n_features = 10 )
2742
+
2743
+ spline_reg_pipe = make_pipeline (
2744
+ SplineTransformer (extrapolation = "periodic" ),
2745
+ LogisticRegression (),
2746
+ )
2747
+
2748
+ n_knots_list = [n_features * i for i in [10 , 11 , 12 ]]
2749
+ knots_list = [
2750
+ np .linspace (0 , np .pi * 2 , n_knots ).reshape ((- 1 , n_features ))
2751
+ for n_knots in n_knots_list
2752
+ ]
2753
+ spline_reg_pipe_cv = GridSearchCV (
2754
+ estimator = spline_reg_pipe ,
2755
+ param_grid = {
2756
+ "splinetransformer__knots" : knots_list ,
2757
+ },
2758
+ )
2759
+
2760
+ spline_reg_pipe_cv .fit (X , y )
2761
+ assert (
2762
+ spline_reg_pipe_cv .cv_results_ ["param_splinetransformer__knots" ].dtype == object
2763
+ )
2764
+
2765
+
2727
2766
@pytest .mark .parametrize (
2728
2767
"array_namespace, device, dtype" , yield_namespace_device_dtype_combinations ()
2729
2768
)
@@ -2747,3 +2786,77 @@ def test_array_api_search_cv_classifier(SearchCV, array_namespace, device, dtype
2747
2786
)
2748
2787
searcher .fit (X_xp , y_xp )
2749
2788
searcher .score (X_xp , y_xp )
2789
+
2790
+
2791
+ # Construct these outside the tests so that the same object is used
2792
+ # for both input and `expected`
2793
+ one_hot_encoder = OneHotEncoder ()
2794
+ ordinal_encoder = OrdinalEncoder ()
2795
+
2796
+ # If we construct this directly via `MaskedArray`, the list of tuples
2797
+ # gets auto-converted to a 2D array.
2798
+ ma_with_tuples = np .ma .MaskedArray (np .empty (2 ), mask = True , dtype = object )
2799
+ ma_with_tuples [0 ] = (1 , 2 )
2800
+ ma_with_tuples [1 ] = (3 , 4 )
2801
+
2802
+
2803
+ @pytest .mark .parametrize (
2804
+ ("candidate_params" , "expected" ),
2805
+ [
2806
+ pytest .param (
2807
+ [{"foo" : 1 }, {"foo" : 2 }],
2808
+ [
2809
+ ("param_foo" , np .ma .MaskedArray (np .array ([1 , 2 ]))),
2810
+ ],
2811
+ id = "simple numeric, single param" ,
2812
+ ),
2813
+ pytest .param (
2814
+ [{"foo" : 1 , "bar" : 3 }, {"foo" : 2 , "bar" : 4 }, {"foo" : 3 }],
2815
+ [
2816
+ ("param_foo" , np .ma .MaskedArray (np .array ([1 , 2 , 3 ]))),
2817
+ (
2818
+ "param_bar" ,
2819
+ np .ma .MaskedArray (np .array ([3 , 4 , 0 ]), mask = [False , False , True ]),
2820
+ ),
2821
+ ],
2822
+ id = "simple numeric, one param is missing in one round" ,
2823
+ ),
2824
+ pytest .param (
2825
+ [{"foo" : [[1 ], [2 ], [3 ]]}, {"foo" : [[1 ], [2 ]]}],
2826
+ [
2827
+ (
2828
+ "param_foo" ,
2829
+ np .ma .MaskedArray ([[[1 ], [2 ], [3 ]], [[1 ], [2 ]]], dtype = object ),
2830
+ ),
2831
+ ],
2832
+ id = "lists of different lengths" ,
2833
+ ),
2834
+ pytest .param (
2835
+ [{"foo" : (1 , 2 )}, {"foo" : (3 , 4 )}],
2836
+ [
2837
+ (
2838
+ "param_foo" ,
2839
+ ma_with_tuples ,
2840
+ ),
2841
+ ],
2842
+ id = "lists tuples" ,
2843
+ ),
2844
+ pytest .param (
2845
+ [{"foo" : ordinal_encoder }, {"foo" : one_hot_encoder }],
2846
+ [
2847
+ (
2848
+ "param_foo" ,
2849
+ np .ma .MaskedArray ([ordinal_encoder , one_hot_encoder ], dtype = object ),
2850
+ ),
2851
+ ],
2852
+ id = "estimators" ,
2853
+ ),
2854
+ ],
2855
+ )
2856
+ def test_yield_masked_array_for_each_param (candidate_params , expected ):
2857
+ result = list (_yield_masked_array_for_each_param (candidate_params ))
2858
+ for (key , value ), (expected_key , expected_value ) in zip (result , expected ):
2859
+ assert key == expected_key
2860
+ assert value .dtype == expected_value .dtype
2861
+ np .testing .assert_array_equal (value , expected_value )
2862
+ np .testing .assert_array_equal (value .mask , expected_value .mask )
0 commit comments