@@ -1513,24 +1513,6 @@ def test_most_frequent(expected, array, dtype, extra_value, n_repeat):
1513
1513
)
1514
1514
1515
1515
1516
- @pytest .mark .parametrize (
1517
- "initial_strategy" , ["mean" , "median" , "most_frequent" , "constant" ]
1518
- )
1519
- def test_iterative_imputer_keep_empty_features (initial_strategy ):
1520
- """Check the behaviour of the iterative imputer with different initial strategy
1521
- and keeping empty features (i.e. features containing only missing values).
1522
- """
1523
- X = np .array ([[1 , np .nan , 2 ], [3 , np .nan , np .nan ]])
1524
-
1525
- imputer = IterativeImputer (
1526
- initial_strategy = initial_strategy , keep_empty_features = True
1527
- )
1528
- X_imputed = imputer .fit_transform (X )
1529
- assert_allclose (X_imputed [:, 1 ], 0 )
1530
- X_imputed = imputer .transform (X )
1531
- assert_allclose (X_imputed [:, 1 ], 0 )
1532
-
1533
-
1534
1516
def test_iterative_imputer_constant_fill_value ():
1535
1517
"""Check that we propagate properly the parameter `fill_value`."""
1536
1518
X = np .array ([[- 1 , 2 , 3 , - 1 ], [4 , - 1 , 5 , - 1 ], [6 , 7 , - 1 , - 1 ], [8 , 9 , 0 , - 1 ]])
@@ -1786,3 +1768,70 @@ def test_simple_imputer_constant_fill_value_casting():
1786
1768
)
1787
1769
X_trans = imputer .fit_transform (X_float32 )
1788
1770
assert X_trans .dtype == X_float32 .dtype
1771
+
1772
+
1773
+ @pytest .mark .parametrize ("strategy" , ["mean" , "median" , "most_frequent" , "constant" ])
1774
+ def test_iterative_imputer_no_empty_features (strategy ):
1775
+ """Check the behaviour of `keep_empty_features` with no empty features.
1776
+
1777
+ With no-empty features, we should get the same imputation whatever the
1778
+ parameter `keep_empty_features`.
1779
+
1780
+ Non-regression test for:
1781
+ https://github.com/scikit-learn/scikit-learn/issues/29375
1782
+ """
1783
+ X = np .array ([[np .nan , 0 , 1 ], [2 , np .nan , 3 ], [4 , 5 , np .nan ]])
1784
+
1785
+ imputer_drop_empty_features = IterativeImputer (
1786
+ initial_strategy = strategy , fill_value = 1 , keep_empty_features = False
1787
+ )
1788
+
1789
+ imputer_keep_empty_features = IterativeImputer (
1790
+ initial_strategy = strategy , fill_value = 1 , keep_empty_features = True
1791
+ )
1792
+
1793
+ assert_allclose (
1794
+ imputer_drop_empty_features .fit_transform (X ),
1795
+ imputer_keep_empty_features .fit_transform (X ),
1796
+ )
1797
+
1798
+
1799
+ @pytest .mark .parametrize ("strategy" , ["mean" , "median" , "most_frequent" , "constant" ])
1800
+ @pytest .mark .parametrize (
1801
+ "X_test" ,
1802
+ [
1803
+ np .array ([[1 , 2 , 3 , 4 ], [5 , 6 , 7 , 8 ]]), # without empty feature
1804
+ np .array ([[np .nan , 2 , 3 , 4 ], [np .nan , 6 , 7 , 8 ]]), # empty feature at column 0
1805
+ np .array ([[1 , 2 , 3 , np .nan ], [5 , 6 , 7 , np .nan ]]), # empty feature at column 3
1806
+ ],
1807
+ )
1808
+ def test_iterative_imputer_with_empty_features (strategy , X_test ):
1809
+ """Check the behaviour of `keep_empty_features` in the presence of empty features.
1810
+
1811
+ With `keep_empty_features=True`, the empty feature will be imputed with the value
1812
+ defined by the initial imputation.
1813
+
1814
+ Non-regression test for:
1815
+ https://github.com/scikit-learn/scikit-learn/issues/29375
1816
+ """
1817
+ X_train = np .array (
1818
+ [[np .nan , np .nan , 0 , 1 ], [np .nan , 2 , np .nan , 3 ], [np .nan , 4 , 5 , np .nan ]]
1819
+ )
1820
+
1821
+ imputer_drop_empty_features = IterativeImputer (
1822
+ initial_strategy = strategy , fill_value = 0 , keep_empty_features = False
1823
+ )
1824
+ X_train_drop_empty_features = imputer_drop_empty_features .fit_transform (X_train )
1825
+ X_test_drop_empty_features = imputer_drop_empty_features .transform (X_test )
1826
+
1827
+ imputer_keep_empty_features = IterativeImputer (
1828
+ initial_strategy = strategy , fill_value = 0 , keep_empty_features = True
1829
+ )
1830
+ X_train_keep_empty_features = imputer_keep_empty_features .fit_transform (X_train )
1831
+ X_test_keep_empty_features = imputer_keep_empty_features .transform (X_test )
1832
+
1833
+ assert_allclose (X_train_drop_empty_features , X_train_keep_empty_features [:, 1 :])
1834
+ assert_allclose (X_train_keep_empty_features [:, 0 ], 0 )
1835
+
1836
+ assert X_train_drop_empty_features .shape [1 ] == X_test_drop_empty_features .shape [1 ]
1837
+ assert X_train_keep_empty_features .shape [1 ] == X_test_keep_empty_features .shape [1 ]
0 commit comments