Skip to content

Commit 18cf8d0

Browse files
FIX PowerTransformer raise when "box-cox" has nan column (scikit-learn#26400)
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
1 parent d29f78e commit 18cf8d0

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

doc/whats_new/v1.3.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,10 @@ Changelog
628628
The `sample_interval_` attribute is deprecated and will be removed in 1.5.
629629
:pr:`25190` by :user:`Vincent Maladière <Vincent-Maladiere>`.
630630

631+
- |Fix| :class:`preprocessing.PowerTransformer` now correcly raises error when
632+
using `method="box-cox"` on data with a constant `np.nan` column.
633+
:pr:`26400` by :user:`Yao Xiao <Charlie-XIAO>`.
634+
631635
:mod:`sklearn.svm`
632636
..................
633637

sklearn/preprocessing/_data.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3311,9 +3311,13 @@ def _box_cox_optimize(self, x):
33113311
33123312
We here use scipy builtins which uses the brent optimizer.
33133313
"""
3314+
mask = np.isnan(x)
3315+
if np.all(mask):
3316+
raise ValueError("Column must not be all nan.")
3317+
33143318
# the computation of lambda is influenced by NaNs so we need to
33153319
# get rid of them
3316-
_, lmbda = stats.boxcox(x[~np.isnan(x)], lmbda=None)
3320+
_, lmbda = stats.boxcox(x[~mask], lmbda=None)
33173321

33183322
return lmbda
33193323

sklearn/preprocessing/tests/test_data.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2527,6 +2527,21 @@ def test_power_transformer_copy_False(method, standardize):
25272527
assert X_trans is X_inv_trans
25282528

25292529

2530+
def test_power_transformer_box_cox_raise_all_nans_col():
2531+
"""Check that box-cox raises informative when a column contains all nans.
2532+
2533+
Non-regression test for gh-26303
2534+
"""
2535+
X = rng.random_sample((4, 5))
2536+
X[:, 0] = np.nan
2537+
2538+
err_msg = "Column must not be all nan."
2539+
2540+
pt = PowerTransformer(method="box-cox")
2541+
with pytest.raises(ValueError, match=err_msg):
2542+
pt.fit_transform(X)
2543+
2544+
25302545
@pytest.mark.parametrize(
25312546
"X_2",
25322547
[

0 commit comments

Comments
 (0)