From 3f1a73473eb3d52104d095f786e9994ca7b044bd Mon Sep 17 00:00:00 2001 From: heoh Date: Tue, 10 Jun 2025 14:31:12 +0000 Subject: [PATCH 1/3] BUG: Fix infer_dtype result for float with embedded pd.NA (#61621) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/lib.pyx | 8 +++++--- pandas/tests/dtypes/test_inference.py | 9 +++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 03a386708323d..9ea1ba0bb811f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -711,6 +711,7 @@ Timezones Numeric ^^^^^^^ +- Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`) - Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`) - Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`) - Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index cded299f77876..5e9d4d7e85cec 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1751,7 +1751,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str: return "complex" elif util.is_float_object(val): - if is_float_array(values): + if is_float_array(values, skipna=skipna): return "floating" elif is_integer_float_array(values, skipna=skipna): if is_integer_na_array(values, skipna=skipna): @@ -1953,9 +1953,11 @@ cdef class FloatValidator(Validator): # Note: only python-exposed for tests -cpdef bint is_float_array(ndarray values): +cpdef bint is_float_array(ndarray values, bint skipna=True): cdef: - FloatValidator validator = FloatValidator(values.size, values.dtype) + FloatValidator validator = FloatValidator(values.size, + values.dtype, + skipna=skipna) return validator.validate(values) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index db98751324ebc..23eedc64f36d8 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1387,6 +1387,15 @@ def test_infer_dtype_period_with_na(self, na_value): arr = np.array([na_value, Period("2011-01", freq="D"), na_value]) assert lib.infer_dtype(arr, skipna=True) == "period" + @pytest.mark.parametrize("na_value", [pd.NA, np.nan]) + def test_infer_dtype_numeric_with_na(self, na_value): + # GH61621 + arr = Series([1, 2, na_value], dtype=object) + assert lib.infer_dtype(arr, skipna=True) == "integer" + + arr = Series([1.0, 2.0, na_value], dtype=object) + assert lib.infer_dtype(arr, skipna=True) == "floating" + def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) assert lib.infer_dtype(arr, skipna=True) == "floating" From a6b3962427977f678929b182eae935a394175086 Mon Sep 17 00:00:00 2001 From: heoh Date: Tue, 10 Jun 2025 15:08:58 +0000 Subject: [PATCH 2/3] Fix is_float_array stub arguments inconsistent --- pandas/_libs/lib.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 331233f37f63d..310cd3c3d76ec 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -60,7 +60,7 @@ def is_time_array(values: np.ndarray, skipna: bool = ...): ... def is_date_array(values: np.ndarray, skipna: bool = ...): ... def is_datetime_array(values: np.ndarray, skipna: bool = ...): ... def is_string_array(values: np.ndarray, skipna: bool = ...): ... -def is_float_array(values: np.ndarray): ... +def is_float_array(values: np.ndarray, skipna: bool = ...): ... def is_integer_array(values: np.ndarray, skipna: bool = ...): ... def is_bool_array(values: np.ndarray, skipna: bool = ...): ... def fast_multiget( From 33bd950ee730273a36d37b20629ad704093f77f4 Mon Sep 17 00:00:00 2001 From: heoh Date: Tue, 10 Jun 2025 16:15:01 +0000 Subject: [PATCH 3/3] Fix side effects --- pandas/core/dtypes/cast.py | 5 +---- pandas/tests/extension/test_arrow.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index dae04ba6244d4..996ebdac8d0c3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1086,10 +1086,7 @@ def convert_dtypes( elif ( infer_objects and input_array.dtype == object - and ( - isinstance(inferred_dtype, str) - and inferred_dtype == "mixed-integer-float" - ) + and inferred_dtype == "floating" ): inferred_dtype = pandas_dtype_func("Float64") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fc5930ebcd8ac..b79301ec9552a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3090,7 +3090,7 @@ def test_infer_dtype_pyarrow_dtype(data, request): res = lib.infer_dtype(data) assert res != "unknown-array" - if data._hasna and res in ["floating", "datetime64", "timedelta64"]: + if data._hasna and res in ["datetime64", "timedelta64"]: mark = pytest.mark.xfail( reason="in infer_dtype pd.NA is not ignored in these cases " "even with skipna=True in the list(data) check below"