From 2d8b55eb485ebb422f1fd1a3935c3f566ce2e4e4 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Tue, 3 Jun 2025 22:54:31 +0900 Subject: [PATCH 1/5] Fix Index.equal --- pandas/core/indexes/base.py | 6 +--- pandas/tests/frame/test_arithmetic.py | 17 +++++----- pandas/tests/indexes/test_base.py | 49 +++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4e1ea07907cdb..aed8287926810 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5481,11 +5481,7 @@ def equals(self, other: Any) -> bool: # quickly return if the lengths are different return False - if ( - isinstance(self.dtype, StringDtype) - and self.dtype.na_value is np.nan - and other.dtype != self.dtype - ): + if isinstance(self.dtype, StringDtype) and other.dtype != self.dtype: # TODO(infer_string) can we avoid this special case? # special case for object behavior return other.equals(self.astype(object)) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index bc69ec388bf0c..3c931504c380e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2183,19 +2183,18 @@ def test_enum_column_equality(): tm.assert_series_equal(result, expected) -def test_mixed_col_index_dtype(using_infer_string): +def test_mixed_col_index_dtype(): # GH 47382 df1 = DataFrame(columns=list("abc"), data=1.0, index=[0]) df2 = DataFrame(columns=list("abc"), data=0.0, index=[0]) df1.columns = df2.columns.astype("string") result = df1 + df2 expected = DataFrame(columns=list("abc"), data=1.0, index=[0]) - if using_infer_string: - # df2.columns.dtype will be "str" instead of object, - # so the aligned result will be "string", not object - if HAS_PYARROW: - dtype = "string[pyarrow]" - else: - dtype = "string" - expected.columns = expected.columns.astype(dtype) + + if HAS_PYARROW: + dtype = "string[pyarrow]" + else: + dtype = "string" + expected.columns = expected.columns.astype(dtype) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5b75bd9afd6df..7caffa8726b3a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -40,6 +40,7 @@ ensure_index, ensure_index_from_sequences, ) +from pandas.testing import assert_series_equal class TestIndex: @@ -1717,3 +1718,51 @@ def test_is_monotonic_pyarrow_list_type(): idx = Index([[1], [2, 3]], dtype=pd.ArrowDtype(pa.list_(pa.int64()))) assert not idx.is_monotonic_increasing assert not idx.is_monotonic_decreasing + + +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + pd.StringDtype(storage="pyarrow", na_value=pd.NA), + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + pd.StringDtype(storage="pyarrow", na_value=np.nan), + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_index_equals_different_string_dtype(dtype): + # GH 61099 + idx_obj = Index(["a", "b", "c"]) + idx_str = Index(["a", "b", "c"], dtype=dtype) + + assert idx_obj.equals(idx_str) + assert idx_str.equals(idx_obj) + + +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + pd.StringDtype(storage="pyarrow", na_value=pd.NA), + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + pd.StringDtype(storage="pyarrow", na_value=np.nan), + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_index_comparison_different_string_dtype(dtype): + # GH 61099 + idx = Index(["a", "b", "c"]) + s_obj = Series([1, 2, 3], index=idx) + s_str = Series([4, 5, 6], index=idx.astype(dtype)) + + expected = Series([True, True, True], index=["a", "b", "c"]) + result = s_obj < s_str + assert_series_equal(result, expected) From 509c74c79fd536f78f836aeea6fdf7e082b3b1c7 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Wed, 4 Jun 2025 07:31:18 +0900 Subject: [PATCH 2/5] Fix test_mixed_col_index_dtype --- pandas/tests/frame/test_arithmetic.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 3c931504c380e..657ad8770bd54 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -11,7 +11,7 @@ import numpy as np import pytest -from pandas.compat import HAS_PYARROW +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -2183,18 +2183,28 @@ def test_enum_column_equality(): tm.assert_series_equal(result, expected) -def test_mixed_col_index_dtype(): +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + pd.StringDtype(storage="pyarrow", na_value=pd.NA), + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + pd.StringDtype(storage="pyarrow", na_value=np.nan), + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_mixed_col_index_dtype(dtype): # GH 47382 df1 = DataFrame(columns=list("abc"), data=1.0, index=[0]) df2 = DataFrame(columns=list("abc"), data=0.0, index=[0]) - df1.columns = df2.columns.astype("string") + df1.columns = df2.columns.astype(dtype) result = df1 + df2 expected = DataFrame(columns=list("abc"), data=1.0, index=[0]) - if HAS_PYARROW: - dtype = "string[pyarrow]" - else: - dtype = "string" expected.columns = expected.columns.astype(dtype) tm.assert_frame_equal(result, expected) From 0ed742d58b4d9d5f5c43174f24a8a89c88b0c62f Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Wed, 4 Jun 2025 22:38:23 +0900 Subject: [PATCH 3/5] Fix test_mixed_col_index_dtype --- pandas/tests/frame/test_arithmetic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 657ad8770bd54..e6a86dad8abf6 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2188,11 +2188,11 @@ def test_enum_column_equality(): [ "string[python]", pytest.param( - pd.StringDtype(storage="pyarrow", na_value=pd.NA), + "string[pyarrow]", marks=td.skip_if_no("pyarrow"), ), pytest.param( - pd.StringDtype(storage="pyarrow", na_value=np.nan), + "str", marks=td.skip_if_no("pyarrow"), ), ], From c79df8069770150a24467dc1235afbcdc6f9418d Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Fri, 6 Jun 2025 17:07:09 +0900 Subject: [PATCH 4/5] Fix parametrize for tests --- pandas/tests/indexes/test_base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7caffa8726b3a..fa3510996b3b7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1725,11 +1725,11 @@ def test_is_monotonic_pyarrow_list_type(): [ "string[python]", pytest.param( - pd.StringDtype(storage="pyarrow", na_value=pd.NA), + "string[pyarrow]", marks=td.skip_if_no("pyarrow"), ), pytest.param( - pd.StringDtype(storage="pyarrow", na_value=np.nan), + "str", marks=td.skip_if_no("pyarrow"), ), ], @@ -1748,11 +1748,11 @@ def test_index_equals_different_string_dtype(dtype): [ "string[python]", pytest.param( - pd.StringDtype(storage="pyarrow", na_value=pd.NA), + "string[pyarrow]", marks=td.skip_if_no("pyarrow"), ), pytest.param( - pd.StringDtype(storage="pyarrow", na_value=np.nan), + "str", marks=td.skip_if_no("pyarrow"), ), ], From 5ac26810e12dc8c93552d354a6eb3fcd516906e4 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Thu, 12 Jun 2025 22:25:10 +0900 Subject: [PATCH 5/5] Add whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 03a386708323d..af57d1d4f185d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -742,6 +742,7 @@ Indexing - Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) +- Bug in :meth:`Index.equals` when comparing between :class:`Series` with string dtype :class:`Index` (:issue:`61099`) - Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)