From 99ae64f11bf1f205e85149ec3178c364bd33bdce Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 1 May 2025 07:43:35 -0600 Subject: [PATCH 1/7] Add back getattr for ExtensionArrays --- doc/whats-new.rst | 16 ++++++++++++++-- xarray/core/extension_array.py | 5 ++++- xarray/tests/test_duck_array_ops.py | 5 +++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 76fb5d42aa9..dc7165eda9a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,11 +16,23 @@ What's New np.random.seed(123456) -.. _whats-new.2025.04.0: +.. _whats-new.2025.05.0: -v2025.04.0 (unreleased) +v2025.05.0 (unreleased) ----------------------- +Bug fixes +~~~~~~~~~ + +- Allow accessing arbitrary attributes on Pandas ExtensionArrays. + By `Deepak Cherian Any: + return getattr(self.array, attr) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index ff84041f8f1..3b75d3d6b2a 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -1094,3 +1094,8 @@ def test_extension_array_singleton_equality(categorical1): def test_extension_array_repr(int1): int_duck_array = PandasExtensionArray(int1) assert repr(int1) in repr(int_duck_array) + + +def test_extension_array_attr(): + array = pd.Categorical(["cat2", "cat1", "cat2", "cat3", "cat1"]) + assert (array.categories == PandasExtensionArray(array).categories).all() From 6d4fded416c74ade23b63dde45b2631d0d36d19f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 7 May 2025 12:17:05 -0600 Subject: [PATCH 2/7] Fix --- xarray/core/extension_array.py | 9 ++++++++- xarray/tests/test_duck_array_ops.py | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index 11855ee5755..dc030f311c9 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -144,4 +144,11 @@ def __array__( return np.asarray(self.array, dtype=dtype) def __getattr__(self, attr: str) -> Any: - return getattr(self.array, attr) + # without __deepcopy__ or __copy__, the object is first constructed and then the sub-objects are attached: + # "A shallow copy constructs a new compound object and then (to the extent possible) inserts references into it to the objects found in the original" + + # "A deep copy constructs a new compound object and then, recursively, inserts copies into it of the objects found in the original." + # So without array, this method then calls getattr for "array" again while looking for __setstate__ + # (which is apparently the first thing sought in copy.copy from the under-construction copied object) + # because of self.array. __getattribute__ bypasses the lookup mechanism of __getattr__. + # See stackoverflow.com/questions/40583131/python-deepcopy-with-custom-getattr-and-setattr as well. + return getattr(super().__getattribute__("array"), attr) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 3b75d3d6b2a..deda022efd9 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -1,6 +1,7 @@ from __future__ import annotations import datetime as dt +import pickle import warnings import numpy as np @@ -1098,4 +1099,8 @@ def test_extension_array_repr(int1): def test_extension_array_attr(): array = pd.Categorical(["cat2", "cat1", "cat2", "cat3", "cat1"]) - assert (array.categories == PandasExtensionArray(array).categories).all() + wrapped = PandasExtensionArray(array) + assert (array.categories == wrapped.categories).all() + assert array.nbytes == wrapped.nbytes + + assert (pickle.loads(pickle.dumps(wrapped)) == wrapped).all() From b72f56067d3dd90ce27369df1bfb5d01268fd7a3 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 7 May 2025 12:22:57 -0600 Subject: [PATCH 3/7] fix test --- xarray/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index af7db7294a8..11f9e88c69b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -297,7 +297,7 @@ def test_repr(self) -> None: var1 (dim1, dim2) float64 576B -0.9891 -0.3678 1.288 ... -0.2116 0.364 var2 (dim1, dim2) float64 576B 0.953 1.52 1.704 ... 0.1347 -0.6423 var3 (dim3, dim1) float64 640B 0.4107 0.9941 0.1665 ... 0.716 1.555 - var4 (dim1) category 64B 'b' 'c' 'b' 'a' 'c' 'a' 'c' 'a' + var4 (dim1) category 32B 'b' 'c' 'b' 'a' 'c' 'a' 'c' 'a' Attributes: foo: bar""".format( data["dim3"].dtype, From 2186d497fe54651b99ad8abea2afcca93c3e9bc1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 7 May 2025 12:23:22 -0600 Subject: [PATCH 4/7] fix bad merge --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 515bc7679eb..71cd3dceac1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,7 +50,6 @@ Internal Changes .. _whats-new.2025.04.0: -v2025.04.0 (Apr 28, 2025) v2025.04.0 (Apr 29, 2025) ------------------------- From 4a5acf212d68dcff6c98b837ba6536b6ef40d1be Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 7 May 2025 12:26:53 -0600 Subject: [PATCH 5/7] More test --- xarray/tests/test_duck_array_ops.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index deda022efd9..4052d414f63 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -1100,7 +1100,14 @@ def test_extension_array_repr(int1): def test_extension_array_attr(): array = pd.Categorical(["cat2", "cat1", "cat2", "cat3", "cat1"]) wrapped = PandasExtensionArray(array) - assert (array.categories == wrapped.categories).all() + assert_array_equal(array.categories, wrapped.categories) assert array.nbytes == wrapped.nbytes - assert (pickle.loads(pickle.dumps(wrapped)) == wrapped).all() + roundtripped = pickle.loads(pickle.dumps(wrapped)) + assert isinstance(roundtripped, PandasExtensionArray) + assert (roundtripped == wrapped).all() + + interval_array = pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3], closed="right") + wrapped = PandasExtensionArray(interval_array) + assert_array_equal(wrapped.left, interval_array.left, strict=True) + assert wrapped.closed == interval_array.closed From fc9298a3290b139cbd4a12885a666223454507b8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 7 May 2025 12:45:16 -0600 Subject: [PATCH 6/7] fix docs --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 71cd3dceac1..f4dea1cd2aa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,7 +38,7 @@ Bug fixes ~~~~~~~~~ - Allow accessing arbitrary attributes on Pandas ExtensionArrays. - By `Deepak Cherian `_. Documentation From 93405b81ab62468d29f0d0f1c952f6a7aaf3e295 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 8 May 2025 09:04:43 -0600 Subject: [PATCH 7/7] Update xarray/core/extension_array.py Co-authored-by: Ilan Gold --- xarray/core/extension_array.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index dc030f311c9..096a427e425 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -144,11 +144,8 @@ def __array__( return np.asarray(self.array, dtype=dtype) def __getattr__(self, attr: str) -> Any: - # without __deepcopy__ or __copy__, the object is first constructed and then the sub-objects are attached: - # "A shallow copy constructs a new compound object and then (to the extent possible) inserts references into it to the objects found in the original" + - # "A deep copy constructs a new compound object and then, recursively, inserts copies into it of the objects found in the original." - # So without array, this method then calls getattr for "array" again while looking for __setstate__ - # (which is apparently the first thing sought in copy.copy from the under-construction copied object) - # because of self.array. __getattribute__ bypasses the lookup mechanism of __getattr__. - # See stackoverflow.com/questions/40583131/python-deepcopy-with-custom-getattr-and-setattr as well. + # with __deepcopy__ or __copy__, the object is first constructed and then the sub-objects are attached (see https://docs.python.org/3/library/copy.html) + # Thus, if we didn't have `super().__getattribute__("array")` this method would call `self.array` (i.e., `getattr(self, "array")`) again while looking for `__setstate__` + # (which is apparently the first thing sought in copy.copy from the under-construction copied object), + # which would cause a recursion error since `array` is not present on the object when it is being constructed during `__{deep}copy__`. return getattr(super().__getattribute__("array"), attr)