Skip to content

Commit edf47aa

Browse files
authored
Clean-up indexing adapter classes (#10355)
1 parent 90ee309 commit edf47aa

File tree

10 files changed

+205
-151
lines changed

10 files changed

+205
-151
lines changed

.github/workflows/benchmarks.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
runs-on: ubuntu-latest
1616
env:
1717
ASV_DIR: "./asv_bench"
18-
CONDA_ENV_FILE: ci/requirements/environment.yml
18+
CONDA_ENV_FILE: ci/requirements/environment-benchmark.yml
1919

2020
steps:
2121
# We need the full repo to avoid this issue
@@ -29,7 +29,7 @@ jobs:
2929
with:
3030
micromamba-version: "1.5.10-0"
3131
environment-file: ${{env.CONDA_ENV_FILE}}
32-
environment-name: xarray-tests
32+
environment-name: xarray-benchmark
3333
cache-environment: true
3434
cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark"
3535
# add "build" because of https://github.com/airspeed-velocity/asv/issues/1385

asv_bench/asv.conf.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
// },
6161
"matrix": {
6262
"setuptools_scm": [""], // GH6609
63-
"numpy": [""],
63+
"numpy": ["2.2"],
6464
"pandas": [""],
6565
"netcdf4": [""],
6666
"scipy": [""],

asv_bench/benchmarks/repr.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,31 @@ def time_repr(self):
5757

5858
def time_repr_html(self):
5959
self.da._repr_html_()
60+
61+
62+
class ReprPandasRangeIndex:
63+
# display a memory-saving pandas.RangeIndex shouldn't trigger memory
64+
# expensive conversion into a numpy array
65+
def setup(self):
66+
index = xr.indexes.PandasIndex(pd.RangeIndex(1_000_000), "x")
67+
self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index))
68+
69+
def time_repr(self):
70+
repr(self.ds.x)
71+
72+
def time_repr_html(self):
73+
self.ds.x._repr_html_()
74+
75+
76+
class ReprXarrayRangeIndex:
77+
# display an Xarray RangeIndex shouldn't trigger memory expensive conversion
78+
# of its lazy coordinate into a numpy array
79+
def setup(self):
80+
index = xr.indexes.RangeIndex.arange(1_000_000, dim="x")
81+
self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index))
82+
83+
def time_repr(self):
84+
repr(self.ds.x)
85+
86+
def time_repr_html(self):
87+
self.ds.x._repr_html_()
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: xarray-benchmark
2+
channels:
3+
- conda-forge
4+
- nodefaults
5+
dependencies:
6+
- bottleneck
7+
- cftime
8+
- dask-core
9+
- distributed
10+
- flox
11+
- netcdf4
12+
- numba
13+
- numbagg
14+
- numexpr
15+
- numpy>=2.2,<2.3 # https://github.com/numba/numba/issues/10105
16+
- opt_einsum
17+
- packaging
18+
- pandas
19+
- pyarrow # pandas raises a deprecation warning without this, breaking doctests
20+
- sparse
21+
- scipy
22+
- toolz
23+
- zarr

doc/whats-new.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ Documentation
3333
Internal Changes
3434
~~~~~~~~~~~~~~~~
3535

36+
- Refactored the ``PandasIndexingAdapter`` and
37+
``CoordinateTransformIndexingAdapter`` internal indexing classes. Coordinate
38+
variables that wrap a :py:class:`pandas.RangeIndex`, a
39+
:py:class:`pandas.MultiIndex` or a
40+
:py:class:`xarray.indexes.CoordinateTransform` are now displayed as lazy variables
41+
in the Xarray data reprs (:pull:`10355`).
42+
By `Benoit Bovy <https://github.com/benbovy>`_.
3643

3744
.. _whats-new.2025.07.0:
3845

xarray/core/formatting.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
from xarray.core.datatree_render import RenderDataTree
2121
from xarray.core.duck_array_ops import array_all, array_any, array_equiv, astype, ravel
2222
from xarray.core.extension_array import PandasExtensionArray
23-
from xarray.core.indexing import MemoryCachedArray
23+
from xarray.core.indexing import (
24+
BasicIndexer,
25+
ExplicitlyIndexed,
26+
MemoryCachedArray,
27+
)
2428
from xarray.core.options import OPTIONS, _get_boolean_with_default
2529
from xarray.core.treenode import group_subtrees
2630
from xarray.core.utils import is_duck_array
@@ -87,6 +91,8 @@ def first_n_items(array, n_desired):
8791

8892
if n_desired < array.size:
8993
indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=False)
94+
if isinstance(array, ExplicitlyIndexed):
95+
indexer = BasicIndexer(indexer)
9096
array = array[indexer]
9197

9298
# We pass variable objects in to handle indexing
@@ -111,6 +117,8 @@ def last_n_items(array, n_desired):
111117

112118
if n_desired < array.size:
113119
indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=True)
120+
if isinstance(array, ExplicitlyIndexed):
121+
indexer = BasicIndexer(indexer)
114122
array = array[indexer]
115123

116124
# We pass variable objects in to handle indexing
@@ -659,6 +667,7 @@ def short_array_repr(array):
659667
def short_data_repr(array):
660668
"""Format "data" for DataArray and Variable."""
661669
internal_data = getattr(array, "variable", array)._data
670+
662671
if isinstance(array, np.ndarray):
663672
return short_array_repr(array)
664673
elif is_duck_array(internal_data):

0 commit comments

Comments
 (0)