From 9fcfc3e6081c397a030dfcef59aecf302c9baaf1 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 18 Jun 2021 13:40:18 -0400 Subject: [PATCH 1/7] test querying unnamed da --- xarray/tests/test_dataarray.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 95b6036712c..1c2d74891ac 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4659,6 +4659,7 @@ def test_query(self, backend, engine, parser): bb = DataArray(data=b, dims=["x"], name="b") cc = DataArray(data=c, dims=["y"], name="c") dd = DataArray(data=d, dims=["z"], name="d") + nn = DataArray(data=a, dims=["x"], name=None) elif backend == "dask": import dask.array as da @@ -4667,6 +4668,7 @@ def test_query(self, backend, engine, parser): bb = DataArray(data=da.from_array(b, chunks=3), dims=["x"], name="b") cc = DataArray(data=da.from_array(c, chunks=7), dims=["y"], name="c") dd = DataArray(data=da.from_array(d, chunks=12), dims=["z"], name="d") + nn = DataArray(data=da.from_array(a, chunks=3), dims=["x"], name=None) # query single dim, single variable actual = aa.query(x="a > 5", engine=engine, parser=parser) @@ -4704,6 +4706,11 @@ def test_query(self, backend, engine, parser): with pytest.raises(UndefinedVariableError): aa.query(x="spam > 50") # name not present + # test with nameless dataarray (GH issue 5492) + actual = nn.query(x="x > 5", engine=engine, parser=parser) + expect = nn.isel(x=(nn.x > 5)) + assert_identical(expect, actual) + @requires_scipy @pytest.mark.parametrize("use_dask", [True, False]) def test_curvefit(self, use_dask): From a469bc73aa09b075590f42db9f0df7a176ff09c7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 18 Jun 2021 13:42:31 -0400 Subject: [PATCH 2/7] use _from_temp_dataset if no name --- xarray/core/dataarray.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index eab4413d5ce..693722664b4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4490,7 +4490,8 @@ def query( Dimensions without coordinates: x """ - ds = self._to_dataset_whole(shallow_copy=True) + name = _THIS_ARRAY if self.name is None else self.name + ds = self._to_dataset_whole(name=name, shallow_copy=True) ds = ds.query( queries=queries, parser=parser, @@ -4498,7 +4499,8 @@ def query( missing_dims=missing_dims, **queries_kwargs, ) - return ds[self.name] + da = self._from_temp_dataset(ds) if name is _THIS_ARRAY else ds[name] + return da def curvefit( self, From ec599268149f66ef96c64bd05f375968478bb909 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 18 Jun 2021 13:52:12 -0400 Subject: [PATCH 3/7] what's new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5ff3ff20b6a..71b5d591ce7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -73,6 +73,9 @@ Bug fixes - Fix the ``repr`` of :py:class:`Variable` objects with ``display_expand_data=True`` (:pull:`5406`) By `Justus Magin `_. +- Fixed :py:func:`xarray.DataArray.query` to not fail with an unnamed dataarray + (:issue:`5492`, :pull:`5493`). + By `Tom Nicholas `_. Documentation From b89acffa5c0656ea49fca8b0e96fb76e7300cff3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 18 Jun 2021 14:57:09 -0400 Subject: [PATCH 4/7] removed two unneccessary intermediate variables --- xarray/core/dataarray.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 693722664b4..32fc73941a7 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4491,16 +4491,14 @@ def query( """ name = _THIS_ARRAY if self.name is None else self.name - ds = self._to_dataset_whole(name=name, shallow_copy=True) - ds = ds.query( + ds = self._to_dataset_whole(name=name, shallow_copy=True).query( queries=queries, parser=parser, engine=engine, missing_dims=missing_dims, **queries_kwargs, ) - da = self._from_temp_dataset(ds) if name is _THIS_ARRAY else ds[name] - return da + return self._from_temp_dataset(ds) if name is _THIS_ARRAY else ds[name] def curvefit( self, From 851c405c72448dd6106d9f771cd8818d06fb0aec Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 18 Jun 2021 16:56:49 -0400 Subject: [PATCH 5/7] removed shallow copy --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 32fc73941a7..50dedc1cc3a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4491,7 +4491,7 @@ def query( """ name = _THIS_ARRAY if self.name is None else self.name - ds = self._to_dataset_whole(name=name, shallow_copy=True).query( + ds = self._to_dataset_whole(name=name).query( queries=queries, parser=parser, engine=engine, From 68a505ce26959ac29ec3c0bbdab66c3b7f09344a Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 18 Jun 2021 17:48:15 -0400 Subject: [PATCH 6/7] reference values in unnamed dataarrays as 'self' --- doc/whats-new.rst | 3 +++ xarray/core/dataarray.py | 20 ++++++++++++++++++-- xarray/tests/test_dataarray.py | 10 ++++++---- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 71b5d591ce7..18af20ac318 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,6 +44,9 @@ New Features By `Thomas Hirtz `_. - allow passing a function to ``combine_attrs`` (:pull:`4896`). By `Justus Magin `_. +- The values stored in an unnamed dataarray can now be referenced in a call to + py:func:`xarray.DataArray.query` via 'self' (:issue:`5492`, :pull:`5493`). + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 50dedc1cc3a..e6dc3879c9f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4437,6 +4437,8 @@ def query( dimension(s), where the indexers are given as strings containing Python expressions to be evaluated against the values in the array. + The values stored in unnamed dataarrays can be referenced in queries as 'self'. + Parameters ---------- queries : dict, optional @@ -4488,9 +4490,20 @@ def query( array([3, 4]) Dimensions without coordinates: x + + >>> da = xr.DataArray(np.arange(0, 5, 1), dims="x", name=None) + >>> da + + array([0, 1, 2, 3, 4]) + Dimensions without coordinates: x + >>> da.query(x="self > 2") + + array([3, 4]) + Dimensions without coordinates: x """ - name = _THIS_ARRAY if self.name is None else self.name + # Naming unnamed dataarrays as 'self' allows querying their values still + name = 'self' if self.name is None else self.name ds = self._to_dataset_whole(name=name).query( queries=queries, parser=parser, @@ -4498,7 +4511,10 @@ def query( missing_dims=missing_dims, **queries_kwargs, ) - return self._from_temp_dataset(ds) if name is _THIS_ARRAY else ds[name] + da = ds[name] + if name == 'self': + da.name = None + return da def curvefit( self, diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 1c2d74891ac..df9160a03bd 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4659,7 +4659,8 @@ def test_query(self, backend, engine, parser): bb = DataArray(data=b, dims=["x"], name="b") cc = DataArray(data=c, dims=["y"], name="c") dd = DataArray(data=d, dims=["z"], name="d") - nn = DataArray(data=a, dims=["x"], name=None) + nn = DataArray(data=a, dims=["x"]) + nn.name = None elif backend == "dask": import dask.array as da @@ -4668,7 +4669,8 @@ def test_query(self, backend, engine, parser): bb = DataArray(data=da.from_array(b, chunks=3), dims=["x"], name="b") cc = DataArray(data=da.from_array(c, chunks=7), dims=["y"], name="c") dd = DataArray(data=da.from_array(d, chunks=12), dims=["z"], name="d") - nn = DataArray(data=da.from_array(a, chunks=3), dims=["x"], name=None) + nn = DataArray(data=da.from_array(a, chunks=3), dims=["x"]) + nn.name = None # query single dim, single variable actual = aa.query(x="a > 5", engine=engine, parser=parser) @@ -4707,8 +4709,8 @@ def test_query(self, backend, engine, parser): aa.query(x="spam > 50") # name not present # test with nameless dataarray (GH issue 5492) - actual = nn.query(x="x > 5", engine=engine, parser=parser) - expect = nn.isel(x=(nn.x > 5)) + actual = nn.query(x="self > 5", engine=engine, parser=parser) + expect = nn.isel(x=(nn > 5)) assert_identical(expect, actual) @requires_scipy From 3603ccd8555d5dc09b7ab7abfdeb4c566f4edf75 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 21 Jun 2021 13:28:54 -0400 Subject: [PATCH 7/7] replace self with name for named dataarrays --- doc/whats-new.rst | 2 +- xarray/core/dataarray.py | 20 ++++++++++++++++---- xarray/tests/test_dataarray.py | 5 +++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 18af20ac318..667e75de0b4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,7 +44,7 @@ New Features By `Thomas Hirtz `_. - allow passing a function to ``combine_attrs`` (:pull:`4896`). By `Justus Magin `_. -- The values stored in an unnamed dataarray can now be referenced in a call to +- The values stored in an dataarray can now be referenced in a call to py:func:`xarray.DataArray.query` via 'self' (:issue:`5492`, :pull:`5493`). By `Tom Nicholas `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e6dc3879c9f..ed0dcd45500 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4437,7 +4437,7 @@ def query( dimension(s), where the indexers are given as strings containing Python expressions to be evaluated against the values in the array. - The values stored in unnamed dataarrays can be referenced in queries as 'self'. + The values stored in dataarrays can also be referenced in queries as 'self'. Parameters ---------- @@ -4502,8 +4502,19 @@ def query( Dimensions without coordinates: x """ - # Naming unnamed dataarrays as 'self' allows querying their values still - name = 'self' if self.name is None else self.name + if self.name is None: + # Naming unnamed dataarrays as 'self' allows querying their values still + name = "self" + else: + # For consistency allow named datarrays to be referred to as 'self' also + name = self.name + queries = either_dict_or_kwargs(queries, queries_kwargs, "query") + queries = { + d: (q.replace("self", name) if isinstance(q, str) else q) + for d, q in queries.items() + } + queries_kwargs = {} + ds = self._to_dataset_whole(name=name).query( queries=queries, parser=parser, @@ -4511,8 +4522,9 @@ def query( missing_dims=missing_dims, **queries_kwargs, ) + da = ds[name] - if name == 'self': + if name == "self": da.name = None return da diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index df9160a03bd..a0b7afc8786 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4713,6 +4713,11 @@ def test_query(self, backend, engine, parser): expect = nn.isel(x=(nn > 5)) assert_identical(expect, actual) + # test referring to named dataarray as self + actual = aa.query(x="self > 5", engine=engine, parser=parser) + expect = aa.isel(x=(aa > 5)) + assert_identical(expect, actual) + @requires_scipy @pytest.mark.parametrize("use_dask", [True, False]) def test_curvefit(self, use_dask):