Skip to content

Commit 1a91802

Browse files
committed
Merge branch 'main' into groupby-aggs-using-numpy-groupies
* main: (23 commits) Vectorize groupby binary ops (pydata#6160) Speed-up multi-index html repr + add display_values_threshold option (pydata#6400) [pre-commit.ci] pre-commit autoupdate (pydata#6422) Fix concat scalar coord dtype (pydata#6418) use the `DaskIndexingAdapter` for `duck dask` arrays (pydata#6414) Weighted quantile (pydata#6059) upgrade `sphinx` (pydata#6415) Add kwarg-only breaking change to whats-new (pydata#6409) [pre-commit.ci] pre-commit autoupdate (pydata#6396) fix DataArray groupby returning a Dataset (pydata#6394) reindex: fix missing variable metadata (pydata#6389) [skip-ci] Add benchmarks for groupby math (pydata#6390) Fix concat with scalar coordinate (pydata#6385) isel: convert IndexVariable to Variable if index is dropped (pydata#6388) fix dataset groupby combine dataarray func (pydata#6386) fix concat with variable or dataarray as dim (pydata#6387) pydata#6367 Fix for time units checking could produce "unhashable type" error (pydata#6368) Explicit indexes (pydata#5692) Remove test_rasterio_vrt_network (pydata#6371) Allow write_empty_chunks to be set in Zarr encoding (pydata#6348) ...
2 parents 2694dbe + 2e93d54 commit 1a91802

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+5289
-2290
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ repos:
1919
hooks:
2020
- id: isort
2121
- repo: https://github.com/asottile/pyupgrade
22-
rev: v2.31.0
22+
rev: v2.31.1
2323
hooks:
2424
- id: pyupgrade
2525
args:
@@ -45,7 +45,7 @@ repos:
4545
# - id: velin
4646
# args: ["--write", "--compact"]
4747
- repo: https://github.com/pre-commit/mirrors-mypy
48-
rev: v0.931
48+
rev: v0.942
4949
hooks:
5050
- id: mypy
5151
# Copied from setup.cfg
Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
version: 2
2+
23
build:
34
os: ubuntu-20.04
45
tools:
56
python: mambaforge-4.10
6-
sphinx:
7-
fail_on_warning: true
7+
88
conda:
99
environment: ci/requirements/doc.yml
10+
11+
sphinx:
12+
fail_on_warning: true
13+
1014
formats: []

asv_bench/benchmarks/groupby.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ def setup(self, *args, **kwargs):
1717
}
1818
)
1919
self.ds2d = self.ds1d.expand_dims(z=10)
20+
self.ds1d_mean = self.ds1d.groupby("b").mean()
21+
self.ds2d_mean = self.ds2d.groupby("b").mean()
2022

2123
@parameterized(["ndim"], [(1, 2)])
2224
def time_init(self, ndim):
@@ -32,15 +34,30 @@ def time_agg_large_num_groups(self, method, ndim):
3234
ds = getattr(self, f"ds{ndim}d")
3335
getattr(ds.groupby("b"), method)()
3436

37+
def time_groupby_binary_op_1d(self):
38+
self.ds1d - self.ds1d_mean
39+
40+
def time_groupby_binary_op_2d(self):
41+
self.ds2d - self.ds2d_mean
42+
43+
def peakmem_groupby_binary_op_1d(self):
44+
self.ds1d - self.ds1d_mean
45+
46+
def peakmem_groupby_binary_op_2d(self):
47+
self.ds2d - self.ds2d_mean
48+
3549

3650
class GroupByDask(GroupBy):
3751
def setup(self, *args, **kwargs):
3852
requires_dask()
3953
super().setup(**kwargs)
54+
4055
self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2))
4156
self.ds1d["c"] = self.ds1d["c"].chunk({"dim_0": 50})
4257
self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2))
4358
self.ds2d["c"] = self.ds2d["c"].chunk({"dim_0": 50, "z": 5})
59+
self.ds1d_mean = self.ds1d.groupby("b").mean()
60+
self.ds2d_mean = self.ds2d.groupby("b").mean()
4461

4562

4663
class GroupByPandasDataFrame(GroupBy):
@@ -52,6 +69,13 @@ def setup(self, *args, **kwargs):
5269

5370
super().setup(**kwargs)
5471
self.ds1d = self.ds1d.to_dataframe()
72+
self.ds1d_mean = self.ds1d.groupby("b").mean()
73+
74+
def time_groupby_binary_op_2d(self):
75+
raise NotImplementedError
76+
77+
def peakmem_groupby_binary_op_2d(self):
78+
raise NotImplementedError
5579

5680

5781
class GroupByDaskDataFrame(GroupBy):
@@ -64,6 +88,13 @@ def setup(self, *args, **kwargs):
6488
requires_dask()
6589
super().setup(**kwargs)
6690
self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
91+
self.ds1d_mean = self.ds1d.groupby("b").mean()
92+
93+
def time_groupby_binary_op_2d(self):
94+
raise NotImplementedError
95+
96+
def peakmem_groupby_binary_op_2d(self):
97+
raise NotImplementedError
6798

6899

69100
class Resample:
@@ -75,6 +106,8 @@ def setup(self, *args, **kwargs):
75106
coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)},
76107
)
77108
self.ds2d = self.ds1d.expand_dims(z=10)
109+
self.ds1d_mean = self.ds1d.resample(time="48H").mean()
110+
self.ds2d_mean = self.ds2d.resample(time="48H").mean()
78111

79112
@parameterized(["ndim"], [(1, 2)])
80113
def time_init(self, ndim):
@@ -90,6 +123,18 @@ def time_agg_large_num_groups(self, method, ndim):
90123
ds = getattr(self, f"ds{ndim}d")
91124
getattr(ds.resample(time="48H"), method)()
92125

126+
def time_groupby_binary_op_1d(self):
127+
self.ds1d - self.ds1d_mean
128+
129+
def time_groupby_binary_op_2d(self):
130+
self.ds2d - self.ds2d_mean
131+
132+
def peakmem_groupby_binary_op_1d(self):
133+
self.ds1d - self.ds1d_mean
134+
135+
def peakmem_groupby_binary_op_2d(self):
136+
self.ds2d - self.ds2d_mean
137+
93138

94139
class ResampleDask(Resample):
95140
def setup(self, *args, **kwargs):

ci/requirements/doc.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@ channels:
44
- conda-forge
55
- nodefaults
66
dependencies:
7-
- python=3.8
7+
- python=3.9
88
- bottleneck
99
- cartopy
1010
- cfgrib>=0.9
1111
- dask-core>=2.30
1212
- h5netcdf>=0.7.4
1313
- ipykernel
1414
- ipython
15-
- ipython_genutils # remove once `nbconvert` fixed its dependencies
1615
- iris>=2.3
16+
- jinja2<3.1 # remove once nbconvert fixed the use of removed functions
1717
- jupyter_client
1818
- matplotlib-base
1919
- nbsphinx
@@ -34,7 +34,7 @@ dependencies:
3434
- sphinx-book-theme >= 0.0.38
3535
- sphinx-copybutton
3636
- sphinx-panels
37-
- sphinx<4
37+
- sphinx!=4.4.0
3838
- zarr>=2.4
3939
- pip:
4040
- sphinxext-rediraffe

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,7 @@ Dataset
944944

945945
DatasetWeighted
946946
DatasetWeighted.mean
947+
DatasetWeighted.quantile
947948
DatasetWeighted.sum
948949
DatasetWeighted.std
949950
DatasetWeighted.var
@@ -958,6 +959,7 @@ DataArray
958959

959960
DataArrayWeighted
960961
DataArrayWeighted.mean
962+
DataArrayWeighted.quantile
961963
DataArrayWeighted.sum
962964
DataArrayWeighted.std
963965
DataArrayWeighted.var

doc/developers-meeting.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ The meeting occurs on `Zoom <https://us02web.zoom.us/j/88251613296?pwd=azZsSkU1U
77

88
Notes for the meeting are kept `here <https://hackmd.io/@U4W-olO3TX-hc-cvbjNe4A/xarray-dev-meeting/edit>`__.
99

10-
There is a `GitHub issue <https://github.com/pydata/xarray/issues/4001>`__ for changes to the meeting.
10+
There is a :issue:`GitHub issue <4001>` for changes to the meeting.
1111

1212
You can subscribe to this calendar to be notified of changes:
1313

doc/internals/extending-xarray.rst

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@ easy to inadvertently use internal APIs when subclassing, which means that your
1818
code may break when xarray upgrades. Furthermore, many builtin methods will
1919
only return native xarray objects.
2020

21-
The standard advice is to use `composition over inheritance`__, but
21+
The standard advice is to use :issue:`composition over inheritance <706>`, but
2222
reimplementing an API as large as xarray's on your own objects can be an onerous
2323
task, even if most methods are only forwarding to xarray implementations.
2424

25-
__ https://github.com/pydata/xarray/issues/706
26-
2725
If you simply want the ability to call a function with the syntax of a
2826
method call, then the builtin :py:meth:`~xarray.DataArray.pipe` method (copied
2927
from pandas) may suffice.

doc/roadmap.rst

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,7 @@ xarray's data model, e.g., as attributes on the ``Dataset`` and
114114
coordinates in xarray operations, but will no longer would need to have
115115
a one-to-one correspondence with coordinate variables. Instead, an index
116116
should be able to refer to multiple (possibly multidimensional)
117-
coordinates that define it. See `GH
118-
1603 <https://github.com/pydata/xarray/issues/1603>`__ for full details
117+
coordinates that define it. See :issue:`1603` for full details.
119118

120119
Specific tasks:
121120

@@ -182,11 +181,9 @@ backends means that users can not easily build backend interface for
182181
xarray in third-party libraries.
183182

184183
The idea of refactoring the backends API and exposing it to users was
185-
originally proposed in `GH
186-
1970 <https://github.com/pydata/xarray/issues/1970>`__. The idea would
187-
be to develop a well tested and generic backend base class and
188-
associated utilities for external use. Specific tasks for this
189-
development would include:
184+
originally proposed in :issue:`1970`. The idea would be to develop a
185+
well tested and generic backend base class and associated utilities
186+
for external use. Specific tasks for this development would include:
190187

191188
- Exposing an abstract backend for writing new storage systems.
192189
- Exposing utilities for features like automatic closing of files,
@@ -225,7 +222,7 @@ examples include:
225222

226223
A new tree-like data structure which is essentially a structured hierarchical
227224
collection of Datasets could represent these cases, and would instead map to
228-
multiple netCDF groups (see `GH4118 <https://github.com/pydata/xarray/issues/4118>`__.).
225+
multiple netCDF groups (see :issue:`4118`).
229226

230227
Currently there are several libraries which have wrapped xarray in order to build
231228
domain-specific data structures (e.g. `xarray-multiscale <https://github.com/JaneliaSciComp/xarray-multiscale>`__.),

doc/tutorials-and-videos.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,20 @@ Videos
1818
.. panels::
1919
:card: text-center
2020

21+
---
22+
Xdev Python Tutorial Seminar Series 2022 Thinking with Xarray : High-level computation patterns | Deepak Cherian
23+
^^^
24+
.. raw:: html
25+
26+
<iframe width="100%" src="https://www.youtube.com/embed/TSw3GF_d2y8" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
27+
28+
---
29+
Xdev Python Tutorial Seminar Series 2021 seminar introducing xarray (2 of 2) | Anderson Banihirwe
30+
^^^
31+
.. raw:: html
32+
33+
<iframe width="100%" src="https://www.youtube.com/embed/2H_4drBwORY" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
34+
2135
---
2236
Xdev Python Tutorial Seminar Series 2021 seminar introducing xarray (1 of 2) | Anderson Banihirwe
2337
^^^

doc/user-guide/computation.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ Weighted array reductions
265265

266266
:py:class:`DataArray` and :py:class:`Dataset` objects include :py:meth:`DataArray.weighted`
267267
and :py:meth:`Dataset.weighted` array reduction methods. They currently
268-
support weighted ``sum``, ``mean``, ``std`` and ``var``.
268+
support weighted ``sum``, ``mean``, ``std``, ``var`` and ``quantile``.
269269

270270
.. ipython:: python
271271
@@ -293,6 +293,12 @@ Calculate the weighted mean:
293293
294294
weighted_prec.mean(dim="month")
295295
296+
Calculate the weighted quantile:
297+
298+
.. ipython:: python
299+
300+
weighted_prec.quantile(q=0.5, dim="month")
301+
296302
The weighted sum corresponds to:
297303

298304
.. ipython:: python

0 commit comments

Comments
 (0)