|
5 | 5 | import numpy as np
|
6 | 6 | import pandas as pd
|
7 | 7 | from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined]
|
| 8 | +from packaging.version import Version |
8 | 9 |
|
9 | 10 | # remove once numpy 2.0 is the oldest supported version
|
10 | 11 | try:
|
|
18 | 19 | try:
|
19 | 20 | import bottleneck as bn
|
20 | 21 |
|
21 |
| - _USE_BOTTLENECK = True |
| 22 | + _BOTTLENECK_AVAILABLE = True |
22 | 23 | except ImportError:
|
23 | 24 | # use numpy methods instead
|
24 | 25 | bn = np
|
25 |
| - _USE_BOTTLENECK = False |
| 26 | + _BOTTLENECK_AVAILABLE = False |
| 27 | + |
| 28 | +try: |
| 29 | + import numbagg |
| 30 | + |
| 31 | + _HAS_NUMBAGG = Version(numbagg.__version__) >= Version("0.5.0") |
| 32 | +except ImportError: |
| 33 | + # use numpy methods instead |
| 34 | + numbagg = np |
| 35 | + _HAS_NUMBAGG = False |
26 | 36 |
|
27 | 37 |
|
28 | 38 | def _select_along_axis(values, idx, axis):
|
@@ -161,13 +171,30 @@ def __setitem__(self, key, value):
|
161 | 171 | self._array[key] = np.moveaxis(value, vindex_positions, mixed_positions)
|
162 | 172 |
|
163 | 173 |
|
164 |
| -def _create_bottleneck_method(name, npmodule=np): |
| 174 | +def _create_method(name, npmodule=np): |
165 | 175 | def f(values, axis=None, **kwargs):
|
166 | 176 | dtype = kwargs.get("dtype", None)
|
167 | 177 | bn_func = getattr(bn, name, None)
|
| 178 | + nba_func = getattr(numbagg, name, None) |
168 | 179 |
|
169 | 180 | if (
|
170 |
| - _USE_BOTTLENECK |
| 181 | + _HAS_NUMBAGG |
| 182 | + and OPTIONS["use_numbagg"] |
| 183 | + and isinstance(values, np.ndarray) |
| 184 | + and nba_func is not None |
| 185 | + # numbagg uses ddof=1 only, but numpy uses ddof=0 by default |
| 186 | + and (("var" in name or "std" in name) and kwargs.get("ddof", 0) == 1) |
| 187 | + # TODO: bool? |
| 188 | + and values.dtype.kind in "uifc" |
| 189 | + # and values.dtype.isnative |
| 190 | + and (dtype is None or np.dtype(dtype) == values.dtype) |
| 191 | + ): |
| 192 | + # numbagg does not take care dtype, ddof |
| 193 | + kwargs.pop("dtype", None) |
| 194 | + kwargs.pop("ddof", None) |
| 195 | + result = nba_func(values, axis=axis, **kwargs) |
| 196 | + elif ( |
| 197 | + _BOTTLENECK_AVAILABLE |
171 | 198 | and OPTIONS["use_bottleneck"]
|
172 | 199 | and isinstance(values, np.ndarray)
|
173 | 200 | and bn_func is not None
|
@@ -233,14 +260,14 @@ def least_squares(lhs, rhs, rcond=None, skipna=False):
|
233 | 260 | return coeffs, residuals
|
234 | 261 |
|
235 | 262 |
|
236 |
| -nanmin = _create_bottleneck_method("nanmin") |
237 |
| -nanmax = _create_bottleneck_method("nanmax") |
238 |
| -nanmean = _create_bottleneck_method("nanmean") |
239 |
| -nanmedian = _create_bottleneck_method("nanmedian") |
240 |
| -nanvar = _create_bottleneck_method("nanvar") |
241 |
| -nanstd = _create_bottleneck_method("nanstd") |
242 |
| -nanprod = _create_bottleneck_method("nanprod") |
243 |
| -nancumsum = _create_bottleneck_method("nancumsum") |
244 |
| -nancumprod = _create_bottleneck_method("nancumprod") |
245 |
| -nanargmin = _create_bottleneck_method("nanargmin") |
246 |
| -nanargmax = _create_bottleneck_method("nanargmax") |
| 263 | +nanmin = _create_method("nanmin") |
| 264 | +nanmax = _create_method("nanmax") |
| 265 | +nanmean = _create_method("nanmean") |
| 266 | +nanmedian = _create_method("nanmedian") |
| 267 | +nanvar = _create_method("nanvar") |
| 268 | +nanstd = _create_method("nanstd") |
| 269 | +nanprod = _create_method("nanprod") |
| 270 | +nancumsum = _create_method("nancumsum") |
| 271 | +nancumprod = _create_method("nancumprod") |
| 272 | +nanargmin = _create_method("nanargmin") |
| 273 | +nanargmax = _create_method("nanargmax") |
0 commit comments