Refactor utils.interpolate module (#210)

dnerini · aperezhortal · loforest · web-flow · commit 69061dd4177f · 2021-07-04T10:43:44.000+02:00
* Rename to iwdinterp2d; adapt docstrings, variable names, and interface

* Fix interface

* Initial wrapper to scipy's method

* Aesthetics

* Use mode=N-D to do multivariate interpolation

* Fix black check

By upgrading black to latest version

* Support parameters of the original function

* Refactor idw method

* Aesthetics

* Implement checks as a decorator

* Add tests

* Aesthetics

* Remove unnecessary test

* Remove unnecessary test

* Test for wrong number of dimensions

* Chunking of the dst grid is done in the preamble

* Fix chunking

* Simplify idw

* Add LRU caching of interpolator classes

* Small adjustments

* Change default interp method to idw

* Adapt test

* Adapt examples

* Silence some warnings

And other super minor changes

* Fix black check

* Fix tests

* Aesthetics

* Fix docstrings

Or at least try...

* Fix univariate interpolation

And include test

* Fix black check

* Super minor changes

* Update precommit config

* Define RBF in docstring

* Add function to append extra kwrds to docstrings

* Rename decorator

* Test idw with k=1 or k=None

* Test that all outputs are finite nuumberrs

* Update pysteps/utils/interpolate.py

Co-authored-by: Loris Foresti &lt;39999237+loforest@users.noreply.github.com&gt;

* Improve variable naming

* Workaround for TypeError issue with Pillow 8.3

* Add abbreviation for inverse distance weighting

* Fix typo

* Docstring polishing in idwinterp2d

* Small refactoring

* Docstring polishing

* Improve docstrings

* Adapt variable naming

* Inputs must be ndarrays

* Fix typo

* Make the offset contant a user-selectable parameter

Co-authored-by: Andres Perez Hortal &lt;16256571+aperezhortal@users.noreply.github.com&gt;
Co-authored-by: Loris Foresti &lt;39999237+loforest@users.noreply.github.com&gt;
Co-authored-by: Seppo Pulkkinen &lt;pulkkins@gmail.com&gt;
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 20.8b1
+    rev: 21.6b0
     hooks:
     - id: black
       language_version: python3
diff --git a/examples/LK_buffer_mask.py b/examples/LK_buffer_mask.py
@@ -169,16 +169,9 @@
 # By comparing the velocity of the motion fields, we can easily notice
 # the negative bias that is introduced by the the erroneous interpretation of
 # velocities near the maximum range of the radars.
-# Please note that we are setting a small shape parameter ``epsilon`` for the
-# interpolation routine. This will produce a smoother motion field.
 
-interp_kwargs = {"epsilon": 5}  # use a small shape parameter for interpolation
-UV1 = dense_lucaskanade(
-    R, dense=True, fd_kwargs=fd_kwargs1, interp_kwargs=interp_kwargs
-)
-UV2 = dense_lucaskanade(
-    R, dense=True, fd_kwargs=fd_kwargs2, interp_kwargs=interp_kwargs
-)
+UV1 = dense_lucaskanade(R, dense=True, fd_kwargs=fd_kwargs1)
+UV2 = dense_lucaskanade(R, dense=True, fd_kwargs=fd_kwargs2)
 
 V1 = np.sqrt(UV1[0] ** 2 + UV1[1] ** 2)
 V2 = np.sqrt(UV2[0] ** 2 + UV2[1] ** 2)
diff --git a/examples/optical_flow_methods_convergence.py b/examples/optical_flow_methods_convergence.py
@@ -290,7 +290,7 @@ def plot_optflow_method_convergence(input_precip, optflow_method_name, motion_ty
         axis=0
     )
 
-    cmap = get_cmap("jet")
+    cmap = get_cmap("jet").copy()
     cmap.set_under("grey", alpha=0.25)
     cmap.set_over("none")
 
diff --git a/pysteps/decorators.py b/pysteps/decorators.py
@@ -11,14 +11,34 @@
 
     postprocess_import
     check_input_frames
+    prepare_interpolator
+    memoize
 """
 import inspect
+import uuid
 from collections import defaultdict
 from functools import wraps
 
 import numpy as np
 
 
+def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text):
+    """
+    Update the functions docstrings by replacing the `{extra_kwargs_doc}` occurences in
+    the docstring by the `extra_kwargs_doc_text` value.
+    """
+    # Clean up indentation from docstrings for the
+    # docstrings to be merged correctly.
+    extra_kwargs_doc = inspect.cleandoc(extra_kwargs_doc_text)
+    target_func.__doc__ = inspect.cleandoc(target_func.__doc__)
+
+    # Add extra kwargs docstrings
+    target_func.__doc__ = target_func.__doc__.format_map(
+        defaultdict(str, extra_kwargs_doc=extra_kwargs_doc)
+    )
+    return target_func
+
+
 def postprocess_import(fillna=np.nan, dtype="double"):
     """
     Postprocess the imported precipitation data.
@@ -82,19 +102,7 @@ def _import_with_postprocessing(*args, **kwargs):
                 By default, np.nan is used.
             """
 
-        # Clean up indentation from docstrings for the
-        # docstrings to be merged correctly.
-        extra_kwargs_doc = inspect.cleandoc(extra_kwargs_doc)
-        _import_with_postprocessing.__doc__ = inspect.cleandoc(
-            _import_with_postprocessing.__doc__
-        )
-
-        # Add extra kwargs docstrings
-        _import_with_postprocessing.__doc__ = (
-            _import_with_postprocessing.__doc__.format_map(
-                defaultdict(str, extra_kwargs_doc=extra_kwargs_doc)
-            )
-        )
+        _add_extra_kwrds_to_docstrings(_import_with_postprocessing, extra_kwargs_doc)
 
         return _import_with_postprocessing
 
@@ -140,3 +148,138 @@ def new_function(*args, **kwargs):
         return new_function
 
     return _check_input_frames
+
+
+def prepare_interpolator(nchunks=4):
+    """
+    Check that all the inputs have the correct shape, and that all values are
+    finite. It also split the destination grid in  `nchunks` parts, and process each
+    part independently.
+    """
+
+    def _preamble_interpolation(interpolator):
+        @wraps(interpolator)
+        def _interpolator_with_preamble(xy_coord, values, xgrid, ygrid, **kwargs):
+            nonlocal nchunks  # https://stackoverflow.com/questions/5630409/
+
+            values = values.copy()
+            xy_coord = xy_coord.copy()
+
+            input_ndims = values.ndim
+            input_nvars = 1 if input_ndims == 1 else values.shape[1]
+            input_nsamples = values.shape[0]
+
+            coord_ndims = xy_coord.ndim
+            coord_nsamples = xy_coord.shape[0]
+
+            grid_shape = (ygrid.size, xgrid.size)
+
+            if np.any(~np.isfinite(values)):
+                raise ValueError("argument 'values' contains non-finite values")
+            if np.any(~np.isfinite(xy_coord)):
+                raise ValueError("argument 'xy_coord' contains non-finite values")
+
+            if input_ndims > 2:
+                raise ValueError(
+                    "argument 'values' must have 1 (n) or 2 dimensions (n, m), "
+                    f"but it has {input_ndims}"
+                )
+            if not coord_ndims == 2:
+                raise ValueError(
+                    "argument 'xy_coord' must have 2 dimensions (n, 2), "
+                    f"but it has {coord_ndims}"
+                )
+
+            if not input_nsamples == coord_nsamples:
+                raise ValueError(
+                    "the number of samples in argument 'values' does not match the "
+                    f"number of coordinates {input_nsamples}!={coord_nsamples}"
+                )
+
+            # only one sample, return uniform output
+            if input_nsamples == 1:
+                output_array = np.ones((input_nvars,) + grid_shape)
+                for n, v in enumerate(values[0, ...]):
+                    output_array[n, ...] *= v
+                return output_array.squeeze()
+
+            # all equal elements, return uniform output
+            if values.max() == values.min():
+                return np.ones((input_nvars,) + grid_shape) * values.ravel()[0]
+
+            # split grid in n chunks
+            nchunks = int(kwargs.get("nchunks", nchunks) ** 0.5)
+            if nchunks > 1:
+                subxgrids = np.array_split(xgrid, nchunks)
+                subxgrids = [x for x in subxgrids if x.size > 0]
+                subygrids = np.array_split(ygrid, nchunks)
+                subygrids = [y for y in subygrids if y.size > 0]
+
+                # generate a unique identifier to be used for caching
+                # intermediate results
+                kwargs["hkey"] = uuid.uuid1().int
+            else:
+                subxgrids = [xgrid]
+                subygrids = [ygrid]
+
+            interpolated = np.zeros((input_nvars,) + grid_shape)
+            indx = 0
+            for subxgrid in subxgrids:
+                deltax = subxgrid.size
+                indy = 0
+                for subygrid in subygrids:
+                    deltay = subygrid.size
+                    interpolated[
+                        :, indy : (indy + deltay), indx : (indx + deltax)
+                    ] = interpolator(xy_coord, values, subxgrid, subygrid, **kwargs)
+                    indy += deltay
+                indx += deltax
+
+            return interpolated.squeeze()
+
+        extra_kwargs_doc = """
+            nchunks: int, optional
+                Split and process the destination grid in nchunks.
+                Useful for large grids to limit the memory footprint.
+            """
+
+        _add_extra_kwrds_to_docstrings(_interpolator_with_preamble, extra_kwargs_doc)
+
+        return _interpolator_with_preamble
+
+    return _preamble_interpolation
+
+
+def memoize(maxsize=10):
+    """
+    Add a Least Recently Used (LRU) cache to any function.
+    Caching is purely based on the optional keyword argument 'hkey', which needs
+    to be a hashable.
+
+    Parameters
+    ----------
+    maxsize: int, optional
+        The maximum number of elements stored in the LRU cache.
+    """
+
+    def _memoize(func):
+        cache = dict()
+        hkeys = []
+
+        @wraps(func)
+        def _func_with_cache(*args, **kwargs):
+            hkey = kwargs.pop("hkey", None)
+            if hkey in cache:
+                return cache[hkey]
+            result = func(*args, **kwargs)
+            if hkey is not None:
+                cache[hkey] = result
+                hkeys.append(hkey)
+                if len(hkeys) > maxsize:
+                    cache.pop(hkeys.pop(0))
+
+            return result
+
+        return _func_with_cache
+
+    return _memoize
diff --git a/pysteps/io/exporters.py b/pysteps/io/exporters.py
@@ -510,14 +510,14 @@ def initialize_forecast_exporter_netcdf(
     pr = pyproj.Proj(metadata["projection"])
     lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True)
 
-    var_lon = ncf.createVariable("lon", np.float, dimensions=("y", "x"))
+    var_lon = ncf.createVariable("lon", float, dimensions=("y", "x"))
     var_lon[:] = lon.reshape(shape)
     var_lon.standard_name = "longitude"
     var_lon.long_name = "longitude coordinate"
     # TODO(exporters): Don't hard-code the unit.
     var_lon.units = "degrees_east"
 
-    var_lat = ncf.createVariable("lat", np.float, dimensions=("y", "x"))
+    var_lat = ncf.createVariable("lat", float, dimensions=("y", "x"))
     var_lat[:] = lat.reshape(shape)
     var_lat.standard_name = "latitude"
     var_lat.long_name = "latitude coordinate"
@@ -533,22 +533,20 @@ def initialize_forecast_exporter_netcdf(
     ) = _convert_proj4_to_grid_mapping(metadata["projection"])
     # skip writing the grid mapping if a matching name was not found
     if grid_mapping_var_name is not None:
-        var_gm = ncf.createVariable(grid_mapping_var_name, np.int, dimensions=())
+        var_gm = ncf.createVariable(grid_mapping_var_name, int, dimensions=())
         var_gm.grid_mapping_name = grid_mapping_name
         for i in grid_mapping_params.items():
             var_gm.setncattr(i[0], i[1])
 
     if incremental == "member" or n_ens_gt_one:
-        var_ens_num = ncf.createVariable(
-            "ens_number", np.int, dimensions=("ens_number",)
-        )
+        var_ens_num = ncf.createVariable("ens_number", int, dimensions=("ens_number",))
         if incremental != "member":
             var_ens_num[:] = list(range(1, n_ens_members + 1))
         var_ens_num.long_name = "ensemble member"
         var_ens_num.standard_name = "realization"
         var_ens_num.units = ""
 
-    var_time = ncf.createVariable("time", np.int, dimensions=("time",))
+    var_time = ncf.createVariable("time", int, dimensions=("time",))
     if incremental != "timestep":
         var_time[:] = [i * timestep * 60 for i in range(1, n_timesteps + 1)]
     var_time.long_name = "forecast time"
diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py
@@ -39,7 +39,7 @@
 |   ypixelsize     | grid resolution in y-direction                           |
 +------------------+----------------------------------------------------------+
 |   cartesian_unit | the physical unit of the cartesian x- and y-coordinates: |
-|                  | e.g. 'm' or 'km'                                         | 
+|                  | e.g. 'm' or 'km'                                         |
 +------------------+----------------------------------------------------------+
 |   yorigin        | a string specifying the location of the first element in |
 |                  | the data raster w.r.t. y-axis:                           |
@@ -934,12 +934,12 @@ def import_mch_gif(filename, product, unit, accutime, **kwargs):
     metadata = geodata
 
     # import gif file
-    B = Image.open(filename)
+    img = Image.open(filename)
 
     if product.lower() in ["azc", "rzc", "precip"]:
 
         # convert 8-bit GIF colortable to RGB values
-        Brgb = B.convert("RGB")
+        img_rgb = img.convert("RGB")
 
         # load lookup table
         if product.lower() == "azc":
@@ -954,12 +954,12 @@ def import_mch_gif(filename, product, unit, accutime, **kwargs):
         lut = dict(zip(zip(lut[:, 1], lut[:, 2], lut[:, 3]), lut[:, -1]))
 
         # apply lookup table conversion
-        precip = np.zeros(len(Brgb.getdata()))
-        for i, dn in enumerate(Brgb.getdata()):
+        precip = np.zeros(len(img_rgb.getdata()))
+        for i, dn in enumerate(img_rgb.getdata()):
             precip[i] = lut.get(dn, np.nan)
 
         # convert to original shape
-        width, height = B.size
+        width, height = img.size
         precip = precip.reshape(height, width)
 
         # set values outside observational range to NaN,
@@ -970,7 +970,7 @@ def import_mch_gif(filename, product, unit, accutime, **kwargs):
     elif product.lower() in ["aqc", "cpc", "acquire ", "combiprecip"]:
 
         # convert digital numbers to physical values
-        B = np.array(B, dtype=int)
+        img = np.array(img).astype(int)
 
         # build lookup table [mm/5min]
         lut = np.zeros(256)
@@ -985,7 +985,7 @@ def import_mch_gif(filename, product, unit, accutime, **kwargs):
                 lut[i] = (10.0 ** ((i - 71.5) / 20.0) / a) ** (1.0 / b)
 
         # apply lookup table
-        precip = lut[B]
+        precip = lut[img]
 
     else:
         raise ValueError("unknown product %s" % product)
diff --git a/pysteps/motion/lucaskanade.py b/pysteps/motion/lucaskanade.py
@@ -49,7 +49,7 @@ def dense_lucaskanade(
     lk_kwargs=None,
     fd_method="shitomasi",
     fd_kwargs=None,
-    interp_method="rbfinterp2d",
+    interp_method="idwinterp2d",
     interp_kwargs=None,
     dense=True,
     nr_std_outlier=3,
diff --git a/pysteps/nowcasts/sseps.py b/pysteps/nowcasts/sseps.py
@@ -230,9 +230,9 @@ def forecast(
         )
 
     if np.isscalar(win_size):
-        win_size = (np.int(win_size), np.int(win_size))
+        win_size = (int(win_size), int(win_size))
     else:
-        win_size = tuple([np.int(win_size[i]) for i in range(2)])
+        win_size = tuple([int(win_size[i]) for i in range(2)])
 
     timestep = metadata["accutime"]
     kmperpixel = metadata["xpixelsize"] / 1000
diff --git a/pysteps/tests/test_decorators.py b/pysteps/tests/test_decorators.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+import time
+
+from pysteps.decorators import memoize
+
+
+def test_memoize():
+    @memoize(maxsize=1)
+    def _slow_function(x, **kwargs):
+        time.sleep(1)
+        return x
+
+    for i in range(2):
+        out = _slow_function(i, hkey=i)
+        assert out == i
+
+    # cached result
+    t0 = time.monotonic()
+    out = _slow_function(1, hkey=1)
+    assert time.monotonic() - t0 < 1
+    assert out == 1
+
+    # maxsize exceeded
+    t0 = time.monotonic()
+    out = _slow_function(0, hkey=0)
+    assert time.monotonic() - t0 >= 1
+    assert out == 0
+
+    # no hash
+    t0 = time.monotonic()
+    out = _slow_function(1)
+    assert time.monotonic() - t0 >= 1
+    assert out == 1
diff --git a/pysteps/tests/test_nowcasts_steps.py b/pysteps/tests/test_nowcasts_steps.py
@@ -27,7 +27,7 @@
     (5, 6, 2, "sprog", None, "spatial", 3, 8.35),
     (5, 6, 2, "obs", None, "spatial", 3, 8.30),
     (5, 6, 2, None, "cdf", "spatial", 3, 0.60),
-    (5, 6, 2, None, "mean", "spatial", 3, 1.30),
+    (5, 6, 2, None, "mean", "spatial", 3, 1.35),
     (5, 6, 2, "incremental", "cdf", "spectral", 3, 0.60),
 ]
 
diff --git a/pysteps/tests/test_utils_interpolate.py b/pysteps/tests/test_utils_interpolate.py
diff --git a/pysteps/utils/cleansing.py b/pysteps/utils/cleansing.py
diff --git a/pysteps/utils/images.py b/pysteps/utils/images.py
diff --git a/pysteps/utils/interface.py b/pysteps/utils/interface.py
diff --git a/pysteps/utils/interpolate.py b/pysteps/utils/interpolate.py

Original file line number	Diff line number	Diff line change
`@@ -290,7 +290,7 @@ def plot_optflow_method_convergence(input_precip, optflow_method_name, motion_ty`
`290`	`290`	`axis=0`
`291`	`291`	`)`
`292`	`292`
`293`		`- cmap = get_cmap("jet")`
	`293`	`+ cmap = get_cmap("jet").copy()`
`294`	`294`	`cmap.set_under("grey", alpha=0.25)`
`295`	`295`	`cmap.set_over("none")`
`296`	`296`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@`
`27`	`27`	`(5, 6, 2, "sprog", None, "spatial", 3, 8.35),`
`28`	`28`	`(5, 6, 2, "obs", None, "spatial", 3, 8.30),`
`29`	`29`	`(5, 6, 2, None, "cdf", "spatial", 3, 0.60),`
`30`		`- (5, 6, 2, None, "mean", "spatial", 3, 1.30),`
	`30`	`+ (5, 6, 2, None, "mean", "spatial", 3, 1.35),`
`31`	`31`	`(5, 6, 2, "incremental", "cdf", "spectral", 3, 0.60),`
`32`	`32`	`]`
`33`	`33`