pydata · shazelquist · Oct 13, 2021 · Oct 15, 2021 · Oct 15, 2021 · Oct 15, 2021
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -23,6 +23,8 @@ v0.19.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
+- Added an optional ``tolerance`` argument to :py:func:`isin` for numerically close datasets (:issue:`5587`, :pull:`5862`).
+  By `Shane Hazelquist <https://github.com/shazelquist>`.
 - Added a :py:func:`get_options` method to xarray's root namespace (:issue:`5698`, :pull:`5716`)
   By `Pushkar Kopparla <https://github.com/pkopparla>`_.
 - Xarray now does a better job rendering variable names that are long LaTeX sequences when plotting (:issue:`5681`, :pull:`5682`).

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -1384,15 +1384,16 @@ def notnull(self, keep_attrs: bool = None):
             keep_attrs=keep_attrs,
         )
 
-    def isin(self, test_elements):
+    def isin(self, test_elements, tolerance=None):
         """Tests each value in the array for whether it is in test elements.
 
         Parameters
         ----------
         test_elements : array_like
             The values against which to test each value of `element`.
-            This argument is flattened if an array or array_like.
             See numpy notes for behavior with non-array-like parameters.
+        tolerance : dtype
+            Optional parameter for acceptible equal tolerance.
 
         Returns
         -------
@@ -1407,6 +1408,12 @@ def isin(self, test_elements):
         array([ True, False,  True])
         Dimensions without coordinates: x
 
+        >>> array = xr.DataArray([1, 2, 3], dims="x")
+        >>> array.isin([1.1, 2.9], tolerance=0.2)
+        <xarray.DataArray (x: 3)>
+        array([ True, False,  True])
+        Dimensions without coordinates: x
+
         See Also
         --------
         numpy.isin
@@ -1427,6 +1434,15 @@ def isin(self, test_elements):
             # second argument
             test_elements = test_elements.data
 
+        if tolerance:
+            # non-zero tolerance arguments
+            return apply_ufunc(
+                duck_array_ops.isin_tolerance,
+                self,
+                kwargs=dict(test_elements=test_elements, tolerance=tolerance),
+                dask="allowed",
+            )
+
         return apply_ufunc(
             duck_array_ops.isin,
             self,

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
@@ -467,6 +467,39 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
         return np.where(isnull(array), np.nan, array.astype(dtype))
 
 
+def isin_tolerance(self, test_elements, tolerance):
+    """Compare self.values to test_elements elementwise.
+    Parameters
+    ----------
+    self : numpy.array_like
+    test_elements : array_like
+    tolerance : dtype
+        Absolute value of acceptable range between self and test_elements.
+
+    Returns
+    -------
+    array_like : Same shape as self, but contains bool values.
+
+    Notes
+    -----
+    Vectorized comparisons elementwise require immense memory for larger datasets
+    because it generates np.array with shape (*self.shape, *test_elements.shape)
+
+    """
+    test_elements = np.asarray(test_elements)
+    merge_axis = (
+        *[
+            mergeaxis
+            for mergeaxis in range(
+                len(self.shape), len(self.shape) + len(test_elements.shape)
+            )
+        ],
+    )
+    return (np.abs(np.subtract.outer(self, test_elements)) < abs(tolerance)).any(
+        merge_axis
+    )
+
+
 def timedelta_to_numeric(value, datetime_unit="ns", dtype=float):
     """Convert a timedelta-like object to numerical values.
 

diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
@@ -7,14 +7,15 @@
 import pytest
 from numpy import array, nan
 
-from xarray import DataArray, Dataset, cftime_range, concat
+from xarray import DataArray, Dataset, cftime_range, concat, testing
 from xarray.core import dtypes, duck_array_ops
 from xarray.core.duck_array_ops import (
     array_notnull_equiv,
     concatenate,
     count,
     first,
     gradient,
+    isin_tolerance,
     last,
     least_squares,
     mean,
@@ -892,3 +893,49 @@ def test_push_dask():
             dask.array.from_array(array, chunks=(1, 2, 3, 2, 2, 1, 1)), axis=0, n=None
         )
     np.testing.assert_equal(actual, expected)
+
+
+@pytest.mark.parametrize("shape", [(200), (10, 10, 2), (4, 50)])
+@pytest.mark.parametrize("tolerance", [1e-2, 1e-4, 1e-6])
+@pytest.mark.parametrize("dask_for_A", [True, False])
+@pytest.mark.parametrize("dask_for_B", [True, False])
+def test_isin_tolerance(shape, tolerance, dask_for_A, dask_for_B):
+    if (dask_for_A or dask_for_B) and not has_dask:
+        pytest.skip("requires dask")
+
+    in_margin = tolerance / 2  # measure within acceptable margin
+    arrayA = np.arange(-10.0, 10.0, 0.1).reshape(shape)
+    expected = np.array([item % 2 == 0 for item in range(0, arrayA.size)]).reshape(
+        shape
+    )
+    if dask_for_A or dask_for_B:  # tests including dask arrays
+        import dask.array
+
+        if dask_for_A:
+            arrayA = dask.array.from_array(arrayA)
+            expected = dask.array.from_array(expected)
+        for offset_direction in [1, -1]:
+            # generate test set
+            if dask_for_B:
+                arrayB = dask.array.from_array(
+                    [-99 * (~expected) + (in_margin + arrayA * expected)]
+                )
+            else:
+                arrayB = -99 * (~expected) + (in_margin + arrayA * expected)
+            # test function
+            actual = isin_tolerance(arrayA, arrayB, tolerance)
+
+            testing.assert_duckarray_equal(actual, expected)
+            if dask_for_A:
+                assert isinstance(actual, dask_array_type)
+            else:
+                assert isinstance(actual, np.ndarray)
+    else:  # test only using numpy
+        for offset_direction in [1, -1]:
+            # generate test set
+            arrayB = -99 * (~expected) + (in_margin + arrayA * expected)
+            # test function
+            actual = isin_tolerance(arrayA, arrayB, tolerance)
+
+            testing.assert_duckarray_equal(actual, expected)
+            assert isinstance(actual, np.ndarray)
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
@@ -454,6 +454,11 @@ def test_dataarray_property(prop):
             False,
             marks=xfail(reason="Missing implementation for np.isin"),
         ),
+        param(
+            do("isin_tolerance", [1 - 1e-7, 2, 3 + 1e-7], tolerance=1e-6),
+            False,
+            marks=xfail(reason="Missing implementation for isin_tolerance"),
+        ),
         param(
             do("item", (1, 1)),
             False,