Feature detector extensions (#225)

pulkkins · web-flow · commit 3485498c0870 · 2021-08-03T14:34:53.000+03:00
* Remove unnecessary encoding lines

* Docstring revisions

* Add interface documentation

* Docstring revision

* Remove unnecessary encoding line

* Add tests

* Remove unnecessary import

* Add option to restrict the number of blobs based on their scale-space intensity

* Add max_num_features keyword argument

* Add tests for max_num_features

* Add documentation about feature selection when max_num_features is not None

* Add area to cell attributes and option to choose the maximum number of cells by area

* Add tests for max_num_features

* Add normalization of blob intensities by multiplication with sigma**2

* Fix typo in if clause

* Adjust the number of columns to include cell area

* Sord IDs to ensure correct comparison

* Fix incorrect indexing

* Remove unused metadata variable

* Rename input to input_field to avoid redefining a built-in
diff --git a/pysteps/feature/blob.py b/pysteps/feature/blob.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 pysteps.feature.blob
 ====================
@@ -15,6 +14,8 @@
 
 from pysteps.exceptions import MissingOptionalDependency
 
+from scipy.ndimage import gaussian_laplace
+
 try:
     from skimage import feature
 
@@ -25,6 +26,7 @@
 
 def detection(
     input_image,
+    max_num_features=None,
     method="log",
     threshold=0.5,
     min_sigma=3,
@@ -47,6 +49,11 @@ def detection(
     ----------
     input_image: array_like
         Array of shape (m, n) containing the input image. Nan values are ignored.
+    max_num_features : int, optional
+        The maximum number of blobs to detect. Set to None for no restriction.
+        If specified, the most significant blobs are chosen based on their
+        intensities in the corresponding Laplacian of Gaussian (LoG)-filtered
+        images.
     method: {'log', 'dog', 'doh'}, optional
         The method to use: 'log' = Laplacian of Gaussian, 'dog' = Difference of
         Gaussian, 'doh' = Determinant of Hessian.
@@ -95,6 +102,14 @@ def detection(
         **kwargs,
     )
 
+    if max_num_features is not None and blobs.shape[0] > max_num_features:
+        blob_intensities = []
+        for i in range(blobs.shape[0]):
+            gl_image = -gaussian_laplace(input_image, blobs[i, 2]) * blobs[i, 2] ** 2
+            blob_intensities.append(gl_image[int(blobs[i, 0]), int(blobs[i, 1])])
+        idx = np.argsort(blob_intensities)[::-1]
+        blobs = blobs[idx[:max_num_features], :]
+
     if not return_sigmas:
         return np.column_stack([blobs[:, 1], blobs[:, 0]])
     else:
diff --git a/pysteps/feature/interface.py b/pysteps/feature/interface.py
@@ -1,10 +1,27 @@
-# -*- coding: utf-8 -*-
 """
 pysteps.feature.interface
 =========================
 
 Interface for the feature detection module. It returns a callable function for
-detecting features.
+detecting features from two-dimensional images.
+
+The feature detectors implement the following interface:
+
+    ``detection(input_image, **keywords)``
+
+The input is a two-dimensional image. Additional arguments to the specific
+method can be given via **keywords. The output is an array of shape (n, m),
+where each row corresponds to one of the n features. The first two columns
+contain the coordinates (x, y) of the features, and additional information can
+be specified in the remaining columns.
+
+All implemented methods support the following keyword arguments:
+
++------------------+-----------------------------------------------------+
+|       Key        |                Value                                |
++==================+=====================================================+
+| max_num_features | maximum number of features to detect                |
++------------------+-----------------------------------------------------+
 
 .. autosummary::
     :toctree: ../generated/
@@ -23,10 +40,7 @@
 
 
 def get_method(name):
-    """Return a callable function for computing detection.
-
-    Description:
-    Return a callable function for detecting features on input images .
+    """Return a callable function for feature detection.
 
     Implemented methods:
 
diff --git a/pysteps/feature/shitomasi.py b/pysteps/feature/shitomasi.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 pysteps.feature.shitomasi
 =========================
@@ -27,6 +26,7 @@
 def detection(
     input_image,
     max_corners=1000,
+    max_num_features=None,
     quality_level=0.01,
     min_distance=10,
     block_size=5,
@@ -73,6 +73,10 @@ def detection(
         method.
         It represents the maximum number of points to be tracked (corners).
         If set to zero, all detected corners are used.
+    max_num_features: int, optional
+        If specified, this argument is substituted for max_corners. Set to None
+        for no restriction. Added for compatibility with the feature detector
+        interface.
     quality_level: float, optional
         The ``qualityLevel`` parameter in the `Shi-Tomasi`_ corner detection
         method.
@@ -148,7 +152,7 @@ def detection(
     mask = (-1 * mask + 1).astype("uint8")
 
     params = dict(
-        maxCorners=max_corners,
+        maxCorners=max_num_features if max_num_features is not None else max_corners,
         qualityLevel=quality_level,
         minDistance=min_distance,
         blockSize=block_size,
diff --git a/pysteps/feature/tstorm.py b/pysteps/feature/tstorm.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 pysteps.feature.tstorm
 ======================
@@ -49,6 +48,7 @@
 
 def detection(
     input_image,
+    max_num_features=None,
     minref=35,
     maxref=48,
     mindiff=6,
@@ -69,6 +69,9 @@ def detection(
     input_image: array-like
         Array of shape (m,n) containing input image, usually maximum reflectivity in
         dBZ with a resolution of 1 km. Nan values are ignored.
+    max_num_features : int, optional
+        The maximum number of cells to detect. Set to None for no restriction.
+        If specified, the most significant cells are chosen based on their area.
     minref: float, optional
         Lower threshold for object detection. Lower values will be set to NaN.
         The default is 35 dBZ.
@@ -163,10 +166,22 @@ def detection(
 
     cells_id, labels = get_profile(areas, binary, input_image, loc_max, time, minref)
 
+    if max_num_features is not None:
+        idx = np.argsort(cells_id.area.to_numpy())[::-1]
+
     if not output_feat:
-        return cells_id, labels
+        if max_num_features is None:
+            return cells_id, labels
+        else:
+            for i in idx[max_num_features:]:
+                labels[labels == cells_id.ID[i]] = 0
+            return cells_id.loc[idx[:max_num_features]], labels
     if output_feat:
-        return np.column_stack([np.array(cells_id.cen_x), np.array(cells_id.cen_y)])
+        out = np.column_stack([np.array(cells_id.cen_x), np.array(cells_id.cen_y)])
+        if max_num_features is not None:
+            out = out[idx[:max_num_features], :]
+
+        return out
 
 
 def breakup(ref, minval, maxima):
@@ -219,7 +234,7 @@ def get_profile(areas, binary, ref, loc_max, time, minref):
     cells_id = pd.DataFrame(
         data=None,
         index=range(len(cell_labels)),
-        columns=["ID", "time", "x", "y", "cen_x", "cen_y", "max_ref", "cont"],
+        columns=["ID", "time", "x", "y", "cen_x", "cen_y", "max_ref", "cont", "area"],
     )
     cells_id.time = time
     for n in range(len(cell_labels)):
@@ -235,6 +250,7 @@ def get_profile(areas, binary, ref, loc_max, time, minref):
         cells_id.cen_x.iloc[n] = int(np.nanmean(cells_id.x[n]))  # int(x[0])
         cells_id.cen_y.iloc[n] = int(np.nanmean(cells_id.y[n]))  # int(y[0])
         cells_id.max_ref.iloc[n] = maxref
+        cells_id.area.iloc[n] = len(cells_id.x.iloc[n])
         labels[cells == cell_labels[n]] = ID
 
     return cells_id, labels
diff --git a/pysteps/tests/test_feature.py b/pysteps/tests/test_feature.py
@@ -0,0 +1,24 @@
+import pytest
+import numpy as np
+from pysteps import feature
+from pysteps.tests.helpers import get_precipitation_fields
+
+arg_names = ["method", "max_num_features"]
+arg_values = [("blob", None), ("blob", 5), ("shitomasi", None), ("shitomasi", 5)]
+
+
+@pytest.mark.parametrize(arg_names, arg_values)
+def test_feature(method, max_num_features):
+    input_field, _ = get_precipitation_fields(0, 0, True, True, None, "mch")
+
+    detector = feature.get_method(method)
+
+    kwargs = {"max_num_features": max_num_features}
+    output = detector(input_field.squeeze(), **kwargs)
+
+    assert isinstance(output, np.ndarray)
+    assert output.ndim == 2
+    assert output.shape[0] > 0
+    if max_num_features is not None:
+        assert output.shape[0] <= max_num_features
+    assert output.shape[1] == 2
diff --git a/pysteps/tests/test_feature_tstorm.py b/pysteps/tests/test_feature_tstorm.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-
-import datetime as dt
-
 import numpy as np
 import pytest
 
@@ -14,17 +10,20 @@
 except ModuleNotFoundError:
     pass
 
-arg_names = ("source", "output_feat", "dry_input")
+arg_names = ("source", "output_feat", "dry_input", "max_num_features")
 
 arg_values = [
-    ("mch", False, False),
-    ("mch", True, False),
-    ("mch", False, True),
+    ("mch", False, False, None),
+    ("mch", False, False, 5),
+    ("mch", True, False, None),
+    ("mch", True, False, 5),
+    ("mch", False, True, None),
+    ("mch", False, True, 5),
 ]
 
 
 @pytest.mark.parametrize(arg_names, arg_values)
-def test_feature_tstorm_detection(source, output_feat, dry_input):
+def test_feature_tstorm_detection(source, output_feat, dry_input, max_num_features):
 
     pytest.importorskip("skimage")
     pytest.importorskip("pandas")
@@ -37,18 +36,24 @@ def test_feature_tstorm_detection(source, output_feat, dry_input):
         input = np.zeros((50, 50))
 
     time = "000"
-    output = detection(input, time=time, output_feat=output_feat)
+    output = detection(
+        input, time=time, output_feat=output_feat, max_num_features=max_num_features
+    )
 
     if output_feat:
         assert isinstance(output, np.ndarray)
         assert output.ndim == 2
         assert output.shape[1] == 2
+        if max_num_features is not None:
+            assert output.shape[0] <= max_num_features
     else:
         assert isinstance(output, tuple)
         assert len(output) == 2
         assert isinstance(output[0], DataFrame)
         assert isinstance(output[1], np.ndarray)
-        assert output[0].shape[1] == 8
+        if max_num_features is not None:
+            assert output[0].shape[0] <= max_num_features
+        assert output[0].shape[1] == 9
         assert list(output[0].columns) == [
             "ID",
             "time",
@@ -58,13 +63,14 @@ def test_feature_tstorm_detection(source, output_feat, dry_input):
             "cen_y",
             "max_ref",
             "cont",
+            "area",
         ]
         assert (output[0].time == time).all()
         assert output[1].ndim == 2
         assert output[1].shape == input.shape
         if not dry_input:
             assert output[0].shape[0] > 0
-            assert list(output[0].ID) == list(np.unique(output[1]))[1:]
+            assert sorted(list(output[0].ID)) == sorted(list(np.unique(output[1]))[1:])
         else:
             assert output[0].shape[0] == 0
             assert output[1].sum() == 0
diff --git a/pysteps/tests/test_tracking_tdating.py b/pysteps/tests/test_tracking_tdating.py
@@ -42,12 +42,12 @@ def test_tracking_tdating_dating(source, dry_input):
     assert len(output[2]) == input.shape[0]
     assert isinstance(output[1][0], pandas.DataFrame)
     assert isinstance(output[2][0], np.ndarray)
-    assert output[1][0].shape[1] == 8
+    assert output[1][0].shape[1] == 9
     assert output[2][0].shape == input.shape[1:]
     if not dry_input:
         assert len(output[0]) > 0
         assert isinstance(output[0][0], pandas.DataFrame)
-        assert output[0][0].shape[1] == 8
+        assert output[0][0].shape[1] == 9
     else:
         assert len(output[0]) == 0
         assert output[1][0].shape[0] == 0