[MRG] Log number of rejected samples in raw.get_data (#5824)

cbrnr · agramfort · commit d97fcc0c9898 · 2019-01-13T11:04:57.000+01:00
* Log number of rejected samples in raw.get_data

* Better names and f-strings

* Add single test

* Increase verbosity to get the log message

* Add verbose parameter to raw.get_data

* Remove f-strings because of Python 3.5 :-(
diff --git a/mne/io/base.py b/mne/io/base.py
@@ -933,8 +933,9 @@ def __setitem__(self, item, value):
         # set the data
         self._data[sel, start:stop] = value
 
+    @verbose
     def get_data(self, picks=None, start=0, stop=None,
-                 reject_by_annotation=None, return_times=False):
+                 reject_by_annotation=None, return_times=False, verbose=None):
         """Get data in the given range.
 
         Parameters
@@ -953,6 +954,10 @@ def get_data(self, picks=None, start=0, stop=None,
             'bad' are omitted. If 'NaN', the bad samples are filled with NaNs.
         return_times : bool
             Whether to return times as well. Defaults to False.
+        verbose : bool, str, int, or None
+            If not None, override default verbose level (see
+            :func:`mne.verbose` and :ref:`Logging documentation <tut_logging>`
+            for more). Defaults to self.verbose.
 
         Returns
         -------
@@ -985,29 +990,43 @@ def get_data(self, picks=None, start=0, stop=None,
             if return_times:
                 return data, times
             return data
-
-        used = np.ones(stop - start, bool)
+        n_samples = stop - start  # total number of samples
+        used = np.ones(n_samples, bool)
         for onset, end in zip(onsets, ends):
             if onset >= end:
                 continue
             used[onset - start: end - start] = False
         used = np.concatenate([[False], used, [False]])
         starts = np.where(~used[:-1] & used[1:])[0] + start
         stops = np.where(used[:-1] & ~used[1:])[0] + start
-        if reject_by_annotation == 'omit':
-
-            data = np.zeros((len(picks), (stops - starts).sum()))
-            times = np.zeros(data.shape[1])
-            idx = 0
-            for start, stop in zip(starts, stops):  # get the data
-                if start == stop:
-                    continue
-                end = idx + stop - start
-                data[:, idx:end], times[idx:end] = self[picks, start:stop]
-                idx = end
+        n_kept = (stops - starts).sum()  # kept samples
+        n_rejected = n_samples - n_kept  # rejected samples
+        if n_rejected > 0:
+            if reject_by_annotation == 'omit':
+                msg = ("Omitting {} of {} ({:.2%}) samples, retaining {}"
+                       " ({:.2%}) samples.")
+                logger.info(msg.format(n_rejected, n_samples,
+                                       n_rejected / n_samples,
+                                       n_kept, n_kept / n_samples))
+                data = np.zeros((len(picks), n_kept))
+                times = np.zeros(data.shape[1])
+                idx = 0
+                for start, stop in zip(starts, stops):  # get the data
+                    if start == stop:
+                        continue
+                    end = idx + stop - start
+                    data[:, idx:end], times[idx:end] = self[picks, start:stop]
+                    idx = end
+            else:
+                msg = ("Setting {} of {} ({:.2%}) samples to NaN, retaining {}"
+                       " ({:.2%}) samples.")
+                logger.info(msg.format(n_rejected, n_samples,
+                                       n_rejected / n_samples,
+                                       n_kept, n_kept / n_samples))
+                data, times = self[picks, start:stop]
+                data[:, ~used[1:-1]] = np.nan
         else:
             data, times = self[picks, start:stop]
-            data[:, ~used[1:-1]] = np.nan
 
         if return_times:
             return data, times
diff --git a/mne/io/tests/test_raw.py b/mne/io/tests/test_raw.py
@@ -17,7 +17,7 @@
 from mne.annotations import _handle_meas_date
 from mne.datasets import testing
 from mne.io import read_raw_fif, RawArray, BaseRaw
-from mne.utils import _TempDir
+from mne.utils import _TempDir, catch_logging
 from mne.io.meas_info import _get_valid_units
 
 
@@ -257,3 +257,27 @@ def test_meas_date_orig_time():
     assert raw.annotations.orig_time is None
     assert raw.annotations.onset[0] == 0.5
     assert raw.annotations.duration[0] == 0.2
+
+
+def test_get_data_reject():
+    """Test if reject_by_annotation is working correctly."""
+    fs = 256
+    ch_names = ["C3", "Cz", "C4"]
+    info = create_info(ch_names, sfreq=fs)
+    raw = RawArray(np.zeros((len(ch_names), 10 * fs)), info)
+    raw.set_annotations(Annotations(onset=[2, 4], duration=[3, 2],
+                                    description="bad"))
+
+    with catch_logging() as log:
+        data = raw.get_data(reject_by_annotation="omit", verbose=True)
+        msg = ('Omitting 1024 of 2560 (40.00%) samples, retaining 1536' +
+               ' (60.00%) samples.')
+        assert log.getvalue().strip() == msg
+    assert data.shape == (len(ch_names), 1536)
+    with catch_logging() as log:
+        data = raw.get_data(reject_by_annotation="nan", verbose=True)
+        msg = ('Setting 1024 of 2560 (40.00%) samples to NaN, retaining 1536' +
+               ' (60.00%) samples.')
+        assert log.getvalue().strip() == msg
+    assert data.shape == (len(ch_names), 2560)  # shape doesn't change
+    assert np.isnan(data).sum() == 3072  # but NaNs are introduced instead