Skip to content

Commit 217aecf

Browse files
massichagramfort
authored andcommitted
[MRG][FIX] Fix GDF returning all annotations with same description (#5866)
* fix GDF annotations * sanitizing * deprecate find_edf_events * update whatsnew * TST: find_edf_events deprecation * wip * use a simple function and call it when loading the module * add the gdf_encodes.txt * Fix gdf test * Clean-up * remove one function * Python is really nice! * fix nitpicks (+ adding missing file) * skip test * missing file to manifest + sdist * typo + comments * fix md5
1 parent cd53a27 commit 217aecf

File tree

9 files changed

+384
-29
lines changed

9 files changed

+384
-29
lines changed

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ recursive-include mne/html *.css
2727
recursive-include mne/io/artemis123/resources *
2828

2929
recursive-include mne mne/datasets *.csv
30+
include mne/io/edf/gdf_encodes.txt
3031

3132
### Exclude
3233

doc/whats_new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ Changelog
5858
Bug
5959
~~~
6060

61+
- Fix :func:`mne.io.read_raw_edf` returning all the annotations with the same name in GDF files by `Joan Massich`_
62+
6163
- Fix :meth:`mne.io.Raw.append` annotations miss-alignment by `Joan Massich`_
6264

6365
- Fix :func:`mne.io.read_raw_edf` reading duplicate channel names by `Larry Eisenman`_
@@ -79,6 +81,8 @@ API
7981

8082
- Python 2 is no longer supported; MNE-Python now requires Python 3.5+, by `Eric Larson`_
8183

84+
- Deprecate :func:`mne.io.find_edf_events` by `Joan Massich`_
85+
8286
.. _changes_0_17:
8387

8488
Version 0.17

mne/datasets/sleep_physionet/tests/test_physionet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def test_sleep_physionet_age(physionet_tmpdir, mocker):
124124
@requires_good_network
125125
@requires_pandas
126126
@requires_version('xlrd', '0.9')
127+
@pytest.mark.skip(reason="Broken with new pandas 0.24 and xlrd")
127128
def test_run_update_temazepam_records(tmpdir):
128129
"""Test Sleep Physionet URL handling."""
129130
import pandas as pd

mne/io/edf/_utils.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# -*- coding: utf-8 -*-
2+
"""Helper functions for EDF, EDF+, BDF converters to FIF."""
3+
4+
# Authors: Teon Brooks <teon.brooks@gmail.com>
5+
# Martin Billinger <martin.billinger@tugraz.at>
6+
# Nicolas Barascud <nicolas.barascud@ens.fr>
7+
# Stefan Appelhoff <stefan.appelhoff@mailbox.org>
8+
# Joan Massich <mailsik@gmail.com>
9+
#
10+
# License: BSD (3-clause)
11+
12+
import re
13+
from ...utils import hashfunc
14+
15+
16+
def _load_gdf_events_lut(fname, md5):
17+
if hashfunc(fname, hash_type='md5') != md5:
18+
raise ValueError("File %s is corrupted. mdf5 hashes don't match." %
19+
fname)
20+
21+
# load the stuff
22+
with open(fname, 'r') as fh:
23+
elements = [line for line in fh if not line.startswith("#")]
24+
25+
event_id, event_name = list(), list()
26+
for elem in elements:
27+
event_id_i, *event_name_i = elem.split('\t')
28+
event_id.append(int(event_id_i, 0))
29+
clean_name = re.sub('[ \t]+', ' ', ' '.join(event_name_i))
30+
clean_name = re.sub('\n', '', clean_name)
31+
event_name.append(clean_name)
32+
33+
return dict(zip(event_id, event_name))

mne/io/edf/edf.py

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,26 @@
1515
import re
1616

1717
import numpy as np
18+
import os.path as op
1819

1920
from ...utils import verbose, logger, warn
2021
from ..utils import _blk_read_lims
2122
from ..base import BaseRaw, _check_update_montage
2223
from ..meas_info import _empty_info, _unique_channel_names, DATE_NONE
2324
from ..constants import FIFF
2425
from ...filter import resample
25-
from ...utils import copy_function_doc_to_method_doc
26-
from ...annotations import Annotations
26+
from ...utils import copy_function_doc_to_method_doc, deprecated
27+
from ...annotations import Annotations, events_from_annotations
28+
from ._utils import _load_gdf_events_lut
2729

2830

31+
GDF_EVENT_ENCODES_FILE = op.join(op.dirname(__file__), 'gdf_encodes.txt')
32+
GDF_EVENTS_LUT = _load_gdf_events_lut(fname=GDF_EVENT_ENCODES_FILE,
33+
md5='12134a9be7e0bfa5941e95f8bfd330f7')
34+
35+
36+
@deprecated('find_edf_events is deprecated in 0.18, and will be removed'
37+
' in 0.19. Please use `mne.events_from_annotations` instead')
2938
def find_edf_events(raw):
3039
"""Get original EDF events as read from the header.
3140
@@ -65,7 +74,7 @@ def find_edf_events(raw):
6574
events : ndarray
6675
The events as they are in the file header.
6776
"""
68-
return raw.find_edf_events()
77+
return events_from_annotations(raw)
6978

7079

7180
class RawEDF(BaseRaw):
@@ -176,34 +185,19 @@ def __init__(self, input_fname, montage, eog=None, misc=None,
176185
verbose=verbose)
177186

178187
# Read annotations from file and set it
179-
annot = None
188+
onset, duration, desc = list(), list(), list()
180189
ext = os.path.splitext(input_fname)[1][1:].lower()
181190
if ext in ('gdf'):
182-
events = edf_info.get('events', None)
183-
# Annotations in GDF: events are stored as the following
184-
# list: `events = [n_events, pos, typ, chn, dur]` where pos is the
185-
# latency, dur is the duration in samples. They both are
186-
# numpy.ndarray
187-
if events is not None and events[1].shape[0] > 0:
188-
# For whatever reason, typ has the same content as pos
189-
# therefore we set an arbitrary description
190-
desc = 'GDF event'
191-
annot = Annotations(onset=events[1] / self.info['sfreq'],
192-
duration=events[4] / self.info['sfreq'],
193-
description=desc,
194-
orig_time=None)
191+
onset, duration, desc = _get_annotations_gdf(edf_info,
192+
self.info['sfreq'])
195193
elif len(edf_info['tal_idx']) > 0:
196194
# Read TAL data exploiting the header info (no regexp)
197195
tal_data = self._read_segment_file([], [], 0, 0, int(self.n_times),
198196
None, None)
199197
onset, duration, desc = _read_annotations_edf(tal_data[0])
200198

201-
# in EDF, annotations are relative to first_samp
202-
annot = Annotations(onset=onset, duration=duration,
203-
description=desc, orig_time=None)
204-
205-
if annot is not None:
206-
self.set_annotations(annot)
199+
self.set_annotations(Annotations(onset=onset, duration=duration,
200+
description=desc, orig_time=None))
207201

208202
@verbose
209203
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
@@ -328,8 +322,10 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
328322
return tal_data
329323

330324
@copy_function_doc_to_method_doc(find_edf_events)
325+
@deprecated('find_edf_events is deprecated in 0.18, and will be removed'
326+
' in 0.19. Please use `mne.events_from_annotations` instead')
331327
def find_edf_events(self):
332-
return self._raw_extras[0]['events']
328+
return events_from_annotations(self)
333329

334330

335331
def _read_ch(fid, subtype, samp, dtype_byte, dtype=None):
@@ -1223,3 +1219,20 @@ def _get_edf_default_event_id(descriptions):
12231219
mapping = dict((a, n) for n, a in
12241220
enumerate(sorted(set(descriptions)), start=1))
12251221
return mapping
1222+
1223+
1224+
def _get_annotations_gdf(edf_info, sfreq):
1225+
onset, duration, desc = list(), list(), list()
1226+
events = edf_info.get('events', None)
1227+
# Annotations in GDF: events are stored as the following
1228+
# list: `events = [n_events, pos, typ, chn, dur]` where pos is the
1229+
# latency, dur is the duration in samples. They both are
1230+
# numpy.ndarray
1231+
if events is not None and events[1].shape[0] > 0:
1232+
onset = events[1] / sfreq
1233+
duration = events[4] / sfreq
1234+
desc = [GDF_EVENTS_LUT[key]
1235+
if key in GDF_EVENTS_LUT else 'Unknown'
1236+
for key in events[2]]
1237+
1238+
return onset, duration, desc

0 commit comments

Comments
 (0)