Skip to content

Commit 23fa43c

Browse files
authored
add unicode roundtrip for FIF (mne-tools#12080)
1 parent 6aca4ec commit 23fa43c

File tree

4 files changed

+17
-2
lines changed

4 files changed

+17
-2
lines changed

doc/changes/devel.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ Bugs
6565
- Fix parsing of eye-link :class:`~mne.Annotations` when ``apply_offsets=False`` is provided to :func:`~mne.io.read_raw_eyelink` (:gh:`12003` by `Mathieu Scheltienne`_)
6666
- Correctly prune channel-specific :class:`~mne.Annotations` when creating :class:`~mne.Epochs` without the channel(s) included in the channel specific annotations (:gh:`12010` by `Mathieu Scheltienne`_)
6767
- Fix :func:`~mne.viz.plot_volume_source_estimates` with :class:`~mne.VolSourceEstimate` which include a list of vertices (:gh:`12025` by `Mathieu Scheltienne`_)
68+
- Add support for non-ASCII characters in Annotations, Evoked comments, etc when saving to FIFF format (:gh:`12080` by `Daniel McCloy`_)
6869
- Correctly handle passing ``"eyegaze"`` or ``"pupil"`` to :meth:`mne.io.Raw.pick` (:gh:`12019` by `Scott Huberty`_)
6970

7071
API changes

mne/_fiff/tag.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
# License: BSD-3-Clause
55

66
from functools import partial
7+
import html
78
import struct
9+
import re
810

911
import numpy as np
1012
from scipy.sparse import csc_matrix, csr_matrix
@@ -265,7 +267,10 @@ def _read_string(fid, tag, shape, rlims):
265267
"""Read a string tag."""
266268
# Always decode to ISO 8859-1 / latin1 (FIFF standard).
267269
d = _frombuffer_rows(fid, tag.size, dtype=">c", shape=shape, rlims=rlims)
268-
return str(d.tobytes().decode("latin1", "ignore"))
270+
string = str(d.tobytes().decode("latin1", "ignore"))
271+
if re.search(r"&#[0-9a-fA-F]{6};", string):
272+
string = html.unescape(string)
273+
return string
269274

270275

271276
def _read_complex_float(fid, tag, shape, rlims):

mne/_fiff/write.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,10 @@ def write_julian(fid, kind, data):
128128

129129
def write_string(fid, kind, data):
130130
"""Write a string tag."""
131-
str_data = str(data).encode("latin1")
131+
try:
132+
str_data = str(data).encode("latin1")
133+
except UnicodeEncodeError:
134+
str_data = str(data).encode("latin1", errors="xmlcharrefreplace")
132135
data_size = len(str_data) # therefore compute size here
133136
my_dtype = ">a" # py2/3 compatible on writing -- don't ask me why
134137
if data_size > 0:

mne/tests/test_evoked.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,12 @@ def test_io_evoked(tmp_path):
263263
ave_complex = read_evokeds(fname_temp)[0]
264264
assert_allclose(ave.data, ave_complex.data.imag)
265265

266+
# test non-ascii comments (gh 11684)
267+
aves1[0].comment = "🙃"
268+
write_evokeds(tmp_path / "evoked-ave.fif", aves1, overwrite=True)
269+
aves1_read = read_evokeds(tmp_path / "evoked-ave.fif")[0]
270+
assert aves1_read.comment == aves1[0].comment
271+
266272
# test warnings on bad filenames
267273
fname2 = tmp_path / "test-bad-name.fif"
268274
with pytest.warns(RuntimeWarning, match="-ave.fif"):

0 commit comments

Comments
 (0)