Skip to content

add OTB (EMG) reader #13303

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mne/io/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ __all__ = [
"read_raw_nihon",
"read_raw_nirx",
"read_raw_nsx",
"read_raw_otb",
"read_raw_persyst",
"read_raw_snirf",
"show_fiff",
Expand Down Expand Up @@ -87,5 +88,6 @@ from .nicolet import read_raw_nicolet
from .nihon import read_raw_nihon
from .nirx import read_raw_nirx
from .nsx import read_raw_nsx
from .otb import read_raw_otb
from .persyst import read_raw_persyst
from .snirf import read_raw_snirf
7 changes: 7 additions & 0 deletions mne/io/otb/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""OTB module for reading EMG data in OTB4 and OTB+ formats."""

# Authors: The MNE-Python contributors.
# License: BSD-3-Clause
# Copyright the MNE-Python contributors.

from .otb import read_raw_otb
251 changes: 251 additions & 0 deletions mne/io/otb/otb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# Authors: The MNE-Python contributors.
# License: BSD-3-Clause
# Copyright the MNE-Python contributors.

import tarfile
from collections import Counter
from datetime import datetime
from pathlib import Path

import numpy as np
from defusedxml import ElementTree as ET

from ..._fiff.meas_info import create_info
from ...utils import _check_fname, fill_doc, logger, verbose, warn
from ..base import BaseRaw

OTB_PLUS_DTYPE = np.int16 # the file I have is int16; TBD if *all* OTB+ are like that
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@OTBioelettronica there are several TODO items in the code here that maybe you can easily answer. Here's the first: are the data in .sig files always 16-bit signed integer?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is 16-bit when recording EMG and 24-bit when recording EEG. Have a look at https://otbioelettronica.it/download/137/otb-file-structure/2665/otb-structure (page 2, bullet point 'Raw data')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I confirm what @klotz-t wrote! The .sig file is a binary file containing raw data. Data are received from the device and saved immediately. Representation can be on 16-bit or on 24-bit depending on the device configuration. Also, the sampling frequency can be different depending on the device configuration. Most of the configurations are:

  • EEG : 500 Hz, 24-bit
  • EMG: 2000Hz, 16-bit
    there can be others with different sampling frequencies but the bits resolutions are only 16 or 24 bits

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @klotz-t for pointing out the file format spec document (embarassed I didn't find that before asking 😳). @OTBioelettronica is there a similar document for .otb4 format? I didn't see one on the website.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wrote that document some while ago and, unfortunately, I still have to do it for the .otb4 file format :(

FORMAT_MAPPING = dict(
d="double",
f="single",
i="int",
h="short",
)
OTB_PLUS_FORMAT = FORMAT_MAPPING[OTB_PLUS_DTYPE().dtype.char]


def _parse_patient_xml(tree):
"""Convert an ElementTree to a dict."""

def _parse_sex(sex):
# TODO English-centric; any value of "sex" not starting with "m" or "f" will get
# classed as "unknown". TBD if non-English-like values in the XML are possible
# (e.g. maybe the recording GUI is localized but the XML values are not?)
return dict(m=1, f=2)[sex.lower()[0]] if sex else 0

def _parse_date(dt):
return datetime.fromisoformat(dt).date()

subj_info_mapping = (
("family_name", "last_name", str),
("first_name", "first_name", str),
("weight", "weight", float),
("height", "height", float),
("sex", "sex", _parse_sex),
("birth_date", "birthday", _parse_date),
)
subject_info = dict()
for source, target, func in subj_info_mapping:
if value := tree.find(source):
subject_info[target] = func(value.text)
return subject_info


@fill_doc
class RawOTB(BaseRaw):
"""Raw object from an OTB file.

Parameters
----------
fname : path-like
Path to the OTB file.
%(verbose)s

See Also
--------
mne.io.Raw : Documentation of attributes and methods.
"""

@verbose
def __init__(self, fname, *, verbose=None):
# Adapted from the MATLAB code at:
# https://github.com/OTBioelettronica/OTB-Matlab/tree/main/MATLAB%20Open%20and%20Processing%20OTBFiles
# with permission to relicense as BSD-3 granted here:
# https://github.com/OTBioelettronica/OTB-Python/issues/2#issuecomment-2979135882
fname = str(_check_fname(fname, "read", True, "fname"))
otb_version = "four" if fname.endswith(".otb4") else "plus"
logger.info(f"Loading {fname}")

self.preload = True # lazy loading not supported

highpass = list()
lowpass = list()
ch_names = list()
ch_types = list()

# TODO verify these are the only non-data channel IDs
NON_DATA_CHS = ("Quaternion", "BufferChannel", "RampChannel")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@OTBioelettronica we need to distinguish which channels are EMG data from other channel types. Are these 3 the only non-EMG channel IDs, or are there others?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are also arbitrary auxiliary input channels, e.g., AUX Force, AUX Acceleration, ...

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @klotz-t for answering this!
To recap there are few additional non-EMG channels:

  • Quartenions (intertial sensor)
  • Buffer and Ramp (these are control channels for debugging purposes)
  • AUX (Volt value - BNC connector)
  • Load Cell channels (used with dynamometer)


with tarfile.open(fname, "r") as fid:
fnames = fid.getnames()
# the .sig file is the binary channel data
sig_fname = [_fname for _fname in fnames if _fname.endswith(".sig")]
assert len(sig_fname) == 1 # TODO is this a valid assumption?
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there guaranteed to be only 1 .sig file in each OTB+ archive? The MATLAB code suggests "yes" but it's not totally clear.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a pure user experience, this seems to be true (i.e., using this assumption, our own readers work fine).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, each acquisition contains only one .sig file. However, there are two exceptions to this rule:

  1. Imported acquisitions: A user can import one or more acquisitions into another session. In that case, the corresponding .sig files are copied into the archive, resulting in multiple .sig files.
  2. Novecento device: this device can have many connected probes. Since every probe can have a different configuration (bits resolution and sampling frequency), the OTBiolab25 software split the data in many .sig files (one for each probe) However, Novecento is only supported by our new software, and this case is relevant only for .otb4 files, not .otb+.

sig_fname = sig_fname[0]
data_size_bytes = fid.getmember(sig_fname).size
# the .xml file with the matching basename contains signal metadata
metadata_fname = str(Path(sig_fname).with_suffix(".xml"))
metadata = ET.fromstring(fid.extractfile(metadata_fname).read())
# patient info
patient_info_xml = ET.fromstring(fid.extractfile("patient.xml").read())
# structure of `metadata` is:
# Device
# └ Channels
# ├ Adapter
# | ├ Channel
# | ├ ...
# | └ Channel
# ├ ...
# └ Adapter
# ├ Channel
# ├ ...
# └ Channel
assert metadata.tag == "Device"
sfreq = float(metadata.attrib["SampleFrequency"])
n_chan = int(metadata.attrib["DeviceTotalChannels"])
bit_depth = int(metadata.attrib["ad_bits"])
assert bit_depth == 16, f"expected 16-bit data, got {bit_depth}" # TODO verify
gains = np.full(n_chan, np.nan)
# check in advance where we'll need to append indices to uniquify ch_names
n_ch_by_type = Counter([ch.get("ID") for ch in metadata.iter("Channel")])
dupl_ids = [k for k, v in n_ch_by_type.items() if v > 1]
# iterate over adapters & channels to extract gain, filters, names, etc
for adapter in metadata.iter("Adapter"):
ch_offset = int(adapter.get("ChannelStartIndex"))
adapter_gain = float(adapter.get("Gain"))
# we only care about lowpass/highpass on the data channels
# TODO verify these two are the only non-data adapter types
if adapter.get("ID") not in ("AdapterQuaternions", "AdapterControl"):
highpass.append(float(adapter.get("HighPassFilter")))
lowpass.append(float(adapter.get("LowPassFilter")))

for ch in adapter.iter("Channel"):
# uniquify channel names by appending channel index (if needed)
ix = int(ch.get("Index"))
ch_id = ch.get("ID")
# TODO better to call these "emg_1" etc? should we left-zeropad ix?
ch_names.append(f"{ch_id}_{ix}" if ch_id in dupl_ids else ch_id)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right now we get channel names like "HD10MM0804_0", "HD10MM0804_1", etc. Seems like "EMG_000", "EMG_001" might be more user-friendly.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that channel names such as EMG_001 are more user-friendly (names such as HD10MM0804 refer to specific EMG electrode arrays). Yet, it seems not to be straightforward to automatically distinguish EMG and non-EMG channels as there exist a lot of different grids, and depending on the adapter configuration, the same grid can even be input into multiple adapters (e.g., one can use four 16-channel adapters for one 64-channel grid). Thus, in a self-written reader function, we implemented the number of adapters used for acquiring EMG as a function argument.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you again @klotz-t for pointing this out!
I know is not so user friendly but as you wrote there a lot of different grid and matrices. I want to clarify the nomenclature to make it easier to read.
For example HD10MM0804:

  • HD is the connector type (GR is another one for example)
  • 10MM is the i.e.d (inter electrodic distance)
  • 0804 is the number of rows and columns

# store gains
gains[ix + ch_offset] = float(ch.get("Gain")) * adapter_gain

# TODO verify ch_type for quats, buffer channel, and ramp channel
ch_types.append("misc" if ch_id in NON_DATA_CHS else "emg")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@larsoner is misc the right channel type for these?


# compute number of samples
n_samples, extra = divmod(data_size_bytes, (bit_depth / 8) * n_chan)
if extra != 0:
warn(
f"Number of bytes in file ({data_size_bytes}) not evenly divided by "
f"number of channels ({n_chan}). File may be corrupted or truncated."
)
n_samples = int(n_samples)

# check filter freqs
if len(highpass) > 1:
warn(
"More than one highpass frequency found in file; choosing highest "
f"({max(highpass)})"
)
if len(lowpass) > 1:
warn(
"More than one lowpass frequency found in file; choosing lowest "
f"({min(lowpass)})"
)
highpass = max(highpass)
lowpass = min(lowpass)

# create info
info = create_info(ch_names=ch_names, ch_types=ch_types, sfreq=sfreq)
subject_info = _parse_patient_xml(patient_info_xml)
info.update(subject_info=subject_info)
with info._unlock():
info["highpass"] = highpass
info["lowpass"] = lowpass
if meas_date := patient_info_xml.find("time"):
info["meas_date"] = datetime.fromisoformat(meas_date.text)

# sanity check
if dur := patient_info_xml.find("duration"):
assert float(dur.text) == n_samples / sfreq

# TODO other fields in patient_info_xml:
# protocol_code, place, pathology, commentsPatient, comments

# TODO parse files markers_0.xml, markers_1.xml as annotations

# populate raw_extras
raw_extras = dict(
n_samples=n_samples,
gains=gains,
dtype=OTB_PLUS_DTYPE,
data_size_bytes=data_size_bytes,
sig_fname=sig_fname,
bit_depth=bit_depth,
otb_version=otb_version,
)

super().__init__(
info,
preload=True,
last_samps=(n_samples - 1,),
filenames=[fname],
orig_format=OTB_PLUS_FORMAT,
# orig_units="mV", # TODO verify
raw_extras=[raw_extras],
verbose=verbose,
)

def _preload_data(self, preload):
"""Load raw data from an OTB+ file."""
_extras = self._raw_extras[0]
sig_fname = _extras["sig_fname"]
power_supply = 3.3 if _extras["otb_version"] == "plus" else None
bit_depth = _extras["bit_depth"]
gains = _extras["gains"]

with tarfile.open(self.filenames[0], "r") as fid:
_data = np.frombuffer(
fid.extractfile(sig_fname).read(),
dtype=_extras["dtype"],
)
# TODO is the factor of 1000 (copied from the MATLAB code) to convert Volts
# milliVolts? If so we should remove it (in MNE we always store in SI units, so
# we should keep it in V)
self._data = _data * (1000 * power_supply / 2**bit_depth / gains)[:, np.newaxis]


@fill_doc
def read_raw_otb(fname, verbose=None) -> RawOTB:
"""Reader for an OTB (.otb4/.otb+) recording.

Parameters
----------
fname : path-like
Path to the OTB file.
%(verbose)s

Returns
-------
raw : instance of RawOTB
A Raw object containing OTB data.
See :class:`mne.io.Raw` for documentation of attributes and methods.

See Also
--------
mne.io.Raw : Documentation of attributes and methods of RawPersyst.

Notes
-----
``preload=False`` is not supported for this file format.
"""
return RawOTB(fname, verbose=verbose)
Loading