Skip to content

Commit 4011387

Browse files
authored
Boost parsing performance (#67)
* Use pandas to boost parsing performance by ~58% (from ~26s to ~11s parsing time) * Remove branching for ~11% boost in performance (~11.1 to ~9.8s parse time) * Simplify hex string parsing and boost performance by ~18% (9.8s to 8.1s parse time) * Remove TODO * Catch and raise ClassificationError
1 parent 62a3a69 commit 4011387

File tree

2 files changed

+14
-39
lines changed

2 files changed

+14
-39
lines changed

backend/classification/file_loading.py

Lines changed: 13 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
Function utilities to convert data acquired on an OpenBCI
33
Cyton board using the SD card logging strategy.
44
5-
TODO: We should look into optimizing this conversion. We currently
6-
convert one line at a time, while a vectorized approach would be much more efficient,
7-
as the conversion of a line does not depend on the other lines.
85
TODO: Consider cropping file (from bed to wake up time) here, before the for loop. Have to consider
96
not all lines hold sample values (i.e. first line with comment and second line with a single timestamp).
107
@@ -17,6 +14,7 @@
1714
from mne import create_info
1815
from mne.io import RawArray
1916
import numpy as np
17+
import pandas as pd
2018

2119
from classification.exceptions import ClassificationError
2220
from classification.config.constants import (
@@ -41,16 +39,19 @@ def get_raw_array(file):
4139
Returns:
4240
- mne.RawArray of the two EEG channels of interest
4341
"""
44-
lines = file.readlines()
45-
eeg_raw = np.zeros((len(lines) - SKIP_ROWS, len(EEG_CHANNELS)))
4642

47-
for index, line in enumerate(lines[SKIP_ROWS:]):
48-
line_splitted = line.decode('utf-8').split(',')
43+
retained_columns = tuple(range(1, len(EEG_CHANNELS) + 1))
4944

50-
if len(line_splitted) < CYTON_TOTAL_NB_CHANNELS:
51-
raise ClassificationError()
45+
try:
46+
eeg_raw = pd.read_csv(file,
47+
skiprows=SKIP_ROWS,
48+
usecols=retained_columns
49+
).to_numpy()
50+
except Exception:
51+
raise ClassificationError()
5252

53-
eeg_raw[index] = _get_decimals_from_hexadecimal_strings(line_splitted)
53+
hexstr_to_int = np.vectorize(_hexstr_to_int)
54+
eeg_raw = hexstr_to_int(eeg_raw)
5455

5556
raw_object = RawArray(
5657
SCALE_V_PER_COUNT * np.transpose(eeg_raw),
@@ -71,38 +72,11 @@ def get_raw_array(file):
7172
return raw_object
7273

7374

74-
def _get_decimals_from_hexadecimal_strings(lines):
75-
"""Converts the array of hexadecimal strings to an array of decimal values of the EEG channels
76-
Input:
77-
- lines: splitted array of two complement hexadecimal
78-
Returns:
79-
- array of decimal values for each EEG channel of interest
80-
"""
81-
return np.array([
82-
_convert_hexadecimal_to_signed_decimal(hex_value)
83-
for hex_value in lines[FILE_COLUMN_OFFSET:FILE_COLUMN_OFFSET + len(EEG_CHANNELS)]
84-
])
85-
86-
87-
def _convert_hexadecimal_to_signed_decimal(hex_value):
88-
"""Converts the hexadecimal value encoded on OpenBCI Cyton SD card to signed decimal
89-
Input:
90-
- hex_value: signed hexadecimal value
91-
Returns:
92-
- decimal value
93-
"""
94-
return _get_twos_complement(hex_value) if len(hex_value) % 2 == 0 else 0
95-
96-
97-
def _get_twos_complement(hexstr):
75+
def _hexstr_to_int(hexstr):
9876
"""Converts a two complement hexadecimal value in a string to a signed float
9977
Input:
10078
- hex_value: signed hexadecimal value
10179
Returns:
10280
- decimal value
10381
"""
104-
bits = len(hexstr) * 4
105-
value = int(hexstr, 16)
106-
if value & (1 << (bits - 1)):
107-
value -= 1 << bits
108-
return value
82+
return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True)

backend/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ onnxruntime==1.5.2
77
numpy==1.19.2
88
scipy==1.5.2
99
scikit-learn==0.23.2
10+
pandas==1.1.4
1011
requests==2.24.0
1112
hmmlearn==0.2.4
1213
certifi==2020.6.20

0 commit comments

Comments
 (0)