keep _timeseries data as memmap int16 type, apply bitvolt conversion only when needed (at LFP or waveform extraction)

Thinh Nguyen · Thinh Nguyen · commit f9e5fc291c17 · 2021-03-05T14:29:42.000-06:00
diff --git a/elements_ephys/ephys.py b/elements_ephys/ephys.py
@@ -228,7 +228,11 @@ def make(self, key):
             spikeglx_rec_dir = (root_dir / spikeglx_meta_fp).parent
             spikeglx_recording = spikeglx.SpikeGLX(spikeglx_rec_dir)
 
-            lfp = spikeglx_recording.lf_timeseries[:, :-1].T  # exclude the sync channel
+            lfp_chn_ind = spikeglx_recording.lfmeta.recording_channels[-1::-self.skip_chn_counts]
+
+            # Extract LFP data at specified channels and convert to uV
+            lfp = spikeglx_recording.lf_timeseries[:, lfp_chn_ind]  # (sample x channel)
+            lfp = (lfp * spikeglx_recording.get_channel_bit_volts('lf')[lfp_chn_ind]).T  # (channel x sample)
 
             self.insert1(dict(key,
                               lfp_sampling_rate=spikeglx_recording.lfmeta.meta['imSampRate'],
@@ -237,7 +241,7 @@ def make(self, key):
 
             q_electrodes = probe.ProbeType.Electrode * probe.ElectrodeConfig.Electrode * EphysRecording & key
             electrodes = []
-            for recorded_site in np.arange(lfp.shape[0]):
+            for recorded_site in lfp_chn_ind:
                 shank, shank_col, shank_row, _ = spikeglx_recording.apmeta.shankmap['data'][recorded_site]
                 electrodes.append((q_electrodes
                                    & {'shank': shank,
@@ -247,12 +251,17 @@ def make(self, key):
             chn_lfp = list(zip(electrodes, lfp))
             self.Electrode().insert((
                 {**key, **electrode, 'lfp': d}
-                for electrode, d in chn_lfp[-1::-self._skip_chn_counts]), ignore_extra_fields=True)
+                for electrode, d in chn_lfp), ignore_extra_fields=True)
+
         elif acq_software == 'OpenEphys':
             sess_dir = pathlib.Path(get_session_directory(key))
             loaded_oe = openephys.OpenEphys(sess_dir)
             oe_probe = loaded_oe.probes[probe_sn]
-            lfp = oe_probe.lfp_timeseries
+
+            lfp_chn_ind = np.arange(len(oe_probe.lfp_meta['channels_ids']))[-1::-self.skip_chn_counts]
+
+            lfp = oe_probe.lfp_timeseries[:, lfp_chn_ind]  # (sample x channel)
+            lfp = (lfp * oe_probe.lfp_meta['channels_gains'][lfp_chn_ind]).T  # (channel x sample)
             lfp_timestamps = oe_probe.lfp_timestamps
 
             self.insert1(dict(key,
@@ -262,13 +271,13 @@ def make(self, key):
 
             q_electrodes = probe.ProbeType.Electrode * probe.ElectrodeConfig.Electrode * EphysRecording & key
             electrodes = []
-            for chn_idx in oe_probe.lfp_meta['channels_ids']:
+            for chn_idx in oe_probe.lfp_meta['channels_ids'][lfp_chn_ind]:
                 electrodes.append((q_electrodes & {'electrode': chn_idx}).fetch1('KEY'))
 
             chn_lfp = list(zip(electrodes, lfp))
             self.Electrode().insert((
                 {**key, **electrode, 'lfp': d}
-                for electrode, d in chn_lfp[-1::-self._skip_chn_counts]), ignore_extra_fields=True)
+                for electrode, d in chn_lfp), ignore_extra_fields=True)
 
         else:
             raise NotImplementedError(f'LFP extraction from acquisition software of type {acq_software} is not yet implemented')
diff --git a/elements_ephys/readers/openephys.py b/elements_ephys/readers/openephys.py
@@ -120,11 +120,10 @@ def __init__(self, processor):
     def ap_timeseries(self):
         """
         AP data concatenated across recordings. Shape: (sample x channel)
-        Channels' gains (bit_volts) applied - unit: uV
+        Data are stored as int16 - to convert to microvolts, multiply with self.ap_meta['channels_gains']
         """
         if self._ap_timeseries is None:
             self._ap_timeseries = np.hstack([s.signal for s in self.ap_analog_signals]).T
-            self._ap_timeseries *= self.ap_meta['channels_gains']
         return self._ap_timeseries
 
     @property
@@ -137,11 +136,10 @@ def ap_timestamps(self):
     def lfp_timeseries(self):
         """
         LFP data concatenated across recordings. Shape: (sample x channel)
-        Channels' gains (bit_volts) applied - unit: uV
+        Data are stored as int16 - to convert to microvolts, multiply with self.lfp_meta['channels_gains']
         """
         if self._lfp_timeseries is None:
             self._lfp_timeseries = np.hstack([s.signal for s in self.lfp_analog_signals]).T
-            self._lfp_timeseries *= self.lfp_meta['channels_gains']
         return self._lfp_timeseries
 
     @property
@@ -159,6 +157,7 @@ def extract_spike_waveforms(self, spikes, channel, n_wf=500, wf_win=(-32, 32)):
         :return: waveforms (sample x channel x spike)
         """
         channel_ind = [np.where(self.ap_meta['channels_ids'] == chn)[0][0] for chn in channel]
+        channel_bit_volts = self.ap_meta['channels_gains'][channel_ind]
 
         # ignore spikes at the beginning or end of raw data
         spikes = spikes[np.logical_and(spikes > (-wf_win[0] / self.ap_meta['sample_rate']),
@@ -171,6 +170,7 @@ def extract_spike_waveforms(self, spikes, channel, n_wf=500, wf_win=(-32, 32)):
             spike_indices = np.searchsorted(self.ap_timestamps, spikes, side="left")
             # waveform at each spike: (sample x channel x spike)
             spike_wfs = np.dstack([self.ap_timeseries[int(spk + wf_win[0]):int(spk + wf_win[-1]), channel_ind]
+                                   * channel_bit_volts
                                    for spk in spike_indices])
             return spike_wfs
         else:  # if no spike found, return NaN of size (sample x channel x 1)
diff --git a/elements_ephys/readers/spikeglx.py b/elements_ephys/readers/spikeglx.py
@@ -50,11 +50,11 @@ def apmeta(self):
     def ap_timeseries(self):
         """
         AP data: (sample x channel)
-        Channels' gains (bit_volts) applied - unit: uV
+        Data are stored as np.memmap with dtype: int16
+        - to convert to microvolts, multiply with self.get_channel_bit_volts('ap')
         """
         if self._ap_timeseries is None:
             self._ap_timeseries = self._read_bin(self.root_dir / (self.root_name + '.ap.bin'))
-            self._ap_timeseries *= self.get_channel_bit_volts('ap')
         return self._ap_timeseries
 
     @property
@@ -67,16 +67,16 @@ def lfmeta(self):
     def lf_timeseries(self):
         """
         LFP data: (sample x channel)
-        Channels' gains (bit_volts) applied - unit: uV
+        Data are stored as np.memmap with dtype: int16
+        - to convert to microvolts, multiply with self.get_channel_bit_volts('lf')
         """
         if self._lf_timeseries is None:
             self._lf_timeseries = self._read_bin(self.root_dir / (self.root_name + '.lf.bin'))
-            self._lf_timeseries *= self.get_channel_bit_volts('lf')
         return self._lf_timeseries
 
     def get_channel_bit_volts(self, band='ap'):
         """
-        Extract the AP and LF channels' int16 to microvolts
+        Extract the recorded AP and LF channels' int16 to microvolts - no Sync (SY) channels
         Following the steps specified in: https://billkarsh.github.io/SpikeGLX/Support/SpikeGLX_Datafile_Tools.zip
                 dataVolts = dataInt * Vmax / Imax / gain
         """
@@ -86,11 +86,13 @@ def get_channel_bit_volts(self, band='ap'):
             imax = IMAX[self.apmeta.probe_model]
             imroTbl_data = self.apmeta.imroTbl['data']
             imroTbl_idx = 3
+            chn_ind = self.apmeta.get_recording_channels_indices(exclude_sync=True)
 
         elif band == 'lf':
             imax = IMAX[self.lfmeta.probe_model]
             imroTbl_data = self.lfmeta.imroTbl['data']
             imroTbl_idx = 4
+            chn_ind = self.lfmeta.get_recording_channels_indices(exclude_sync=True)
         else:
             raise ValueError(f'Unsupported band: {band} - Must be "ap" or "lf"')
 
@@ -102,25 +104,26 @@ def get_channel_bit_volts(self, band='ap'):
             # 3A, 3B1, 3B2 (NP 1.0)
             chn_gains = [c[imroTbl_idx] for c in imroTbl_data]
 
-        return vmax / imax / np.array(chn_gains) * 1e6  # convert to uV as well
+        chn_gains = np.array(chn_gains)[chn_ind]
+
+        return vmax / imax / chn_gains * 1e6  # convert to uV as well
 
     def _read_bin(self, fname):
         nchan = self.apmeta.meta['nSavedChans']
         dtype = np.dtype((np.int16, nchan))
         return np.memmap(fname, dtype, 'r')
 
-    def extract_spike_waveforms(self, spikes, channel, n_wf=500, wf_win=(-32, 32), bit_volts=1):
+    def extract_spike_waveforms(self, spikes, channel_ind, n_wf=500, wf_win=(-32, 32)):
         """
         :param spikes: spike times (in second) to extract waveforms
-        :param channel: channel (name, not indices) to extract waveforms
+        :param channel_ind: channel indices (of shankmap) to extract the waveforms from
         :param n_wf: number of spikes per unit to extract the waveforms
         :param wf_win: number of sample pre and post a spike
-        :param bit_volts: scalar required to convert int16 values into microvolts (default of 1)
-        :return: waveforms (sample x channel x spike)
+        :return: waveforms (in uV) - shape: (sample x channel x spike)
         """
+        channel_bit_volts = self.get_channel_bit_volts('ap')[channel_ind]
 
         data = self.ap_timeseries
-        channel_idx = [np.where(self.apmeta.recording_channels == chn)[0][0] for chn in channel]
 
         spikes = np.round(spikes * self.apmeta.meta['imSampRate']).astype(int)  # convert to sample
         # ignore spikes at the beginning or end of raw data
@@ -130,10 +133,12 @@ def extract_spike_waveforms(self, spikes, channel, n_wf=500, wf_win=(-32, 32), b
         spikes = spikes[:n_wf]
         if len(spikes) > 0:
             # waveform at each spike: (sample x channel x spike)
-            spike_wfs = np.dstack([data[int(spk + wf_win[0]):int(spk + wf_win[-1]), channel_idx] for spk in spikes])
-            return spike_wfs * bit_volts
+            spike_wfs = np.dstack([data[int(spk + wf_win[0]):int(spk + wf_win[-1]), channel_ind]
+                                   * channel_bit_volts
+                                   for spk in spikes])
+            return spike_wfs
         else:  # if no spike found, return NaN of size (sample x channel x 1)
-            return np.full((len(range(*wf_win)), len(channel), 1), np.nan)
+            return np.full((len(range(*wf_win)), len(channel_ind), 1), np.nan)
 
 
 class SpikeGLXMeta:
@@ -177,7 +182,9 @@ def __init__(self, meta_filepath):
         self.shankmap = self._parse_shankmap(self.meta['~snsShankMap']) if '~snsShankMap' in self.meta else None
         self.imroTbl = self._parse_imrotbl(self.meta['~imroTbl']) if '~imroTbl' in self.meta else None
 
-        self._recording_channels = None
+        # Channels being recorded, exclude Sync channels - basically a 1-1 mapping to shankmap
+        self.recording_channels = [int(v[0]) for k, v in self.chanmap.items()
+                                   if k != 'shape' and not k.startswith('SY')]
 
     @staticmethod
     def _parse_chanmap(raw):
@@ -208,6 +215,9 @@ def _parse_chanmap(raw):
     @staticmethod
     def _parse_shankmap(raw):
         """
+        The shankmap contains details on the shank info
+            for each electrode sites of the sites being recorded only
+
         https://github.com/billkarsh/SpikeGLX/blob/master/Markdown/UserManual.md#shank-map
         Parse shank map header structure. Converts:
 
@@ -234,6 +244,10 @@ def _parse_shankmap(raw):
     @staticmethod
     def _parse_imrotbl(raw):
         """
+        The imro table contains info for all electrode sites (no sync)
+            for a particular electrode configuration (all 384 sites)
+        Note: not all of these 384 sites are necessarily recorded
+
         https://github.com/billkarsh/SpikeGLX/blob/master/Markdown/UserManual.md#imro-per-channel-settings
         Parse imro tbl structure. Converts:
 
@@ -257,8 +271,17 @@ def _parse_imrotbl(raw):
 
         return res
 
-    @property
-    def recording_channels(self):
+    def get_recording_channels_indices(self, exclude_sync=False):
+        """
+        The indices of recorded channels (in chanmap) with respect to the channels listed in the imro table
+        """
+        recorded_chns_ind = [int(v[0]) for k, v in self.chanmap.items()
+                             if k != 'shape' and (not k.startswith('SY') if exclude_sync else True)]
+        orig_chns_ind = self.get_original_chans()
+        _, _, chns_ind = np.intersect1d(orig_chns_ind, recorded_chns_ind, return_indices=True)
+        return chns_ind
+
+    def get_original_chans(self):
         """
         Because you can selectively save channels, the
         ith channel in the file isn't necessarily the ith acquired channel.
@@ -267,23 +290,20 @@ def recording_channels(self):
         Credit to https://billkarsh.github.io/SpikeGLX/Support/SpikeGLX_Datafile_Tools.zip
             OriginalChans() function
         """
-        if self._recording_channels is None:
-            if self.meta['snsSaveChanSubset'] == 'all':
-                # output = int32, 0 to nSavedChans - 1
-                self._recording_channels = np.arange(0, int(self.meta['nSavedChans']))
-            else:
-                # parse the snsSaveChanSubset string
-                # split at commas
-                chStrList = self.meta['snsSaveChanSubset'].split(sep=',')
-                self._recording_channels = np.arange(0, 0)  # creates an empty array of int32
-                for sL in chStrList:
-                    currList = sL.split(sep=':')
-                    # each set of continuous channels specified by chan1:chan2 inclusive
-                    newChans = np.arange(int(currList[0]), int(currList[min(1, len(currList))]) + 1)
-
-                    self._recording_channels = np.append(self._recording_channels, newChans)
-        return self._recording_channels
-
+        if self.meta['snsSaveChanSubset'] == 'all':
+            # output = int32, 0 to nSavedChans - 1
+            chans = np.arange(0, int(self.meta['nSavedChans']))
+        else:
+            # parse the snsSaveChanSubset string
+            # split at commas
+            chStrList = self.meta['snsSaveChanSubset'].split(sep = ',')
+            chans = np.arange(0, 0)  # creates an empty array of int32
+            for sL in chStrList:
+                currList = sL.split(sep = ':')
+                # each set of continuous channels specified by chan1:chan2 inclusive
+                newChans = np.arange(int(currList[0]), int(currList[min(1, len(currList) - 1)]) + 1)
+                chans = np.append(chans, newChans)
+        return chans
 
 # ============= HELPER FUNCTIONS =============