Merge pull request #184 from JaerongA/datajoint-spikeinterface

Thinh Nguyen · web-flow · commit cd6553943bbc · 2024-03-12T12:15:27.000-05:00
Fix &amp; cleanup ephys_no_curation.py &amp; si_spike_sorting.py
diff --git a/element_array_ephys/ephys_no_curation.py b/element_array_ephys/ephys_no_curation.py
@@ -1603,8 +1603,8 @@ class Waveform(dj.Part):
         -> master
         -> CuratedClustering.Unit
         ---
-        amplitude: float  # (uV) absolute difference between waveform peak and trough
-        duration: float  # (ms) time between waveform peak and trough
+        amplitude=null: float  # (uV) absolute difference between waveform peak and trough
+        duration=null: float  # (ms) time between waveform peak and trough
         halfwidth=null: float  # (ms) spike width at half max amplitude
         pt_ratio=null: float  # absolute amplitude of peak divided by absolute amplitude of trough relative to 0
         repolarization_slope=null: float  # the repolarization slope was defined by fitting a regression line to the first 30us from trough to peak
diff --git a/element_array_ephys/spike_sorting/si_spike_sorting.py b/element_array_ephys/spike_sorting/si_spike_sorting.py
@@ -1,34 +1,17 @@
 """
-The following DataJoint pipeline implements the sequence of steps in the spike-sorting routine featured in the
-"spikeinterface" pipeline.
-Spikeinterface developed by Alessio Buccino, Samuel Garcia, Cole Hurwitz, Jeremy Magland, and Matthias Hennig (https://github.com/SpikeInterface)
-
-The DataJoint pipeline currently incorporated Spikeinterfaces approach of running Kilosort using a container
-
-The follow pipeline features intermediary tables:
-1. PreProcessing - for preprocessing steps (no GPU required)
-    - create recording extractor and link it to a probe
-    - bandpass filtering
-    - common mode referencing
-2. SIClustering - kilosort (MATLAB) - requires GPU and docker/singularity containers
-    - supports kilosort 2.0, 2.5 or 3.0 (https://github.com/MouseLand/Kilosort.git)
-3. PostProcessing - for postprocessing steps (no GPU required)
-    - create waveform extractor object
-    - extract templates, waveforms and snrs
-    - quality_metrics
+The following DataJoint pipeline implements the sequence of steps in the spike-sorting routine featured in the "spikeinterface" pipeline. Spikeinterface was developed by Alessio Buccino, Samuel Garcia, Cole Hurwitz, Jeremy Magland, and Matthias Hennig (https://github.com/SpikeInterface)
 """
 
-import pathlib
 from datetime import datetime
 
 import datajoint as dj
 import pandas as pd
-import probeinterface as pi
 import spikeinterface as si
 from element_array_ephys import get_logger, probe, readers
 from element_interface.utils import find_full_path
 from spikeinterface import exporters, postprocessing, qualitymetrics, sorters
 
+from .. import get_logger, probe, readers
 from . import si_preprocessing
 
 log = get_logger(__name__)
@@ -64,12 +47,6 @@ def activate(
 
 SI_SORTERS = [s.replace("_", ".") for s in si.sorters.sorter_dict.keys()]
 
-SI_READERS = {
-    "Open Ephys": si.extractors.read_openephys,
-    "SpikeGLX": si.extractors.read_spikeglx,
-    "Intan": si.extractors.read_intan,
-}
-
 
 @schema
 class PreProcessing(dj.Imported):
@@ -125,9 +102,7 @@ def make(self, key):
         output_dir = find_full_path(ephys.get_ephys_root_data_dir(), output_dir)
         recording_dir = output_dir / sorter_name / "recording"
         recording_dir.mkdir(parents=True, exist_ok=True)
-        recording_file = (
-            recording_dir / "si_recording.pkl"
-        )  # recording cache to be created for each key
+        recording_file = recording_dir / "si_recording.pkl"
 
         # Create SI recording extractor object
         if acq_software == "SpikeGLX":
@@ -142,12 +117,15 @@ def make(self, key):
             assert len(oe_probe.recording_info["recording_files"]) == 1
             data_dir = oe_probe.recording_info["recording_files"][0]
         else:
-            raise NotImplementedError(f"Not implemented for {acq_software}")
+            acq_software = acq_software.replace(" ", "").lower()
+            si_extractor: si.extractors.neoextractors = (
+                si.extractors.extractorlist.recording_extractor_full_dict[acq_software]
+            )  # data extractor object
 
         stream_names, stream_ids = si.extractors.get_neo_streams(
-            acq_software.strip().lower(), folder_path=data_dir
+            acq_software, folder_path=data_dir
         )
-        si_recording: si.BaseRecording = SI_READERS[acq_software](
+        si_recording: si.BaseRecording = si_extractor[acq_software](
             folder_path=data_dir, stream_name=stream_names[0]
         )
 
@@ -205,23 +183,23 @@ def make(self, key):
             ephys.ClusteringTask * ephys.ClusteringParamSet & key
         ).fetch1("clustering_method", "clustering_output_dir", "params")
         output_dir = find_full_path(ephys.get_ephys_root_data_dir(), output_dir)
-
-        # Get sorter method and create output directory.
         sorter_name = clustering_method.replace(".", "_")
         recording_file = output_dir / sorter_name / "recording" / "si_recording.pkl"
         si_recording: si.BaseRecording = si.load_extractor(recording_file)
 
         # Run sorting
+        # Sorting performed in a dedicated docker environment if the sorter is not built in the spikeinterface package.
         si_sorting: si.sorters.BaseSorter = si.sorters.run_sorter(
             sorter_name=sorter_name,
             recording=si_recording,
             output_folder=output_dir / sorter_name / "spike_sorting",
             remove_existing_folder=True,
             verbose=True,
-            docker_image=True,
+            docker_image=sorter_name not in si.sorters.installed_sorters(),
             **params.get("SI_SORTING_PARAMS", {}),
         )
 
+        # Save sorting object
         sorting_save_path = (
             output_dir / sorter_name / "spike_sorting" / "si_sorting.pkl"
         )
@@ -253,15 +231,14 @@ class PostProcessing(dj.Imported):
     def make(self, key):
         execution_time = datetime.utcnow()
 
-        # Load recording object.
+        # Load recording & sorting object.
         clustering_method, output_dir, params = (
             ephys.ClusteringTask * ephys.ClusteringParamSet & key
         ).fetch1("clustering_method", "clustering_output_dir", "params")
         output_dir = find_full_path(ephys.get_ephys_root_data_dir(), output_dir)
-
-        # Get sorter method and create output directory.
         sorter_name = clustering_method.replace(".", "_")
         output_dir = find_full_path(ephys.get_ephys_root_data_dir(), output_dir)
+
         recording_file = output_dir / sorter_name / "recording" / "si_recording.pkl"
         sorting_file = output_dir / sorter_name / "spike_sorting" / "si_sorting.pkl"
 
@@ -301,14 +278,13 @@ def make(self, key):
         _ = si.postprocessing.compute_principal_components(
             waveform_extractor=we, **params.get("SI_QUALITY_METRICS_PARAMS", None)
         )
+        metrics = si.qualitymetrics.compute_quality_metrics(waveform_extractor=we)
+
         # Save the output (metrics.csv to the output dir)
         metrics_output_dir = output_dir / sorter_name / "metrics"
         metrics_output_dir.mkdir(parents=True, exist_ok=True)
-
-        metrics = si.qualitymetrics.compute_quality_metrics(waveform_extractor=we)
         metrics.to_csv(metrics_output_dir / "metrics.csv")
 
-        # Save results
         self.insert1(
             {
                 **key,