datajoint
diff --git a/‎elements_ephys/ephys.py
Lines changed: 79 additions & 26 deletions b/‎elements_ephys/ephys.py
Lines changed: 79 additions & 26 deletions
diff --git a/‎elements_ephys/readers/kilosort.py
Lines changed: 15 additions & 9 deletions b/‎elements_ephys/readers/kilosort.py
Lines changed: 15 additions & 9 deletions
diff --git a/‎elements_ephys/readers/openephys.py
Lines changed: 1 addition & 1 deletion b/‎elements_ephys/readers/openephys.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎elements_ephys/readers/spikeglx.py
Lines changed: 1 addition & 1 deletion b/‎elements_ephys/readers/spikeglx.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎images/attached_ephys_element.svg
Lines changed: 1 addition & 1 deletion b/‎images/attached_ephys_element.svg
Lines changed: 1 addition & 1 deletion
@@ -328,16 +328,6 @@ def insert_new_params(cls, processing_method: str, paramset_idx: int, paramset_d
             cls.insert1(param_dict)
 
 
-@schema
-class ClusteringTask(dj.Manual):
-    definition = """
-    -> EphysRecording
-    -> ClusteringParamSet
-    ---
-    clustering_output_dir: varchar(255)  #  clustering output directory relative to root data directory
-    """
-
-
 @schema
 class ClusterQualityLabel(dj.Lookup):
     definition = """
@@ -354,15 +344,82 @@ class ClusterQualityLabel(dj.Lookup):
     ]
 
 
+@schema
+class ClusteringTask(dj.Manual):
+    definition = """
+    -> EphysRecording
+    -> ClusteringParamSet
+    ---
+    clustering_output_dir: varchar(255)  #  clustering output directory relative to root data directory
+    task_mode='load': enum('load', 'trigger')  # 'load': load computed analysis results, 'trigger': trigger computation
+    """
+
+
 @schema
 class Clustering(dj.Imported):
+    """
+    A processing table to handle each ClusteringTask:
+    + If `task_mode == "trigger"`: trigger clustering analysis according to the ClusteringParamSet (e.g. launch a kilosort job)
+    + If `task_mode == "load"`: verify output
+    """
     definition = """
     -> ClusteringTask
     ---
-    clustering_time: datetime  # time of generation of this set of clustering results 
-    quality_control: bool      # has this clustering result undergone quality control?
-    manual_curation: bool      # has manual curation been performed on this clustering result?
-    clustering_note='': varchar(2000)  
+    clustering_time: datetime             # time of generation of this set of clustering results 
+    """
+
+    def make(self, key):
+        root_dir = pathlib.Path(get_ephys_root_data_dir())
+        task_mode, output_dir = (ClusteringTask & key).fetch1('task_mode', 'clustering_output_dir')
+        ks_dir = root_dir / output_dir
+
+        if task_mode == 'load':
+            ks = kilosort.Kilosort(ks_dir)  # check if the directory is a valid Kilosort output
+            creation_time, _, _ = kilosort.extract_clustering_info(ks_dir)
+        elif task_mode == 'trigger':
+            raise NotImplementedError('Automatic triggering of clustering analysis is not yet supported')
+        else:
+            raise ValueError(f'Unknown task mode: {task_mode}')
+
+        self.insert1({**key, 'clustering_time': creation_time})
+
+
+@schema
+class Curation(dj.Manual):
+    definition = """
+    -> Clustering
+    curation_id: int
+    ---
+    curation_time: datetime             # time of generation of this set of curated clustering results 
+    curation_output_dir: varchar(255)   # output directory of the curated results, relative to root data directory
+    quality_control: bool               # has this clustering result undergone quality control?
+    manual_curation: bool               # has manual curation been performed on this clustering result?
+    curation_note='': varchar(2000)  
+    """
+
+    def create1_from_clustering_task(self, key, curation_note=''):
+        """
+        A convenient function to create a new corresponding "Curation" for a particular "ClusteringTask"
+        """
+        if key not in Clustering():
+            raise ValueError(f'No corresponding entry in Clustering available for: {key}; do `Clustering.populate(key)`')
+
+        root_dir = pathlib.Path(get_ephys_root_data_dir())
+        task_mode, output_dir = (ClusteringTask & key).fetch1('task_mode', 'clustering_output_dir')
+        ks_dir = root_dir / output_dir
+        creation_time, is_curated, is_qc = kilosort.extract_clustering_info(ks_dir)
+        # Synthesize curation_id
+        curation_id = dj.U().aggr(self & key, n='ifnull(max(curation_id)+1,1)').fetch1('n')
+        self.insert1({**key, 'curation_id': curation_id,
+                      'curation_time': creation_time, 'curation_output_dir': output_dir,
+                      'quality_control': is_qc, 'manual_curation': is_curated,
+                      'curation_note': curation_note})
+
+
+@schema
+class CuratedClustering(dj.Imported):
+    definition = """
+    -> Curation    
     """
 
     class Unit(dj.Part):
@@ -380,13 +437,10 @@ class Unit(dj.Part):
 
     def make(self, key):
         root_dir = pathlib.Path(get_ephys_root_data_dir())
-        ks_dir = root_dir / (ClusteringTask & key).fetch1('clustering_output_dir')
+        ks_dir = root_dir / (Curation & key).fetch1('curation_output_dir')
         ks = kilosort.Kilosort(ks_dir)
         acq_software = (EphysRecording & key).fetch1('acq_software')
 
-        # ---------- Clustering ----------
-        creation_time, is_curated, is_qc = kilosort.extract_clustering_info(ks_dir)
-
         # ---------- Unit ----------
         # -- Remove 0-spike units
         withspike_idx = [i for i, u in enumerate(ks.data['cluster_ids']) if (ks.data['spike_clusters'] == u).any()]
@@ -422,15 +476,14 @@ def make(self, key):
                               'spike_sites': spike_sites[ks.data['spike_clusters'] == unit],
                               'spike_depths': spike_depths[ks.data['spike_clusters'] == unit]})
 
-        self.insert1({**key, 'clustering_time': creation_time,
-                      'quality_control': is_qc, 'manual_curation': is_curated})
+        self.insert1(key)
         self.Unit.insert([{**key, **u} for u in units])
 
 
 @schema
 class Waveform(dj.Imported):
     definition = """
-    -> Clustering.Unit
+    -> CuratedClustering.Unit
     ---
     peak_chn_waveform_mean: longblob  # mean over all spikes at the peak channel for this unit
     """
@@ -446,11 +499,11 @@ class Electrode(dj.Part):
 
     @property
     def key_source(self):
-        return Clustering()
+        return Curation()
 
     def make(self, key):
         root_dir = pathlib.Path(get_ephys_root_data_dir())
-        ks_dir = root_dir / (ClusteringTask & key).fetch1('clustering_output_dir')
+        ks_dir = root_dir / (Curation & key).fetch1('curation_output_dir')
         ks = kilosort.Kilosort(ks_dir)
 
         acq_software, probe_sn = (EphysRecording * ProbeInsertion & key).fetch1('acq_software', 'probe')
@@ -459,10 +512,10 @@ def make(self, key):
         rec_key = (EphysRecording & key).fetch1('KEY')
         chn2electrodes = get_neuropixels_chn2electrode_map(rec_key, acq_software)
 
-        is_qc = (Clustering & key).fetch1('quality_control')
+        is_qc = (Curation & key).fetch1('quality_control')
 
         # Get all units
-        units = {u['unit']: u for u in (Clustering.Unit & key).fetch(as_dict=True, order_by='unit')}
+        units = {u['unit']: u for u in (CuratedClustering.Unit & key).fetch(as_dict=True, order_by='unit')}
 
         unit_waveforms, unit_peak_waveforms = [], []
         if is_qc:
@@ -503,7 +556,7 @@ def make(self, key):
 @schema
 class ClusterQualityMetrics(dj.Imported):
     definition = """
-    -> Clustering.Unit
+    -> CuratedClustering.Unit
     ---
     amp: float
     snr: float
 
@@ -1,5 +1,6 @@
 from os import path
 from datetime import datetime
+import pathlib
 import pandas as pd
 import numpy as np
 import re
@@ -37,14 +38,19 @@ class Kilosort:
     # keys to self.files, .data are file name e.g. self.data['params'], etc.
     ks_keys = [path.splitext(i)[0] for i in ks_files]
 
-    def __init__(self, dname):
-        self._dname = dname
+    def __init__(self, ks_dir):
+        self._ks_dir = pathlib.Path(ks_dir)
         self._files = {}
         self._data = None
         self._clusters = None
 
-        self._info = {'time_created': datetime.fromtimestamp((dname / 'params.py').stat().st_ctime),
-                      'time_modified': datetime.fromtimestamp((dname / 'params.py').stat().st_mtime)}
+        params_fp = ks_dir / 'params.py'
+
+        if not params_fp.exists():
+            raise FileNotFoundError(f'No Kilosort output found in: {ks_dir}')
+
+        self._info = {'time_created': datetime.fromtimestamp(params_fp.stat().st_ctime),
+                      'time_modified': datetime.fromtimestamp(params_fp.stat().st_mtime)}
 
     @property
     def data(self):
@@ -59,7 +65,7 @@ def info(self):
     def _stat(self):
         self._data = {}
         for i in Kilosort.ks_files:
-            f = self._dname / i
+            f = self._ks_dir / i
 
             if not f.exists():
                 log.debug('skipping {} - doesnt exist'.format(f))
@@ -84,12 +90,12 @@ def _stat(self):
                 self._data[base] = np.reshape(d, d.shape[0]) if d.ndim == 2 and d.shape[1] == 1 else d
 
         # Read the Cluster Groups
-        if (self._dname / 'cluster_groups.csv').exists():
-            df = pd.read_csv(self._dname / 'cluster_groups.csv', delimiter='\t')
+        if (self._ks_dir / 'cluster_groups.csv').exists():
+            df = pd.read_csv(self._ks_dir / 'cluster_groups.csv', delimiter= '\t')
             self._data['cluster_groups'] = np.array(df['group'].values)
             self._data['cluster_ids'] = np.array(df['cluster_id'].values)
-        elif (self._dname / 'cluster_KSLabel.tsv').exists():
-            df = pd.read_csv(self._dname / 'cluster_KSLabel.tsv', sep = "\t", header = 0)
+        elif (self._ks_dir / 'cluster_KSLabel.tsv').exists():
+            df = pd.read_csv(self._ks_dir / 'cluster_KSLabel.tsv', sep = "\t", header = 0)
             self._data['cluster_groups'] = np.array(df['KSLabel'].values)
             self._data['cluster_ids'] = np.array(df['cluster_id'].values)
         else:
 
@@ -69,7 +69,7 @@ def load_probe_data(self):
                             oe_probe.recording_info['recording_datetimes'].append(rec.datetime)
                             oe_probe.recording_info['recording_durations'].append(float(rec.duration))
                             oe_probe.recording_info['recording_files'].append(
-                                rec.absolute_foldername / cont_info['folder_name'])
+                                rec.absolute_foldername / 'continuous' / cont_info['folder_name'])
 
                         elif cont_info['source_processor_sub_idx'] == 1:  # lfp data
                             assert cont_info['sample_rate'] == analog_signal.sample_rate == 2500
 
@@ -303,7 +303,7 @@ def get_original_chans(self):
                 # a block of contiguous channels specified as chan or chan1:chan2 inclusive
                 ix = [int(r) for r in channel_range.split(':')]
                 assert len(ix) in (1, 2), f"Invalid channel range spec '{channel_range}'"
-                channels = np.append(np.r_[ix[0]:ix[-1] + 1])
+                channels = np.append(channels, np.r_[ix[0]:ix[-1] + 1])
         return channels