Skip to content

Commit 6488fee

Browse files
author
Thinh Nguyen
committed
allow root_dir to be a list of potential directories - util function find_valid_full_path() for root and path searching
1 parent e51113b commit 6488fee

File tree

2 files changed

+77
-55
lines changed

2 files changed

+77
-55
lines changed

element_array_ephys/__init__.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,33 @@
11
import datajoint as dj
2+
import pathlib
3+
24

35
dj.config['enable_python_native_blobs'] = True
6+
7+
8+
def find_valid_full_path(potential_root_directories, path):
9+
"""
10+
Given multiple potential root directories and a single path
11+
Search and return one directory that is the parent of the given path
12+
:param potential_root_directories: potential root directories
13+
:param path: the path to search the root directory
14+
:return: (fullpath, root_directory)
15+
"""
16+
path = pathlib.Path(path)
17+
18+
# turn to list if only a single root directory is provided
19+
if isinstance(potential_root_directories, (str, pathlib.Path)):
20+
potential_root_directories = [potential_root_directories]
21+
22+
# search routine
23+
for root_dir in potential_root_directories:
24+
root_dir = pathlib.Path(root_dir)
25+
if path.exists():
26+
if root_dir in list(path.parents):
27+
return path, root_dir
28+
else:
29+
if (root_dir / path).exists():
30+
return root_dir / path, root_dir
31+
32+
raise FileNotFoundError('Unable to identify root-directory (from {})'
33+
' associated with {}'.format(potential_root_directories, path))

element_array_ephys/ephys.py

Lines changed: 47 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import importlib
99

1010
from .readers import spikeglx, kilosort, openephys
11-
from . import probe
11+
from . import probe, find_valid_full_path
1212

1313
schema = dj.schema()
1414

@@ -61,40 +61,23 @@ def activate(ephys_schema_name, probe_schema_name=None, *, create_schema=True,
6161

6262
# -------------- Functions required by the elements-ephys ---------------
6363

64-
def get_ephys_root_data_dir() -> str:
64+
def get_ephys_root_data_dir() -> list:
6565
"""
6666
All data paths, directories in DataJoint Elements are recommended to be stored as
6767
relative paths, with respect to some user-configured "root" directory,
6868
which varies from machine to machine (e.g. different mounted drive locations)
6969
70-
get_ephys_root_data_dir() -> str
71-
This user-provided function retrieves the root data directory
72-
containing ephys data for all subjects/sessions
73-
(e.g. acquired SpikeGLX or Open Ephys raw files)
74-
:return: a string for full path to the ephys root data directory
70+
get_ephys_root_data_dir() -> list
71+
This user-provided function retrieves the possible root data directories
72+
containing the ephys data for all subjects/sessions
73+
(e.g. acquired SpikeGLX or Open Ephys raw files,
74+
output files from spike sorting routines, etc.)
75+
:return: a string for full path to the ephys root data directory,
76+
or list of strings for possible root data directories
7577
"""
7678
return _linking_module.get_ephys_root_data_dir()
7779

7880

79-
def get_clustering_root_data_dir() -> str:
80-
"""
81-
All data paths, directories in DataJoint Elements are recommended to be stored as
82-
relative paths, with respect to some user-configured "root" directory,
83-
which varies from machine to machine (e.g. different mounted drive locations)
84-
85-
get_clustering_root_data_dir() -> str
86-
This user-provided function retrieves the root data directory
87-
containing clustering results for all subjects/sessions
88-
(e.g. output files from spike sorting routines)
89-
Note: if not provided, use "get_ephys_root_data_dir()"
90-
:return: a string for full path to the clustering root data directory
91-
"""
92-
if not hasattr(_linking_module, 'get_clustering_root_data_dir'):
93-
return get_ephys_root_data_dir()
94-
95-
return _linking_module.get_clustering_root_data_dir()
96-
97-
9881
def get_session_directory(session_key: dict) -> str:
9982
"""
10083
get_session_directory(session_key: dict) -> str
@@ -159,7 +142,6 @@ class EphysFile(dj.Part):
159142
"""
160143

161144
def make(self, key):
162-
root_dir = pathlib.Path(get_ephys_root_data_dir())
163145
sess_dir = pathlib.Path(get_session_directory(key))
164146

165147
inserted_probe_serial_number = (ProbeInsertion * probe.Probe & key).fetch1('probe')
@@ -207,6 +189,8 @@ def make(self, key):
207189
**generate_electrode_config(probe_type, electrode_group_members),
208190
'acq_software': acq_software,
209191
'sampling_rate': spikeglx_meta.meta['imSampRate']})
192+
193+
_, root_dir = find_valid_full_path(get_ephys_root_data_dir(), meta_filepath)
210194
self.EphysFile.insert1({
211195
**key,
212196
'file_path': meta_filepath.relative_to(root_dir).as_posix()})
@@ -240,6 +224,10 @@ def make(self, key):
240224
**generate_electrode_config(probe_type, electrode_group_members),
241225
'acq_software': acq_software,
242226
'sampling_rate': probe_data.ap_meta['sample_rate']})
227+
228+
_, root_dir = find_valid_full_path(
229+
get_ephys_root_data_dir(),
230+
probe_data.recording_info['recording_files'][0])
243231
self.EphysFile.insert([{**key,
244232
'file_path': fp.relative_to(root_dir).as_posix()}
245233
for fp in probe_data.recording_info['recording_files']])
@@ -431,10 +419,9 @@ class Clustering(dj.Imported):
431419
"""
432420

433421
def make(self, key):
434-
root_dir = pathlib.Path(get_clustering_root_data_dir())
435422
task_mode, output_dir = (ClusteringTask & key).fetch1(
436423
'task_mode', 'clustering_output_dir')
437-
kilosort_dir = root_dir / output_dir
424+
kilosort_dir, _ = find_valid_full_path(get_ephys_root_data_dir(), output_dir)
438425

439426
if task_mode == 'load':
440427
kilosort_dataset = kilosort.Kilosort(kilosort_dir) # check if the directory is a valid Kilosort output
@@ -470,10 +457,10 @@ def create1_from_clustering_task(self, key, curation_note=''):
470457
raise ValueError(f'No corresponding entry in Clustering available'
471458
f' for: {key}; do `Clustering.populate(key)`')
472459

473-
root_dir = pathlib.Path(get_clustering_root_data_dir())
474460
task_mode, output_dir = (ClusteringTask & key).fetch1(
475461
'task_mode', 'clustering_output_dir')
476-
kilosort_dir = root_dir / output_dir
462+
kilosort_dir, _ = find_valid_full_path(get_ephys_root_data_dir(), output_dir)
463+
477464
creation_time, is_curated, is_qc = kilosort.extract_clustering_info(kilosort_dir)
478465
# Synthesize curation_id
479466
curation_id = dj.U().aggr(self & key, n='ifnull(max(curation_id)+1,1)').fetch1('n')
@@ -503,8 +490,9 @@ class Unit(dj.Part):
503490
"""
504491

505492
def make(self, key):
506-
root_dir = pathlib.Path(get_clustering_root_data_dir())
507-
kilosort_dir = root_dir / (Curation & key).fetch1('curation_output_dir')
493+
output_dir = (Curation & key).fetch1('curation_output_dir')
494+
kilosort_dir, _ = find_valid_full_path(get_ephys_root_data_dir(), output_dir)
495+
508496
kilosort_dataset = kilosort.Kilosort(kilosort_dir)
509497
acq_software = (EphysRecording & key).fetch1('acq_software')
510498

@@ -521,7 +509,7 @@ def make(self, key):
521509
# spike_times_sec_adj > spike_times_sec > spike_times
522510
spike_time_key = ('spike_times_sec_adj' if 'spike_times_sec_adj' in kilosort_dataset.data
523511
else 'spike_times_sec' if 'spike_times_sec'
524-
in kilosort_dataset.data else 'spike_times')
512+
in kilosort_dataset.data else 'spike_times')
525513
spike_times = kilosort_dataset.data[spike_time_key]
526514
kilosort_dataset.extract_spike_depths()
527515

@@ -576,8 +564,9 @@ class UnitElectrode(dj.Part):
576564
"""
577565

578566
def make(self, key):
579-
root_dir = pathlib.Path(get_clustering_root_data_dir())
580-
kilosort_dir = root_dir / (Curation & key).fetch1('curation_output_dir')
567+
output_dir = (Curation & key).fetch1('curation_output_dir')
568+
kilosort_dir, _ = find_valid_full_path(get_ephys_root_data_dir(), output_dir)
569+
581570
kilosort_dataset = kilosort.Kilosort(kilosort_dir)
582571

583572
acq_software, probe_serial_number = (EphysRecording * ProbeInsertion & key).fetch1(
@@ -656,25 +645,28 @@ def yield_unit_waveforms():
656645

657646
def get_spikeglx_meta_filepath(ephys_recording_key):
658647
# attempt to retrieve from EphysRecording.EphysFile
659-
ephys_root_dir = get_ephys_root_data_dir()
660-
spikeglx_meta_filepath = ephys_root_dir / (
661-
EphysRecording.EphysFile & ephys_recording_key
662-
& 'file_path LIKE "%.ap.meta"').fetch1('file_path')
663-
# if not found, search in session_dir again
664-
if not spikeglx_meta_filepath.exists():
665-
sess_dir = pathlib.Path(get_session_directory(ephys_recording_key))
666-
inserted_probe_serial_number = (ProbeInsertion * probe.Probe
667-
& ephys_recording_key).fetch1('probe')
668-
669-
spikeglx_meta_filepaths = [fp for fp in sess_dir.rglob('*.ap.meta')]
670-
for meta_filepath in spikeglx_meta_filepaths:
671-
spikeglx_meta = spikeglx.SpikeGLXMeta(meta_filepath)
672-
if str(spikeglx_meta.probe_SN) == inserted_probe_serial_number:
673-
spikeglx_meta_filepath = meta_filepath
674-
break
675-
else:
676-
raise FileNotFoundError(
677-
'No SpikeGLX data found for probe insertion: {}'.format(ephys_recording_key))
648+
spikeglx_meta_filepath = (EphysRecording.EphysFile & ephys_recording_key
649+
& 'file_path LIKE "%.ap.meta"').fetch1('file_path')
650+
651+
try:
652+
spikeglx_meta_filepath, _ = find_valid_full_path(get_ephys_root_data_dir(),
653+
spikeglx_meta_filepath)
654+
except FileNotFoundError:
655+
# if not found, search in session_dir again
656+
if not spikeglx_meta_filepath.exists():
657+
sess_dir = pathlib.Path(get_session_directory(ephys_recording_key))
658+
inserted_probe_serial_number = (ProbeInsertion * probe.Probe
659+
& ephys_recording_key).fetch1('probe')
660+
661+
spikeglx_meta_filepaths = [fp for fp in sess_dir.rglob('*.ap.meta')]
662+
for meta_filepath in spikeglx_meta_filepaths:
663+
spikeglx_meta = spikeglx.SpikeGLXMeta(meta_filepath)
664+
if str(spikeglx_meta.probe_SN) == inserted_probe_serial_number:
665+
spikeglx_meta_filepath = meta_filepath
666+
break
667+
else:
668+
raise FileNotFoundError(
669+
'No SpikeGLX data found for probe insertion: {}'.format(ephys_recording_key))
678670

679671
return spikeglx_meta_filepath
680672

0 commit comments

Comments
 (0)