-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Milestone
Description
We should define what the API should look like for reading BIDS and CAPS datasets.
Here are some thoughts to start the discussion:
class Visit:
"""Defines a visit in a longitudinal dataset."""
subject: Subject
session: Session
class Dataset:
"""Base abstract class for datasets."""
root: Path
class LongitudinalDataset(Dataset):
"""Base class for longitudinal datasets with subjects and sessions layout."""
subjects: list[Subject]
def get_subject_metadata(self, subject: Subject | None) -> pd.DataFrame:
"""Return the metadata of participants.tsv for the given subject or all of them."""
...
def get_sessions(self, subject: Subject) -> list[Session]:
"""Get the sessions for given subject."""
...
def get_visits() -> list[Visit]:
"""Return all the visits."""
...
def get_session_metadata(self, subject: Subject, session: Session | None) -> pd.DataFrame:
"""Return the metadata of sessions.tsv for the provided visit (if session is None, merge all sessions for given subject)."""
...
def get_modalities(visit: Visit | None) -> Set[Modality]:
"""Return all the modalities for the given visit or in the dataset."""
...
def get_scans_metadata(self, visit: Visit) -> pd.DataFrame:
"""Return the metadata of scans.tsv for the provided visit."""
...
class BIDSDataset(LongitudinalDataset):
description: BIDSDatasetDescription
files: list[BIDSPath]
def get_files(
self,
subject: Subject | None,
session: Session | None,
modality: Modality | None,
entities: list[Entity] | None,
extension: Extension | None,
) -> list[BIDSPath]:
"""Return all files matching the provided parameters."""
...
class CAPSDataset(LongitudinalDataset):
description: CAPSDatasetDescription
class CAPSGroupDataset(LongitudinalDataset):
...
# This is already implemented in Clinica:
class DatasetType(str, Enum):
RAW = "raw"
DERIVATIVE = "derivative"
class BIDSDatasetDescription:
name: str
bids_version: Version = BIDS_VERSION
dataset_type: DatasetType = DatasetType.RAW
class CAPSDatasetDescription:
name: str
bids_version: Version = BIDS_VERSION
caps_version: Version = CAPS_VERSION
dataset_type: DatasetType = DatasetType.DERIVATIVE
processing: MutableSequence[CAPSProcessingDescription] = []
class CAPSProcessingDescription:
name: str
date: IsoDate
author: str
machine: str
input_path: str
clinica_version: Version
dependencies: List[SoftwareDependency]
Usage would more or less look like that:
dataset = BIDSDataset.from_path("./bids")
session_df = dataset.get_session_metadata()
dataset.get_files(modality="T1w", extension=".nii.gz") # Get T1w nifty images for all visits
dataset.get_files(subject="sub-01", extension=".json") # Get all JSON files for subject sub-01
dataset.get_files(
subject="sub-01",
session="ses-M000",
modality="pet",
entities=[{"prefix": "trc", "value": Tracer.18FFDG}], # not sure how to nicely provide these filters
extension=".nii.gz",
) # Get the PET scan for subject sub-01 and session M000 with tracer 18FFDG
Metadata
Metadata
Assignees
Labels
No labels