Initial commit

danjjl · danjjl · commit 3ce8bb57db17 · 2024-11-12T11:11:19.000+01:00
diff --git a/README.md b/README.md
@@ -0,0 +1,69 @@
+# szcore-evaluation
+
+Compare szCORE compliant annotations of EEG datasets of people with epilelpsy.
+
+The package compares annotations in TSV. The annotations should be organized in a BIDS compliant manner:
+
+```txt
+BIDS_DATASET/
+├── ...
+├── sub-01/
+│   ├── ses-01/
+│   │   └── eeg/
+│   │       ├── sub-01_ses-01_task-szMonitoring_run-00_events.tsv
+│   │       ├── ...
+│   ├── ...
+├── ...
+```
+
+The package compares hypothesis annotations to reference annotations from two folders which follow the same structure. It provides a JSON file with the overall results as an output:
+
+```json
+{
+  "sample_results": {
+    "sensitivity": 0.08,
+    "sensitivity_std": 0.04,
+    "precision": 0.01,
+    "precision_std": 0.01,
+    "f1": 0.02,
+    "f1_std": 0.01,
+    "fpRate": 9792.41,
+    "fpRate_std": 4566.68
+  },
+  "event_results": {
+    "sensitivity": 1.0,
+    "sensitivity_std": 0.0,
+    "precision": 0.08,
+    "precision_std": 0.03,
+    "f1": 0.16,
+    "f1_std": 0.04,
+    "fpRate": 280.55,
+    "fpRate_std": 0.03
+  }
+}
+```
+
+The library provides a simple interface:
+
+```python
+def evaluate_dataset(
+    reference: Path, hypothesis: Path, outFile: Path, avg_per_subject=True
+) -> dict:
+    """
+    Compares two sets of seizure annotations accross a full dataset.
+
+    Parameters:
+    reference (Path): The path to the folder containing the reference TSV files.
+    hypothesis (Path): The path to the folder containing the hypothesis TSV files.
+    outFile (Path): The path to the output JSON file where the results are saved.
+    avg_per_subject (bool): Whether to compute average scores per subject or
+                            average across the full dataset.
+
+    Returns:
+    dict. return the evaluation result. The dictionary contains the following
+          keys: {'sample_results': {'sensitivity', 'precision', 'f1', 'fpRate',
+                    'sensitivity_std', 'precision_std', 'f1_std', 'fpRate_std'},
+                 'event_results':{...}
+                 }
+    """
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,28 @@
+[project]
+name = "szcore-evaluation"
+version = "0.1.0"
+description = "Compare szCORE compliant annotations of EEG datasets of people with epilelpsy."
+authors = [
+    { name = "Jonathan Dan", email = "jonathan.dan@epfl.ch" }
+]
+dependencies = [
+    "epilepsy2bids>=0.7",
+    "numpy>=1.26",
+    "timescore>=0.0.5",
+]
+readme = "README.md"
+requires-python = ">= 3.12"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.rye]
+managed = true
+dev-dependencies = []
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/szcore_evaluation"]
diff --git a/src/szcore_evaluation/__init__.py b/src/szcore_evaluation/__init__.py
@@ -0,0 +1,2 @@
+def hello() -> str:
+    return "Hello from szcore-evaluation!"
diff --git a/src/szcore_evaluation/evaluate.py b/src/szcore_evaluation/evaluate.py
@@ -0,0 +1,144 @@
+import json
+from pathlib import Path
+
+from epilepsy2bids.annotations import Annotations
+import numpy as np
+from timescoring import scoring
+from timescoring.annotations import Annotation
+
+
+class Result(scoring._Scoring):
+    """Helper class built on top of scoring._Scoring that implements the sum
+    operator between two scoring objects. The sum corresponds to the
+    concatenation of both objects.
+    Args:
+        scoring (scoring._Scoring): initialized as None (all zeros) or from a
+                                    scoring._Scoring object.
+    """
+
+    def __init__(self, score: scoring._Scoring = None):
+        if score is None:
+            self.fs = 0
+            self.duration = 0
+            self.numSamples = 0
+            self.tp = 0
+            self.fp = 0
+            self.refTrue = 0
+        else:
+            self.fs = score.ref.fs
+            self.duration = len(score.ref.mask) / score.ref.fs
+            self.numSamples = score.numSamples
+            self.tp = score.tp
+            self.fp = score.fp
+            self.refTrue = score.refTrue
+
+    def __add__(self, other_result: scoring._Scoring):
+        new_result = Result()
+        new_result.fs = other_result.fs
+        new_result.duration = self.duration + other_result.duration
+        new_result.numSamples = self.numSamples + other_result.numSamples
+        new_result.tp = self.tp + other_result.tp
+        new_result.fp = self.fp + other_result.fp
+        new_result.refTrue = self.refTrue + other_result.refTrue
+
+        return new_result
+
+    def __iadd__(self, other_result: scoring._Scoring):
+        self.fs = other_result.fs
+        self.duration += other_result.duration
+        self.numSamples += other_result.numSamples
+        self.tp += other_result.tp
+        self.fp += other_result.fp
+        self.refTrue += other_result.refTrue
+
+        return self
+
+
+def evaluate_dataset(
+    reference: Path, hypothesis: Path, outFile: Path, avg_per_subject=True
+) -> dict:
+    """
+    Compares two sets of seizure annotations accross a full dataset.
+
+    Parameters:
+    reference (Path): The path to the folder containing the reference TSV files.
+    hypothesis (Path): The path to the folder containing the hypothesis TSV files.
+    outFile (Path): The path to the output JSON file where the results are saved.
+    avg_per_subject (bool): Whether to compute average scores per subject or
+                            average across the full dataset.
+
+    Returns:
+    dict. return the evaluation result. The dictionary contains the following
+          keys: {'sample_results': {'sensitivity', 'precision', 'f1', 'fpRate',
+                    'sensitivity_std', 'precision_std', 'f1_std', 'fpRate_std'},
+                 'event_results':{...}
+                 }
+    """
+
+    FS = 1
+
+    sample_results = dict()
+    event_results = dict()
+    for subject in Path(reference).glob("sub-*"):
+        sample_results[subject.name] = Result()
+        event_results[subject.name] = Result()
+
+        for ref_tsv in subject.glob("**/*.tsv"):
+            # Load reference
+            ref = Annotations.loadTsv(ref_tsv)
+            ref = Annotation(ref.getMask(FS), FS)
+
+            # Load hypothesis
+            hyp_tsv = Path(hypothesis) / ref_tsv.relative_to(reference)
+            if hyp_tsv.exists():
+                hyp = Annotations.loadTsv(hyp_tsv)
+                hyp = Annotation(hyp.getMask(FS), FS)
+            else:
+                hyp = Annotation(np.zeros_like(ref.mask), ref.fs)
+
+            # Compute evaluation
+            sample_score = scoring.SampleScoring(ref, hyp)
+            event_score = scoring.EventScoring(ref, hyp)
+
+            # Store results
+            sample_results[subject.name] += Result(sample_score)
+            event_results[subject.name] += Result(event_score)
+
+        # Compute scores
+        sample_results[subject.name].computeScores()
+        event_results[subject.name].computeScores()
+
+    aggregated_sample_results = dict()
+    aggregated_event_results = dict()
+    if avg_per_subject:
+        for result_builder, aggregated_result in zip(
+            (sample_results, event_results),
+            (aggregated_sample_results, aggregated_event_results),
+        ):
+            for metric in ["sensitivity", "precision", "f1", "fpRate"]:
+                aggregated_result[metric] = np.mean(
+                    [getattr(x, metric) for x in result_builder.values()]
+                )
+                aggregated_result[f"{metric}_std"] = np.std(
+                    [getattr(x, metric) for x in result_builder.values()]
+                )
+    else:
+        for result_builder, aggregated_result in zip(
+            (sample_results, event_results),
+            (aggregated_sample_results, aggregated_event_results),
+        ):
+            result_builder["cumulated"] = Result()
+            for result in result_builder.values():
+                result_builder["cumulated"] += result
+            result_builder["cumulated"].computeScores()
+            for metric in ["sensitivity", "precision", "f1", "fpRate"]:
+                aggregated_result[metric] = getattr(result_builder["cumulated"], metric)
+
+    output = {
+        "sample_results": aggregated_sample_results,
+        "event_results": aggregated_event_results,
+    }
+    with open(outFile, "w") as file:
+        json.dump(output, file, indent=2, sort_keys=False)
+
+    return output

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+def hello() -> str:`
	`2`	`+ return "Hello from szcore-evaluation!"`