From 39a8493bb14029dd17361df653828ca27b97f969 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Mon, 23 Dec 2024 10:35:45 +0100
Subject: [PATCH 1/6] 1st implementation of semisupervised DA

---
 .../semisupervised_domainAdaptation.py        | 118 ++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 scripts/portal/training/semisupervised_domainAdaptation.py

diff --git a/scripts/portal/training/semisupervised_domainAdaptation.py b/scripts/portal/training/semisupervised_domainAdaptation.py
new file mode 100644
index 0000000..d71cedf
--- /dev/null
+++ b/scripts/portal/training/semisupervised_domainAdaptation.py
@@ -0,0 +1,118 @@
+import os
+import argparse
+from glob import glob
+import json
+from sklearn.model_selection import train_test_split
+import sys
+sys.path.append('/user/muth9/u12095/synapse-net')
+from synapse_net.training.domain_adaptation import mean_teacher_adaptation
+
+OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synapse_net/training/semisupervisedDA_cryo"
+
+def _require_train_val_test_split(datasets, train_root, extension = "mrc"):
+    train_ratio, val_ratio, test_ratio = 0.8, 0.1, 0.1
+
+    def _train_val_test_split(names):
+        train, test = train_test_split(names, test_size=1 - train_ratio, shuffle=True)
+        _ratio = test_ratio / (test_ratio + val_ratio)
+        val, test = train_test_split(test, test_size=_ratio)
+        return train, val, test
+
+    for ds in datasets:
+        print(f"Processing dataset: {ds}")
+        split_path = os.path.join(OUTPUT_ROOT, f"split-{ds}.json")
+        if os.path.exists(split_path):
+            print(f"Split file already exists: {split_path}")
+            continue
+
+        file_paths = sorted(glob(os.path.join(train_root, ds, f"*.{extension}")))
+        file_names = [os.path.basename(path) for path in file_paths]
+
+        train, val, test = _train_val_test_split(file_names)
+
+        with open(split_path, "w") as f:
+            json.dump({"train": train, "val": val, "test": test}, f)
+
+def _require_train_val_split(datasets, train_root, extension = "mrc"):
+    train_ratio, val_ratio = 0.8, 0.2
+
+    def _train_val_split(names):
+        train, val = train_test_split(names, test_size=1 - train_ratio, shuffle=True)
+        return train, val
+
+    for ds in datasets:
+        print(f"Processing dataset: {ds}")
+        split_path = os.path.join(OUTPUT_ROOT, f"split-{ds}.json")
+        if os.path.exists(split_path):
+            print(f"Split file already exists: {split_path}")
+            continue
+
+        file_paths = sorted(glob(os.path.join(train_root, ds, f"*.{extension}")))
+        file_names = [os.path.basename(path) for path in file_paths]
+
+        train, val = _train_val_split(file_names)
+
+        with open(split_path, "w") as f:
+            json.dump({"train": train, "val": val}, f)
+
+def get_paths(split, datasets, train_root, testset=True, extension = "mrc"):
+    if testset:
+        _require_train_val_test_split(datasets, train_root, extension)
+    else:
+        _require_train_val_split(datasets, train_root, extension)
+
+    paths = []
+    for ds in datasets:
+        split_path = os.path.join(OUTPUT_ROOT, f"split-{ds}.json")
+        with open(split_path) as f:
+            names = json.load(f)[split]
+        ds_paths = [os.path.join(train_root, ds, name) for name in names]
+        assert all(os.path.exists(path) for path in ds_paths), f"Some paths do not exist in {ds_paths}"
+        paths.extend(ds_paths)
+
+    return paths
+
+def vesicle_domain_adaptation(teacher_model, testset=True):
+    # Adjustable parameters
+    patch_shape = [48, 256, 256]
+    model_name = "vesicle-semisupervisedDA-cryo-v1"
+    model_root = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/models_v2/checkpoints/"
+    checkpoint_path = os.path.join(model_root, teacher_model)
+
+    unsupervised_train_root = "/mnt/lustre-emmy-hdd/usr/u12095/cryo-et"
+    supervised_train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/cryoVesNet"
+
+    unsupervised_datasets = ["from_portal"]
+    unsupervised_train_paths = get_paths("train", datasets=unsupervised_datasets, train_root=unsupervised_train_root, testset=testset)
+    unsupervised_val_paths = get_paths("val", datasets=unsupervised_datasets, train_root=unsupervised_train_root, testset=testset)
+
+    supervised_datasets = ["exported"]
+    supervised_train_paths = get_paths("train", datasets=supervised_datasets, train_root=supervised_train_root, testset=testset, extension = "h5")
+    supervised_val_paths = get_paths("val", datasets=supervised_datasets, train_root=supervised_train_root, testset=testset, extension = "h5")
+
+    mean_teacher_adaptation(
+        name=model_name,
+        unsupervised_train_paths=unsupervised_train_paths,
+        unsupervised_val_paths=unsupervised_val_paths,
+        raw_key="data",
+        supervised_train_paths=supervised_train_paths,
+        supervised_val_paths=supervised_val_paths,
+        raw_key_supervised = "raw",
+        label_key="/labels/vesicles",
+        patch_shape=patch_shape,
+        save_root="/mnt/lustre-emmy-hdd/usr/u12095/synapse_net/models/DA",
+        source_checkpoint=checkpoint_path,
+        confidence_threshold=0.75,
+        n_iterations=int(1e3),
+    )
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", "--teacher_model", required=True, help="Name of teacher model")
+    parser.add_argument("-t", "--testset", action="store_false", help="Set to False if no testset should be created")
+    args = parser.parse_args()
+
+    vesicle_domain_adaptation(args.teacher_model, args.testset)
+
+if __name__ == "__main__":
+    main()

From 7ce27d803de6b00cdefb31cdfc5d93d1475b7c52 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Mon, 24 Feb 2025 16:39:43 +0100
Subject: [PATCH 2/6] add evaluation stuff for revision

---
 .gitignore                            |  1 +
 scripts/cooper/training/evaluation.py | 33 ++++++++++++--------
 scripts/rizzoli/evaluation_2D.py      | 45 +++++++++++++++++++++------
 3 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/.gitignore b/.gitignore
index 262d4d7..e5a747b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ scripts/cooper/training/copy_testset.py
 scripts/rizzoli/upsample_data.py
 scripts/cooper/training/find_rec_testset.py
 synapse-net-models/
+scripts/portal/upscale_tomo.py
diff --git a/scripts/cooper/training/evaluation.py b/scripts/cooper/training/evaluation.py
index d7aaf6e..55bac16 100644
--- a/scripts/cooper/training/evaluation.py
+++ b/scripts/cooper/training/evaluation.py
@@ -4,24 +4,25 @@
 import h5py
 import pandas as pd
 
-from elf.evaluation import matching
+from elf.evaluation import matching, symmetric_best_dice_score
 
 
 
 def evaluate(labels, vesicles):
     assert labels.shape == vesicles.shape
     stats = matching(vesicles, labels)
-    return [stats["f1"], stats["precision"], stats["recall"]]
+    sbd = symmetric_best_dice_score(vesicles, labels)
+    return [stats["f1"], stats["precision"], stats["recall"], sbd]
 
 
 def summarize_eval(results):
-    summary = results[["dataset", "f1-score", "precision", "recall"]].groupby("dataset").mean().reset_index("dataset")
-    total = results[["f1-score", "precision", "recall"]].mean().values.tolist()
+    summary = results[["dataset", "f1-score", "precision", "recall", "SBD score"]].groupby("dataset").mean().reset_index("dataset")
+    total = results[["f1-score", "precision", "recall", "SBD score"]].mean().values.tolist()
     summary.iloc[-1] = ["all"] + total
     table = summary.to_markdown(index=False)
     print(table)
 
-def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key):
+def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key = None):
     print(f"Evaluate labels {labels_path} and vesicles {vesicles_path}")
 
     ds_name = os.path.basename(os.path.dirname(labels_path))
@@ -33,16 +34,21 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
         #vesicles = labels["vesicles"]
         gt = labels[anno_key][:]
         
+        if mask_key is not None:
+            mask = labels[mask_key][:]
+        
     with h5py.File(vesicles_path) as seg_file:
         segmentation = seg_file["vesicles"]
         vesicles = segmentation[segment_key][:] 
     
-    
-    #evaluate the match of ground truth and vesicles
+    if mask_key is not None:
+        gt[mask == 0] = 0
+        vesicles[mask == 0] = 0
+     #evaluate the match of ground truth and vesicles
     scores = evaluate(gt, vesicles)
     
     #store results
-    result_folder ="/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results"
+    result_folder ="/user/muth9/u12095/synapse-net/scripts/cooper/evaluation_results"
     os.makedirs(result_folder, exist_ok=True)
     result_path=os.path.join(result_folder, f"evaluation_{model_name}.csv")
     print("Evaluation results are saved to:", result_path)
@@ -53,7 +59,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
         results = None
         
     res = pd.DataFrame(
-        [[ds_name, tomo] + scores], columns=["dataset", "tomogram", "f1-score", "precision", "recall"]
+        [[ds_name, tomo] + scores], columns=["dataset", "tomogram", "f1-score", "precision", "recall", "SBD score"]
     )
     if results is None:
         results = res
@@ -65,7 +71,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
     summarize_eval(results)
 
 
-def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_key):
+def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key = None):
     print(f"Evaluating folder {vesicles_path}")
     print(f"Using labels stored in {labels_path}")
 
@@ -75,7 +81,7 @@ def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_ke
     for vesicle_file in vesicles_files:
         if vesicle_file in label_files:
 
-            evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key)
+            evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key, mask_key)
 
 
 
@@ -87,13 +93,14 @@ def main():
     parser.add_argument("-n", "--model_name", required=True)
     parser.add_argument("-sk", "--segment_key", required=True)
     parser.add_argument("-ak", "--anno_key", required=True)
+    parser.add_argument("-m", "--mask_key")
     args = parser.parse_args()
 
     vesicles_path = args.vesicles_path
     if os.path.isdir(vesicles_path):
-        evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key)
+        evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
     else:
-        evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key)
+        evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
     
     
 
diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py
index 1cae666..8621490 100644
--- a/scripts/rizzoli/evaluation_2D.py
+++ b/scripts/rizzoli/evaluation_2D.py
@@ -5,20 +5,27 @@
 import pandas as pd
 import numpy as np
 
-from elf.evaluation import matching
-
+from elf.evaluation import matching, symmetric_best_dice_score
+from skimage.transform import rescale
 
+def transpose_tomo(tomogram):
+        data0 = np.swapaxes(tomogram, 0, -1)
+        data1 = np.fliplr(data0)
+        transposed_data = np.swapaxes(data1, 0, -1)
+        return transposed_data
 
 def evaluate(labels, vesicles):
     assert labels.shape == vesicles.shape
     stats = matching(vesicles, labels)
-    return [stats["f1"], stats["precision"], stats["recall"]]
+    sbd = symmetric_best_dice_score(vesicles, labels)
+    return [stats["f1"], stats["precision"], stats["recall"], sbd]
 
 def evaluate_slices(gt, vesicles):
     """Evaluate 2D model performance for each z-slice of the 3D volume."""
     f1_scores = []
     precision_scores = []
     recall_scores = []
+    sbd_scores = []
     
     # Iterate through each slice along the z-axis
     for z in range(gt.shape[0]):
@@ -27,24 +34,27 @@ def evaluate_slices(gt, vesicles):
         vesicles_slice = vesicles[z, :, :]
         
         # Evaluate the performance for the current slice
-        f1, precision, recall = evaluate(gt_slice, vesicles_slice)
+        f1, precision, recall, sbd = evaluate(gt_slice, vesicles_slice)
         
         f1_scores.append(f1)
         precision_scores.append(precision)
         recall_scores.append(recall)
+        sbd_scores.append(sbd)
     
     print(f"f1 scores to be averaged {f1_scores}")
+    print(f"sbd scores to be averaged {sbd_scores}")
 
     # Calculate the mean for each metric
     mean_f1 = np.mean(f1_scores)
     mean_precision = np.mean(precision_scores)
     mean_recall = np.mean(recall_scores)
+    mead_sbd = np.mean(sbd_scores)
     
-    return [mean_f1, mean_precision, mean_recall]
+    return [mean_f1, mean_precision, mean_recall, mead_sbd]
 
 def summarize_eval(results):
-    summary = results[["dataset", "f1-score", "precision", "recall"]].groupby("dataset").mean().reset_index("dataset")
-    total = results[["f1-score", "precision", "recall"]].mean().values.tolist()
+    summary = results[["dataset", "f1-score", "precision", "recall", "SBD score"]].groupby("dataset").mean().reset_index("dataset")
+    total = results[["f1-score", "precision", "recall", "SBD score"]].mean().values.tolist()
     summary.iloc[-1] = ["all"] + total
     table = summary.to_markdown(index=False)
     print(table)
@@ -55,22 +65,37 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
     ds_name = os.path.basename(os.path.dirname(labels_path))
     tomo = os.path.basename(labels_path)
 
+    use_mask=True #set to true for eg maus data
+
     #get the labels and vesicles
     with h5py.File(labels_path) as label_file:
         labels = label_file["labels"]
         #vesicles = labels["vesicles"]
         gt = labels[anno_key][:]
+        gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype)
+        gt = transpose_tomo(gt)
+
+        if use_mask:
+            mask = labels["mask"][:]
+            mask = rescale(mask, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(mask.dtype)
+            mask = transpose_tomo(mask)
         
     with h5py.File(vesicles_path) as seg_file:
         segmentation = seg_file["vesicles"]
         vesicles = segmentation[segment_key][:] 
     
+    if use_mask:
+        gt[mask == 0] = 0
+        vesicles[mask == 0] = 0
     
     #evaluate the match of ground truth and vesicles
-    scores = evaluate_slices(gt, vesicles)
+    if len(vesicles.shape) == 3:
+        scores = evaluate_slices(gt, vesicles)
+    else:
+        scores = evaluate(gt,vesicles)
     
     #store results
-    result_folder ="/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results"
+    result_folder ="/user/muth9/u12095/synapse-net/scripts/cooper/evaluation_results"
     os.makedirs(result_folder, exist_ok=True)
     result_path=os.path.join(result_folder, f"2Devaluation_{model_name}.csv")
     print("Evaluation results are saved to:", result_path)
@@ -81,7 +106,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
         results = None
         
     res = pd.DataFrame(
-        [[ds_name, tomo] + scores], columns=["dataset", "tomogram", "f1-score", "precision", "recall"]
+        [[ds_name, tomo] + scores], columns=["dataset", "tomogram", "f1-score", "precision", "recall", "SBD score"]
     )
     if results is None:
         results = res

From a7a59a1b2ee97a1533d25946bb45fd7a7771476a Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Thu, 13 Mar 2025 11:52:01 +0100
Subject: [PATCH 3/6] add bb option for eval

---
 scripts/cooper/training/evaluation.py | 26 +++++++++++++++++++++++++-
 scripts/rizzoli/evaluation_2D.py      | 11 ++++++-----
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/scripts/cooper/training/evaluation.py b/scripts/cooper/training/evaluation.py
index 55bac16..8ba996c 100644
--- a/scripts/cooper/training/evaluation.py
+++ b/scripts/cooper/training/evaluation.py
@@ -3,10 +3,24 @@
 
 import h5py
 import pandas as pd
+import numpy as np
 
 from elf.evaluation import matching, symmetric_best_dice_score
 
-
+def get_bounding_box(mask, halo=2):
+    """ Get bounding box coordinates around a mask with a halo."""
+    coords = np.argwhere(mask)
+    if coords.size == 0:
+        return None  # No labels found
+    
+    min_coords = coords.min(axis=0)
+    max_coords = coords.max(axis=0)
+    
+    min_coords = np.maximum(min_coords - halo, 0)
+    max_coords = np.minimum(max_coords + halo, mask.shape)
+    
+    slices = tuple(slice(min_c, max_c) for min_c, max_c in zip(min_coords, max_coords))
+    return slices
 
 def evaluate(labels, vesicles):
     assert labels.shape == vesicles.shape
@@ -44,6 +58,16 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key,
     if mask_key is not None:
         gt[mask == 0] = 0
         vesicles[mask == 0] = 0
+
+    bb= False
+    if bb:
+        # Get bounding box and crop
+        bb_slices = get_bounding_box(gt, halo=2)
+        gt = gt[bb_slices]
+        vesicles = vesicles[bb_slices]
+    else:
+        print("not using bb")
+
      #evaluate the match of ground truth and vesicles
     scores = evaluate(gt, vesicles)
     
diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py
index 8621490..e999203 100644
--- a/scripts/rizzoli/evaluation_2D.py
+++ b/scripts/rizzoli/evaluation_2D.py
@@ -65,17 +65,18 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
     ds_name = os.path.basename(os.path.dirname(labels_path))
     tomo = os.path.basename(labels_path)
 
-    use_mask=True #set to true for eg maus data
+    maus=False #set to true for eg maus data
 
     #get the labels and vesicles
     with h5py.File(labels_path) as label_file:
         labels = label_file["labels"]
         #vesicles = labels["vesicles"]
         gt = labels[anno_key][:]
-        gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype)
-        gt = transpose_tomo(gt)
+        if maus:
+            gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype)
+            gt = transpose_tomo(gt)
 
-        if use_mask:
+        if maus:
             mask = labels["mask"][:]
             mask = rescale(mask, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(mask.dtype)
             mask = transpose_tomo(mask)
@@ -84,7 +85,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
         segmentation = seg_file["vesicles"]
         vesicles = segmentation[segment_key][:] 
     
-    if use_mask:
+    if maus:
         gt[mask == 0] = 0
         vesicles[mask == 0] = 0
     

From 17d26dff951683d5037c1fbb0e0858e9a6290d18 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Thu, 22 May 2025 10:37:29 +0200
Subject: [PATCH 4/6] everything after 1st revision relating to evaluation

---
 scripts/cooper/ground_truth/az/evaluate_az.py | 124 +++++++++++++++++-
 .../cooper/ground_truth/az/postprocess_az.py  |  48 +++++++
 scripts/cooper/training/evaluation.py         |  17 +++
 3 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 scripts/cooper/ground_truth/az/postprocess_az.py

diff --git a/scripts/cooper/ground_truth/az/evaluate_az.py b/scripts/cooper/ground_truth/az/evaluate_az.py
index 63ca765..60bfcd2 100644
--- a/scripts/cooper/ground_truth/az/evaluate_az.py
+++ b/scripts/cooper/ground_truth/az/evaluate_az.py
@@ -1,11 +1,14 @@
 import os
+import argparse
 
 import h5py
 import pandas as pd
+import numpy as np
 from elf.evaluation import dice_score
 
 from scipy.ndimage import binary_dilation, binary_closing
 from tqdm import tqdm
+from elf.evaluation import matching
 
 
 def _expand_AZ(az):
@@ -88,5 +91,124 @@ def main():
     print("Total:")
     print(scores["Dice"].mean(), "+-", scores["Dice"].std())
 
+def get_bounding_box(mask, halo=2):
+    """ Get bounding box coordinates around a mask with a halo. """
+    coords = np.argwhere(mask)
+    if coords.size == 0:
+        return None  # No labels found
 
-main()
+    min_coords = coords.min(axis=0)
+    max_coords = coords.max(axis=0)
+
+    min_coords = np.maximum(min_coords - halo, 0)
+    max_coords = np.minimum(max_coords + halo, mask.shape)
+
+    slices = tuple(slice(min_c, max_c) for min_c, max_c in zip(min_coords, max_coords))
+    return slices
+
+
+def evaluate(labels, seg):
+    assert labels.shape == seg.shape
+    stats = matching(seg, labels)
+    return [stats["f1"], stats["precision"], stats["recall"]]
+
+
+def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None):
+    print(f"Evaluating: {os.path.basename(labels_path)}")
+
+    ds_name = os.path.basename(os.path.dirname(labels_path))
+    tomo = os.path.basename(labels_path)
+
+    with h5py.File(labels_path) as label_file:
+        labels = label_file["labels"]
+        gt = labels[anno_key][:]
+        if mask_key is not None:
+            mask = labels[mask_key][:]
+
+    with h5py.File(seg_path) as seg_file:
+        az = seg_file["AZ"][segment_key][:]
+
+    if mask_key is not None:
+        gt[mask == 0] = 0
+        az[mask == 0] = 0
+
+    # Optionally crop to bounding box
+    use_bb = False
+    if use_bb:
+        bb_slices = get_bounding_box(gt, halo=2)
+        gt = gt[bb_slices]
+        az = az[bb_slices]
+
+    scores = evaluate(gt, az)
+    return pd.DataFrame([[ds_name, tomo] + scores],
+                        columns=["dataset", "tomogram", "f1-score", "precision", "recall"])
+
+
+def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, mask_key=None):
+    print(f"\nEvaluating folder: {seg_path}")
+    label_files = os.listdir(labels_path)
+    seg_files = os.listdir(seg_path)
+
+    all_results = []
+
+    for seg_file in seg_files:
+        if seg_file in label_files:
+            label_fp = os.path.join(labels_path, seg_file)
+            seg_fp = os.path.join(seg_path, seg_file)
+
+            res = evaluate_file(label_fp, seg_fp, segment_key, anno_key, mask_key)
+            all_results.append(res)
+
+    if not all_results:
+        print("No matched files found for evaluation.")
+        return
+
+    results_df = pd.concat(all_results, ignore_index=True)
+
+    # Per-folder metrics
+    folder_metrics = results_df[["f1-score", "precision", "recall"]].mean().to_dict()
+    folder_name = os.path.basename(seg_path)
+    print(f"\n Folder-Level Metrics for '{folder_name}':")
+    print(f"Precision: {folder_metrics['precision']:.4f}")
+    print(f"Recall:    {folder_metrics['recall']:.4f}")
+    print(f"F1 Score:  {folder_metrics['f1-score']:.4f}\n")
+
+    # Save results
+    result_dir = "/user/muth9/u12095/synapse-net/scripts/cooper/evaluation_results"
+    os.makedirs(result_dir, exist_ok=True)
+
+    # Per-file CSV
+    file_results_path = os.path.join(result_dir, f"evaluation_{model_name}_per_file.csv")
+    if os.path.exists(file_results_path):
+        existing_df = pd.read_csv(file_results_path)
+        results_df = pd.concat([existing_df, results_df], ignore_index=True)
+    results_df.to_csv(file_results_path, index=False)
+    print(f"Per-file results saved to {file_results_path}")
+
+    # Append to per-folder summary
+    folder_summary_path = os.path.join(result_dir, f"evaluation_{model_name}_per_folder.csv")
+    with open(folder_summary_path, "a") as f:
+        f.write(f"{folder_name},{folder_metrics['f1-score']:.4f},{folder_metrics['precision']:.4f},{folder_metrics['recall']:.4f}\n")
+    print(f"Folder summary appended to {folder_summary_path}")
+
+
+def main_f1():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-l", "--labels_path", required=True)
+    parser.add_argument("-seg", "--seg_path", required=True)
+    parser.add_argument("-n", "--model_name", required=True)
+    parser.add_argument("-sk", "--segment_key", required=True)
+    parser.add_argument("-ak", "--anno_key", required=True)
+    parser.add_argument("-m", "--mask_key")
+
+    args = parser.parse_args()
+
+    if os.path.isdir(args.seg_path):
+        evaluate_folder(args.labels_path, args.seg_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
+    else:
+        print("Please pass a folder to get folder-level metrics.")
+
+
+if __name__ == "__main__":
+    #main() #for dice score
+    main_f1()
\ No newline at end of file
diff --git a/scripts/cooper/ground_truth/az/postprocess_az.py b/scripts/cooper/ground_truth/az/postprocess_az.py
new file mode 100644
index 0000000..1bc296e
--- /dev/null
+++ b/scripts/cooper/ground_truth/az/postprocess_az.py
@@ -0,0 +1,48 @@
+import os
+import glob
+import h5py
+import argparse
+import sys
+sys.path.append('/user/muth9/u12095/synapse-net')
+from synapse_net.ground_truth.shape_refinement import edge_filter
+
+def postprocess_file(file_path):
+    """Processes a single .h5 file, applying an edge filter and saving the membrane mask."""#
+    print(f"processing file {file_path}")
+    with h5py.File(file_path, "a") as f:
+        raw = f["raw"][:]
+        print("applying the edge filter ...")
+        hmap = edge_filter(raw, sigma=1.0, method="sato", per_slice=True, n_threads=8)
+        membrane_mask = hmap > 0.5
+        print("saving results ....")
+        try:
+            f.create_dataset("labels/membrane_mask", data=membrane_mask, compression="gzip")
+        except:
+            print(f"membrane mask aleady saved for {file_path}")    
+    print("Done!")
+
+def postprocess_folder(folder_path):
+    """Processes all .h5 files in a given folder recursively."""
+    files = sorted(glob.glob(os.path.join(folder_path, '**', '*.h5'), recursive=True))
+    print("Processing files:", files)
+    
+    for file_path in files:
+        postprocess_file(file_path)
+
+def main():
+    #/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete 
+    #/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/04_hoi_stem_examples 
+    #/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/06_hoi_wt_stem750_fm 
+    #/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation 
+    
+    parser = argparse.ArgumentParser(description="Postprocess .h5 files by applying edge filtering.")
+    parser.add_argument("-p", "--data_path", required=True, help="Path to the .h5 file or folder.")
+    args = parser.parse_args()
+    
+    if os.path.isdir(args.data_path):
+        postprocess_folder(args.data_path)
+    else:
+        postprocess_file(args.data_path)
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/cooper/training/evaluation.py b/scripts/cooper/training/evaluation.py
index 8ba996c..e39183c 100644
--- a/scripts/cooper/training/evaluation.py
+++ b/scripts/cooper/training/evaluation.py
@@ -107,7 +107,23 @@ def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_ke
 
             evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key, mask_key)
 
+def evaluate_folder_onlyGTAnnotations(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key=None):
+    print(f"Evaluating folder {vesicles_path}")
+    print(f"Using labels stored in {labels_path}")
 
+    label_files = set(os.listdir(labels_path))
+    vesicles_files = os.listdir(vesicles_path)
+    
+    for vesicle_file in vesicles_files:
+        if vesicle_file.endswith('_processed.h5'):
+            # Remove '_processed' to get the corresponding label file name
+            label_file = vesicle_file.replace('_processed', '')
+            if label_file in label_files:
+                evaluate_file(
+                    os.path.join(labels_path, label_file),
+                    os.path.join(vesicles_path, vesicle_file),
+                    model_name, segment_key, anno_key, mask_key
+                )
 
 def main():
     
@@ -123,6 +139,7 @@ def main():
     vesicles_path = args.vesicles_path
     if os.path.isdir(vesicles_path):
         evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
+        #evaluate_folder_onlyGTAnnotations(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
     else:
         evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
     

From f1466d35e1eb0cc6198c92271b1b41d2dd240136 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Thu, 22 May 2025 19:06:05 +0200
Subject: [PATCH 5/6] add connected components and TP, FN and FP to eval AZ

---
 scripts/cooper/ground_truth/az/evaluate_az.py | 62 +++++++++++++------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/scripts/cooper/ground_truth/az/evaluate_az.py b/scripts/cooper/ground_truth/az/evaluate_az.py
index 60bfcd2..ac4713a 100644
--- a/scripts/cooper/ground_truth/az/evaluate_az.py
+++ b/scripts/cooper/ground_truth/az/evaluate_az.py
@@ -5,10 +5,11 @@
 import pandas as pd
 import numpy as np
 from elf.evaluation import dice_score
-
+from skimage.measure import label as connected_components_label
 from scipy.ndimage import binary_dilation, binary_closing
 from tqdm import tqdm
 from elf.evaluation import matching
+from elf.evaluation.matching import _compute_scores, _compute_tps
 
 
 def _expand_AZ(az):
@@ -91,6 +92,7 @@ def main():
     print("Total:")
     print(scores["Dice"].mean(), "+-", scores["Dice"].std())
 
+
 def get_bounding_box(mask, halo=2):
     """ Get bounding box coordinates around a mask with a halo. """
     coords = np.argwhere(mask)
@@ -110,10 +112,14 @@ def get_bounding_box(mask, halo=2):
 def evaluate(labels, seg):
     assert labels.shape == seg.shape
     stats = matching(seg, labels)
-    return [stats["f1"], stats["precision"], stats["recall"]]
+    n_true, n_matched, n_pred, scores = _compute_scores(seg, labels, criterion= "iou", ignore_label=None)
+    tp = _compute_tps(scores, n_matched, threshold= 0.5)
+    fp = n_pred - tp
+    fn = n_true - tp
+    return stats["f1"], stats["precision"], stats["recall"], tp, fp, fn
 
 
-def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None):
+def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None, cc=False):
     print(f"Evaluating: {os.path.basename(labels_path)}")
 
     ds_name = os.path.basename(os.path.dirname(labels_path))
@@ -132,6 +138,11 @@ def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None):
         gt[mask == 0] = 0
         az[mask == 0] = 0
 
+    # Optionally apply connected components
+    if cc:
+        gt = connected_components_label(gt)
+        az = connected_components_label(az)
+
     # Optionally crop to bounding box
     use_bb = False
     if use_bb:
@@ -139,12 +150,13 @@ def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None):
         gt = gt[bb_slices]
         az = az[bb_slices]
 
-    scores = evaluate(gt, az)
-    return pd.DataFrame([[ds_name, tomo] + scores],
-                        columns=["dataset", "tomogram", "f1-score", "precision", "recall"])
+    f1, precision, recall, tp, fp, fn = evaluate(gt, az)
+
+    return pd.DataFrame([[ds_name, tomo, f1, precision, recall, tp, fp, fn]],
+                        columns=["dataset", "tomogram", "f1-score", "precision", "recall", "tp", "fp", "fn"])
 
 
-def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, mask_key=None):
+def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, mask_key=None, cc=False):
     print(f"\nEvaluating folder: {seg_path}")
     label_files = os.listdir(labels_path)
     seg_files = os.listdir(seg_path)
@@ -156,7 +168,7 @@ def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, ma
             label_fp = os.path.join(labels_path, seg_file)
             seg_fp = os.path.join(seg_path, seg_file)
 
-            res = evaluate_file(label_fp, seg_fp, segment_key, anno_key, mask_key)
+            res = evaluate_file(label_fp, seg_fp, segment_key, anno_key, mask_key, cc)
             all_results.append(res)
 
     if not all_results:
@@ -165,19 +177,29 @@ def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, ma
 
     results_df = pd.concat(all_results, ignore_index=True)
 
-    # Per-folder metrics
-    folder_metrics = results_df[["f1-score", "precision", "recall"]].mean().to_dict()
+    # Convert TP, FP, FN to integers
+    results_df[["tp", "fp", "fn"]] = results_df[["tp", "fp", "fn"]].astype(int)
+
+    # Compute folder-level TP/FP/FN and final metrics
+    total_tp = results_df["tp"].sum()
+    total_fp = results_df["fp"].sum()
+    total_fn = results_df["fn"].sum()
+
+    precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0
+    recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0
+    f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
+
     folder_name = os.path.basename(seg_path)
     print(f"\n Folder-Level Metrics for '{folder_name}':")
-    print(f"Precision: {folder_metrics['precision']:.4f}")
-    print(f"Recall:    {folder_metrics['recall']:.4f}")
-    print(f"F1 Score:  {folder_metrics['f1-score']:.4f}\n")
+    print(f"Precision: {precision:.4f}")
+    print(f"Recall:    {recall:.4f}")
+    print(f"F1 Score:  {f1_score:.4f}\n")
 
     # Save results
     result_dir = "/user/muth9/u12095/synapse-net/scripts/cooper/evaluation_results"
     os.makedirs(result_dir, exist_ok=True)
 
-    # Per-file CSV
+    # Per-file results
     file_results_path = os.path.join(result_dir, f"evaluation_{model_name}_per_file.csv")
     if os.path.exists(file_results_path):
         existing_df = pd.read_csv(file_results_path)
@@ -185,10 +207,13 @@ def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, ma
     results_df.to_csv(file_results_path, index=False)
     print(f"Per-file results saved to {file_results_path}")
 
-    # Append to per-folder summary
+    # Folder-level summary
     folder_summary_path = os.path.join(result_dir, f"evaluation_{model_name}_per_folder.csv")
+    header_needed = not os.path.exists(folder_summary_path)
     with open(folder_summary_path, "a") as f:
-        f.write(f"{folder_name},{folder_metrics['f1-score']:.4f},{folder_metrics['precision']:.4f},{folder_metrics['recall']:.4f}\n")
+        if header_needed:
+            f.write("folder,f1-score,precision,recall,tp,fp,fn\n")
+        f.write(f"{folder_name},{f1_score:.4f},{precision:.4f},{recall:.4f},{total_tp},{total_fp},{total_fn}\n")
     print(f"Folder summary appended to {folder_summary_path}")
 
 
@@ -200,15 +225,16 @@ def main_f1():
     parser.add_argument("-sk", "--segment_key", required=True)
     parser.add_argument("-ak", "--anno_key", required=True)
     parser.add_argument("-m", "--mask_key")
+    parser.add_argument("--cc", "--connected_components", dest="cc", action="store_true",
+                        help="Apply connected components to ground truth and segmentation before evaluation")
 
     args = parser.parse_args()
 
     if os.path.isdir(args.seg_path):
-        evaluate_folder(args.labels_path, args.seg_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
+        evaluate_folder(args.labels_path, args.seg_path, args.model_name, args.segment_key, args.anno_key, args.mask_key, args.cc)
     else:
         print("Please pass a folder to get folder-level metrics.")
 
-
 if __name__ == "__main__":
     #main() #for dice score
     main_f1()
\ No newline at end of file

From 5dde196fc006931676a8aa51067f77f2c84829d6 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Sat, 24 May 2025 10:04:30 +0200
Subject: [PATCH 6/6] testing different connected components

---
 scripts/cooper/ground_truth/az/evaluate_az.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/cooper/ground_truth/az/evaluate_az.py b/scripts/cooper/ground_truth/az/evaluate_az.py
index ac4713a..0716a8f 100644
--- a/scripts/cooper/ground_truth/az/evaluate_az.py
+++ b/scripts/cooper/ground_truth/az/evaluate_az.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import numpy as np
 from elf.evaluation import dice_score
-from skimage.measure import label as connected_components_label
+from torch_em.transform.label import connected_components
 from scipy.ndimage import binary_dilation, binary_closing
 from tqdm import tqdm
 from elf.evaluation import matching
@@ -140,8 +140,8 @@ def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None, c
 
     # Optionally apply connected components
     if cc:
-        gt = connected_components_label(gt)
-        az = connected_components_label(az)
+        gt = connected_components(gt)
+        az = connected_components(az)
 
     # Optionally crop to bounding box
     use_bb = False