From 39a8493bb14029dd17361df653828ca27b97f969 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Mon, 23 Dec 2024 10:35:45 +0100 Subject: [PATCH 1/6] 1st implementation of semisupervised DA --- .../semisupervised_domainAdaptation.py | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 scripts/portal/training/semisupervised_domainAdaptation.py diff --git a/scripts/portal/training/semisupervised_domainAdaptation.py b/scripts/portal/training/semisupervised_domainAdaptation.py new file mode 100644 index 0000000..d71cedf --- /dev/null +++ b/scripts/portal/training/semisupervised_domainAdaptation.py @@ -0,0 +1,118 @@ +import os +import argparse +from glob import glob +import json +from sklearn.model_selection import train_test_split +import sys +sys.path.append('/user/muth9/u12095/synapse-net') +from synapse_net.training.domain_adaptation import mean_teacher_adaptation + +OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synapse_net/training/semisupervisedDA_cryo" + +def _require_train_val_test_split(datasets, train_root, extension = "mrc"): + train_ratio, val_ratio, test_ratio = 0.8, 0.1, 0.1 + + def _train_val_test_split(names): + train, test = train_test_split(names, test_size=1 - train_ratio, shuffle=True) + _ratio = test_ratio / (test_ratio + val_ratio) + val, test = train_test_split(test, test_size=_ratio) + return train, val, test + + for ds in datasets: + print(f"Processing dataset: {ds}") + split_path = os.path.join(OUTPUT_ROOT, f"split-{ds}.json") + if os.path.exists(split_path): + print(f"Split file already exists: {split_path}") + continue + + file_paths = sorted(glob(os.path.join(train_root, ds, f"*.{extension}"))) + file_names = [os.path.basename(path) for path in file_paths] + + train, val, test = _train_val_test_split(file_names) + + with open(split_path, "w") as f: + json.dump({"train": train, "val": val, "test": test}, f) + +def _require_train_val_split(datasets, train_root, extension = "mrc"): + train_ratio, val_ratio = 0.8, 0.2 + + def _train_val_split(names): + train, val = train_test_split(names, test_size=1 - train_ratio, shuffle=True) + return train, val + + for ds in datasets: + print(f"Processing dataset: {ds}") + split_path = os.path.join(OUTPUT_ROOT, f"split-{ds}.json") + if os.path.exists(split_path): + print(f"Split file already exists: {split_path}") + continue + + file_paths = sorted(glob(os.path.join(train_root, ds, f"*.{extension}"))) + file_names = [os.path.basename(path) for path in file_paths] + + train, val = _train_val_split(file_names) + + with open(split_path, "w") as f: + json.dump({"train": train, "val": val}, f) + +def get_paths(split, datasets, train_root, testset=True, extension = "mrc"): + if testset: + _require_train_val_test_split(datasets, train_root, extension) + else: + _require_train_val_split(datasets, train_root, extension) + + paths = [] + for ds in datasets: + split_path = os.path.join(OUTPUT_ROOT, f"split-{ds}.json") + with open(split_path) as f: + names = json.load(f)[split] + ds_paths = [os.path.join(train_root, ds, name) for name in names] + assert all(os.path.exists(path) for path in ds_paths), f"Some paths do not exist in {ds_paths}" + paths.extend(ds_paths) + + return paths + +def vesicle_domain_adaptation(teacher_model, testset=True): + # Adjustable parameters + patch_shape = [48, 256, 256] + model_name = "vesicle-semisupervisedDA-cryo-v1" + model_root = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/models_v2/checkpoints/" + checkpoint_path = os.path.join(model_root, teacher_model) + + unsupervised_train_root = "/mnt/lustre-emmy-hdd/usr/u12095/cryo-et" + supervised_train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/cryoVesNet" + + unsupervised_datasets = ["from_portal"] + unsupervised_train_paths = get_paths("train", datasets=unsupervised_datasets, train_root=unsupervised_train_root, testset=testset) + unsupervised_val_paths = get_paths("val", datasets=unsupervised_datasets, train_root=unsupervised_train_root, testset=testset) + + supervised_datasets = ["exported"] + supervised_train_paths = get_paths("train", datasets=supervised_datasets, train_root=supervised_train_root, testset=testset, extension = "h5") + supervised_val_paths = get_paths("val", datasets=supervised_datasets, train_root=supervised_train_root, testset=testset, extension = "h5") + + mean_teacher_adaptation( + name=model_name, + unsupervised_train_paths=unsupervised_train_paths, + unsupervised_val_paths=unsupervised_val_paths, + raw_key="data", + supervised_train_paths=supervised_train_paths, + supervised_val_paths=supervised_val_paths, + raw_key_supervised = "raw", + label_key="/labels/vesicles", + patch_shape=patch_shape, + save_root="/mnt/lustre-emmy-hdd/usr/u12095/synapse_net/models/DA", + source_checkpoint=checkpoint_path, + confidence_threshold=0.75, + n_iterations=int(1e3), + ) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-m", "--teacher_model", required=True, help="Name of teacher model") + parser.add_argument("-t", "--testset", action="store_false", help="Set to False if no testset should be created") + args = parser.parse_args() + + vesicle_domain_adaptation(args.teacher_model, args.testset) + +if __name__ == "__main__": + main() From 7ce27d803de6b00cdefb31cdfc5d93d1475b7c52 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Mon, 24 Feb 2025 16:39:43 +0100 Subject: [PATCH 2/6] add evaluation stuff for revision --- .gitignore | 1 + scripts/cooper/training/evaluation.py | 33 ++++++++++++-------- scripts/rizzoli/evaluation_2D.py | 45 +++++++++++++++++++++------ 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 262d4d7..e5a747b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ scripts/cooper/training/copy_testset.py scripts/rizzoli/upsample_data.py scripts/cooper/training/find_rec_testset.py synapse-net-models/ +scripts/portal/upscale_tomo.py diff --git a/scripts/cooper/training/evaluation.py b/scripts/cooper/training/evaluation.py index d7aaf6e..55bac16 100644 --- a/scripts/cooper/training/evaluation.py +++ b/scripts/cooper/training/evaluation.py @@ -4,24 +4,25 @@ import h5py import pandas as pd -from elf.evaluation import matching +from elf.evaluation import matching, symmetric_best_dice_score def evaluate(labels, vesicles): assert labels.shape == vesicles.shape stats = matching(vesicles, labels) - return [stats["f1"], stats["precision"], stats["recall"]] + sbd = symmetric_best_dice_score(vesicles, labels) + return [stats["f1"], stats["precision"], stats["recall"], sbd] def summarize_eval(results): - summary = results[["dataset", "f1-score", "precision", "recall"]].groupby("dataset").mean().reset_index("dataset") - total = results[["f1-score", "precision", "recall"]].mean().values.tolist() + summary = results[["dataset", "f1-score", "precision", "recall", "SBD score"]].groupby("dataset").mean().reset_index("dataset") + total = results[["f1-score", "precision", "recall", "SBD score"]].mean().values.tolist() summary.iloc[-1] = ["all"] + total table = summary.to_markdown(index=False) print(table) -def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key): +def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key = None): print(f"Evaluate labels {labels_path} and vesicles {vesicles_path}") ds_name = os.path.basename(os.path.dirname(labels_path)) @@ -33,16 +34,21 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) #vesicles = labels["vesicles"] gt = labels[anno_key][:] + if mask_key is not None: + mask = labels[mask_key][:] + with h5py.File(vesicles_path) as seg_file: segmentation = seg_file["vesicles"] vesicles = segmentation[segment_key][:] - - #evaluate the match of ground truth and vesicles + if mask_key is not None: + gt[mask == 0] = 0 + vesicles[mask == 0] = 0 + #evaluate the match of ground truth and vesicles scores = evaluate(gt, vesicles) #store results - result_folder ="/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results" + result_folder ="/user/muth9/u12095/synapse-net/scripts/cooper/evaluation_results" os.makedirs(result_folder, exist_ok=True) result_path=os.path.join(result_folder, f"evaluation_{model_name}.csv") print("Evaluation results are saved to:", result_path) @@ -53,7 +59,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) results = None res = pd.DataFrame( - [[ds_name, tomo] + scores], columns=["dataset", "tomogram", "f1-score", "precision", "recall"] + [[ds_name, tomo] + scores], columns=["dataset", "tomogram", "f1-score", "precision", "recall", "SBD score"] ) if results is None: results = res @@ -65,7 +71,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) summarize_eval(results) -def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_key): +def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key = None): print(f"Evaluating folder {vesicles_path}") print(f"Using labels stored in {labels_path}") @@ -75,7 +81,7 @@ def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_ke for vesicle_file in vesicles_files: if vesicle_file in label_files: - evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key) + evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key, mask_key) @@ -87,13 +93,14 @@ def main(): parser.add_argument("-n", "--model_name", required=True) parser.add_argument("-sk", "--segment_key", required=True) parser.add_argument("-ak", "--anno_key", required=True) + parser.add_argument("-m", "--mask_key") args = parser.parse_args() vesicles_path = args.vesicles_path if os.path.isdir(vesicles_path): - evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key) + evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) else: - evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key) + evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py index 1cae666..8621490 100644 --- a/scripts/rizzoli/evaluation_2D.py +++ b/scripts/rizzoli/evaluation_2D.py @@ -5,20 +5,27 @@ import pandas as pd import numpy as np -from elf.evaluation import matching - +from elf.evaluation import matching, symmetric_best_dice_score +from skimage.transform import rescale +def transpose_tomo(tomogram): + data0 = np.swapaxes(tomogram, 0, -1) + data1 = np.fliplr(data0) + transposed_data = np.swapaxes(data1, 0, -1) + return transposed_data def evaluate(labels, vesicles): assert labels.shape == vesicles.shape stats = matching(vesicles, labels) - return [stats["f1"], stats["precision"], stats["recall"]] + sbd = symmetric_best_dice_score(vesicles, labels) + return [stats["f1"], stats["precision"], stats["recall"], sbd] def evaluate_slices(gt, vesicles): """Evaluate 2D model performance for each z-slice of the 3D volume.""" f1_scores = [] precision_scores = [] recall_scores = [] + sbd_scores = [] # Iterate through each slice along the z-axis for z in range(gt.shape[0]): @@ -27,24 +34,27 @@ def evaluate_slices(gt, vesicles): vesicles_slice = vesicles[z, :, :] # Evaluate the performance for the current slice - f1, precision, recall = evaluate(gt_slice, vesicles_slice) + f1, precision, recall, sbd = evaluate(gt_slice, vesicles_slice) f1_scores.append(f1) precision_scores.append(precision) recall_scores.append(recall) + sbd_scores.append(sbd) print(f"f1 scores to be averaged {f1_scores}") + print(f"sbd scores to be averaged {sbd_scores}") # Calculate the mean for each metric mean_f1 = np.mean(f1_scores) mean_precision = np.mean(precision_scores) mean_recall = np.mean(recall_scores) + mead_sbd = np.mean(sbd_scores) - return [mean_f1, mean_precision, mean_recall] + return [mean_f1, mean_precision, mean_recall, mead_sbd] def summarize_eval(results): - summary = results[["dataset", "f1-score", "precision", "recall"]].groupby("dataset").mean().reset_index("dataset") - total = results[["f1-score", "precision", "recall"]].mean().values.tolist() + summary = results[["dataset", "f1-score", "precision", "recall", "SBD score"]].groupby("dataset").mean().reset_index("dataset") + total = results[["f1-score", "precision", "recall", "SBD score"]].mean().values.tolist() summary.iloc[-1] = ["all"] + total table = summary.to_markdown(index=False) print(table) @@ -55,22 +65,37 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) ds_name = os.path.basename(os.path.dirname(labels_path)) tomo = os.path.basename(labels_path) + use_mask=True #set to true for eg maus data + #get the labels and vesicles with h5py.File(labels_path) as label_file: labels = label_file["labels"] #vesicles = labels["vesicles"] gt = labels[anno_key][:] + gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype) + gt = transpose_tomo(gt) + + if use_mask: + mask = labels["mask"][:] + mask = rescale(mask, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(mask.dtype) + mask = transpose_tomo(mask) with h5py.File(vesicles_path) as seg_file: segmentation = seg_file["vesicles"] vesicles = segmentation[segment_key][:] + if use_mask: + gt[mask == 0] = 0 + vesicles[mask == 0] = 0 #evaluate the match of ground truth and vesicles - scores = evaluate_slices(gt, vesicles) + if len(vesicles.shape) == 3: + scores = evaluate_slices(gt, vesicles) + else: + scores = evaluate(gt,vesicles) #store results - result_folder ="/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results" + result_folder ="/user/muth9/u12095/synapse-net/scripts/cooper/evaluation_results" os.makedirs(result_folder, exist_ok=True) result_path=os.path.join(result_folder, f"2Devaluation_{model_name}.csv") print("Evaluation results are saved to:", result_path) @@ -81,7 +106,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) results = None res = pd.DataFrame( - [[ds_name, tomo] + scores], columns=["dataset", "tomogram", "f1-score", "precision", "recall"] + [[ds_name, tomo] + scores], columns=["dataset", "tomogram", "f1-score", "precision", "recall", "SBD score"] ) if results is None: results = res From a7a59a1b2ee97a1533d25946bb45fd7a7771476a Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Thu, 13 Mar 2025 11:52:01 +0100 Subject: [PATCH 3/6] add bb option for eval --- scripts/cooper/training/evaluation.py | 26 +++++++++++++++++++++++++- scripts/rizzoli/evaluation_2D.py | 11 ++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/scripts/cooper/training/evaluation.py b/scripts/cooper/training/evaluation.py index 55bac16..8ba996c 100644 --- a/scripts/cooper/training/evaluation.py +++ b/scripts/cooper/training/evaluation.py @@ -3,10 +3,24 @@ import h5py import pandas as pd +import numpy as np from elf.evaluation import matching, symmetric_best_dice_score - +def get_bounding_box(mask, halo=2): + """ Get bounding box coordinates around a mask with a halo.""" + coords = np.argwhere(mask) + if coords.size == 0: + return None # No labels found + + min_coords = coords.min(axis=0) + max_coords = coords.max(axis=0) + + min_coords = np.maximum(min_coords - halo, 0) + max_coords = np.minimum(max_coords + halo, mask.shape) + + slices = tuple(slice(min_c, max_c) for min_c, max_c in zip(min_coords, max_coords)) + return slices def evaluate(labels, vesicles): assert labels.shape == vesicles.shape @@ -44,6 +58,16 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key, if mask_key is not None: gt[mask == 0] = 0 vesicles[mask == 0] = 0 + + bb= False + if bb: + # Get bounding box and crop + bb_slices = get_bounding_box(gt, halo=2) + gt = gt[bb_slices] + vesicles = vesicles[bb_slices] + else: + print("not using bb") + #evaluate the match of ground truth and vesicles scores = evaluate(gt, vesicles) diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py index 8621490..e999203 100644 --- a/scripts/rizzoli/evaluation_2D.py +++ b/scripts/rizzoli/evaluation_2D.py @@ -65,17 +65,18 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) ds_name = os.path.basename(os.path.dirname(labels_path)) tomo = os.path.basename(labels_path) - use_mask=True #set to true for eg maus data + maus=False #set to true for eg maus data #get the labels and vesicles with h5py.File(labels_path) as label_file: labels = label_file["labels"] #vesicles = labels["vesicles"] gt = labels[anno_key][:] - gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype) - gt = transpose_tomo(gt) + if maus: + gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype) + gt = transpose_tomo(gt) - if use_mask: + if maus: mask = labels["mask"][:] mask = rescale(mask, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(mask.dtype) mask = transpose_tomo(mask) @@ -84,7 +85,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) segmentation = seg_file["vesicles"] vesicles = segmentation[segment_key][:] - if use_mask: + if maus: gt[mask == 0] = 0 vesicles[mask == 0] = 0 From 17d26dff951683d5037c1fbb0e0858e9a6290d18 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Thu, 22 May 2025 10:37:29 +0200 Subject: [PATCH 4/6] everything after 1st revision relating to evaluation --- scripts/cooper/ground_truth/az/evaluate_az.py | 124 +++++++++++++++++- .../cooper/ground_truth/az/postprocess_az.py | 48 +++++++ scripts/cooper/training/evaluation.py | 17 +++ 3 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 scripts/cooper/ground_truth/az/postprocess_az.py diff --git a/scripts/cooper/ground_truth/az/evaluate_az.py b/scripts/cooper/ground_truth/az/evaluate_az.py index 63ca765..60bfcd2 100644 --- a/scripts/cooper/ground_truth/az/evaluate_az.py +++ b/scripts/cooper/ground_truth/az/evaluate_az.py @@ -1,11 +1,14 @@ import os +import argparse import h5py import pandas as pd +import numpy as np from elf.evaluation import dice_score from scipy.ndimage import binary_dilation, binary_closing from tqdm import tqdm +from elf.evaluation import matching def _expand_AZ(az): @@ -88,5 +91,124 @@ def main(): print("Total:") print(scores["Dice"].mean(), "+-", scores["Dice"].std()) +def get_bounding_box(mask, halo=2): + """ Get bounding box coordinates around a mask with a halo. """ + coords = np.argwhere(mask) + if coords.size == 0: + return None # No labels found -main() + min_coords = coords.min(axis=0) + max_coords = coords.max(axis=0) + + min_coords = np.maximum(min_coords - halo, 0) + max_coords = np.minimum(max_coords + halo, mask.shape) + + slices = tuple(slice(min_c, max_c) for min_c, max_c in zip(min_coords, max_coords)) + return slices + + +def evaluate(labels, seg): + assert labels.shape == seg.shape + stats = matching(seg, labels) + return [stats["f1"], stats["precision"], stats["recall"]] + + +def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None): + print(f"Evaluating: {os.path.basename(labels_path)}") + + ds_name = os.path.basename(os.path.dirname(labels_path)) + tomo = os.path.basename(labels_path) + + with h5py.File(labels_path) as label_file: + labels = label_file["labels"] + gt = labels[anno_key][:] + if mask_key is not None: + mask = labels[mask_key][:] + + with h5py.File(seg_path) as seg_file: + az = seg_file["AZ"][segment_key][:] + + if mask_key is not None: + gt[mask == 0] = 0 + az[mask == 0] = 0 + + # Optionally crop to bounding box + use_bb = False + if use_bb: + bb_slices = get_bounding_box(gt, halo=2) + gt = gt[bb_slices] + az = az[bb_slices] + + scores = evaluate(gt, az) + return pd.DataFrame([[ds_name, tomo] + scores], + columns=["dataset", "tomogram", "f1-score", "precision", "recall"]) + + +def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, mask_key=None): + print(f"\nEvaluating folder: {seg_path}") + label_files = os.listdir(labels_path) + seg_files = os.listdir(seg_path) + + all_results = [] + + for seg_file in seg_files: + if seg_file in label_files: + label_fp = os.path.join(labels_path, seg_file) + seg_fp = os.path.join(seg_path, seg_file) + + res = evaluate_file(label_fp, seg_fp, segment_key, anno_key, mask_key) + all_results.append(res) + + if not all_results: + print("No matched files found for evaluation.") + return + + results_df = pd.concat(all_results, ignore_index=True) + + # Per-folder metrics + folder_metrics = results_df[["f1-score", "precision", "recall"]].mean().to_dict() + folder_name = os.path.basename(seg_path) + print(f"\n Folder-Level Metrics for '{folder_name}':") + print(f"Precision: {folder_metrics['precision']:.4f}") + print(f"Recall: {folder_metrics['recall']:.4f}") + print(f"F1 Score: {folder_metrics['f1-score']:.4f}\n") + + # Save results + result_dir = "/user/muth9/u12095/synapse-net/scripts/cooper/evaluation_results" + os.makedirs(result_dir, exist_ok=True) + + # Per-file CSV + file_results_path = os.path.join(result_dir, f"evaluation_{model_name}_per_file.csv") + if os.path.exists(file_results_path): + existing_df = pd.read_csv(file_results_path) + results_df = pd.concat([existing_df, results_df], ignore_index=True) + results_df.to_csv(file_results_path, index=False) + print(f"Per-file results saved to {file_results_path}") + + # Append to per-folder summary + folder_summary_path = os.path.join(result_dir, f"evaluation_{model_name}_per_folder.csv") + with open(folder_summary_path, "a") as f: + f.write(f"{folder_name},{folder_metrics['f1-score']:.4f},{folder_metrics['precision']:.4f},{folder_metrics['recall']:.4f}\n") + print(f"Folder summary appended to {folder_summary_path}") + + +def main_f1(): + parser = argparse.ArgumentParser() + parser.add_argument("-l", "--labels_path", required=True) + parser.add_argument("-seg", "--seg_path", required=True) + parser.add_argument("-n", "--model_name", required=True) + parser.add_argument("-sk", "--segment_key", required=True) + parser.add_argument("-ak", "--anno_key", required=True) + parser.add_argument("-m", "--mask_key") + + args = parser.parse_args() + + if os.path.isdir(args.seg_path): + evaluate_folder(args.labels_path, args.seg_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) + else: + print("Please pass a folder to get folder-level metrics.") + + +if __name__ == "__main__": + #main() #for dice score + main_f1() \ No newline at end of file diff --git a/scripts/cooper/ground_truth/az/postprocess_az.py b/scripts/cooper/ground_truth/az/postprocess_az.py new file mode 100644 index 0000000..1bc296e --- /dev/null +++ b/scripts/cooper/ground_truth/az/postprocess_az.py @@ -0,0 +1,48 @@ +import os +import glob +import h5py +import argparse +import sys +sys.path.append('/user/muth9/u12095/synapse-net') +from synapse_net.ground_truth.shape_refinement import edge_filter + +def postprocess_file(file_path): + """Processes a single .h5 file, applying an edge filter and saving the membrane mask."""# + print(f"processing file {file_path}") + with h5py.File(file_path, "a") as f: + raw = f["raw"][:] + print("applying the edge filter ...") + hmap = edge_filter(raw, sigma=1.0, method="sato", per_slice=True, n_threads=8) + membrane_mask = hmap > 0.5 + print("saving results ....") + try: + f.create_dataset("labels/membrane_mask", data=membrane_mask, compression="gzip") + except: + print(f"membrane mask aleady saved for {file_path}") + print("Done!") + +def postprocess_folder(folder_path): + """Processes all .h5 files in a given folder recursively.""" + files = sorted(glob.glob(os.path.join(folder_path, '**', '*.h5'), recursive=True)) + print("Processing files:", files) + + for file_path in files: + postprocess_file(file_path) + +def main(): + #/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete + #/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/04_hoi_stem_examples + #/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/06_hoi_wt_stem750_fm + #/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation + + parser = argparse.ArgumentParser(description="Postprocess .h5 files by applying edge filtering.") + parser.add_argument("-p", "--data_path", required=True, help="Path to the .h5 file or folder.") + args = parser.parse_args() + + if os.path.isdir(args.data_path): + postprocess_folder(args.data_path) + else: + postprocess_file(args.data_path) + +if __name__ == "__main__": + main() diff --git a/scripts/cooper/training/evaluation.py b/scripts/cooper/training/evaluation.py index 8ba996c..e39183c 100644 --- a/scripts/cooper/training/evaluation.py +++ b/scripts/cooper/training/evaluation.py @@ -107,7 +107,23 @@ def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_ke evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key, mask_key) +def evaluate_folder_onlyGTAnnotations(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key=None): + print(f"Evaluating folder {vesicles_path}") + print(f"Using labels stored in {labels_path}") + label_files = set(os.listdir(labels_path)) + vesicles_files = os.listdir(vesicles_path) + + for vesicle_file in vesicles_files: + if vesicle_file.endswith('_processed.h5'): + # Remove '_processed' to get the corresponding label file name + label_file = vesicle_file.replace('_processed', '') + if label_file in label_files: + evaluate_file( + os.path.join(labels_path, label_file), + os.path.join(vesicles_path, vesicle_file), + model_name, segment_key, anno_key, mask_key + ) def main(): @@ -123,6 +139,7 @@ def main(): vesicles_path = args.vesicles_path if os.path.isdir(vesicles_path): evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) + #evaluate_folder_onlyGTAnnotations(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) else: evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) From f1466d35e1eb0cc6198c92271b1b41d2dd240136 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Thu, 22 May 2025 19:06:05 +0200 Subject: [PATCH 5/6] add connected components and TP, FN and FP to eval AZ --- scripts/cooper/ground_truth/az/evaluate_az.py | 62 +++++++++++++------ 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/scripts/cooper/ground_truth/az/evaluate_az.py b/scripts/cooper/ground_truth/az/evaluate_az.py index 60bfcd2..ac4713a 100644 --- a/scripts/cooper/ground_truth/az/evaluate_az.py +++ b/scripts/cooper/ground_truth/az/evaluate_az.py @@ -5,10 +5,11 @@ import pandas as pd import numpy as np from elf.evaluation import dice_score - +from skimage.measure import label as connected_components_label from scipy.ndimage import binary_dilation, binary_closing from tqdm import tqdm from elf.evaluation import matching +from elf.evaluation.matching import _compute_scores, _compute_tps def _expand_AZ(az): @@ -91,6 +92,7 @@ def main(): print("Total:") print(scores["Dice"].mean(), "+-", scores["Dice"].std()) + def get_bounding_box(mask, halo=2): """ Get bounding box coordinates around a mask with a halo. """ coords = np.argwhere(mask) @@ -110,10 +112,14 @@ def get_bounding_box(mask, halo=2): def evaluate(labels, seg): assert labels.shape == seg.shape stats = matching(seg, labels) - return [stats["f1"], stats["precision"], stats["recall"]] + n_true, n_matched, n_pred, scores = _compute_scores(seg, labels, criterion= "iou", ignore_label=None) + tp = _compute_tps(scores, n_matched, threshold= 0.5) + fp = n_pred - tp + fn = n_true - tp + return stats["f1"], stats["precision"], stats["recall"], tp, fp, fn -def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None): +def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None, cc=False): print(f"Evaluating: {os.path.basename(labels_path)}") ds_name = os.path.basename(os.path.dirname(labels_path)) @@ -132,6 +138,11 @@ def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None): gt[mask == 0] = 0 az[mask == 0] = 0 + # Optionally apply connected components + if cc: + gt = connected_components_label(gt) + az = connected_components_label(az) + # Optionally crop to bounding box use_bb = False if use_bb: @@ -139,12 +150,13 @@ def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None): gt = gt[bb_slices] az = az[bb_slices] - scores = evaluate(gt, az) - return pd.DataFrame([[ds_name, tomo] + scores], - columns=["dataset", "tomogram", "f1-score", "precision", "recall"]) + f1, precision, recall, tp, fp, fn = evaluate(gt, az) + + return pd.DataFrame([[ds_name, tomo, f1, precision, recall, tp, fp, fn]], + columns=["dataset", "tomogram", "f1-score", "precision", "recall", "tp", "fp", "fn"]) -def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, mask_key=None): +def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, mask_key=None, cc=False): print(f"\nEvaluating folder: {seg_path}") label_files = os.listdir(labels_path) seg_files = os.listdir(seg_path) @@ -156,7 +168,7 @@ def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, ma label_fp = os.path.join(labels_path, seg_file) seg_fp = os.path.join(seg_path, seg_file) - res = evaluate_file(label_fp, seg_fp, segment_key, anno_key, mask_key) + res = evaluate_file(label_fp, seg_fp, segment_key, anno_key, mask_key, cc) all_results.append(res) if not all_results: @@ -165,19 +177,29 @@ def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, ma results_df = pd.concat(all_results, ignore_index=True) - # Per-folder metrics - folder_metrics = results_df[["f1-score", "precision", "recall"]].mean().to_dict() + # Convert TP, FP, FN to integers + results_df[["tp", "fp", "fn"]] = results_df[["tp", "fp", "fn"]].astype(int) + + # Compute folder-level TP/FP/FN and final metrics + total_tp = results_df["tp"].sum() + total_fp = results_df["fp"].sum() + total_fn = results_df["fn"].sum() + + precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0 + recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0 + f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0 + folder_name = os.path.basename(seg_path) print(f"\n Folder-Level Metrics for '{folder_name}':") - print(f"Precision: {folder_metrics['precision']:.4f}") - print(f"Recall: {folder_metrics['recall']:.4f}") - print(f"F1 Score: {folder_metrics['f1-score']:.4f}\n") + print(f"Precision: {precision:.4f}") + print(f"Recall: {recall:.4f}") + print(f"F1 Score: {f1_score:.4f}\n") # Save results result_dir = "/user/muth9/u12095/synapse-net/scripts/cooper/evaluation_results" os.makedirs(result_dir, exist_ok=True) - # Per-file CSV + # Per-file results file_results_path = os.path.join(result_dir, f"evaluation_{model_name}_per_file.csv") if os.path.exists(file_results_path): existing_df = pd.read_csv(file_results_path) @@ -185,10 +207,13 @@ def evaluate_folder(labels_path, seg_path, model_name, segment_key, anno_key, ma results_df.to_csv(file_results_path, index=False) print(f"Per-file results saved to {file_results_path}") - # Append to per-folder summary + # Folder-level summary folder_summary_path = os.path.join(result_dir, f"evaluation_{model_name}_per_folder.csv") + header_needed = not os.path.exists(folder_summary_path) with open(folder_summary_path, "a") as f: - f.write(f"{folder_name},{folder_metrics['f1-score']:.4f},{folder_metrics['precision']:.4f},{folder_metrics['recall']:.4f}\n") + if header_needed: + f.write("folder,f1-score,precision,recall,tp,fp,fn\n") + f.write(f"{folder_name},{f1_score:.4f},{precision:.4f},{recall:.4f},{total_tp},{total_fp},{total_fn}\n") print(f"Folder summary appended to {folder_summary_path}") @@ -200,15 +225,16 @@ def main_f1(): parser.add_argument("-sk", "--segment_key", required=True) parser.add_argument("-ak", "--anno_key", required=True) parser.add_argument("-m", "--mask_key") + parser.add_argument("--cc", "--connected_components", dest="cc", action="store_true", + help="Apply connected components to ground truth and segmentation before evaluation") args = parser.parse_args() if os.path.isdir(args.seg_path): - evaluate_folder(args.labels_path, args.seg_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) + evaluate_folder(args.labels_path, args.seg_path, args.model_name, args.segment_key, args.anno_key, args.mask_key, args.cc) else: print("Please pass a folder to get folder-level metrics.") - if __name__ == "__main__": #main() #for dice score main_f1() \ No newline at end of file From 5dde196fc006931676a8aa51067f77f2c84829d6 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Sat, 24 May 2025 10:04:30 +0200 Subject: [PATCH 6/6] testing different connected components --- scripts/cooper/ground_truth/az/evaluate_az.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/cooper/ground_truth/az/evaluate_az.py b/scripts/cooper/ground_truth/az/evaluate_az.py index ac4713a..0716a8f 100644 --- a/scripts/cooper/ground_truth/az/evaluate_az.py +++ b/scripts/cooper/ground_truth/az/evaluate_az.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np from elf.evaluation import dice_score -from skimage.measure import label as connected_components_label +from torch_em.transform.label import connected_components from scipy.ndimage import binary_dilation, binary_closing from tqdm import tqdm from elf.evaluation import matching @@ -140,8 +140,8 @@ def evaluate_file(labels_path, seg_path, segment_key, anno_key, mask_key=None, c # Optionally apply connected components if cc: - gt = connected_components_label(gt) - az = connected_components_label(az) + gt = connected_components(gt) + az = connected_components(az) # Optionally crop to bounding box use_bb = False