Update compartment model training

constantinpape · constantinpape · commit aad578cbb3e0 · 2024-11-10T16:17:31.000+01:00
diff --git a/scripts/cooper/ground_truth/compartments/run_prediction_04.py b/scripts/cooper/ground_truth/compartments/run_prediction_04.py
@@ -8,8 +8,13 @@
 from synaptic_reconstruction.inference.compartments import segment_compartments
 
 INPUT_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval"  # noqa
-MODEL_PATH = "/mnt/lustre-emmy-hdd/projects/nim00007/compartment_models/compartment_model_3d.pt"
-OUTPUT = "./predictions"
+# MODEL_PATH = "/mnt/lustre-emmy-hdd/projects/nim00007/compartment_models/compartment_model_3d.pt"
+MODEL_PATH = "/user/pape41/u12086/Work/my_projects/synaptic-reconstruction/scripts/cooper/training/checkpoints/compartment_model_3d/v2"  # noqa
+OUTPUT = "/mnt/lustre-emmy-hdd/projects/nim00007/compartment_predictions"
+
+
+def label_transform_3d():
+    pass
 
 
 def segment_volume(input_path, model_path):
@@ -20,29 +25,39 @@ def segment_volume(input_path, model_path):
     scaler = _Scaler(scale, verbose=False)
     raw = scaler.scale_input(raw)
 
-    n_slices_exclude = 4
-    seg = segment_compartments(raw, model_path, verbose=False, n_slices_exclude=n_slices_exclude)
-    raw, seg = raw[n_slices_exclude:-n_slices_exclude], seg[n_slices_exclude:-n_slices_exclude]
+    n_slices_exclude = 2
+    seg, pred = segment_compartments(
+        raw, model_path, verbose=False, n_slices_exclude=n_slices_exclude, return_predictions=True
+    )
+    # raw, seg = raw[n_slices_exclude:-n_slices_exclude], seg[n_slices_exclude:-n_slices_exclude]
 
-    return raw, seg
+    return raw, seg, pred
 
 
 def main():
     inputs = sorted(glob(os.path.join(INPUT_ROOT, "**/*.h5"), recursive=True))
-    for input_path in tqdm(inputs):
+    inputs = [inp for inp in inputs if "cropped_for_2D" not in inp]
+
+    for input_path in tqdm(inputs, desc="Run prediction for 04."):
         ds_name, fname = os.path.split(input_path)
         ds_name = os.path.split(ds_name)[1]
-        output_folder = os.path.join(OUTPUT, ds_name)
+        output_folder = os.path.join(OUTPUT, "segmentation", ds_name)
         output_path = os.path.join(output_folder, fname)
 
         if os.path.exists(output_path):
             continue
 
-        raw, seg = segment_volume(input_path, MODEL_PATH)
+        pred_folder = os.path.join(OUTPUT, "prediction", ds_name)
+        os.makedirs(pred_folder, exist_ok=True)
+        pred_path = os.path.join(pred_folder, fname)
+
+        raw, seg, pred = segment_volume(input_path, MODEL_PATH)
         os.makedirs(output_folder, exist_ok=True)
         with h5py.File(output_path, "a") as f:
             f.create_dataset("raw", data=raw, compression="gzip")
             f.create_dataset("labels/compartments", data=seg, compression="gzip")
+        with h5py.File(pred_path, "a") as f:
+            f.create_dataset("prediction", data=pred, compression="gzip")
 
 
 if __name__ == "__main__":
diff --git a/scripts/cooper/training/train_compartments.py b/scripts/cooper/training/train_compartments.py
@@ -2,13 +2,14 @@
 from glob import glob
 
 import numpy as np
-from sklearn.model_selection import train_test_split
+import torch_em
 
+from sklearn.model_selection import train_test_split
 from skimage import img_as_ubyte
 from skimage.segmentation import find_boundaries
 from skimage.filters import gaussian, rank
 from skimage.morphology import disk
-from scipy.ndimage import binary_dilation
+from scipy.ndimage import binary_dilation, distance_transform_edt
 
 from synaptic_reconstruction.training import supervised_training
 
@@ -23,19 +24,26 @@ def get_paths_2d():
 
 def get_paths_3d():
     paths = sorted(glob(os.path.join(TRAIN_ROOT, "v2", "**", "*.h5"), recursive=True))
+    paths += sorted(glob(os.path.join(TRAIN_ROOT, "v3", "**", "*.h5"), recursive=True))
     return paths
 
 
 def label_transform_2d(seg):
-    boundaries = find_boundaries(seg).astype("float32")
-    boundaries = gaussian(boundaries, sigma=1.0)
+    boundaries = find_boundaries(seg)
+    distances = distance_transform_edt(~seg).astype("float32")
+    distances /= distances.max()
+
+    boundaries = gaussian(boundaries.astype("float32"), sigma=1.0)
     boundaries = rank.autolevel(img_as_ubyte(boundaries), disk(8)).astype("float") / 255
-    mask = binary_dilation(seg != 0, iterations=8)
-    return np.stack([boundaries, mask])
+
+    distance_mask = seg != 0
+    boundary_mask = binary_dilation(distance_mask, iterations=8)
+
+    return np.stack([boundaries, distances, boundary_mask, distance_mask])
 
 
 def label_transform_3d(seg):
-    output = np.zeros((2,) + seg.shape, dtype="float32")
+    output = np.zeros((4,) + seg.shape, dtype="float32")
     for z in range(seg.shape[0]):
         out = label_transform_2d(seg[z])
         output[:, z] = out
@@ -70,18 +78,21 @@ def train_compartments_2d_v1():
     )
 
 
-def train_compartments_3d_v1():
+def train_compartments_3d_v2():
     paths = get_paths_3d()
-    train_paths, val_paths = train_test_split(paths, test_size=0.15, random_state=42)
+    train_paths, val_paths = train_test_split(paths, test_size=0.10, random_state=42)
+    print("Number of train paths:", len(train_paths))
+    print("Number of val paths:", len(val_paths))
 
     patch_shape = (64, 384, 384)
     batch_size = 1
 
     check = False
+    sampler = torch_em.data.sampler.MinInstanceSampler(min_num_instances=2)
 
     save_root = "."
     supervised_training(
-        name="compartment_model_3d/v1",
+        name="compartment_model_3d/v2",
         train_paths=train_paths,
         val_paths=val_paths,
         label_key="/labels/compartments",
@@ -90,16 +101,18 @@ def train_compartments_3d_v1():
         save_root=save_root,
         label_transform=label_transform_3d,
         mask_channel=True,
-        n_samples_train=100,
-        n_samples_val=10,
-        n_iterations=int(2e4),
-        out_channels=1,
+        n_samples_train=250,
+        n_samples_val=25,
+        n_iterations=int(5e4),
+        out_channels=2,
+        sampler=sampler,
+        num_workers=8,
     )
 
 
 def main():
     # train_compartments_2d_v1()
-    train_compartments_3d_v1()
+    train_compartments_3d_v2()
 
 
 if __name__ == "__main__":
diff --git a/synaptic_reconstruction/inference/compartments.py b/synaptic_reconstruction/inference/compartments.py
@@ -143,13 +143,13 @@ def segment_compartments(
     scaler = _Scaler(scale, verbose)
     input_volume = scaler.scale_input(input_volume)
 
-    # Run prediction.
+    # Run prediction. Support models with a single or multiple channels,
+    # assuming that the first channel is the boundary prediction.
     pred = get_prediction(input_volume, tiling=tiling, model_path=model_path, model=model, verbose=verbose)
 
     # Remove channel axis if necessary.
     if pred.ndim != input_volume.ndim:
         assert pred.ndim == input_volume.ndim + 1
-        assert pred.shape[0] == 1
         pred = pred[0]
 
     # Run the compartment segmentation.
diff --git a/synaptic_reconstruction/training/supervised_training.py b/synaptic_reconstruction/training/supervised_training.py
@@ -266,7 +266,8 @@ def supervised_training(
     elif mask_channel:
         loss = torch_em.loss.LossWrapper(
             loss=torch_em.loss.DiceLoss(),
-            transform=torch_em.loss.wrapper.ApplyAndRemoveMask()
+            transform=torch_em.loss.wrapper.ApplyAndRemoveMask(
+                masking_method="crop" if out_channels == 1 else "multiply")
         )
         metric = loss
     else: