Skip to content

Sm dev #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 47 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
e2c4f4b
AZ segmentation
SarahMuth Oct 23, 2024
a0f713f
updates
SarahMuth Oct 28, 2024
94f9121
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Oct 28, 2024
ac1ac00
update 2D DA
SarahMuth Oct 28, 2024
37de75d
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Oct 29, 2024
61c57fa
small updates, compartment segmentation
SarahMuth Nov 7, 2024
40e965e
Implement code for first analysis
constantinpape Nov 7, 2024
7be9ee8
2D seg with mask
SarahMuth Nov 11, 2024
b1bef7e
Merge branch 'analysis' of https://github.com/computational-cell-anal…
SarahMuth Nov 11, 2024
f85e445
spatial distribution analysis
SarahMuth Nov 11, 2024
8ef16bc
intersection between compartment boundary and AZ segmentaiton
SarahMuth Nov 12, 2024
e625ef7
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Nov 12, 2024
09f6c84
Update compartment postprocessing
constantinpape Nov 12, 2024
d7dbb39
Merge branch 'more-comp-seg-updates' of https://github.com/computatio…
SarahMuth Nov 12, 2024
f893d23
updating data analysis on smaller details
SarahMuth Nov 13, 2024
08c56b9
minor updates data analysis
SarahMuth Nov 13, 2024
36d834f
Implement inner ear analysis WIP
constantinpape Nov 14, 2024
49d1b7c
calculation of AZ area
SarahMuth Nov 14, 2024
8a515d1
corrected radius factor
SarahMuth Nov 14, 2024
0f40d3c
Update inner ear analysis
constantinpape Nov 15, 2024
ad4741b
Update inner ear analysis
constantinpape Nov 17, 2024
305a80b
Updates to inner ear training and eval
constantinpape Nov 17, 2024
903e59e
Update inner ear analysis
constantinpape Nov 18, 2024
b1449d2
minor changes
SarahMuth Nov 19, 2024
0b7884d
Merge branch 'main' of https://github.com/computational-cell-analytic…
constantinpape Nov 19, 2024
186c92d
Update inner ear analysis scripts
constantinpape Nov 20, 2024
186df5b
Merge branch 'more-inner-ear-analysis' of https://github.com/computat…
constantinpape Nov 20, 2024
2ccf340
Add script to extract vesicle diameters for inner ear data
constantinpape Nov 20, 2024
5feff6a
Update active zone analysis for SNAP/MUNC data
constantinpape Nov 21, 2024
9b8c7a2
Add more inner ear analysis code
constantinpape Nov 21, 2024
db89b44
evaluation of AZ seg
SarahMuth Nov 23, 2024
51165a5
Fix issues with the segmentation export to IMOD
constantinpape Nov 23, 2024
aa5d78e
clean up
SarahMuth Nov 23, 2024
20e429b
clean up
SarahMuth Nov 23, 2024
19f618e
clean up
SarahMuth Nov 23, 2024
cb693b1
Update data summaries
constantinpape Nov 24, 2024
a0c31a8
Fix issue in data aggregation
constantinpape Nov 24, 2024
93a66c1
Update data summary
constantinpape Nov 24, 2024
e0dfda6
Merge branch 'main' into more-inner-ear-analysis
constantinpape Nov 24, 2024
59a38db
Update all measurements for the inner ear analysis
constantinpape Nov 24, 2024
9728951
Update vesicle diameter analysis
constantinpape Nov 24, 2024
84d3ec7
Merge branch 'more-inner-ear-analysis' of https://github.com/computat…
SarahMuth Nov 25, 2024
622da1e
update AZ evaluation
SarahMuth Nov 27, 2024
686b018
erosion dilation filtering of AZ
SarahMuth Nov 28, 2024
6b54e4a
stuff for revision
SarahMuth Mar 31, 2025
7d675ab
everything after 1st revision relating to training, inference, postpr…
SarahMuth May 22, 2025
f052b98
minor things
SarahMuth May 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ models/*/
run_sbatch.sbatch
slurm/
scripts/cooper/evaluation_results/
analysis_results/
scripts/cooper/training/copy_testset.py
scripts/rizzoli/upsample_data.py
scripts/cooper/training/find_rec_testset.py
scripts/cooper/training/find_rec_testset.py
scripts/rizzoli/combine_2D_slices.py
scripts/rizzoli/combine_2D_slices_raw.py
scripts/cooper/remove_h5key.py
scripts/cooper/analysis/calc_AZ_area.py
173 changes: 173 additions & 0 deletions scripts/cooper/AZ_segmentation_h5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import argparse
import h5py
import os
from pathlib import Path

from tqdm import tqdm
from elf.io import open_file

from synaptic_reconstruction.inference.AZ import segment_AZ
from synaptic_reconstruction.inference.util import parse_tiling

def _require_output_folders(output_folder):
#seg_output = os.path.join(output_folder, "segmentations")
seg_output = output_folder
os.makedirs(seg_output, exist_ok=True)
return seg_output

def get_volume(input_path):
'''
with h5py.File(input_path) as seg_file:
input_volume = seg_file["raw"][:]
'''
with open_file(input_path, "r") as f:

# Try to automatically derive the key with the raw data.
keys = list(f.keys())
if len(keys) == 1:
key = keys[0]
elif "data" in keys:
key = "data"
elif "raw" in keys:
key = "raw"

input_volume = f[key][:]
return input_volume

def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, key_label, compartment_seg):
tiling = parse_tiling(tile_shape, halo)
print(f"using tiling {tiling}")
input = get_volume(input_path)

#check if we have a restricting mask for the segmentation
if mask_path is not None:
with open_file(mask_path, "r") as f:
mask = f[mask_key][:]
else:
mask = None

#check if intersection with compartment is necessary
if compartment_seg is None:
foreground = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask)
intersection = None
else:
with open_file(compartment_seg, "r") as f:
compartment = f["/labels/compartment"][:]
foreground, intersection = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask, compartment=compartment)

seg_output = _require_output_folders(output_path)
file_name = Path(input_path).stem
seg_path = os.path.join(seg_output, f"{file_name}.h5")

#check
os.makedirs(Path(seg_path).parent, exist_ok=True)

print(f"Saving results in {seg_path}")
with h5py.File(seg_path, "a") as f:
if "raw" in f:
print("raw image already saved")
else:
f.create_dataset("raw", data=input, compression="gzip")

key=f"AZ/segment_from_{key_label}"
if key in f:
print("Skipping", input_path, "because", key, "exists")
else:
f.create_dataset(key, data=foreground, compression="gzip")

if mask is not None:
if mask_key in f:
print("mask image already saved")
else:
f.create_dataset(mask_key, data = mask, compression = "gzip")

if intersection is not None:
intersection_key = "AZ/compartment_AZ_intersection"
if intersection_key in f:
print("intersection already saved")
else:
f.create_dataset(intersection_key, data = intersection, compression = "gzip")




def segment_folder(args):
input_files = []
for root, dirs, files in os.walk(args.input_path):
input_files.extend([
os.path.join(root, name) for name in files if name.endswith(args.data_ext)
])
print(input_files)
pbar = tqdm(input_files, desc="Run segmentation")
for input_path in pbar:

filename = os.path.basename(input_path)
try:
mask_path = os.path.join(args.mask_path, filename)
except:
print(f"Mask file not found for {input_path}")
mask_path = None

if args.compartment_seg is not None:
try:
compartment_seg = os.path.join(args.compartment_seg, os.path.splitext(filename)[0] + '.h5')
except:
print(f"compartment file not found for {input_path}")
compartment_seg = None

run_AZ_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, compartment_seg)

def main():
parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.")
parser.add_argument(
"--input_path", "-i", required=True,
help="The filepath to the mrc file or the directory containing the tomogram data."
)
parser.add_argument(
"--output_path", "-o", required=True,
help="The filepath to directory where the segmentations will be saved."
)
parser.add_argument(
"--model_path", "-m", required=True, help="The filepath to the vesicle model."
)
parser.add_argument(
"--mask_path", help="The filepath to a h5 file with a mask that will be used to restrict the segmentation. Needs to be in combination with mask_key."
)
parser.add_argument(
"--mask_key", help="Key name that holds the mask segmentation"
)
parser.add_argument(
"--tile_shape", type=int, nargs=3,
help="The tile shape for prediction. Lower the tile shape if GPU memory is insufficient."
)
parser.add_argument(
"--halo", type=int, nargs=3,
help="The halo for prediction. Increase the halo to minimize boundary artifacts."
)
parser.add_argument(
"--key_label", "-k", default = "combined_vesicles",
help="Give the key name for saving the segmentation in h5."
)
parser.add_argument(
"--data_ext", "-d", default = ".h5",
help="Format extension of data to be segmented, default is .h5."
)
parser.add_argument(
"--compartment_seg", "-c",
help="Path to compartment segmentation."
"If the compartment segmentation was executed before, this will add a key to output file that stores the intersection between compartment boundary and AZ."
"Maybe need to adjust the compartment key that the segmentation is stored under"
)
args = parser.parse_args()

input_ = args.input_path

if os.path.isdir(input_):
segment_folder(args)
else:
run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, args.compartment_seg)

print("Finished segmenting!")

if __name__ == "__main__":
main()
174 changes: 174 additions & 0 deletions scripts/cooper/analysis/run_size_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import os
from glob import glob

import numpy as np
import pandas as pd
import h5py
from tqdm import tqdm
from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres

DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa
PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa
RESULT_FOLDER = "./analysis_results/AZ_filtered_autoComp"

def get_compartment_with_max_overlap(compartments, vesicles):
"""
Given 3D numpy arrays of compartments and vesicles, this function returns a binary mask
of the compartment with the most overlap with vesicles based on the number of overlapping voxels.

Parameters:
compartments (numpy.ndarray): 3D array of compartment labels.
vesicles (numpy.ndarray): 3D array of vesicle labels or binary mask.

Returns:
numpy.ndarray: Binary mask of the compartment with the most overlap with vesicles.
"""

unique_compartments = np.unique(compartments)
if 0 in unique_compartments:
unique_compartments = unique_compartments[unique_compartments != 0]

max_overlap_count = 0
best_compartment = None

# Iterate over each compartment and calculate the overlap with vesicles
for compartment_label in unique_compartments:
compartment_mask = compartments == compartment_label
vesicle_mask = vesicles > 0

intersection = np.logical_and(compartment_mask, vesicle_mask)
overlap_count = np.sum(intersection)

# Track the compartment with the most overlap in terms of voxel count
if overlap_count > max_overlap_count:
max_overlap_count = overlap_count
best_compartment = compartment_label

final_mask = compartments == best_compartment

return final_mask

# We compute the sizes for all vesicles in the MANUALLY ANNOTATED compartment masks.
# We use the same logic in the size computation as for the vesicle extraction to IMOD,
# including the radius correction factor. --> not needed here
# The number of vesicles is automatically computed as the length of the size list.
def compute_sizes_for_all_tomorams_manComp():
os.makedirs(RESULT_FOLDER, exist_ok=True)

resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
radius_factor = 1
estimate_radius_2d = True
dataset_results = {}

tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
for tomo in tqdm(tomograms):
ds_name, fname = os.path.split(tomo)
ds_name = os.path.split(ds_name)[1]
fname = os.path.splitext(fname)[0]

# Determine if the tomogram is 'CTRL' or 'DKO'
category = "CTRL" if "CTRL" in fname else "DKO"

if ds_name not in dataset_results:
dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}

if fname in dataset_results[ds_name][category]:
continue

# Load the vesicle segmentation from the predictions.
with h5py.File(tomo, "r") as f:
segmentation = f["/vesicles/segment_from_combined_vesicles"][:]

input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
assert os.path.exists(input_path), input_path

# Load the compartment mask from the tomogram
with h5py.File(input_path, "r") as f:
mask = f["labels/compartment"][:]

segmentation[mask == 0] = 0
_, sizes = convert_segmentation_to_spheres(
segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
)


dataset_results[ds_name][category][fname] = sizes

# Save each dataset's results into separate CSV files for CTRL and DKO tomograms
for ds_name, categories in dataset_results.items():
for category, tomogram_data in categories.items():
sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names
result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()

output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv")

# Save the DataFrame to CSV
result_df.to_csv(output_path, index=False)

# We compute the sizes for all vesicles in the AUTOMATIC SEGMENTED compartment masks.
# We use the same logic in the size computation as for the vesicle extraction to IMOD,
# including the radius correction factor. --> not needed here
# The number of vesicles is automatically computed as the length of the size list.
def compute_sizes_for_all_tomorams_autoComp():
os.makedirs(RESULT_FOLDER, exist_ok=True)

resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
radius_factor = 1
estimate_radius_2d = True
dataset_results = {}

tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
for tomo in tqdm(tomograms):
ds_name, fname = os.path.split(tomo)
ds_name = os.path.split(ds_name)[1]
fname = os.path.splitext(fname)[0]

# Determine if the tomogram is 'CTRL' or 'DKO'
category = "CTRL" if "CTRL" in fname else "DKO"

if ds_name not in dataset_results:
dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}

if fname in dataset_results[ds_name][category]:
continue

# Load the vesicle segmentation from the predictions.
with h5py.File(tomo, "r") as f:
segmentation = f["/vesicles/segment_from_combined_vesicles"][:]

input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
assert os.path.exists(input_path), input_path

# Load the compartment mask from the tomogram
with h5py.File(input_path, "r") as f:
compartments = f["/compartments/segment_from_3Dmodel_v2"][:]
mask = get_compartment_with_max_overlap(compartments, segmentation)

# if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask
if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)):
print(f"using no mask for {tomo}")
else:
segmentation[mask == 0] = 0
_, sizes = convert_segmentation_to_spheres(
segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
)

dataset_results[ds_name][category][fname] = sizes

# Save each dataset's results into separate CSV files for CTRL and DKO tomograms
for ds_name, categories in dataset_results.items():
for category, tomogram_data in categories.items():
sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names
result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()

output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv")

# Save the DataFrame to CSV
result_df.to_csv(output_path, index=False)

def main():
#compute_sizes_for_all_tomorams_manComp()
compute_sizes_for_all_tomorams_autoComp()

if __name__ == "__main__":
main()
Loading