Skip to content

Commit c75abd9

Browse files
Update the example scripts and add downloads for data from zenodo
1 parent ba56b57 commit c75abd9

File tree

6 files changed

+94
-38
lines changed

6 files changed

+94
-38
lines changed

examples/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
data/
2+
set_up_pool.py
3+
*.h5
4+
*.tif
5+
*.mrc

examples/analysis_pipeline.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
from skimage.measure import regionprops
77
from skimage.segmentation import find_boundaries
88

9-
from synapse_net.file_utils import read_mrc
10-
from synapse_net.sample_data import get_sample_data
119
from synapse_net.distance_measurements import measure_segmentation_to_object_distances
10+
from synapse_net.file_utils import read_mrc
11+
from synapse_net.imod.to_imod import convert_segmentation_to_spheres
1212
from synapse_net.inference import compute_scale_from_voxel_size, get_model, run_segmentation
13+
from synapse_net.sample_data import get_sample_data
1314

1415

1516
def segment_structures(tomogram, voxel_size):
@@ -72,15 +73,23 @@ def postprocess_segmentation(segmentations):
7273

7374

7475
def measure_distances(segmentations, voxel_size):
76+
# Here, we measure the distances from each vesicle to the active zone.
77+
# We use the function 'measure_segmentation_to_object_distances' for this,
78+
# which uses an euclidean distance transform scaled with the voxel size
79+
# to determine distances.
7580
vesicles, active_zone = segmentations["vesicles"], segmentations["active_zone"]
7681
voxel_size = tuple(voxel_size[ax] for ax in "zyx")
7782
distances, _, _, vesicle_ids = measure_segmentation_to_object_distances(
7883
vesicles, active_zone, resolution=voxel_size
7984
)
85+
# We convert the result to a pandas data frame.
8086
return pd.DataFrame({"vesicle_id": vesicle_ids, "distance": distances})
8187

8288

8389
def assign_vesicle_pools(vesicle_attributes):
90+
# We assign the vesicles to their respective pool, 'docked' and 'non-attached',
91+
# based on the criterion of being within 2 nm from the active zone.
92+
# We add the pool assignment as a new column to the dataframe with vesicle attributes.
8493
docked_vesicle_distance = 2 # nm
8594
vesicle_attributes["pool"] = vesicle_attributes["distance"].apply(
8695
lambda x: "docked" if x < docked_vesicle_distance else "non-attached"
@@ -89,6 +98,7 @@ def assign_vesicle_pools(vesicle_attributes):
8998

9099

91100
def visualize_results(tomogram, segmentations, vesicle_attributes):
101+
# Here, we visualize the segmentation and pool assignment result in napari.
92102

93103
# Create a segmentation to visualize the vesicle pools.
94104
docked_ids = vesicle_attributes[vesicle_attributes.pool == "docked"].vesicle_id
@@ -97,6 +107,7 @@ def visualize_results(tomogram, segmentations, vesicle_attributes):
97107
vesicle_pools = np.isin(vesicles, docked_ids).astype("uint8")
98108
vesicle_pools[np.isin(vesicles, non_attached_ids)] = 2
99109

110+
# Create a napari viewer, add the tomogram data and the segmentation results.
100111
viewer = napari.Viewer()
101112
viewer.add_image(tomogram)
102113
for name, segmentation in segmentations.items():
@@ -105,9 +116,16 @@ def visualize_results(tomogram, segmentations, vesicle_attributes):
105116
napari.run()
106117

107118

108-
# TODO compute the vesicle radii and other features and then save the attributes.
109119
def save_analysis(segmentations, vesicle_attributes, save_path):
110-
pass
120+
# Here, we compute the radii and centroid positions of the vesicles,
121+
# add them to the vesicle attributes and then save all vesicle attributes to
122+
# an excel table. You can use this table for evaluation of the analysis.
123+
vesicles = segmentations["vesicles"]
124+
coordinates, radii = convert_segmentation_to_spheres(vesicles, radius_factor=0.7)
125+
vesicle_attributes["radius"] = radii
126+
for ax_id, ax_name in enumerate("zyx"):
127+
vesicle_attributes[f"center-{ax_name}"] = coordinates[:, ax_id]
128+
vesicle_attributes.to_excel(save_path, index=False)
111129

112130

113131
def main():
@@ -119,16 +137,7 @@ def main():
119137
tomogram, voxel_size = read_mrc(mrc_path)
120138

121139
# Segment synaptic vesicles, the active zone, and the synaptic compartment.
122-
# segmentations = segment_structures(tomogram, voxel_size)
123-
124-
# Load saved segmentations for development.
125-
import h5py
126-
segmentations = {}
127-
with h5py.File("seg.h5", "r") as f:
128-
for name, ds in f.items():
129-
# f.create_dataset(name, data=seg, compression="gzip")
130-
seg = ds[:]
131-
segmentations[name] = seg
140+
segmentations = segment_structures(tomogram, voxel_size)
132141

133142
# Post-process the segmentations, to find the presynaptic terminal,
134143
# filter out vesicles not in the terminal, and to 'snape' the AZ to the presynaptic boundary.

examples/domain_adaptation.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,41 @@
44
a different electron tomogram with different specimen and sample preparation.
55
You don't need any annotations in the new domain to run this script.
66
7-
You can download example data for this script from:
8-
- Adaptation to 2d TEM data: TODO zenodo link
9-
- Adaptation to different tomography data: TODO zenodo link
7+
We use data from the SynapseNet publication for this example:
8+
- Adaptation to 2d TEM data: https://doi.org/10.5281/zenodo.14236381
9+
- Adaptation to different tomography data (3d data): https://doi.org/10.5281/zenodo.14232606
10+
11+
It is of course possible to adapt it to your own data.
1012
"""
1113

1214
import os
1315
from glob import glob
1416

1517
from sklearn.model_selection import train_test_split
18+
from synapse_net.inference.inference import get_model_path
19+
from synapse_net.sample_data import download_data_from_zenodo
1620
from synapse_net.training import mean_teacher_adaptation
17-
from synapse_net.tools.util import get_model_path
1821

1922

2023
def main():
2124
# Choose whether to adapt the model to 2D or to 3D data.
22-
train_2d_model = True
23-
24-
# TODO adjust to zenodo downloads
25-
# These are the data folders for the example data downloaded from zenodo.
26-
# Update these paths to apply the script to your own data.
27-
# Check out the example data to see the data format for training.
28-
data_root_folder_2d = "./data/2d_tem/train_unlabeled"
29-
data_root_folder_3d = "./data/..."
25+
train_2d_model = False
3026

31-
# Choose the correct data folder depending on 2d/3d training.
32-
data_root_folder = data_root_folder_2d if train_2d_model else data_root_folder_3d
27+
# Download the training data from zenodo.
28+
# You have to replace this if you want to train on your own data.
29+
# The training data should be stored in an hdf5 file per tomogram,
30+
# with tomgoram data stored in the internal dataset 'raw'.
31+
if train_2d_model:
32+
data_root = "./data/2d_tem"
33+
download_data_from_zenodo(data_root, "2d_tem")
34+
train_root_folder = os.path.join(data_root, "train_unlabeled")
35+
else:
36+
data_root = "./data/inner_ear_ribbon_synapse"
37+
download_data_from_zenodo(data_root, "inner_ear_ribbon_synapse")
38+
train_root_folder = data_root
3339

3440
# Get all files with ending .h5 in the training folder.
35-
files = sorted(glob(os.path.join(data_root_folder, "**", "*.h5"), recursive=True))
41+
files = sorted(glob(os.path.join(train_root_folder, "**", "*.h5"), recursive=True))
3642

3743
# Crate a train / val split.
3844
train_ratio = 0.85

examples/network_training.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,36 @@
55
to adapt an already trained network to your data without the need for
66
additional annotations then check out `domain_adaptation.py`.
77
8-
You can download example data for this script from:
9-
TODO zenodo link to Single-Ax / Chemical Fix data.
8+
We will use the data from our manuscript here:
9+
https://doi.org/10.5281/zenodo.14330011
10+
11+
You can also use your own data, if you prepare it in the same format.
1012
"""
1113
import os
1214
from glob import glob
1315

1416
from sklearn.model_selection import train_test_split
17+
from synapse_net.sample_data import download_data_from_zenodo
1518
from synapse_net.training import supervised_training
1619

1720

1821
def main():
19-
# This is the folder that contains your training data.
20-
# The example was designed so that it runs for the sample data downloaded to './data'.
21-
# If you want to train on your own data than change this filepath accordingly.
22-
# TODO update to match zenodo download
23-
data_root_folder = "./data/vesicles/train"
22+
# Download the training data from zenodo.
23+
# You have to replace this if you want to train on your own data.
24+
# The training data should be stored in an hdf5 file per tomogram,
25+
# with tomgoram data stored in the internal dataset 'raw'
26+
# and the vesicle annotations stored in the internal dataset 'labels/vesicles'.
27+
data_root = "./data/training_data"
28+
download_data_from_zenodo(data_root, "training_data")
29+
train_root_folder = os.path.join(data_root, "vesicles/train")
2430

2531
# The training data should be saved as .h5 files, with:
2632
# an internal dataset called 'raw' that contains the image data
2733
# and another dataset that contains the training annotations.
2834
label_key = "labels/vesicles"
2935

3036
# Get all files with the ending .h5 in the training folder.
31-
files = sorted(glob(os.path.join(data_root_folder, "**", "*.h5"), recursive=True))
37+
files = sorted(glob(os.path.join(train_root_folder, "**", "*.h5"), recursive=True))
3238

3339
# Crate a train / val split.
3440
train_ratio = 0.85

scripts/prepare_zenodo_uploads.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def _export_az(train_root, test_tomos, name):
5656

5757
for tomo in tqdm(tomograms):
5858
fname = os.path.basename(tomo)
59-
if tomo in test_tomos:
59+
if fname in test_tomos:
6060
out_path = os.path.join(test_out, fname)
6161
else:
6262
out_path = os.path.join(train_out, fname)

synapse_net/sample_data.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import tempfile
23
import pooch
34

45
from .file_utils import read_mrc, get_cache_dir
@@ -52,3 +53,32 @@ def sample_data_tem_2d():
5253

5354
def sample_data_tem_tomo():
5455
return _sample_data("tem_tomo")
56+
57+
58+
def download_data_from_zenodo(path: str, name: str):
59+
"""Download data uploaded for the SynapseNet manuscript from zenodo.
60+
61+
Args:
62+
path: The path where the downloaded data will be saved.
63+
name: The name of the zenodi dataset.
64+
"""
65+
from torch_em.data.datasets.util import download_source, unzip
66+
67+
urls = {
68+
"2d_tem": "https://zenodo.org/records`/14236382/files/tem_2d.zip?download=1",
69+
"inner_ear_ribbon_synapse": "https://zenodo.org/records/14232607/files/inner-ear-ribbon-synapse-tomgrams.zip?download=1", # noqa
70+
"training_data": "https://zenodo.org/records/14330011/files/synapse-net.zip?download=1"
71+
}
72+
assert name in urls
73+
url = urls[name]
74+
75+
# May need to adapt this for other datasets.
76+
# Check if the download already exists.
77+
dl_path = path
78+
if os.path.exists(dl_path):
79+
return
80+
81+
with tempfile.TemporaryDirectory() as tmp:
82+
tmp_path = os.path.join(tmp, f"{name}.zip")
83+
download_source(tmp_path, url, download=True, checksum=None)
84+
unzip(tmp_path, path, remove=False)

0 commit comments

Comments
 (0)