Skip to content

Commit e2565f6

Browse files
Merge branch 'main' into doc-updates
2 parents a65affb + ae2a472 commit e2565f6

File tree

8 files changed

+474
-12
lines changed

8 files changed

+474
-12
lines changed

scripts/aggregate_data_information.py

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def active_zone_train_data():
200200
"01": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete", # noqa
201201
"04": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/04_hoi_stem_examples", # noqa
202202
"06": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/06_hoi_wt_stem750_fm", # noqa
203-
"12": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/20241021_imig_2014_data_transfer_exported_grouped", # noqa
203+
"12": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation", # noqa
204204
}
205205

206206
test_tomograms = {
@@ -467,11 +467,58 @@ def get_image_sizes_tem_2d():
467467
print(f["raw"].shape)
468468

469469

470+
def mito_train_data():
471+
train_root = "/scratch-grete/projects/nim00007/data/mitochondria/cooper/fidi_down_s2"
472+
test_tomograms = [
473+
"36859_J1_66K_TS_CA3_MF_18_rec_2Kb1dawbp_crop_downscaled.h5",
474+
"3.2_downscaled.h5",
475+
]
476+
all_tomos = sorted(glob(os.path.join(train_root, "*.h5")))
477+
478+
tomo_names = []
479+
tomo_condition = []
480+
tomo_mitos = []
481+
tomo_resolution = []
482+
tomo_train = []
483+
484+
for tomo in all_tomos:
485+
fname = os.path.basename(tomo)
486+
split = "test" if fname in test_tomograms else "train/val"
487+
if "36859" in fname or "37371" in fname: # This is from the STEM dataset.
488+
condition = stem
489+
resolution = 2 * 0.868
490+
else: # This is from the TEM Single-Axis Dataset
491+
condition = single_ax_tem
492+
# These were scaled, despite the resolution mismatch
493+
resolution = 2 * 1.554
494+
495+
with h5py.File(tomo, "r") as f:
496+
seg = f["labels/mitochondria"][:]
497+
n_mitos = len(np.unique(seg)) - 1
498+
499+
tomo_names.append(tomo)
500+
tomo_condition.append(condition)
501+
tomo_train.append(split)
502+
tomo_resolution.append(resolution)
503+
tomo_mitos.append(n_mitos)
504+
505+
df = pd.DataFrame({
506+
"tomogram": tomo_names,
507+
"condition": tomo_condition,
508+
"resolution": tomo_resolution,
509+
"used_for": tomo_train,
510+
"mito_count_all": tomo_mitos,
511+
})
512+
513+
os.makedirs("data_summary", exist_ok=True)
514+
df.to_excel("./data_summary/mitochondria.xlsx", index=False)
515+
516+
470517
def main():
471518
# active_zone_train_data()
472519
# compartment_train_data()
473-
# mito_train_data()
474-
vesicle_train_data()
520+
mito_train_data()
521+
# vesicle_train_data()
475522

476523
# vesicle_domain_adaptation_data()
477524
# get_n_images_frog()

scripts/cooper/full_reconstruction/segment_mitochondria.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,53 @@
88
ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/04_full_reconstruction" # noqa
99
MODEL_PATH = "/scratch-grete/projects/nim00007/models/exports_for_cooper/mito_model_s2.pt" # noqa
1010

11+
# MODEL_PATH = "/scratch-grete/projects/nim00007/models/luca/mito/source_domain"
12+
1113

1214
def run_seg(path):
15+
16+
out_folder = "./mito_seg"
17+
ds, fname = os.path.split(path)
18+
ds = os.path.basename(ds)
19+
20+
os.makedirs(os.path.join(out_folder, ds), exist_ok=True)
21+
out_path = os.path.join(out_folder, ds, fname)
22+
if os.path.exists(out_path):
23+
return
24+
1325
with h5py.File(path, "r") as f:
14-
if "labels/mitochondria" in f:
15-
return
1626
raw = f["raw"][:]
1727

1828
scale = (0.5, 0.5, 0.5)
1929
seg = segment_mitochondria(raw, model_path=MODEL_PATH, scale=scale, verbose=False)
20-
with h5py.File(path, "a") as f:
30+
with h5py.File(out_path, "a") as f:
31+
f.create_dataset("labels/mitochondria", data=seg, compression="gzip")
32+
33+
34+
def run_seg_and_pred(path):
35+
with h5py.File(path, "r") as f:
36+
raw = f["raw"][:]
37+
38+
scale = (0.5, 0.5, 0.5)
39+
seg, pred = segment_mitochondria(
40+
raw, model_path=MODEL_PATH, scale=scale, verbose=False, return_predictions=True
41+
)
42+
43+
out_folder = "./mito_pred"
44+
os.makedirs(out_folder, exist_ok=True)
45+
out_path = os.path.join(out_folder, os.path.basename(path))
46+
47+
with h5py.File(out_path, "a") as f:
48+
f.create_dataset("raw", data=raw[::2, ::2, ::2])
2149
f.create_dataset("labels/mitochondria", data=seg, compression="gzip")
50+
f.create_dataset("pred", data=pred, compression="gzip")
2251

2352

2453
def main():
2554
paths = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True))
2655
for path in tqdm(paths):
2756
run_seg(path)
57+
# run_seg_and_pred(path)
2858

2959

3060
main()
-9.57 KB
Binary file not shown.

scripts/prepare_zenodo_uploads.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
import os
2+
from glob import glob
3+
from shutil import copyfile
4+
5+
import h5py
6+
from tqdm import tqdm
7+
8+
OUTPUT_ROOT = "./data_summary/for_zenodo"
9+
10+
11+
def _copy_vesicles(tomos, out_folder):
12+
label_key = "labels/vesicles/combined_vesicles"
13+
os.makedirs(out_folder, exist_ok=True)
14+
for tomo in tqdm(tomos, desc="Export tomos"):
15+
out_path = os.path.join(out_folder, os.path.basename(tomo))
16+
if os.path.exists(out_path):
17+
continue
18+
19+
with h5py.File(tomo, "r") as f:
20+
raw = f["raw"][:]
21+
labels = f[label_key][:]
22+
try:
23+
fname = f.attrs["filename"]
24+
except KeyError:
25+
fname = None
26+
27+
with h5py.File(out_path, "a") as f:
28+
f.create_dataset("raw", data=raw, compression="gzip")
29+
f.create_dataset("labels/vesicles", data=labels, compression="gzip")
30+
if fname is not None:
31+
f.attrs["filename"] = fname
32+
33+
34+
def _export_vesicles(train_root, test_root, name):
35+
train_tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
36+
test_tomograms = sorted(glob(os.path.join(test_root, "*.h5")))
37+
print(f"Vesicle data for {name}:")
38+
print(len(train_tomograms), len(test_tomograms), len(train_tomograms) + len(test_tomograms))
39+
40+
train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "train", name)
41+
_copy_vesicles(train_tomograms, train_out)
42+
43+
test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "test", name)
44+
_copy_vesicles(test_tomograms, test_out)
45+
46+
47+
def _export_az(train_root, test_tomos, name):
48+
tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
49+
print(f"AZ data for {name}:")
50+
51+
train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "train", name)
52+
test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "test", name)
53+
54+
os.makedirs(train_out, exist_ok=True)
55+
os.makedirs(test_out, exist_ok=True)
56+
57+
for tomo in tqdm(tomograms):
58+
fname = os.path.basename(tomo)
59+
if tomo in test_tomos:
60+
out_path = os.path.join(test_out, fname)
61+
else:
62+
out_path = os.path.join(train_out, fname)
63+
if os.path.exists(out_path):
64+
continue
65+
66+
with h5py.File(tomo, "r") as f:
67+
raw = f["raw"][:]
68+
az = f["labels/AZ"][:]
69+
70+
with h5py.File(out_path, "a") as f:
71+
f.create_dataset("raw", data=raw, compression="gzip")
72+
f.create_dataset("labels/AZ", data=az, compression="gzip")
73+
74+
75+
# NOTE: we have very few mito annotations from 01, so we don't include them in here.
76+
def prepare_single_ax_stem_chemical_fix():
77+
# single-axis-tem: vesicles
78+
train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete" # noqa
79+
test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete" # noqa
80+
_export_vesicles(train_root, test_root, name="single_axis_tem")
81+
82+
# single-axis-tem: active zones
83+
train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete" # noqa
84+
test_tomos = [
85+
"WT_MF_DIV28_01_MS_09204_F1.h5", "WT_MF_DIV14_01_MS_B2_09175_CA3.h5", "M13_CTRL_22723_O2_05_DIV29_5.2.h5", "WT_Unt_SC_09175_D4_05_DIV14_mtk_05.h5", # noqa
86+
"20190805_09002_B4_SC_11_SP.h5", "20190807_23032_D4_SC_01_SP.h5", "M13_DKO_22723_A1_03_DIV29_03_MS.h5", "WT_MF_DIV28_05_MS_09204_F1.h5", "M13_CTRL_09201_S2_06_DIV31_06_MS.h5", # noqa
87+
"WT_MF_DIV28_1.2_MS_09002_B1.h5", "WT_Unt_SC_09175_C4_04_DIV15_mtk_04.h5", "M13_DKO_22723_A4_10_DIV29_10_MS.h5", "WT_MF_DIV14_3.2_MS_D2_09175_CA3.h5", # noqa
88+
"20190805_09002_B4_SC_10_SP.h5", "M13_CTRL_09201_S2_02_DIV31_02_MS.h5", "WT_MF_DIV14_04_MS_E1_09175_CA3.h5", "WT_MF_DIV28_10_MS_09002_B3.h5", "WT_Unt_SC_05646_D4_02_DIV16_mtk_02.h5", "M13_DKO_22723_A4_08_DIV29_08_MS.h5", "WT_MF_DIV28_04_MS_09204_M1.h5", "WT_MF_DIV28_03_MS_09204_F1.h5", "M13_DKO_22723_A1_05_DIV29_05_MS.h5", # noqa
89+
"WT_Unt_SC_09175_C4_06_DIV15_mtk_06.h5", "WT_MF_DIV28_09_MS_09002_B3.h5", "20190524_09204_F4_SC_07_SP.h5",
90+
"WT_MF_DIV14_02_MS_C2_09175_CA3.h5", "M13_DKO_23037_K1_01_DIV29_01_MS.h5", "WT_Unt_SC_09175_E2_01_DIV14_mtk_01.h5", "20190807_23032_D4_SC_05_SP.h5", "WT_MF_DIV14_01_MS_E2_09175_CA3.h5", "WT_MF_DIV14_03_MS_B2_09175_CA3.h5", "M13_DKO_09201_O1_01_DIV31_01_MS.h5", "M13_DKO_09201_U1_04_DIV31_04_MS.h5", # noqa
91+
"WT_MF_DIV14_04_MS_E2_09175_CA3_2.h5", "WT_Unt_SC_09175_D5_01_DIV14_mtk_01.h5",
92+
"M13_CTRL_22723_O2_05_DIV29_05_MS_.h5", "WT_MF_DIV14_02_MS_B2_09175_CA3.h5", "WT_MF_DIV14_01.2_MS_D1_09175_CA3.h5", # noqa
93+
]
94+
_export_az(train_root, test_tomos, name="single_axis_tem")
95+
96+
# chemical_fixation: vesicles
97+
train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation" # noqa
98+
test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation" # noqa
99+
_export_vesicles(train_root, test_root, name="chemical_fixation")
100+
101+
# chemical-fixation: active zones
102+
train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation" # noqa
103+
test_tomos = ["20180305_09_MS.h5", "20180305_04_MS.h5", "20180305_08_MS.h5",
104+
"20171113_04_MS.h5", "20171006_05_MS.h5", "20180305_01_MS.h5"]
105+
_export_az(train_root, test_tomos, name="chemical_fixation")
106+
107+
108+
def prepare_ier():
109+
root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser/other_tomograms"
110+
sets = {
111+
"01_vesicle_pools": "vesicle_pools",
112+
"02_tether": "tether",
113+
"03_ratten_tomos": "rat",
114+
}
115+
116+
output_folder = os.path.join(OUTPUT_ROOT, "IER")
117+
label_names = {
118+
"ribbons": "ribbon",
119+
"membrane": "membrane",
120+
"presynapse": "PD",
121+
"postsynapse": "PSD",
122+
"vesicles": "vesicles",
123+
}
124+
125+
for name, output_name in sets.items():
126+
out_set = os.path.join(output_folder, output_name)
127+
os.makedirs(out_set, exist_ok=True)
128+
tomos = sorted(glob(os.path.join(root, name, "*.h5")))
129+
130+
print("Export", output_name)
131+
for tomo in tqdm(tomos):
132+
with h5py.File(tomo, "r") as f:
133+
try:
134+
fname = os.path.split(f.attrs["filename"])[1][:-4]
135+
except KeyError:
136+
fname = f.attrs["path"][1]
137+
fname = "_".join(fname.split("/")[-2:])
138+
139+
out_path = os.path.join(out_set, os.path.basename(tomo))
140+
if os.path.exists(out_path):
141+
continue
142+
143+
raw = f["raw"][:]
144+
labels = {}
145+
for label_name, out_name in label_names.items():
146+
key = f"labels/{label_name}"
147+
if key not in f:
148+
continue
149+
labels[out_name] = f[key][:]
150+
151+
with h5py.File(out_path, "a") as f:
152+
f.attrs["filename"] = fname
153+
f.create_dataset("raw", data=raw, compression="gzip")
154+
for label_name, seg in labels.items():
155+
f.create_dataset(f"labels/{label_name}", data=seg, compression="gzip")
156+
157+
158+
def prepare_frog():
159+
root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted"
160+
train_tomograms = [
161+
"block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
162+
"block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
163+
"block30UB_one.h5", "block10U3A_five.h5",
164+
]
165+
test_tomograms = ["block10U3A_four.h5", "block30UB_five.h5"]
166+
167+
output_folder = os.path.join(OUTPUT_ROOT, "frog")
168+
output_train = os.path.join(output_folder, "train_unlabeled")
169+
os.makedirs(output_train, exist_ok=True)
170+
171+
for name in train_tomograms:
172+
path = os.path.join(root, name)
173+
out_path = os.path.join(output_train, name)
174+
if os.path.exists(out_path):
175+
continue
176+
copyfile(path, out_path)
177+
178+
output_test = os.path.join(output_folder, "test")
179+
os.makedirs(output_test, exist_ok=True)
180+
for name in test_tomograms:
181+
path = os.path.join(root, name)
182+
out_path = os.path.join(output_test, name)
183+
if os.path.exists(out_path):
184+
continue
185+
copyfile(path, out_path)
186+
187+
188+
def prepare_2d_tem():
189+
train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled" # noqa
190+
test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicle_gt_2d/maus_2020_tem2d" # noqa
191+
train_images = [
192+
"MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
193+
"MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
194+
"MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
195+
]
196+
test_images = [
197+
"MF_05649_G-09175-C_04B.h5", "MF_05646_C-09175-B_000B.h5",
198+
"MF_05649_G-09175-C_03.h5", "MF_05649_G-09175-C_02.h5"
199+
]
200+
print(len(train_images) + len(test_images))
201+
202+
output_folder = os.path.join(OUTPUT_ROOT, "2d_tem")
203+
204+
output_train = os.path.join(output_folder, "train_unlabeled")
205+
os.makedirs(output_train, exist_ok=True)
206+
for name in tqdm(train_images, desc="Export train images"):
207+
out_path = os.path.join(output_train, name)
208+
if os.path.exists(out_path):
209+
continue
210+
in_path = os.path.join(train_root, name)
211+
with h5py.File(in_path, "r") as f:
212+
raw = f["raw"][:]
213+
with h5py.File(out_path, "a") as f:
214+
f.create_dataset("raw", data=raw, compression="gzip")
215+
216+
output_test = os.path.join(output_folder, "test")
217+
os.makedirs(output_test, exist_ok=True)
218+
for name in tqdm(test_images, desc="Export test images"):
219+
out_path = os.path.join(output_test, name)
220+
if os.path.exists(out_path):
221+
continue
222+
in_path = os.path.join(test_root, name)
223+
with h5py.File(in_path, "r") as f:
224+
raw = f["data"][:]
225+
labels = f["labels/vesicles"][:]
226+
mask = f["labels/mask"][:]
227+
with h5py.File(out_path, "a") as f:
228+
f.create_dataset("raw", data=raw, compression="gzip")
229+
f.create_dataset("labels/vesicles", data=labels, compression="gzip")
230+
f.create_dataset("labels/mask", data=mask, compression="gzip")
231+
232+
233+
def prepare_munc_snap():
234+
pass
235+
236+
237+
def main():
238+
prepare_single_ax_stem_chemical_fix()
239+
# prepare_2d_tem()
240+
# prepare_frog()
241+
# prepare_ier()
242+
# prepare_munc_snap()
243+
244+
245+
if __name__ == "__main__":
246+
main()

0 commit comments

Comments
 (0)