Skip to content

Commit d723d5c

Browse files
Merge pull request #52 from computational-cell-analytics/more-inner-ear-analysis
Analyisis experiments and updates to imod export
2 parents 8dbe7d3 + 2cc8f92 commit d723d5c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2938
-126
lines changed

scripts/aggregate_data_information.py

Lines changed: 76 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -12,55 +12,64 @@
1212
stem = "STEM"
1313

1414

15-
def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions):
15+
def aggregate_vesicle_train_data(roots, conditions, resolutions):
1616
tomo_names = []
17-
tomo_vesicles = []
17+
tomo_vesicles_all, tomo_vesicles_imod = [], []
1818
tomo_condition = []
1919
tomo_resolution = []
2020
tomo_train = []
2121

22-
for ds, root in roots.items():
23-
print("Aggregate data for", ds)
24-
train_root = root["train"]
25-
if train_root == "":
26-
test_root = root["test"]
27-
tomograms = sorted(glob(os.path.join(test_root, "2024**", "*.h5"), recursive=True))
28-
this_test_tomograms = [os.path.basename(tomo) for tomo in tomograms]
22+
def aggregate_split(ds, split_root, split):
23+
if ds.startswith("04"):
24+
tomograms = sorted(glob(os.path.join(split_root, "2024**", "*.h5"), recursive=True))
2925
else:
30-
# This is only the case for 04, which is also nested
31-
tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
32-
this_test_tomograms = test_tomograms[ds]
26+
tomograms = sorted(glob(os.path.join(split_root, "*.h5")))
3327

3428
assert len(tomograms) > 0, ds
3529
this_condition = conditions[ds]
3630
this_resolution = resolutions[ds][0]
3731

38-
for tomo_path in tqdm(tomograms):
32+
for tomo_path in tqdm(tomograms, desc=f"Aggregate {split}"):
3933
fname = os.path.basename(tomo_path)
4034
with h5py.File(tomo_path, "r") as f:
4135
try:
4236
tomo_name = f.attrs["filename"]
4337
except KeyError:
4438
tomo_name = fname
4539

46-
n_label_sets = len(f["labels"])
47-
if n_label_sets > 2:
48-
print(tomo_path, "contains the following labels:", list(f["labels"].keys()))
49-
seg = f["labels/vesicles"][:]
50-
n_vesicles = len(np.unique(seg)) - 1
40+
if "labels/vesicles/combined_vesicles" in f:
41+
all_vesicles = f["labels/vesicles/combined_vesicles"][:]
42+
imod_vesicles = f["labels/vesicles/masked_vesicles"][:]
43+
n_vesicles_all = len(np.unique(all_vesicles)) - 1
44+
n_vesicles_imod = len(np.unique(imod_vesicles)) - 2
45+
else:
46+
vesicles = f["labels/vesicles"][:]
47+
n_vesicles_all = len(np.unique(vesicles)) - 1
48+
n_vesicles_imod = n_vesicles_all
5149

5250
tomo_names.append(tomo_name)
53-
tomo_vesicles.append(n_vesicles)
51+
tomo_vesicles_all.append(n_vesicles_all)
52+
tomo_vesicles_imod.append(n_vesicles_imod)
5453
tomo_condition.append(this_condition)
5554
tomo_resolution.append(this_resolution)
56-
tomo_train.append("test" if fname in this_test_tomograms else "train/val")
55+
tomo_train.append(split)
56+
57+
for ds, root in roots.items():
58+
print("Aggregate data for", ds)
59+
train_root = root["train"]
60+
if train_root != "":
61+
aggregate_split(ds, train_root, "train/val")
62+
test_root = root["test"]
63+
if test_root != "":
64+
aggregate_split(ds, test_root, "test")
5765

5866
df = pd.DataFrame({
5967
"tomogram": tomo_names,
6068
"condition": tomo_condition,
6169
"resolution": tomo_resolution,
6270
"used_for": tomo_train,
63-
"vesicle_count": tomo_vesicles,
71+
"vesicle_count_all": tomo_vesicles_all,
72+
"vesicle_count_imod": tomo_vesicles_imod,
6473
})
6574

6675
os.makedirs("data_summary", exist_ok=True)
@@ -70,60 +79,47 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
7079
def vesicle_train_data():
7180
roots = {
7281
"01": {
73-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/01_hoi_maus_2020_incomplete", # noqa
82+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete", # noqa
7483
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete", # noqa
7584
},
7685
"02": {
77-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/02_hcc_nanogold", # noqa
86+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/02_hcc_nanogold", # noqa
7887
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/02_hcc_nanogold", # noqa
7988
},
8089
"03": {
81-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/03_hog_cs1sy7", # noqa
90+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/03_hog_cs1sy7", # noqa
8291
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/03_hog_cs1sy7", # noqa
8392
},
8493
"04": {
8594
"train": "",
8695
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval/", # noqa
8796
},
8897
"05": {
89-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/05_stem750_sv_training", # noqa
98+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/05_stem750_sv_training", # noqa
9099
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/05_stem750_sv_training", # noqa
91100
},
92101
"07": {
93-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/07_hoi_s1sy7_tem250_ihgp", # noqa
102+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/07_hoi_s1sy7_tem250_ihgp", # noqa
94103
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/07_hoi_s1sy7_tem250_ihgp", # noqa
95104
},
96105
"09": {
97-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/09_stem750_66k", # noqa
106+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/09_stem750_66k", # noqa
98107
"test": "",
99108
},
100109
"10": {
101-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/10_tem_single_release", # noqa
110+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/10_tem_single_release", # noqa
102111
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/10_tem_single_release", # noqa
103112
},
104113
"11": {
105-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/11_tem_multiple_release", # noqa
114+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/11_tem_multiple_release", # noqa
106115
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/11_tem_multiple_release", # noqa
107116
},
108117
"12": {
109-
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/12_chemical_fix_cryopreparation", # noqa
118+
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation", # noqa
110119
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation", # noqa
111120
},
112121
}
113122

114-
test_tomograms = {
115-
"01": ["tomogram-009.h5", "tomogram-038.h5", "tomogram-049.h5", "tomogram-052.h5", "tomogram-057.h5", "tomogram-060.h5", "tomogram-067.h5", "tomogram-074.h5", "tomogram-076.h5", "tomogram-083.h5", "tomogram-133.h5", "tomogram-136.h5", "tomogram-145.h5", "tomogram-149.h5", "tomogram-150.h5"], # noqa
116-
"02": ["tomogram-004.h5", "tomogram-008.h5"],
117-
"03": ["tomogram-003.h5", "tomogram-004.h5", "tomogram-008.h5",],
118-
"04": [], # all used for test
119-
"05": ["tomogram-003.h5", "tomogram-005.h5",],
120-
"07": ["tomogram-006.h5", "tomogram-017.h5",],
121-
"09": [], # no test data
122-
"10": ["tomogram-001.h5", "tomogram-002.h5", "tomogram-007.h5"],
123-
"11": ["tomogram-001.h5 tomogram-007.h5 tomogram-008.h5"],
124-
"12": ["tomogram-004.h5", "tomogram-021.h5", "tomogram-022.h5",],
125-
}
126-
127123
conditions = {
128124
"01": single_ax_tem,
129125
"02": dual_ax_tem,
@@ -150,7 +146,7 @@ def vesicle_train_data():
150146
"12": (1.554, 1.554, 1.554)
151147
}
152148

153-
aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
149+
aggregate_vesicle_train_data(roots, conditions, resolutions)
154150

155151

156152
def aggregate_az_train_data(roots, test_tomograms, conditions, resolutions):
@@ -397,6 +393,11 @@ def vesicle_domain_adaptation_data():
397393
"MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
398394
"MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
399395
"MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
396+
],
397+
"frog": [
398+
"block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
399+
"block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
400+
"block30UB_one.h5", "block10U3A_five.h5",
400401
]
401402
}
402403

@@ -439,13 +440,42 @@ def vesicle_domain_adaptation_data():
439440
aggregate_da(roots, train_tomograms, test_tomograms, resolutions)
440441

441442

443+
def get_n_images_frog():
444+
root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted/upsampled_by2"
445+
tomos = ["block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
446+
"block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
447+
"block30UB_one.h5", "block10U3A_five.h5"]
448+
449+
n_images = 0
450+
for tomo in tomos:
451+
path = os.path.join(root, tomo)
452+
with h5py.File(path, "r") as f:
453+
n_images += f["raw"].shape[0]
454+
print(n_images)
455+
456+
457+
def get_image_sizes_tem_2d():
458+
root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled" # noqa
459+
tomos = [
460+
"MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
461+
"MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
462+
"MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
463+
]
464+
for tomo in tomos:
465+
path = os.path.join(root, tomo)
466+
with h5py.File(path, "r") as f:
467+
print(f["raw"].shape)
468+
469+
442470
def main():
443471
# active_zone_train_data()
444472
# compartment_train_data()
445473
# mito_train_data()
446-
# vesicle_train_data()
474+
vesicle_train_data()
447475

448-
vesicle_domain_adaptation_data()
476+
# vesicle_domain_adaptation_data()
477+
# get_n_images_frog()
478+
# get_image_sizes_tem_2d()
449479

450480

451481
main()

scripts/cooper/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
pwd.txt
2+
debug/
3+
mito/
4+
synapse-examples/

scripts/cooper/analysis/.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
screenshots/
2+
20241108_3D_Imig_DATA_2014/
3+
*az*/
4+
mrc_files/
5+
imig_data/
6+
results/
7+
*.xlsx
8+
*.tsv

0 commit comments

Comments
 (0)