12
12
stem = "STEM"
13
13
14
14
15
- def aggregate_vesicle_train_data (roots , test_tomograms , conditions , resolutions ):
15
+ def aggregate_vesicle_train_data (roots , conditions , resolutions ):
16
16
tomo_names = []
17
- tomo_vesicles = []
17
+ tomo_vesicles_all , tomo_vesicles_imod = [], []
18
18
tomo_condition = []
19
19
tomo_resolution = []
20
20
tomo_train = []
21
21
22
- for ds , root in roots .items ():
23
- print ("Aggregate data for" , ds )
24
- train_root = root ["train" ]
25
- if train_root == "" :
26
- test_root = root ["test" ]
27
- tomograms = sorted (glob (os .path .join (test_root , "2024**" , "*.h5" ), recursive = True ))
28
- this_test_tomograms = [os .path .basename (tomo ) for tomo in tomograms ]
22
+ def aggregate_split (ds , split_root , split ):
23
+ if ds .startswith ("04" ):
24
+ tomograms = sorted (glob (os .path .join (split_root , "2024**" , "*.h5" ), recursive = True ))
29
25
else :
30
- # This is only the case for 04, which is also nested
31
- tomograms = sorted (glob (os .path .join (train_root , "*.h5" )))
32
- this_test_tomograms = test_tomograms [ds ]
26
+ tomograms = sorted (glob (os .path .join (split_root , "*.h5" )))
33
27
34
28
assert len (tomograms ) > 0 , ds
35
29
this_condition = conditions [ds ]
36
30
this_resolution = resolutions [ds ][0 ]
37
31
38
- for tomo_path in tqdm (tomograms ):
32
+ for tomo_path in tqdm (tomograms , desc = f"Aggregate { split } " ):
39
33
fname = os .path .basename (tomo_path )
40
34
with h5py .File (tomo_path , "r" ) as f :
41
35
try :
42
36
tomo_name = f .attrs ["filename" ]
43
37
except KeyError :
44
38
tomo_name = fname
45
39
46
- n_label_sets = len (f ["labels" ])
47
- if n_label_sets > 2 :
48
- print (tomo_path , "contains the following labels:" , list (f ["labels" ].keys ()))
49
- seg = f ["labels/vesicles" ][:]
50
- n_vesicles = len (np .unique (seg )) - 1
40
+ if "labels/vesicles/combined_vesicles" in f :
41
+ all_vesicles = f ["labels/vesicles/combined_vesicles" ][:]
42
+ imod_vesicles = f ["labels/vesicles/masked_vesicles" ][:]
43
+ n_vesicles_all = len (np .unique (all_vesicles )) - 1
44
+ n_vesicles_imod = len (np .unique (imod_vesicles )) - 2
45
+ else :
46
+ vesicles = f ["labels/vesicles" ][:]
47
+ n_vesicles_all = len (np .unique (vesicles )) - 1
48
+ n_vesicles_imod = n_vesicles_all
51
49
52
50
tomo_names .append (tomo_name )
53
- tomo_vesicles .append (n_vesicles )
51
+ tomo_vesicles_all .append (n_vesicles_all )
52
+ tomo_vesicles_imod .append (n_vesicles_imod )
54
53
tomo_condition .append (this_condition )
55
54
tomo_resolution .append (this_resolution )
56
- tomo_train .append ("test" if fname in this_test_tomograms else "train/val" )
55
+ tomo_train .append (split )
56
+
57
+ for ds , root in roots .items ():
58
+ print ("Aggregate data for" , ds )
59
+ train_root = root ["train" ]
60
+ if train_root != "" :
61
+ aggregate_split (ds , train_root , "train/val" )
62
+ test_root = root ["test" ]
63
+ if test_root != "" :
64
+ aggregate_split (ds , test_root , "test" )
57
65
58
66
df = pd .DataFrame ({
59
67
"tomogram" : tomo_names ,
60
68
"condition" : tomo_condition ,
61
69
"resolution" : tomo_resolution ,
62
70
"used_for" : tomo_train ,
63
- "vesicle_count" : tomo_vesicles ,
71
+ "vesicle_count_all" : tomo_vesicles_all ,
72
+ "vesicle_count_imod" : tomo_vesicles_imod ,
64
73
})
65
74
66
75
os .makedirs ("data_summary" , exist_ok = True )
@@ -70,60 +79,47 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
70
79
def vesicle_train_data ():
71
80
roots = {
72
81
"01" : {
73
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /01_hoi_maus_2020_incomplete" , # noqa
82
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /01_hoi_maus_2020_incomplete" , # noqa
74
83
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete" , # noqa
75
84
},
76
85
"02" : {
77
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /02_hcc_nanogold" , # noqa
86
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /02_hcc_nanogold" , # noqa
78
87
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/02_hcc_nanogold" , # noqa
79
88
},
80
89
"03" : {
81
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /03_hog_cs1sy7" , # noqa
90
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /03_hog_cs1sy7" , # noqa
82
91
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/03_hog_cs1sy7" , # noqa
83
92
},
84
93
"04" : {
85
94
"train" : "" ,
86
95
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval/" , # noqa
87
96
},
88
97
"05" : {
89
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /05_stem750_sv_training" , # noqa
98
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /05_stem750_sv_training" , # noqa
90
99
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/05_stem750_sv_training" , # noqa
91
100
},
92
101
"07" : {
93
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /07_hoi_s1sy7_tem250_ihgp" , # noqa
102
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /07_hoi_s1sy7_tem250_ihgp" , # noqa
94
103
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/07_hoi_s1sy7_tem250_ihgp" , # noqa
95
104
},
96
105
"09" : {
97
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /09_stem750_66k" , # noqa
106
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /09_stem750_66k" , # noqa
98
107
"test" : "" ,
99
108
},
100
109
"10" : {
101
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /10_tem_single_release" , # noqa
110
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /10_tem_single_release" , # noqa
102
111
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/10_tem_single_release" , # noqa
103
112
},
104
113
"11" : {
105
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /11_tem_multiple_release" , # noqa
114
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /11_tem_multiple_release" , # noqa
106
115
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/11_tem_multiple_release" , # noqa
107
116
},
108
117
"12" : {
109
- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /12_chemical_fix_cryopreparation" , # noqa
118
+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /12_chemical_fix_cryopreparation" , # noqa
110
119
"test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation" , # noqa
111
120
},
112
121
}
113
122
114
- test_tomograms = {
115
- "01" : ["tomogram-009.h5" , "tomogram-038.h5" , "tomogram-049.h5" , "tomogram-052.h5" , "tomogram-057.h5" , "tomogram-060.h5" , "tomogram-067.h5" , "tomogram-074.h5" , "tomogram-076.h5" , "tomogram-083.h5" , "tomogram-133.h5" , "tomogram-136.h5" , "tomogram-145.h5" , "tomogram-149.h5" , "tomogram-150.h5" ], # noqa
116
- "02" : ["tomogram-004.h5" , "tomogram-008.h5" ],
117
- "03" : ["tomogram-003.h5" , "tomogram-004.h5" , "tomogram-008.h5" ,],
118
- "04" : [], # all used for test
119
- "05" : ["tomogram-003.h5" , "tomogram-005.h5" ,],
120
- "07" : ["tomogram-006.h5" , "tomogram-017.h5" ,],
121
- "09" : [], # no test data
122
- "10" : ["tomogram-001.h5" , "tomogram-002.h5" , "tomogram-007.h5" ],
123
- "11" : ["tomogram-001.h5 tomogram-007.h5 tomogram-008.h5" ],
124
- "12" : ["tomogram-004.h5" , "tomogram-021.h5" , "tomogram-022.h5" ,],
125
- }
126
-
127
123
conditions = {
128
124
"01" : single_ax_tem ,
129
125
"02" : dual_ax_tem ,
@@ -150,7 +146,7 @@ def vesicle_train_data():
150
146
"12" : (1.554 , 1.554 , 1.554 )
151
147
}
152
148
153
- aggregate_vesicle_train_data (roots , test_tomograms , conditions , resolutions )
149
+ aggregate_vesicle_train_data (roots , conditions , resolutions )
154
150
155
151
156
152
def aggregate_az_train_data (roots , test_tomograms , conditions , resolutions ):
@@ -397,6 +393,11 @@ def vesicle_domain_adaptation_data():
397
393
"MF_05649_P-09175-E_06.h5" , "MF_05646_C-09175-B_001B.h5" , "MF_05649_P-09175-E_07.h5" ,
398
394
"MF_05649_G-09175-C_001.h5" , "MF_05646_C-09175-B_002.h5" , "MF_05649_G-09175-C_04.h5" ,
399
395
"MF_05649_P-09175-E_05.h5" , "MF_05646_C-09175-B_000.h5" , "MF_05646_C-09175-B_001.h5"
396
+ ],
397
+ "frog" : [
398
+ "block10U3A_three.h5" , "block30UB_one_two.h5" , "block30UB_two.h5" , "block10U3A_one.h5" ,
399
+ "block184B_one.h5" , "block30UB_three.h5" , "block10U3A_two.h5" , "block30UB_four.h5" ,
400
+ "block30UB_one.h5" , "block10U3A_five.h5" ,
400
401
]
401
402
}
402
403
@@ -439,13 +440,42 @@ def vesicle_domain_adaptation_data():
439
440
aggregate_da (roots , train_tomograms , test_tomograms , resolutions )
440
441
441
442
443
+ def get_n_images_frog ():
444
+ root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted/upsampled_by2"
445
+ tomos = ["block10U3A_three.h5" , "block30UB_one_two.h5" , "block30UB_two.h5" , "block10U3A_one.h5" ,
446
+ "block184B_one.h5" , "block30UB_three.h5" , "block10U3A_two.h5" , "block30UB_four.h5" ,
447
+ "block30UB_one.h5" , "block10U3A_five.h5" ]
448
+
449
+ n_images = 0
450
+ for tomo in tomos :
451
+ path = os .path .join (root , tomo )
452
+ with h5py .File (path , "r" ) as f :
453
+ n_images += f ["raw" ].shape [0 ]
454
+ print (n_images )
455
+
456
+
457
+ def get_image_sizes_tem_2d ():
458
+ root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled" # noqa
459
+ tomos = [
460
+ "MF_05649_P-09175-E_06.h5" , "MF_05646_C-09175-B_001B.h5" , "MF_05649_P-09175-E_07.h5" ,
461
+ "MF_05649_G-09175-C_001.h5" , "MF_05646_C-09175-B_002.h5" , "MF_05649_G-09175-C_04.h5" ,
462
+ "MF_05649_P-09175-E_05.h5" , "MF_05646_C-09175-B_000.h5" , "MF_05646_C-09175-B_001.h5"
463
+ ]
464
+ for tomo in tomos :
465
+ path = os .path .join (root , tomo )
466
+ with h5py .File (path , "r" ) as f :
467
+ print (f ["raw" ].shape )
468
+
469
+
442
470
def main ():
443
471
# active_zone_train_data()
444
472
# compartment_train_data()
445
473
# mito_train_data()
446
- # vesicle_train_data()
474
+ vesicle_train_data ()
447
475
448
- vesicle_domain_adaptation_data ()
476
+ # vesicle_domain_adaptation_data()
477
+ # get_n_images_frog()
478
+ # get_image_sizes_tem_2d()
449
479
450
480
451
481
main ()
0 commit comments