@@ -86,62 +86,61 @@ def is_tree(obj):
86
86
# Return str in an array
87
87
return ak .Array ([final_str ])
88
88
89
-
90
- def print_structure_from_str (deliver_dict , filter_branch = "" , save_to_txt = False ):
89
+ def print_structure_from_str (deliver_dict , filter_branch = "" , save_to_txt = False , do_print = False ):
91
90
"""
92
- Helper. Takes the structure strings for all samples from servicex.deliver output
93
- and prints them in a friendly formatted view.
94
-
95
- The expected structure string format is:
96
-
97
- Tree: TreeName1; TBranch: Branchname1 ; dtype: BranchType1, TBranch: Branchname2 ; dtype: BranchType2, ...
98
- Tree: TreeName2; TBranch: Branchname1 ; dtype: BranchType1, ...
99
-
91
+ Converts dataset file structures to a formatted string.
92
+
100
93
Parameters:
101
- deliver_dict (dict): The return dictionary of servicex.deliver
102
- (keys are sample names, values are file paths or URLs)
103
- filter_branch (str): Optional. Only Branch names containing it are printed.
104
- save_to_txt (bool): Optional. Select if file structure is printed or dumped to .txt
94
+ deliver_dict (dict): ServiceX deliver output (keys: sample names, values: file paths or URLs).
95
+ filter_branch (str): If provided, only branches containing this string are included.
96
+ save_to_txt (bool): If True, saves output to a text file instead of returning it.
97
+
98
+ Returns:
99
+ str: The formatted file structure.
105
100
"""
106
- print (f"File structure of all samples with branch filter { filter_branch } :" )
101
+ output_lines = []
102
+ output_lines .append (f"\n File structure of all samples with branch filter '{ filter_branch } ':" )
107
103
108
104
for sample_name , path in deliver_dict .items ():
109
- #Sample name with icon and bands
110
- print (
105
+ output_lines .append (
111
106
f"\n ---------------------------\n "
112
107
f"\U0001F4C1 Sample: { sample_name } \n "
113
108
f"---------------------------"
114
109
)
115
110
116
111
with uproot .open (path [0 ]) as f :
117
- #Expected position of structure_str from servicex.deliver
118
- structure_str = f ["servicex" ]["branch" ].array ()[0 ]
119
-
120
- # Split at each \n each new line represents one tree.
112
+ structure_str = f ["servicex" ]["branch" ].array ()[0 ]
113
+
121
114
tree_lines = structure_str .split ("\n " )
122
-
123
115
for line in tree_lines :
124
116
if not line .strip ():
125
117
continue # Skip empty lines
126
118
127
- # First part before ';' is the tree header.
128
119
parts = line .split (";" , 1 )
129
- tree_header = parts [0 ]
130
- print (f"\n \U0001F333 { tree_header } " ) # Print tree header with icon
131
-
132
- # Check for branches
120
+ tree_header = parts [0 ]
121
+ output_lines .append (f"\n \U0001F333 { tree_header } " )
122
+
133
123
if len (parts ) > 1 :
134
- # branch info separated by ','
135
- branch_info_str = parts [1 ]
136
- branch_infos = branch_info_str .split ("," )
137
- print (" ├── Branches:" )
124
+ branch_infos = parts [1 ].split ("," )
125
+ output_lines .append (" ├── Branches:" )
138
126
for b in branch_infos :
139
127
branch_line = b .strip ()
140
128
if filter_branch not in branch_line :
141
129
continue
142
- # Only print lines that start with "TBranch:"
143
130
if branch_line .startswith ("TBranch:" ):
144
- print (f" │ ├── { branch_line [8 :]} " )
131
+ output_lines .append (f" │ ├── { branch_line [8 :]} " )
132
+
133
+ result_str = "\n " .join (output_lines )
134
+
135
+ if save_to_txt :
136
+ with open ("samples_structure.txt" , "w" ) as f :
137
+ f .write (result_str )
138
+ return "File structure saved to 'samples_structure.txt'."
139
+ if do_print :
140
+ print (result_str )
141
+ return
142
+ else :
143
+ return result_str
145
144
146
145
147
146
def get_structure (dataset , ** kwargs ):
@@ -171,24 +170,21 @@ def get_structure(dataset, **kwargs):
171
170
dataset_dict = dataset
172
171
else :
173
172
raise ValueError (f"Unsupported dataset input type: { user_in } .\n Input must be dict ('sample_name':'dataset_id'), str or list of str" )
173
+ return 0
174
174
175
- sample_list = []
176
-
177
- for name , did in dataset_dict .items ():
178
- tmp_dict = {
179
- "NFiles" :1 ,
175
+ sample_list = [
176
+ {
177
+ "NFiles" : 1 ,
180
178
"Name" : name ,
181
179
"Dataset" : servicex .dataset .Rucio (did ),
182
180
"Query" : query_PythonFunction ,
183
181
}
184
- sample_list .append (tmp_dict )
182
+ for name , did in dataset_dict .items ()
183
+ ]
185
184
186
- spec_python = {
187
- "Sample" : sample_list
188
- }
185
+ spec_python = {"Sample" : sample_list }
189
186
190
187
output = servicex .deliver (spec_python )
191
188
192
- print_structure_from_str (output ,** kwargs )
193
-
189
+ return print_structure_from_str (output , ** kwargs )
194
190
0 commit comments