1
+ # Copyright (c) 2025, IRIS-HEP
2
+ # All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are met:
6
+ #
7
+ # * Redistributions of source code must retain the above copyright notice, this
8
+ # list of conditions and the following disclaimer.
9
+ #
10
+ # * Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ #
14
+ # * Neither the name of the copyright holder nor the names of its
15
+ # contributors may be used to endorse or promote products derived from
16
+ # this software without specific prior written permission.
17
+ #
18
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ import servicex
30
+ import uproot
31
+
32
+ def run_query (input_filenames = None ):
33
+ import uproot
34
+ import awkward as ak
35
+ """
36
+ Helper. Open a file and return one array containing a single string that describes the DataSet root file structure.
37
+ Sent to ServiceX python transformers.
38
+
39
+ The string will be formatted like:
40
+ "Tree: TreeName1; TBranch: Branchname1 ; dtype: BranchType1, TBranch: Branchname2 ; dtype: BranchType2, ...
41
+ Tree: TreeName2; TBranch: Branchname1 ; dtype: BranchType1, ..."
42
+ """
43
+ def is_tree (obj ):
44
+ """
45
+ Helper to check if a root file item is TTree. Different object types use .classname or .classnames
46
+ """
47
+ # Check for 'classname'
48
+ if hasattr (obj , "classname" ):
49
+ cls_attr = obj .classname
50
+ # Call if it's callable
51
+ cls_value = cls_attr () if callable (cls_attr ) else cls_attr
52
+ return "TTree" in cls_value
53
+ # Check for 'classnames'
54
+ elif hasattr (obj , "classnames" ):
55
+ cls_attr = obj .classnames
56
+ cls_values = cls_attr () if callable (cls_attr ) else cls_attr
57
+ return any ("TTree" in cls for cls in cls_values )
58
+ return False
59
+
60
+ trees_info = [] # list of str info for each tree
61
+
62
+ with uproot .open (input_filenames ) as file :
63
+ for tree_name in file .keys ():
64
+ # Remove uproot tree sufix
65
+ tree_name_clean = tree_name .rstrip (";1" )
66
+ tree = file [tree_name ]
67
+
68
+ # Only TTrees
69
+ if not is_tree (tree ):
70
+ continue
71
+
72
+ # Gather branch info
73
+ branch_info_list = []
74
+ for branch_name , branch in tree .items ():
75
+ # Using uproot type interpretor
76
+ branch_type = str (branch .interpretation )
77
+ branch_info_list .append (f"TBranch: { branch_name } ; dtype: { branch_type } " )
78
+
79
+ # Join branch info & separate by ,
80
+ tree_info = f"Tree: { tree_name_clean } ; " + ", " .join (branch_info_list )
81
+ trees_info .append (tree_info )
82
+
83
+ # Join all trees & separate by \n
84
+ final_str = "\n " .join (trees_info )
85
+
86
+ # Return str in an array
87
+ return ak .Array ([final_str ])
88
+
89
+
90
+ def print_structure_from_str (deliver_dict , filter_branch = "" , save_to_txt = False ):
91
+ """
92
+ Helper. Takes the structure strings for all samples from servicex.deliver output
93
+ and prints them in a friendly formatted view.
94
+
95
+ The expected structure string format is:
96
+
97
+ Tree: TreeName1; TBranch: Branchname1 ; dtype: BranchType1, TBranch: Branchname2 ; dtype: BranchType2, ...
98
+ Tree: TreeName2; TBranch: Branchname1 ; dtype: BranchType1, ...
99
+
100
+ Parameters:
101
+ deliver_dict (dict): The return dictionary of servicex.deliver
102
+ (keys are sample names, values are file paths or URLs)
103
+ filter_branch (str): Optional. Only Branch names containing it are printed.
104
+ save_to_txt (bool): Optional. Select if file structure is printed or dumped to .txt
105
+ """
106
+ print (f"File structure of all samples with branch filter { filter_branch } :" )
107
+
108
+ for sample_name , path in deliver_dict .items ():
109
+ #Sample name with icon and bands
110
+ print (
111
+ f"\n ---------------------------\n "
112
+ f"\U0001F4C1 Sample: { sample_name } \n "
113
+ f"---------------------------"
114
+ )
115
+
116
+ with uproot .open (path [0 ]) as f :
117
+ #Expected position of structure_str from servicex.deliver
118
+ structure_str = f ["servicex" ]["branch" ].array ()[0 ]
119
+
120
+ # Split at each \n each new line represents one tree.
121
+ tree_lines = structure_str .split ("\n " )
122
+
123
+ for line in tree_lines :
124
+ if not line .strip ():
125
+ continue # Skip empty lines
126
+
127
+ # First part before ';' is the tree header.
128
+ parts = line .split (";" , 1 )
129
+ tree_header = parts [0 ]
130
+ print (f"\n \U0001F333 { tree_header } " ) # Print tree header with icon
131
+
132
+ # Check for branches
133
+ if len (parts ) > 1 :
134
+ # branch info separated by ','
135
+ branch_info_str = parts [1 ]
136
+ branch_infos = branch_info_str .split ("," )
137
+ print (" ├── Branches:" )
138
+ for b in branch_infos :
139
+ branch_line = b .strip ()
140
+ if filter_branch not in branch_line :
141
+ continue
142
+ # Only print lines that start with "TBranch:"
143
+ if branch_line .startswith ("TBranch:" ):
144
+ print (f" │ ├── { branch_line [8 :]} " )
145
+
146
+
147
+ def get_structure (dataset_dict , ** kwargs ):
148
+ """
149
+ Utility function.
150
+ Creates and sends the ServiceX request from user input.
151
+ Calls print_structure_from_str()
152
+
153
+ Parameters:
154
+ dataset_dict (dict): The datasets to print the structures from, with the associated sample name for readability
155
+ note - should add default sample names and option to add just did or list of dids
156
+ kwargs : Arguments to be propagated to print_structure_from_str
157
+ """
158
+ #Servicex query using the PythonFunction backend
159
+ query_PythonFunction = servicex .query .PythonFunction ().with_uproot_function (run_query )
160
+ sample_list = []
161
+
162
+ for name , did in dataset_dict .items ():
163
+ tmp_dict = {
164
+ "NFiles" :1 ,
165
+ "Name" : name ,
166
+ "Dataset" : servicex .dataset .Rucio (did ),
167
+ "Query" : query_PythonFunction ,
168
+ }
169
+ sample_list .append (tmp_dict )
170
+
171
+ spec_python = {
172
+ "Sample" : sample_list
173
+ }
174
+
175
+ output = servicex .deliver (spec_python )
176
+
177
+ print_structure_from_str (output ,** kwargs )
178
+
179
+
0 commit comments