Skip to content

Commit 7d87a47

Browse files
committed
docstrings & error msg improvements
1 parent e71a01e commit 7d87a47

File tree

3 files changed

+25
-18
lines changed

3 files changed

+25
-18
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@ dist/
77

88
#ServiceX
99
servicex.yaml
10+
11+
#Testing
12+
samples_structure.txt

servicex_analysis_utils/file_peeking.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ def run_query(input_filenames=None):
3333
import uproot
3434
import awkward as ak
3535
"""
36-
Helper. Open a file and return one array containing a single string that describes the DataSet root file structure.
3736
Sent to ServiceX python transformers.
38-
37+
Open a file and return one array containing a single string that describes the DataSet root file structure.
38+
3939
The string will be formatted like:
4040
"Tree: TreeName1; TBranch: Branchname1 ; dtype: BranchType1, TBranch: Branchname2 ; dtype: BranchType2, ...
4141
Tree: TreeName2; TBranch: Branchname1 ; dtype: BranchType1, ..."
@@ -92,11 +92,12 @@ def print_structure_from_str(deliver_dict, filter_branch="", save_to_txt=False,
9292
9393
Parameters:
9494
deliver_dict (dict): ServiceX deliver output (keys: sample names, values: file paths or URLs).
95-
filter_branch (str): If provided, only branches containing this string are included.
95+
filter_branch (str): If provided, only branches containing this string are included in the output.
9696
save_to_txt (bool): If True, saves output to a text file instead of returning it.
97+
do_print (bool): If True, dumps the ouput to the terminal and returns None. Not called if save_to_txt is True
9798
9899
Returns:
99-
str: The formatted file structure.
100+
result_str (str): The formatted file structure.
100101
"""
101102
output_lines = []
102103
output_lines.append(f"\nFile structure of all samples with branch filter '{filter_branch}':")
@@ -139,13 +140,24 @@ def print_structure_from_str(deliver_dict, filter_branch="", save_to_txt=False,
139140
with open("samples_structure.txt", "w") as f:
140141
f.write(result_str)
141142
return "File structure saved to 'samples_structure.txt'."
142-
if do_print:
143+
elif do_print:
143144
print(result_str)
144145
return
145146
else:
146147
return result_str
147148

148149
def build_deliver_spec(dataset):
150+
"""
151+
Helper to build the servicex.deliver dict configuration.
152+
Supports multiple inputs for multiple sample queries.
153+
154+
Parameters:
155+
dataset (str, [str], or dict): Rucio DIDs to be checked by the servicex workers.
156+
If dict, custom names can be inputed
157+
158+
Returns:
159+
spec_python (dict): The specification for the python function query containing Name, Query, Dataset, NFiles
160+
"""
149161
#Servicex query using the PythonFunction backend
150162
query_PythonFunction = servicex.query.PythonFunction().with_uproot_function(run_query)
151163

@@ -175,14 +187,13 @@ def build_deliver_spec(dataset):
175187
]
176188
spec_python = {"Sample": sample_list}
177189

178-
return spec_python
179-
190+
return spec_python
180191

181192
def get_structure(dataset, **kwargs, raw=False):
182193
"""
183194
Utility function.
184195
Creates and sends the ServiceX request from user inputed datasets to retrieve file stucture.
185-
Calls print_structure_from_str()
196+
Calls print_structure_from_str() to get the structure in a user-friendly format
186197
187198
Parameters:
188199
dataset (dict,str,[str]): The datasets from which to print the file structures.
@@ -193,7 +204,4 @@ def get_structure(dataset, **kwargs, raw=False):
193204

194205
output=servicex.deliver(spec_python)
195206

196-
if raw:
197-
return array_from_str(output)
198-
199207
return print_structure_from_str(output, **kwargs)

tests/test_file_peeking.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def test_encoding(build_test_samples,tmp_path, capsys):
8686
# save_to_txt
8787
file_peeking.print_structure_from_str(deliver_dict,save_to_txt=True)
8888
out_txt="samples_structure.txt"
89-
assert os.path.exists(out_txt), "save_to_txt arg not producing files"
89+
assert os.path.exists(out_txt), f"save_to_txt arg not producing {out_txt}"
9090

9191
with open(out_txt, "r", encoding="utf-8") as f:
9292
written_str = f.read()
@@ -99,7 +99,7 @@ def test_encoding(build_test_samples,tmp_path, capsys):
9999
captured = capsys.readouterr()
100100

101101
# Check if all returns match
102-
assert captured.out[0:-1] == written_str == output_str , "saved, printed and direct return formats are different"
102+
assert captured.out[0:-1] == written_str == output_str , "saved, printed and direct return formats should not be different"
103103

104104
# Compare with expected return
105105
test_txt="tests/data/expected_structure.txt"
@@ -139,7 +139,7 @@ def test_spec_builder():
139139
assert isinstance(first_entry["Query"], PythonFunction), "'Query' should be a PythonFunction"
140140

141141
##Different input types
142-
#list with two DID
142+
#list with two DIDs
143143
test_did_list= [test_did_str, test_did_str+"2"]
144144
spec_from_list=file_peeking.build_deliver_spec(test_did_list)
145145
assert len(spec_from_list["Sample"])==2, "Wrong number of samples in deliver configuration"
@@ -153,7 +153,3 @@ def test_spec_builder():
153153
wrong_did=1234
154154
with pytest.raises(ValueError, match=re.escape(f"Unsupported dataset input type: {type(wrong_did)}.\nInput must be dict ('sample_name':'dataset_id'), str or list of str")):
155155
file_peeking.build_deliver_spec(wrong_did)
156-
157-
158-
159-

0 commit comments

Comments
 (0)