Skip to content

Commit d5e37ca

Browse files
committed
adding cli option, save_to_text and print flags
1 parent b8d737f commit d5e37ca

File tree

3 files changed

+88
-44
lines changed

3 files changed

+88
-44
lines changed

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,9 @@ test = [
3636
"pandas"
3737
]
3838

39+
[project.scripts]
40+
servicex-get-structure = "servicex_analysis_utils.cli:run_from_command"
41+
42+
3943
[tool.hatch.build.targets.wheel]
4044
packages = ["servicex_analysis_utils"]

servicex_analysis_utils/cli.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import argparse
2+
import sys
3+
import json
4+
import os
5+
from .file_peeking import get_structure
6+
7+
def run_from_command():
8+
parser = argparse.ArgumentParser(description="CLI tool for retrieving ROOT file structures.")
9+
10+
parser.add_argument("dataset", nargs='+', help="Input datasets (Rucio DID) or a JSON file containing datasets in a dict.")
11+
parser.add_argument("--filter-branch", default="", help="Only display branches containing this string.")
12+
parser.add_argument("--save-to-txt", action="store_true", help="Save output to a text file instead of printing.")
13+
14+
args = parser.parse_args()
15+
16+
if len(args.dataset) == 1 and args.dataset[0].endswith(".json"):
17+
dataset_file = args.dataset[0]
18+
19+
if not os.path.isfile(dataset_file):
20+
print(f"\033[91mError: JSON file '{dataset_file}' not found.\033[0m", file=sys.stderr)
21+
sys.exit(1)
22+
23+
try:
24+
with open(dataset_file, "r") as f:
25+
dataset = json.load(f)
26+
27+
if not isinstance(dataset, dict):
28+
print(f"\033[91mError: The JSON file must contain a dictionary.\033[0m", file=sys.stderr)
29+
sys.exit(1)
30+
31+
except json.JSONDecodeError:
32+
print(f"\033[91mError: '{dataset_file}' is not a valid JSON file.\033[0m", file=sys.stderr)
33+
sys.exit(1)
34+
35+
else:
36+
# If dataset is provided directly in CLI, use it as a list
37+
dataset = args.dataset
38+
39+
result = get_structure(dataset, filter_branch=args.filter_branch, save_to_txt=args.save_to_txt, do_print=False)
40+
41+
if not args.save_to_txt:
42+
print(result)
43+
else:
44+
print("Saved to samples_structure.txt")

servicex_analysis_utils/file_peeking.py

Lines changed: 40 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -86,62 +86,61 @@ def is_tree(obj):
8686
# Return str in an array
8787
return ak.Array([final_str])
8888

89-
90-
def print_structure_from_str(deliver_dict, filter_branch="", save_to_txt=False ):
89+
def print_structure_from_str(deliver_dict, filter_branch="", save_to_txt=False, do_print=False):
9190
"""
92-
Helper. Takes the structure strings for all samples from servicex.deliver output
93-
and prints them in a friendly formatted view.
94-
95-
The expected structure string format is:
96-
97-
Tree: TreeName1; TBranch: Branchname1 ; dtype: BranchType1, TBranch: Branchname2 ; dtype: BranchType2, ...
98-
Tree: TreeName2; TBranch: Branchname1 ; dtype: BranchType1, ...
99-
91+
Converts dataset file structures to a formatted string.
92+
10093
Parameters:
101-
deliver_dict (dict): The return dictionary of servicex.deliver
102-
(keys are sample names, values are file paths or URLs)
103-
filter_branch (str): Optional. Only Branch names containing it are printed.
104-
save_to_txt (bool): Optional. Select if file structure is printed or dumped to .txt
94+
deliver_dict (dict): ServiceX deliver output (keys: sample names, values: file paths or URLs).
95+
filter_branch (str): If provided, only branches containing this string are included.
96+
save_to_txt (bool): If True, saves output to a text file instead of returning it.
97+
98+
Returns:
99+
str: The formatted file structure.
105100
"""
106-
print(f"File structure of all samples with branch filter {filter_branch}:")
101+
output_lines = []
102+
output_lines.append(f"\nFile structure of all samples with branch filter '{filter_branch}':")
107103

108104
for sample_name, path in deliver_dict.items():
109-
#Sample name with icon and bands
110-
print(
105+
output_lines.append(
111106
f"\n---------------------------\n"
112107
f"\U0001F4C1 Sample: {sample_name}\n"
113108
f"---------------------------"
114109
)
115110

116111
with uproot.open(path[0]) as f:
117-
#Expected position of structure_str from servicex.deliver
118-
structure_str=f["servicex"]["branch"].array()[0]
119-
120-
# Split at each \n each new line represents one tree.
112+
structure_str = f["servicex"]["branch"].array()[0]
113+
121114
tree_lines = structure_str.split("\n")
122-
123115
for line in tree_lines:
124116
if not line.strip():
125117
continue # Skip empty lines
126118

127-
# First part before ';' is the tree header.
128119
parts = line.split(";", 1)
129-
tree_header = parts[0]
130-
print(f"\n\U0001F333 {tree_header}") # Print tree header with icon
131-
132-
# Check for branches
120+
tree_header = parts[0]
121+
output_lines.append(f"\n\U0001F333 {tree_header}")
122+
133123
if len(parts) > 1:
134-
# branch info separated by ','
135-
branch_info_str = parts[1]
136-
branch_infos = branch_info_str.split(",")
137-
print(" ├── Branches:")
124+
branch_infos = parts[1].split(",")
125+
output_lines.append(" ├── Branches:")
138126
for b in branch_infos:
139127
branch_line = b.strip()
140128
if filter_branch not in branch_line:
141129
continue
142-
# Only print lines that start with "TBranch:"
143130
if branch_line.startswith("TBranch:"):
144-
print(f" │ ├── {branch_line[8:]}")
131+
output_lines.append(f" │ ├── {branch_line[8:]}")
132+
133+
result_str = "\n".join(output_lines)
134+
135+
if save_to_txt:
136+
with open("samples_structure.txt", "w") as f:
137+
f.write(result_str)
138+
return "File structure saved to 'samples_structure.txt'."
139+
if do_print:
140+
print(result_str)
141+
return
142+
else:
143+
return result_str
145144

146145

147146
def get_structure(dataset, **kwargs):
@@ -171,24 +170,21 @@ def get_structure(dataset, **kwargs):
171170
dataset_dict=dataset
172171
else:
173172
raise ValueError(f"Unsupported dataset input type: {user_in}.\nInput must be dict ('sample_name':'dataset_id'), str or list of str")
173+
return 0
174174

175-
sample_list=[]
176-
177-
for name, did in dataset_dict.items():
178-
tmp_dict={
179-
"NFiles":1,
175+
sample_list = [
176+
{
177+
"NFiles": 1,
180178
"Name": name,
181179
"Dataset": servicex.dataset.Rucio(did),
182180
"Query": query_PythonFunction,
183181
}
184-
sample_list.append(tmp_dict)
182+
for name, did in dataset_dict.items()
183+
]
185184

186-
spec_python = {
187-
"Sample": sample_list
188-
}
185+
spec_python = {"Sample": sample_list}
189186

190187
output=servicex.deliver(spec_python)
191188

192-
print_structure_from_str(output,**kwargs)
193-
189+
return print_structure_from_str(output, **kwargs)
194190

0 commit comments

Comments
 (0)