Skip to content

Commit c87f48c

Browse files
authored
Merge pull request #4 from ssl-hep/fille_peek_dev
- Dataset file structure inspection with servicex - CLI implementation `servicex-get-structure` - Black python formater added GHA workflows
2 parents 805fa35 + 9b55e7f commit c87f48c

File tree

11 files changed

+752
-138
lines changed

11 files changed

+752
-138
lines changed

.github/workflows/CI.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,26 @@ on:
66
workflow_dispatch:
77

88
jobs:
9+
10+
black-format:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- uses: actions/checkout@v4
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version: "3.x"
20+
21+
- name: Run Black
22+
run: |
23+
pipx run black --check .
24+
925
test:
26+
needs:
27+
- black-format
28+
1029
runs-on: ubuntu-latest
1130

1231
steps:

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,10 @@ servicex.yaml
77

88
#Distribution
99
dist/
10+
11+
#ServiceX
12+
servicex.yaml
13+
14+
#Testing
15+
samples_structure.txt
16+

pyproject.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ readme = "README.md"
1111
license = { text = "BSD-3-Clause" }
1212
requires-python = ">=3.9"
1313
dependencies = [
14+
"servicex",
1415
"uproot>=5.0",
1516
"awkward>=2.6",
1617
"dask-awkward>=2024.12.2",
@@ -32,8 +33,13 @@ test = [
3233
"pytest>=7.2.0",
3334
"numpy>=1.21",
3435
"pyarrow>=8.0.0",
35-
"pandas"
36+
"pandas",
37+
"miniopy-async==1.21.1"
3638
]
3739

40+
[project.scripts]
41+
servicex-get-structure = "servicex_analysis_utils.cli:app"
42+
43+
3844
[tool.hatch.build.targets.wheel]
3945
packages = ["servicex_analysis_utils"]

servicex_analysis_utils/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2626
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28-
from .materialization import to_awk
28+
from .materialization import to_awk
29+
from .file_peeking import get_structure
2930

3031
__version__ = "1.0.0"
31-
__all__ = ['to_awk']
32+
__all__ = ["to_awk", "get_structure"]

servicex_analysis_utils/cli.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import sys
2+
import json
3+
import os
4+
import logging
5+
from .file_peeking import get_structure
6+
import typer
7+
from typing import List
8+
9+
app = typer.Typer()
10+
11+
12+
def make_dataset_list(dataset_arg):
13+
"""
14+
Helper to handle the user input daset argument.
15+
Loads to dict if input is .json else returns default input
16+
Output is given to get_structure()
17+
18+
Parameters:
19+
dataset_arg (str, [str]): Single DS identifier, list of multiple identifiers or path/to/.json containig identifiers and sample names.
20+
21+
Returns:
22+
dataset (str, [str], dict): dictionary loaded from the json
23+
"""
24+
if len(dataset_arg) == 1 and dataset_arg[0].endswith(".json"):
25+
dataset_file = dataset_arg[0]
26+
27+
if not os.path.isfile(dataset_file):
28+
logging.error(f"Error: JSON file '{dataset_file}' not found.")
29+
sys.exit(1)
30+
31+
try:
32+
with open(dataset_file, "r") as f:
33+
dataset = json.load(f)
34+
35+
if not isinstance(dataset, dict):
36+
logging.error(f"Error: The JSON file must contain a dictionary.")
37+
sys.exit(1)
38+
39+
except json.JSONDecodeError:
40+
logging.error(
41+
f"Error: '{dataset_file}' is not a valid JSON file.", exc_info=True
42+
)
43+
sys.exit(1)
44+
45+
else:
46+
# If DS is provided in CLI instead of json, use it as a list (default)
47+
dataset = dataset_arg
48+
49+
return dataset
50+
51+
52+
@app.command()
53+
def run_from_command(
54+
dataset: List[str] = typer.Argument(
55+
...,
56+
help="Input datasets (Rucio DID) or path to JSON file containing datasets in a dict.",
57+
),
58+
filter_branch: str = typer.Option(
59+
"", "--filter-branch", help="Only display branches containing this string."
60+
),
61+
):
62+
"""
63+
Calls the get_structure function and sends results to stdout.
64+
To run on command line: servicex-get-structure -dataset --filter-branch
65+
"""
66+
ds_format = make_dataset_list(dataset)
67+
result = get_structure(ds_format, filter_branch=filter_branch, do_print=False)
68+
69+
print(result)
70+
71+
72+
if __name__ == "__main__":
73+
app()

0 commit comments

Comments
 (0)