Skip to content

Commit a224561

Browse files
committed
Original package commit on personal repo
0 parents  commit a224561

File tree

6 files changed

+305
-0
lines changed

6 files changed

+305
-0
lines changed

LICENSE

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
BSD 3-Clause License
2+
3+
Copyright (c) 2025, IRIS-HEP
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
8+
1. Redistributions of source code must retain the above copyright notice, this
9+
list of conditions and the following disclaimer.
10+
11+
2. Redistributions in binary form must reproduce the above copyright notice,
12+
this list of conditions and the following disclaimer in the documentation
13+
and/or other materials provided with the distribution.
14+
15+
3. Neither the name of the copyright holder nor the names of its
16+
contributors may be used to endorse or promote products derived from
17+
this software without specific prior written permission.
18+
19+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# ServiceX analysis utils
2+
This repository provides analysis tools to be used with the [ServiceX Client](https://github.com/ssl-hep/ServiceX_frontend/tree/master)
3+
4+
### To install
5+
'''
6+
pip install servicex-analysis-utils
7+
'''
8+
9+
## Documentation
10+
The different functions are documented in [ServiceX Documentation](https://servicex-frontend.readthedocs.io)

pyproject.toml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
[build-system]
2+
requires = ["hatchling"]
3+
build-backend = "hatchling.build"
4+
5+
[project]
6+
name = "servicex_analysis_utils"
7+
version = "1.0.b1"
8+
description = "A package with analysis tools for ServiceX."
9+
authors = [{name = "Artur Cordeiro Oudot Choi", email = "acordeir@cern.ch"}]
10+
readme = "README.md"
11+
license = { text = "BSD-3-Clause" }
12+
requires-python = ">=3.9"
13+
dependencies = [
14+
"uproot>=5.0",
15+
"awkward>=2.6",
16+
"dask-awkward>=2024.12.2",
17+
]
18+
19+
[project.urls]
20+
"Source Code" = "https://github.com/ArturU043/ServiceX_analysis_utils"
21+
"Documentation" = "https://servicex.readthedocs.io/"
22+
"Issue Tracker" = "https://github.com/ArturU043/ServiceX_analysis_utils/issues"
23+
24+
[tool.hatch.build.targets.sdist]
25+
include = [
26+
"/servicex_analysis_utils",
27+
"/tests/"
28+
]
29+
30+
[project.optional-dependencies]
31+
32+
# Developer extras
33+
test = [
34+
"pytest>=7.2.0",
35+
"numpy>=1.21",
36+
]
37+
38+
[tool.hatch.build.targets.wheel]
39+
packages = ["servicex_analysis_utils"]

servicex_analysis_utils/__init__.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
from .materialization import to_awk
29+
30+
__version__ = "1.0.b1"
31+
__all__ = ['to_awk']
32+
#__version__ = importlib.metadata.version("servicex")
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
import uproot
29+
import awkward as ak
30+
import dask_awkward as dak
31+
import logging
32+
33+
def to_awk(deliver_dict, dask=False, **uproot_kwargs):
34+
"""
35+
Load an awkward array from the deliver() output with uproot or uproot.dask.
36+
37+
Parameters:
38+
deliver_dict (dict): Returned dictionary from servicex.deliver()
39+
(keys are sample names, values are file paths or URLs).
40+
dask (bool): Optional. Flag to load as dask-awkward array. Default is False
41+
**uproot_kwargs : Optional. Additional keyword arguments passed to uproot.dask or uproot.iterate
42+
43+
44+
Returns:
45+
dict: keys are sample names and values are awkward arrays or dask-awkward arrays.
46+
"""
47+
48+
awk_arrays = {}
49+
50+
for sample, paths in deliver_dict.items():
51+
try:
52+
if dask:
53+
# Use uproot.dask to handle URLs and local paths lazily
54+
awk_arrays[sample] = uproot.dask(paths, library="ak", **uproot_kwargs)
55+
else:
56+
# Use uproot.iterate to handle URLs and local paths files in chunks
57+
tmp_arrays = list(uproot.iterate(paths, library="ak", **uproot_kwargs))
58+
# Merge arrays
59+
awk_arrays[sample] = ak.concatenate(tmp_arrays)
60+
61+
except Exception as e:
62+
# Log the exception pointing at the user's code
63+
msg=f"\nError loading sample: {sample}"
64+
logging.error(msg, exc_info=True, stacklevel=2)
65+
# Mark the sample as failed
66+
awk_arrays[sample] = None
67+
68+
return awk_arrays

tests/test_materialization.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
import pytest
29+
import uproot
30+
import awkward as ak
31+
import dask_awkward as dak
32+
import logging
33+
import os
34+
import sys
35+
import numpy as np
36+
37+
#Setting rpath
38+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
39+
from servicex_analysis_utils.materialization import to_awk
40+
41+
def build_test_samples():
42+
# example data for two branches
43+
tree_data1 = {
44+
"branch1": np.ones(100),
45+
"branch2": np.zeros(100)
46+
}
47+
# example data for one branch
48+
tree_data2 = {"branch1": np.ones(10)}
49+
50+
# Create tmp .root files
51+
with uproot.create(test_path1) as file:
52+
file["Tree"] = tree_data1
53+
54+
with uproot.create(test_path2) as file:
55+
file["Tree"] = tree_data2
56+
57+
#Initial test configuration
58+
@pytest.fixture(scope="function", autouse=True)
59+
def init(tmp_path):
60+
#Setting global variables to be used in the tests and helper function
61+
global test_path1, test_path2, \
62+
result, result_da, result_filtered
63+
64+
test_path1 = tmp_path / "test_file1.root"
65+
test_path2 = tmp_path / "test_file2.root"
66+
67+
#Building dumy test files
68+
if not os.path.exists(test_path1) or not os.path.exists(test_path2):
69+
build_test_samples()
70+
71+
#Dict like servicex.deliver() output
72+
sx_dict = {"Test-Sample1": test_path1, "Test-Sample2": test_path2}
73+
74+
#Executing to_awk() and saving results for tests
75+
result = to_awk(sx_dict)
76+
result_da = to_awk(sx_dict, dask=True, step_size=10) #uproot.dask step_size kwarg
77+
result_filtered = to_awk(sx_dict, expressions="branch1") #uproot.iterate expressions kwarg
78+
79+
#Test functions
80+
def test_to_awk_instances():
81+
arr1=result["Test-Sample1"]
82+
da_arr1=result_da["Test-Sample1"]
83+
84+
#Testing returned types
85+
assert isinstance(arr1, ak.Array), "to_awk() does not produce an awkward.Array instance"
86+
assert isinstance(da_arr1, dak.Array), "to_awk(dask=True) does not produce a dask_awkward.Array instance"
87+
88+
def test_to_awk_collection():
89+
arr1=result["Test-Sample1"]
90+
arr2=result["Test-Sample2"]
91+
92+
#Collecting all samples
93+
assert list(result.keys())==["Test-Sample1", "Test-Sample2"]
94+
95+
#Collecting all branches
96+
assert ak.fields(arr1) == ['branch1', 'branch2']
97+
assert ak.fields(arr2) == ['branch1']
98+
99+
#Collecting all elements per branch
100+
assert ak.all(arr1['branch2'] == ak.from_numpy(np.zeros(100)))
101+
assert ak.all(arr2['branch1'] == ak.from_numpy(np.ones(10)))
102+
103+
def test_to_awk_dask():
104+
arr1=result_da["Test-Sample1"]
105+
arr2=result_da["Test-Sample2"]
106+
107+
#Testing if dask.compute() leads to same results
108+
assert ak.almost_equal(arr1.compute(), result["Test-Sample1"])
109+
assert ak.almost_equal(arr2.compute(), result["Test-Sample2"])
110+
111+
#Testing partitionning kwarg
112+
assert arr1.npartitions == 10
113+
assert arr2.npartitions == 1
114+
115+
def test_to_awk_filter():
116+
arr1=result_filtered["Test-Sample1"]
117+
arr2=result_filtered["Test-Sample2"]
118+
119+
#Testing if filtering kwargs are passed to uproot.iterate()
120+
assert ak.fields(arr1) == ['branch1'] #branch2 should be filtered out
121+
assert ak.fields(arr2) == ['branch1']
122+
123+
124+
125+
126+
127+
128+

0 commit comments

Comments
 (0)