Skip to content

Commit f21e92e

Browse files
committed
Pipe line fail if flake8 fail - issue 5
1 parent ae4d039 commit f21e92e

File tree

5 files changed

+155
-138
lines changed

5 files changed

+155
-138
lines changed

.github/workflows/CI.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@ jobs:
2121
- name: Lint with Flake8
2222
run: |
2323
pipx run flake8
24-
24+
2525
test:
26+
needs:
27+
- flake8
28+
2629
runs-on: ubuntu-latest
2730

2831
steps:

servicex_analysis_utils/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2626
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28-
from .materialization import to_awk
28+
from .materialization import to_awk
2929
from .file_peeking import get_structure
3030

3131
__version__ = "1.0.b1"
32-
__all__ = ['to_awk']
32+
__all__ = ["to_awk"]

servicex_analysis_utils/materialization.py

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828
import uproot
2929
import awkward as ak
30-
import dask_awkward as dak
31-
import logging
30+
import dask_awkward as dak
31+
import logging
32+
3233

3334
def to_awk(deliver_dict, dask=False, iterator=False, **kwargs):
3435
"""
@@ -41,51 +42,54 @@ def to_awk(deliver_dict, dask=False, iterator=False, **kwargs):
4142
iterator(bool): Optional. Flag to materialize the data into arrays or to return iterables with uproot.iterate
4243
**kwargs : Optional. Additional keyword arguments passed to uproot.dask, uproot.iterate and from_parquet
4344
44-
45+
4546
Returns:
4647
dict: keys are sample names and values are awkward arrays, uproot generator objects or dask-awkward arrays.
4748
"""
48-
49+
4950
awk_arrays = {}
5051

5152
for sample, paths in deliver_dict.items():
52-
#Check file type
53-
f_type=str(paths[0])
53+
# Check file type
54+
f_type = str(paths[0])
5455
if f_type.endswith(".root"):
55-
is_root=True
56+
is_root = True
5657
elif f_type.endswith(".parquet") or f_type.endswith(".pq"):
57-
is_root=False
58+
is_root = False
5859
# ServiceX supports only root/parquet in transformed files
5960
else:
60-
raise ValueError(f"Unsupported file format: '{paths[0]}'. Files must be ROOT (.root) or Parquet (.parquet, .pq)")
61-
61+
raise ValueError(
62+
f"Unsupported file format: '{paths[0]}'. Files must be ROOT (.root) or Parquet (.parquet, .pq)"
63+
)
64+
6265
try:
6366
if dask:
64-
if is_root==True:
65-
# Use uproot.dask to handle URLs and local paths lazily
67+
if is_root == True:
68+
# Use uproot.dask to handle URLs and local paths lazily
6669
awk_arrays[sample] = uproot.dask(paths, library="ak", **kwargs)
6770
else:
68-
#file is parquet
71+
# file is parquet
6972
awk_arrays[sample] = dak.from_parquet(paths, **kwargs)
7073
else:
71-
if is_root==True:
74+
if is_root == True:
7275
# Use uproot.iterate to handle URLs and local paths files in chunks
73-
iterators=uproot.iterate(paths, library="ak", **kwargs)
74-
if iterator==True:
75-
awk_arrays[sample]= iterators #return iterators
76-
else :
77-
awk_arrays[sample]=ak.concatenate(list(iterators)) #return array
78-
76+
iterators = uproot.iterate(paths, library="ak", **kwargs)
77+
if iterator == True:
78+
awk_arrays[sample] = iterators # return iterators
79+
else:
80+
awk_arrays[sample] = ak.concatenate(
81+
list(iterators)
82+
) # return array
83+
7984
else:
80-
#file is parquet
85+
# file is parquet
8186
awk_arrays[sample] = ak.from_parquet(paths, **kwargs)
8287

83-
8488
except Exception as e:
85-
# Log the exception pointing at the user's code
86-
msg=f"\nError loading sample: {sample}"
89+
# Log the exception pointing at the user's code
90+
msg = f"\nError loading sample: {sample}"
8791
logging.error(msg, exc_info=True, stacklevel=2)
8892
# Mark the sample as failed
8993
awk_arrays[sample] = None
9094

91-
return awk_arrays
95+
return awk_arrays

tests/test_materialization.py

Lines changed: 80 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828
import pytest
29-
import uproot
29+
import uproot
3030
import awkward as ak
31-
import dask_awkward as dak
32-
import logging
31+
import dask_awkward as dak
32+
import logging
3333
import os
3434
import sys
3535
import numpy as np
@@ -43,113 +43,119 @@ def build_test_samples(tmp_path):
4343
test_path1 = str(tmp_path / "test_file1.root")
4444
test_path2 = str(tmp_path / "test_file2.root")
4545
# example data for two branches
46-
tree_data1 = {
47-
"branch1": np.ones(100),
48-
"branch2": np.zeros(100)
49-
}
46+
tree_data1 = {"branch1": np.ones(100), "branch2": np.zeros(100)}
5047
# example data for one branch
51-
tree_data2 = {"branch1": np.ones(10)}
48+
tree_data2 = {"branch1": np.ones(10)}
5249

5350
# Create tmp .root files
5451
with uproot.create(test_path1) as file:
5552
file["Tree"] = tree_data1
56-
53+
5754
with uproot.create(test_path2) as file:
5855
file["Tree"] = tree_data2
5956

60-
#Dict like servicex.deliver() output
57+
# Dict like servicex.deliver() output
6158
sx_dict = {"Test-Sample1": [test_path1], "Test-Sample2": [test_path2]}
6259

6360
return sx_dict
6461

6562

66-
#Test functions
63+
# Test functions
6764
def test_to_awk(build_test_samples):
6865
sx_dict = build_test_samples
69-
result = to_awk(sx_dict) #uproot.iterate expressions kwarg
66+
result = to_awk(sx_dict) # uproot.iterate expressions kwarg
7067

71-
#Collecting all samples
72-
assert list(result.keys())==["Test-Sample1", "Test-Sample2"]
68+
# Collecting all samples
69+
assert list(result.keys()) == ["Test-Sample1", "Test-Sample2"]
7370
arr1 = result["Test-Sample1"]
7471
arr2 = result["Test-Sample2"]
75-
76-
77-
78-
#Collecting all branches
79-
assert ak.fields(arr1) == ['branch1', 'branch2']
80-
assert ak.fields(arr2) == ['branch1']
81-
82-
assert isinstance(arr1, ak.Array), "to_awk() does not produce an awkward.Array instance"
83-
assert isinstance(arr2, ak.Array), "to_awk() does not produce an awkward.Array instance"
84-
85-
#Collecting all elements per branch
86-
assert ak.all(arr1['branch2'] == ak.from_numpy(np.zeros(100)))
87-
assert ak.all(arr2['branch1'] == ak.from_numpy(np.ones(10)))
88-
89-
#Checking kwargs
90-
result_filtered = to_awk(sx_dict, expressions="branch1") #uproot.iterate expressions kwarg
91-
arr1_filtered=result_filtered["Test-Sample1"]
92-
assert ak.fields(arr1_filtered) == ['branch1'] #branch2 should be filtered out
72+
73+
# Collecting all branches
74+
assert ak.fields(arr1) == ["branch1", "branch2"]
75+
assert ak.fields(arr2) == ["branch1"]
76+
77+
assert isinstance(
78+
arr1, ak.Array
79+
), "to_awk() does not produce an awkward.Array instance"
80+
assert isinstance(
81+
arr2, ak.Array
82+
), "to_awk() does not produce an awkward.Array instance"
83+
84+
# Collecting all elements per branch
85+
assert ak.all(arr1["branch2"] == ak.from_numpy(np.zeros(100)))
86+
assert ak.all(arr2["branch1"] == ak.from_numpy(np.ones(10)))
87+
88+
# Checking kwargs
89+
result_filtered = to_awk(
90+
sx_dict, expressions="branch1"
91+
) # uproot.iterate expressions kwarg
92+
arr1_filtered = result_filtered["Test-Sample1"]
93+
assert ak.fields(arr1_filtered) == ["branch1"] # branch2 should be filtered out
9394

9495

9596
def test_to_awk_dask(build_test_samples):
9697
sx_dict = build_test_samples
97-
result_da = to_awk(sx_dict, dask=True, step_size=10) #uproot.dask step_size kwarg
98-
99-
#Collecting all samples
100-
assert list(result_da.keys())==["Test-Sample1", "Test-Sample2"]
101-
arr1=result_da["Test-Sample1"]
102-
arr2=result_da["Test-Sample2"]
103-
104-
#Checking instance
105-
assert isinstance(arr1, dak.Array), "to_awk(dask=True) does not produce an dak.Array instance"
106-
assert isinstance(arr2, dak.Array), "to_awk(dask=True) does not produce an dak.Array instance"
107-
108-
#Testing partitionning kwarg
98+
result_da = to_awk(sx_dict, dask=True, step_size=10) # uproot.dask step_size kwarg
99+
100+
# Collecting all samples
101+
assert list(result_da.keys()) == ["Test-Sample1", "Test-Sample2"]
102+
arr1 = result_da["Test-Sample1"]
103+
arr2 = result_da["Test-Sample2"]
104+
105+
# Checking instance
106+
assert isinstance(
107+
arr1, dak.Array
108+
), "to_awk(dask=True) does not produce an dak.Array instance"
109+
assert isinstance(
110+
arr2, dak.Array
111+
), "to_awk(dask=True) does not produce an dak.Array instance"
112+
113+
# Testing partitionning kwarg
109114
assert arr1.npartitions == 10
110115
assert arr2.npartitions == 1
111116

112-
#Collecting all branches
113-
assert ak.fields(arr1) == ['branch1', 'branch2']
114-
assert ak.fields(arr2) == ['branch1']
117+
# Collecting all branches
118+
assert ak.fields(arr1) == ["branch1", "branch2"]
119+
assert ak.fields(arr2) == ["branch1"]
120+
121+
# Collecting all elements per branch
122+
assert ak.all(arr1["branch2"].compute() == ak.from_numpy(np.zeros(100)))
123+
assert ak.all(arr2["branch1"].compute() == ak.from_numpy(np.ones(10)))
115124

116-
#Collecting all elements per branch
117-
assert ak.all(arr1['branch2'].compute() == ak.from_numpy(np.zeros(100)))
118-
assert ak.all(arr2['branch1'].compute() == ak.from_numpy(np.ones(10)))
119125

120126
def test_to_awk_delayed_and_kwargs(build_test_samples):
121127
sx_dict = build_test_samples
122-
result_delay = to_awk(sx_dict, iterator=True, expressions="branch1") #return iterable + selection kwarg
123-
124-
#Checking iterator return type
125-
assert isinstance(result_delay["Test-Sample1"], types.GeneratorType)
126-
assert isinstance(result_delay["Test-Sample2"], types.GeneratorType)
128+
result_delay = to_awk(
129+
sx_dict, iterator=True, expressions="branch1"
130+
) # return iterable + selection kwarg
127131

132+
# Checking iterator return type
133+
assert isinstance(result_delay["Test-Sample1"], types.GeneratorType)
134+
assert isinstance(result_delay["Test-Sample2"], types.GeneratorType)
128135

129-
arr1 = ak.concatenate(list(result_delay["Test-Sample1"])) # Materialize the generator from uproot.iterate
136+
arr1 = ak.concatenate(
137+
list(result_delay["Test-Sample1"])
138+
) # Materialize the generator from uproot.iterate
130139
arr2 = ak.concatenate(list(result_delay["Test-Sample2"]))
131140

132-
#Checking materialization
133-
assert isinstance(arr1, ak.Array), "to_awk(dask=True) does not produce an ak.Array instance"
134-
assert isinstance(arr2, ak.Array), "to_awk(dask=True) does not produce an ak.Array instance"
141+
# Checking materialization
142+
assert isinstance(
143+
arr1, ak.Array
144+
), "to_awk(dask=True) does not produce an ak.Array instance"
145+
assert isinstance(
146+
arr2, ak.Array
147+
), "to_awk(dask=True) does not produce an ak.Array instance"
148+
149+
# Checking only 1 branch selected
150+
assert ak.fields(arr1) == ["branch1"]
151+
assert ak.fields(arr2) == ["branch1"]
135152

136-
#Checking only 1 branch selected
137-
assert ak.fields(arr1) == ['branch1']
138-
assert ak.fields(arr2) == ['branch1']
139153

140154
def test_unsupported_file_format():
141155
fake_paths = {"fake-Sample": ["invalid_file.txt"]}
142156
# match is regex-level
143-
with pytest.raises(ValueError, match=r"Unsupported file format: 'invalid_file.txt'\. Files must be ROOT \(.*\) or Parquet \(.*\)"):
157+
with pytest.raises(
158+
ValueError,
159+
match=r"Unsupported file format: 'invalid_file.txt'\. Files must be ROOT \(.*\) or Parquet \(.*\)",
160+
):
144161
to_awk(fake_paths)
145-
146-
147-
148-
149-
150-
151-
152-
153-
154-
155-

0 commit comments

Comments
 (0)