Skip to content

Commit fa712f8

Browse files
committed
iterator flag feature to determine return type
1 parent 3030e7c commit fa712f8

File tree

2 files changed

+32
-4
lines changed

2 files changed

+32
-4
lines changed

servicex_analysis_utils/materialization.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,15 @@
3030
import dask_awkward as dak
3131
import logging
3232

33-
def to_awk(deliver_dict, dask=False, **kwargs):
33+
def to_awk(deliver_dict, dask=False, iterator=False, **kwargs):
3434
"""
3535
Load an awkward array from the deliver() output with uproot or uproot.dask.
3636
3737
Parameters:
3838
deliver_dict (dict): Returned dictionary from servicex.deliver()
3939
(keys are sample names, values are file paths or URLs).
4040
dask (bool): Optional. Flag to load as dask-awkward array. Default is False
41+
iterator(bool): Optional. Flag to materialize the data into arrays or to return iterables with uproot.iterate
4142
**kwargs : Optional. Additional keyword arguments passed to uproot.dask, uproot.iterate and from_parquet
4243
4344
@@ -69,7 +70,12 @@ def to_awk(deliver_dict, dask=False, **kwargs):
6970
else:
7071
if is_root==True:
7172
# Use uproot.iterate to handle URLs and local paths files in chunks
72-
awk_arrays[sample]=uproot.iterate(paths, library="ak", **kwargs) # not an ak array but a generator
73+
iterators=uproot.iterate(paths, library="ak", **kwargs)
74+
if iterator==True:
75+
awk_arrays[sample]= iterators #return iterators
76+
else :
77+
awk_arrays[sample]=ak.concatenate(list(iterators)) #return array
78+
7379
else:
7480
#file is parquet
7581
awk_arrays[sample] = ak.from_parquet(paths, **kwargs)

tests/test_materialization.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import sys
3535
import numpy as np
3636
from servicex_analysis_utils.materialization import to_awk
37+
import types
3738

3839

3940
@pytest.fixture
@@ -69,8 +70,10 @@ def test_to_awk(build_test_samples):
6970

7071
#Collecting all samples
7172
assert list(result.keys())==["Test-Sample1", "Test-Sample2"]
72-
arr1 = ak.concatenate(list(result["Test-Sample1"])) # Materialize the generator from uproot.iterate
73-
arr2 = ak.concatenate(list(result["Test-Sample2"]))
73+
arr1 = result["Test-Sample1"]
74+
arr2 = result["Test-Sample2"]
75+
76+
7477

7578
#Collecting all branches
7679
assert ak.fields(arr1) == ['branch1', 'branch2']
@@ -114,6 +117,25 @@ def test_to_awk_dask(build_test_samples):
114117
assert ak.all(arr1['branch2'].compute() == ak.from_numpy(np.zeros(100)))
115118
assert ak.all(arr2['branch1'].compute() == ak.from_numpy(np.ones(10)))
116119

120+
def test_to_awk_delayed_and_kwargs(build_test_samples):
121+
sx_dict = build_test_samples
122+
result_delay = to_awk(sx_dict, iterator=True, expressions="branch1") #return iterable + selection kwarg
123+
124+
#Checking iterator return type
125+
assert isinstance(result_delay["Test-Sample1"], types.GeneratorType)
126+
assert isinstance(result_delay["Test-Sample2"], types.GeneratorType)
127+
128+
129+
arr1 = ak.concatenate(list(result_delay["Test-Sample1"])) # Materialize the generator from uproot.iterate
130+
arr2 = ak.concatenate(list(result_delay["Test-Sample2"]))
131+
132+
#Checking materialization
133+
assert isinstance(arr1, ak.Array), "to_awk(dask=True) does not produce an ak.Array instance"
134+
assert isinstance(arr2, ak.Array), "to_awk(dask=True) does not produce an ak.Array instance"
135+
136+
#Checking only 1 branch selected
137+
assert ak.fields(arr1) == ['branch1']
138+
assert ak.fields(arr2) == ['branch1']
117139

118140
def test_unsupported_file_format():
119141
fake_paths = {"fake-Sample": ["invalid_file.txt"]}

0 commit comments

Comments
 (0)