26
26
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
27
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
import pytest
29
- import uproot
29
+ import uproot
30
30
import awkward as ak
31
- import dask_awkward as dak
32
- import logging
31
+ import dask_awkward as dak
32
+ import logging
33
33
import os
34
34
import sys
35
35
import numpy as np
@@ -43,113 +43,119 @@ def build_test_samples(tmp_path):
43
43
test_path1 = str (tmp_path / "test_file1.root" )
44
44
test_path2 = str (tmp_path / "test_file2.root" )
45
45
# example data for two branches
46
- tree_data1 = {
47
- "branch1" : np .ones (100 ),
48
- "branch2" : np .zeros (100 )
49
- }
46
+ tree_data1 = {"branch1" : np .ones (100 ), "branch2" : np .zeros (100 )}
50
47
# example data for one branch
51
- tree_data2 = {"branch1" : np .ones (10 )}
48
+ tree_data2 = {"branch1" : np .ones (10 )}
52
49
53
50
# Create tmp .root files
54
51
with uproot .create (test_path1 ) as file :
55
52
file ["Tree" ] = tree_data1
56
-
53
+
57
54
with uproot .create (test_path2 ) as file :
58
55
file ["Tree" ] = tree_data2
59
56
60
- #Dict like servicex.deliver() output
57
+ # Dict like servicex.deliver() output
61
58
sx_dict = {"Test-Sample1" : [test_path1 ], "Test-Sample2" : [test_path2 ]}
62
59
63
60
return sx_dict
64
61
65
62
66
- #Test functions
63
+ # Test functions
67
64
def test_to_awk (build_test_samples ):
68
65
sx_dict = build_test_samples
69
- result = to_awk (sx_dict ) # uproot.iterate expressions kwarg
66
+ result = to_awk (sx_dict ) # uproot.iterate expressions kwarg
70
67
71
- #Collecting all samples
72
- assert list (result .keys ())== ["Test-Sample1" , "Test-Sample2" ]
68
+ # Collecting all samples
69
+ assert list (result .keys ()) == ["Test-Sample1" , "Test-Sample2" ]
73
70
arr1 = result ["Test-Sample1" ]
74
71
arr2 = result ["Test-Sample2" ]
75
-
76
-
77
-
78
- #Collecting all branches
79
- assert ak .fields (arr1 ) == ['branch1' , 'branch2' ]
80
- assert ak .fields (arr2 ) == ['branch1' ]
81
-
82
- assert isinstance (arr1 , ak .Array ), "to_awk() does not produce an awkward.Array instance"
83
- assert isinstance (arr2 , ak .Array ), "to_awk() does not produce an awkward.Array instance"
84
-
85
- #Collecting all elements per branch
86
- assert ak .all (arr1 ['branch2' ] == ak .from_numpy (np .zeros (100 )))
87
- assert ak .all (arr2 ['branch1' ] == ak .from_numpy (np .ones (10 )))
88
-
89
- #Checking kwargs
90
- result_filtered = to_awk (sx_dict , expressions = "branch1" ) #uproot.iterate expressions kwarg
91
- arr1_filtered = result_filtered ["Test-Sample1" ]
92
- assert ak .fields (arr1_filtered ) == ['branch1' ] #branch2 should be filtered out
72
+
73
+ # Collecting all branches
74
+ assert ak .fields (arr1 ) == ["branch1" , "branch2" ]
75
+ assert ak .fields (arr2 ) == ["branch1" ]
76
+
77
+ assert isinstance (
78
+ arr1 , ak .Array
79
+ ), "to_awk() does not produce an awkward.Array instance"
80
+ assert isinstance (
81
+ arr2 , ak .Array
82
+ ), "to_awk() does not produce an awkward.Array instance"
83
+
84
+ # Collecting all elements per branch
85
+ assert ak .all (arr1 ["branch2" ] == ak .from_numpy (np .zeros (100 )))
86
+ assert ak .all (arr2 ["branch1" ] == ak .from_numpy (np .ones (10 )))
87
+
88
+ # Checking kwargs
89
+ result_filtered = to_awk (
90
+ sx_dict , expressions = "branch1"
91
+ ) # uproot.iterate expressions kwarg
92
+ arr1_filtered = result_filtered ["Test-Sample1" ]
93
+ assert ak .fields (arr1_filtered ) == ["branch1" ] # branch2 should be filtered out
93
94
94
95
95
96
def test_to_awk_dask (build_test_samples ):
96
97
sx_dict = build_test_samples
97
- result_da = to_awk (sx_dict , dask = True , step_size = 10 ) #uproot.dask step_size kwarg
98
-
99
- #Collecting all samples
100
- assert list (result_da .keys ())== ["Test-Sample1" , "Test-Sample2" ]
101
- arr1 = result_da ["Test-Sample1" ]
102
- arr2 = result_da ["Test-Sample2" ]
103
-
104
- #Checking instance
105
- assert isinstance (arr1 , dak .Array ), "to_awk(dask=True) does not produce an dak.Array instance"
106
- assert isinstance (arr2 , dak .Array ), "to_awk(dask=True) does not produce an dak.Array instance"
107
-
108
- #Testing partitionning kwarg
98
+ result_da = to_awk (sx_dict , dask = True , step_size = 10 ) # uproot.dask step_size kwarg
99
+
100
+ # Collecting all samples
101
+ assert list (result_da .keys ()) == ["Test-Sample1" , "Test-Sample2" ]
102
+ arr1 = result_da ["Test-Sample1" ]
103
+ arr2 = result_da ["Test-Sample2" ]
104
+
105
+ # Checking instance
106
+ assert isinstance (
107
+ arr1 , dak .Array
108
+ ), "to_awk(dask=True) does not produce an dak.Array instance"
109
+ assert isinstance (
110
+ arr2 , dak .Array
111
+ ), "to_awk(dask=True) does not produce an dak.Array instance"
112
+
113
+ # Testing partitionning kwarg
109
114
assert arr1 .npartitions == 10
110
115
assert arr2 .npartitions == 1
111
116
112
- #Collecting all branches
113
- assert ak .fields (arr1 ) == ['branch1' , 'branch2' ]
114
- assert ak .fields (arr2 ) == ['branch1' ]
117
+ # Collecting all branches
118
+ assert ak .fields (arr1 ) == ["branch1" , "branch2" ]
119
+ assert ak .fields (arr2 ) == ["branch1" ]
120
+
121
+ # Collecting all elements per branch
122
+ assert ak .all (arr1 ["branch2" ].compute () == ak .from_numpy (np .zeros (100 )))
123
+ assert ak .all (arr2 ["branch1" ].compute () == ak .from_numpy (np .ones (10 )))
115
124
116
- #Collecting all elements per branch
117
- assert ak .all (arr1 ['branch2' ].compute () == ak .from_numpy (np .zeros (100 )))
118
- assert ak .all (arr2 ['branch1' ].compute () == ak .from_numpy (np .ones (10 )))
119
125
120
126
def test_to_awk_delayed_and_kwargs (build_test_samples ):
121
127
sx_dict = build_test_samples
122
- result_delay = to_awk (sx_dict , iterator = True , expressions = "branch1" ) #return iterable + selection kwarg
123
-
124
- #Checking iterator return type
125
- assert isinstance (result_delay ["Test-Sample1" ], types .GeneratorType )
126
- assert isinstance (result_delay ["Test-Sample2" ], types .GeneratorType )
128
+ result_delay = to_awk (
129
+ sx_dict , iterator = True , expressions = "branch1"
130
+ ) # return iterable + selection kwarg
127
131
132
+ # Checking iterator return type
133
+ assert isinstance (result_delay ["Test-Sample1" ], types .GeneratorType )
134
+ assert isinstance (result_delay ["Test-Sample2" ], types .GeneratorType )
128
135
129
- arr1 = ak .concatenate (list (result_delay ["Test-Sample1" ])) # Materialize the generator from uproot.iterate
136
+ arr1 = ak .concatenate (
137
+ list (result_delay ["Test-Sample1" ])
138
+ ) # Materialize the generator from uproot.iterate
130
139
arr2 = ak .concatenate (list (result_delay ["Test-Sample2" ]))
131
140
132
- #Checking materialization
133
- assert isinstance (arr1 , ak .Array ), "to_awk(dask=True) does not produce an ak.Array instance"
134
- assert isinstance (arr2 , ak .Array ), "to_awk(dask=True) does not produce an ak.Array instance"
141
+ # Checking materialization
142
+ assert isinstance (
143
+ arr1 , ak .Array
144
+ ), "to_awk(dask=True) does not produce an ak.Array instance"
145
+ assert isinstance (
146
+ arr2 , ak .Array
147
+ ), "to_awk(dask=True) does not produce an ak.Array instance"
148
+
149
+ # Checking only 1 branch selected
150
+ assert ak .fields (arr1 ) == ["branch1" ]
151
+ assert ak .fields (arr2 ) == ["branch1" ]
135
152
136
- #Checking only 1 branch selected
137
- assert ak .fields (arr1 ) == ['branch1' ]
138
- assert ak .fields (arr2 ) == ['branch1' ]
139
153
140
154
def test_unsupported_file_format ():
141
155
fake_paths = {"fake-Sample" : ["invalid_file.txt" ]}
142
156
# match is regex-level
143
- with pytest .raises (ValueError , match = r"Unsupported file format: 'invalid_file.txt'\. Files must be ROOT \(.*\) or Parquet \(.*\)" ):
157
+ with pytest .raises (
158
+ ValueError ,
159
+ match = r"Unsupported file format: 'invalid_file.txt'\. Files must be ROOT \(.*\) or Parquet \(.*\)" ,
160
+ ):
144
161
to_awk (fake_paths )
145
-
146
-
147
-
148
-
149
-
150
-
151
-
152
-
153
-
154
-
155
-
0 commit comments