Skip to content

Commit 65e4c16

Browse files
author
acordeir
committed
dask-parquet tests
1 parent e57e588 commit 65e4c16

File tree

2 files changed

+28
-3
lines changed

2 files changed

+28
-3
lines changed

tests/test_materialization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def build_test_samples(tmp_path):
6363

6464

6565
#Test functions
66-
def test_to_awk_collection(build_test_samples):
66+
def test_to_awk(build_test_samples):
6767
sx_dict = build_test_samples
6868
result = to_awk(sx_dict) #uproot.iterate expressions kwarg
6969

tests/test_materialization_parquet.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def build_test_samples(tmp_path):
2929
})
3030

3131
# Write to Parquet files
32-
ak.to_parquet(data1, test_path1)
32+
ak.to_parquet(data1, test_path1, row_group_size=10000) #partions
3333
ak.to_parquet(data2, test_path2)
3434

3535
# Dict simulating servicex.deliver() output
@@ -39,7 +39,7 @@ def build_test_samples(tmp_path):
3939

4040

4141
# Test function for to_awk with Parquet files
42-
def test_to_awk_collection(build_test_samples):
42+
def test_to_awk_parquet(build_test_samples):
4343
sx_dict = build_test_samples
4444
result = to_awk(sx_dict) # Using ak.from_parquet internally
4545

@@ -63,3 +63,28 @@ def test_to_awk_collection(build_test_samples):
6363
result_filtered = to_awk(sx_dict, columns="branch1")
6464
arr1_filtered = result_filtered["Test-Sample1"]
6565
assert ak.fields(arr1_filtered) == ['branch1'] # branch2 should be filtered out
66+
67+
def test_to_awk_dask_parquet(build_test_samples):
68+
sx_dict = build_test_samples
69+
result_da = to_awk(sx_dict, dask=True, split_row_groups=True) #split in partitions
70+
71+
#Collecting all samples
72+
assert list(result_da.keys())==["Test-Sample1", "Test-Sample2"]
73+
arr1=result_da["Test-Sample1"]
74+
arr2=result_da["Test-Sample2"]
75+
76+
#Checking instance
77+
assert isinstance(arr1, dak.Array), "to_awk(dask=True) does not produce an dak.Array instance"
78+
assert isinstance(arr2, dak.Array), "to_awk(dask=True) does not produce an dak.Array instance"
79+
80+
#Testing partitionning kwarg
81+
assert arr1.npartitions == 10
82+
assert arr2.npartitions == 1
83+
84+
#Collecting all branches
85+
assert ak.fields(arr1) == ['branch1', 'branch2']
86+
assert ak.fields(arr2) == ['branch1']
87+
88+
#Collecting all elements per branch
89+
assert ak.all(arr1['branch2'].compute() == ak.from_numpy(np.zeros(100)))
90+
assert ak.all(arr2['branch1'].compute() == ak.from_numpy(np.ones(10)))

0 commit comments

Comments
 (0)