@@ -29,7 +29,7 @@ def build_test_samples(tmp_path):
29
29
})
30
30
31
31
# Write to Parquet files
32
- ak .to_parquet (data1 , test_path1 )
32
+ ak .to_parquet (data1 , test_path1 , row_group_size = 10000 ) #partions
33
33
ak .to_parquet (data2 , test_path2 )
34
34
35
35
# Dict simulating servicex.deliver() output
@@ -39,7 +39,7 @@ def build_test_samples(tmp_path):
39
39
40
40
41
41
# Test function for to_awk with Parquet files
42
- def test_to_awk_collection (build_test_samples ):
42
+ def test_to_awk_parquet (build_test_samples ):
43
43
sx_dict = build_test_samples
44
44
result = to_awk (sx_dict ) # Using ak.from_parquet internally
45
45
@@ -63,3 +63,28 @@ def test_to_awk_collection(build_test_samples):
63
63
result_filtered = to_awk (sx_dict , columns = "branch1" )
64
64
arr1_filtered = result_filtered ["Test-Sample1" ]
65
65
assert ak .fields (arr1_filtered ) == ['branch1' ] # branch2 should be filtered out
66
+
67
+ def test_to_awk_dask_parquet (build_test_samples ):
68
+ sx_dict = build_test_samples
69
+ result_da = to_awk (sx_dict , dask = True , split_row_groups = True ) #split in partitions
70
+
71
+ #Collecting all samples
72
+ assert list (result_da .keys ())== ["Test-Sample1" , "Test-Sample2" ]
73
+ arr1 = result_da ["Test-Sample1" ]
74
+ arr2 = result_da ["Test-Sample2" ]
75
+
76
+ #Checking instance
77
+ assert isinstance (arr1 , dak .Array ), "to_awk(dask=True) does not produce an dak.Array instance"
78
+ assert isinstance (arr2 , dak .Array ), "to_awk(dask=True) does not produce an dak.Array instance"
79
+
80
+ #Testing partitionning kwarg
81
+ assert arr1 .npartitions == 10
82
+ assert arr2 .npartitions == 1
83
+
84
+ #Collecting all branches
85
+ assert ak .fields (arr1 ) == ['branch1' , 'branch2' ]
86
+ assert ak .fields (arr2 ) == ['branch1' ]
87
+
88
+ #Collecting all elements per branch
89
+ assert ak .all (arr1 ['branch2' ].compute () == ak .from_numpy (np .zeros (100 )))
90
+ assert ak .all (arr2 ['branch1' ].compute () == ak .from_numpy (np .ones (10 )))
0 commit comments