Skip to content

Commit 6ccfc7d

Browse files
author
acordeir
committed
parquet test
1 parent df59038 commit 6ccfc7d

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed

tests/test_parquet.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
import pytest
4+
import awkward as ak
5+
import dask_awkward as dak
6+
import logging
7+
import numpy as np
8+
import pyarrow.parquet as pq
9+
from servicex_analysis_utils.materialization import to_awk
10+
11+
12+
@pytest.fixture
13+
def build_test_samples(tmp_path):
14+
"""
15+
Creates two Parquet files with sample data for testing.
16+
"""
17+
test_path1 = tmp_path / "test_file1.parquet"
18+
test_path2 = tmp_path / "test_file2.parquet"
19+
20+
# Example data for two branches
21+
data1 = ak.Array({
22+
"branch1": np.ones(100),
23+
"branch2": np.zeros(100)
24+
})
25+
26+
# Example data for one branch
27+
data2 = ak.Array({
28+
"branch1": np.ones(10)
29+
})
30+
31+
# Write to Parquet files
32+
ak.to_parquet(data1, test_path1)
33+
ak.to_parquet(data2, test_path2)
34+
35+
# Dict simulating servicex.deliver() output
36+
sx_dict = {"Test-Sample1": test_path1, "Test-Sample2": test_path2}
37+
38+
return sx_dict
39+
40+
41+
# Test function for to_awk with Parquet files
42+
def test_to_awk_collection(build_test_samples):
43+
sx_dict = build_test_samples
44+
result = to_awk(sx_dict) # Using ak.from_parquet internally
45+
46+
# Collecting all samples
47+
assert list(result.keys()) == ["Test-Sample1", "Test-Sample2"]
48+
arr1 = result["Test-Sample1"]
49+
arr2 = result["Test-Sample2"]
50+
51+
# Collecting all branches
52+
assert ak.fields(arr1) == ['branch1', 'branch2']
53+
assert ak.fields(arr2) == ['branch1']
54+
55+
assert isinstance(arr1, ak.Array), "to_awk() does not produce an awkward.Array instance"
56+
assert isinstance(arr2, ak.Array), "to_awk() does not produce an awkward.Array instance"
57+
58+
# Collecting all elements per branch
59+
assert ak.all(arr1['branch2'] == ak.from_numpy(np.zeros(100)))
60+
assert ak.all(arr2['branch1'] == ak.from_numpy(np.ones(10)))
61+
62+
# Checking kwargs (Expressions equivalent in Parquet)
63+
result_filtered = to_awk(sx_dict, filter_name="branch1") # Now using filter_name instead of expressions
64+
arr1_filtered = result_filtered["Test-Sample1"]
65+
assert ak.fields(arr1_filtered) == ['branch1'] # branch2 should be filtered out

0 commit comments

Comments
 (0)