|
| 1 | +# Copyright (c) 2025, IRIS-HEP |
| 2 | +# All rights reserved. |
| 3 | +import pytest |
| 4 | +import awkward as ak |
| 5 | +import dask_awkward as dak |
| 6 | +import logging |
| 7 | +import numpy as np |
| 8 | +import pyarrow.parquet as pq |
| 9 | +from servicex_analysis_utils.materialization import to_awk |
| 10 | + |
| 11 | + |
| 12 | +@pytest.fixture |
| 13 | +def build_test_samples(tmp_path): |
| 14 | + """ |
| 15 | + Creates two Parquet files with sample data for testing. |
| 16 | + """ |
| 17 | + test_path1 = tmp_path / "test_file1.parquet" |
| 18 | + test_path2 = tmp_path / "test_file2.parquet" |
| 19 | + |
| 20 | + # Example data for two branches |
| 21 | + data1 = ak.Array({ |
| 22 | + "branch1": np.ones(100), |
| 23 | + "branch2": np.zeros(100) |
| 24 | + }) |
| 25 | + |
| 26 | + # Example data for one branch |
| 27 | + data2 = ak.Array({ |
| 28 | + "branch1": np.ones(10) |
| 29 | + }) |
| 30 | + |
| 31 | + # Write to Parquet files |
| 32 | + ak.to_parquet(data1, test_path1) |
| 33 | + ak.to_parquet(data2, test_path2) |
| 34 | + |
| 35 | + # Dict simulating servicex.deliver() output |
| 36 | + sx_dict = {"Test-Sample1": test_path1, "Test-Sample2": test_path2} |
| 37 | + |
| 38 | + return sx_dict |
| 39 | + |
| 40 | + |
| 41 | +# Test function for to_awk with Parquet files |
| 42 | +def test_to_awk_collection(build_test_samples): |
| 43 | + sx_dict = build_test_samples |
| 44 | + result = to_awk(sx_dict) # Using ak.from_parquet internally |
| 45 | + |
| 46 | + # Collecting all samples |
| 47 | + assert list(result.keys()) == ["Test-Sample1", "Test-Sample2"] |
| 48 | + arr1 = result["Test-Sample1"] |
| 49 | + arr2 = result["Test-Sample2"] |
| 50 | + |
| 51 | + # Collecting all branches |
| 52 | + assert ak.fields(arr1) == ['branch1', 'branch2'] |
| 53 | + assert ak.fields(arr2) == ['branch1'] |
| 54 | + |
| 55 | + assert isinstance(arr1, ak.Array), "to_awk() does not produce an awkward.Array instance" |
| 56 | + assert isinstance(arr2, ak.Array), "to_awk() does not produce an awkward.Array instance" |
| 57 | + |
| 58 | + # Collecting all elements per branch |
| 59 | + assert ak.all(arr1['branch2'] == ak.from_numpy(np.zeros(100))) |
| 60 | + assert ak.all(arr2['branch1'] == ak.from_numpy(np.ones(10))) |
| 61 | + |
| 62 | + # Checking kwargs (Expressions equivalent in Parquet) |
| 63 | + result_filtered = to_awk(sx_dict, filter_name="branch1") # Now using filter_name instead of expressions |
| 64 | + arr1_filtered = result_filtered["Test-Sample1"] |
| 65 | + assert ak.fields(arr1_filtered) == ['branch1'] # branch2 should be filtered out |
0 commit comments