Skip to content

Commit 2c88a67

Browse files
committed
helper test for raw decoding into array - should be in file_peeking.py later
1 parent 7d87a47 commit 2c88a67

File tree

1 file changed

+119
-0
lines changed

1 file changed

+119
-0
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import awkward as ak
2+
import numpy as np
3+
4+
def parse_jagged_depth_and_dtype(dtype_str):
5+
"""
6+
Helper to decode the dtype str for each branch.
7+
8+
Parses uproot-style interpretation strings such as:
9+
- "AsJagged(AsJagged(AsDtype('>f4')))"
10+
11+
Returns the number of nested AsJagged(...) layers
12+
and the inner dtype string to be used with np.dtype
13+
14+
Parameters:
15+
dtype_str (str): A string representing the uproot interpretation of a branch.
16+
17+
Returns:
18+
Tuple[int, Optional[str]]: (jagged_depth, base_numpy_dtype_str) or None if not recognized.
19+
"""
20+
depth = 0
21+
current = dtype_str.strip()
22+
23+
# Count how many nested AsJagged(...) wrappers exist
24+
while current.startswith("AsJagged("):
25+
depth += 1
26+
current = current[len("AsJagged("):-1].strip() # Strip outermost wrapper, up to -1 to remove )
27+
28+
# Extract the base dtype string from AsDtype('<np-format>')
29+
if current.startswith("AsDtype('") and current.endswith("')"):
30+
base_dtype = current[len("AsDtype('"):-2]
31+
return depth, base_dtype
32+
else:
33+
return depth, None
34+
35+
def decode_ak_array(encoded_str):
36+
"""
37+
Helper
38+
Decodes the structured string and reconstructs ak.Arrays
39+
mimicking TTrees with correct field names and dtypes.
40+
Decoded ak types are translated from uproot.interpretation and
41+
recreate the nested AsJagged(AsJagged(...)) arrays.
42+
43+
Parameters:
44+
encoded_str (str): The encoded string from run_query.
45+
46+
Returns:
47+
dict[str, ak.Array]: Dictionary where keys are tree names and values are ak.Arrays with the correct structure.
48+
"""
49+
50+
tree_sections = encoded_str.strip().split("\n")
51+
reconstructed_data = {}
52+
53+
for tree_section in tree_sections:
54+
tree_section = tree_section.strip()
55+
if not tree_section:
56+
continue
57+
58+
parts = tree_section.split(";", 1)
59+
tree_header = parts[0].strip()
60+
61+
# Simple manual slicing to extract tree name
62+
treename = tree_header[len("Tree: "):]
63+
branches = {}
64+
65+
if len(parts) > 1:
66+
branches_str = parts[1].strip()
67+
branch_infos = branches_str.split(",")
68+
69+
for branch in branch_infos:
70+
branch = branch.strip()
71+
72+
if " ; dtype: " in branch: # line with branch info
73+
name_str, dtype_str = branch.split(" ; dtype: ", 1)
74+
branch_name = name_str.replace("TBranch: ", "").strip()
75+
dtype_str = dtype_str.strip()
76+
77+
# Determine nesting depth and base dtype from interpretation string
78+
depth, base_dtype_str = parse_jagged_depth_and_dtype(dtype_str)
79+
if base_dtype_str is None:
80+
branches[branch_name] = None
81+
continue
82+
83+
try:
84+
np_dtype = np.dtype(base_dtype_str)
85+
except TypeError:
86+
branches[branch_name] = None
87+
continue
88+
89+
dummy = np.zeros(1, dtype=np_dtype)[0] # Typed placeholder value
90+
91+
# Simulate jagged structure by nesting the value in lists
92+
for _ in range(depth):
93+
dummy = [dummy] # one level of jaggedness
94+
95+
# Wrap dummy in a length-1 ak.Array
96+
branches[branch_name] = ak.Array([dummy])
97+
98+
if branches:
99+
# Each tree becomes a record array with 1 entry (dict of branch arrays)
100+
reconstructed_data[treename] = ak.Array([branches])
101+
102+
return reconstructed_data
103+
104+
# Test input with multiple trees, varied nesting and types including a string
105+
encoded_str = (
106+
"Tree: testTree1; "
107+
"TBranch: a ; dtype: AsDtype('>f4'), "
108+
"TBranch: b ; dtype: AsJagged(AsDtype('>i4'))\n"
109+
"Tree: testTree2; "
110+
"TBranch: x ; dtype: AsJagged(AsJagged(AsDtype('>u4'))), "
111+
"TBranch: y ; dtype: AsDtype('>f8'), "
112+
"TBranch: label ; dtype: AsDtype('>S10')"
113+
)
114+
115+
result = decode_ak_array(encoded_str)
116+
for tree, array in result.items():
117+
print(f"\nTree: {tree}")
118+
#print(array)
119+
print(array.type)

0 commit comments

Comments
 (0)