Skip to content

Commit e0edfd7

Browse files
committed
updates for lgdo 1.12
1 parent a4658ec commit e0edfd7

File tree

9 files changed

+49
-61
lines changed

9 files changed

+49
-61
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ dependencies = [
3636
"h5py>=3.2",
3737
"iminuit",
3838
"legend-daq2lh5>=1.2.1",
39-
"legend-pydataobj>=1.7,<1.12",
39+
"legend-pydataobj>=1.12.0a1,",
4040
"pylegendmeta>=0.9",
4141
"matplotlib",
4242
"numba!=0.53.*,!=0.54.*,!=0.57",

src/pygama/evt/build_evt.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def build_evt_cols(
268268
if Path(datainfo.evt.file).exists():
269269
Path(datainfo.evt.file).unlink()
270270

271-
for tcm_lh5, _, n_rows in lh5.LH5Iterator(
271+
for tcm_lh5 in lh5.LH5Iterator(
272272
datainfo.tcm.file,
273273
datainfo.tcm.group,
274274
buffer_len=buffer_len,
@@ -281,7 +281,7 @@ def build_evt_cols(
281281
)
282282

283283
# get number of events in file (ask the TCM)
284-
table = Table(size=n_rows)
284+
table = Table(size=len(tcm_lh5))
285285

286286
# now loop over operations (columns in evt table)
287287
for field, v in config["operations"].items():
@@ -335,7 +335,7 @@ def build_evt_cols(
335335
channels_skip=channels_skip,
336336
mode=v["aggregation_mode"],
337337
expr=v["expression"],
338-
n_rows=n_rows,
338+
n_rows=len(tcm_lh5),
339339
table=table,
340340
parameters=v.get("parameters", None),
341341
query=v.get("query", None),
@@ -366,7 +366,7 @@ def build_evt_cols(
366366
table.add_field(field, obj)
367367

368368
# might need to re-organize fields in subtables, create a new object for that
369-
nested_tbl = Table(size=n_rows)
369+
nested_tbl = Table(size=len(tcm_lh5))
370370
output_fields = config.get("outputs", table.keys())
371371

372372
for field, obj in table.items():
@@ -389,7 +389,7 @@ def build_evt_cols(
389389

390390
# otherwise, increase nesting
391391
if level not in lvl_ptr:
392-
lvl_ptr.add_field(level, Table(size=n_rows))
392+
lvl_ptr.add_field(level, Table(size=len(tcm_lh5)))
393393
lvl_ptr = lvl_ptr[level]
394394

395395
# write output fields into outfile

src/pygama/evt/tcm.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ def generate_tcm_cols(
100100
for _ii, it in enumerate(iterators[curr_mask]):
101101
ii = np.where(curr_mask)[0][_ii]
102102
try:
103-
buffer, start, buf_len = it.__next__()
103+
buffer = it.__next__()
104+
buf_len = len(buffer)
105+
start = it.current_i_entry
104106
except StopIteration:
105107
at_end[ii] = True
106108
continue

src/pygama/flow/data_loader.py

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111
from keyword import iskeyword
1212
from typing import Iterator
1313

14+
import lgdo.lh5 as lh5
1415
import numpy as np
1516
import pandas as pd
1617
from awkward_pandas import AwkwardDtype
1718
from dspeed.vis import WaveformBrowser
18-
from lgdo.lh5 import LH5Iterator, LH5Store
19+
from lgdo.lh5 import LH5Iterator
1920
from lgdo.lh5.utils import expand_vars
2021
from lgdo.types import Array, Struct, Table
2122
from lgdo.types.vovutils import build_cl, explode_arrays
@@ -537,8 +538,6 @@ def build_entry_list(
537538
if save_output_columns:
538539
entry_cols += for_output
539540

540-
sto = LH5Store()
541-
542541
if log.getEffectiveLevel() >= logging.INFO:
543542
progress_bar = tqdm(
544543
desc="Building entry list",
@@ -585,7 +584,7 @@ def build_entry_list(
585584

586585
tcm_table_name = self.filedb.get_table_name(tcm_tier, tcm_tb)
587586
try:
588-
tcm_lgdo, _ = sto.read(tcm_table_name, tcm_path)
587+
tcm_lgdo = lh5.read(tcm_table_name, tcm_path)
589588
except KeyError:
590589
log.warning(f"Cannot find table {tcm_table_name} in file {tcm_path}")
591590
continue
@@ -651,7 +650,7 @@ def build_entry_list(
651650
if tb in col_tiers[file]["tables"][tier]:
652651
table_name = self.filedb.get_table_name(tier, tb)
653652
try:
654-
tier_table, _ = sto.read(
653+
tier_table = lh5.read(
655654
table_name,
656655
tier_path,
657656
field_mask=cut_cols[level],
@@ -710,9 +709,9 @@ def build_entry_list(
710709
f_dict = f_entries.to_dict("list")
711710
f_struct = Struct(f_dict)
712711
if self.merge_files:
713-
sto.write(f_struct, "entries", output_file, wo_mode="a")
712+
lh5.write(f_struct, "entries", output_file, wo_mode="a")
714713
else:
715-
sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a")
714+
lh5.write(f_struct, f"entries/{file}", output_file, wo_mode="a")
716715

717716
if log.getEffectiveLevel() >= logging.INFO:
718717
progress_bar.close()
@@ -782,8 +781,6 @@ def build_hit_entries(
782781
log.debug(f"need to load {cut_cols} columns for applying cuts")
783782
col_tiers = self.get_tiers_for_col(cut_cols, merge_files=False)
784783

785-
sto = LH5Store()
786-
787784
if log.getEffectiveLevel() >= logging.INFO:
788785
progress_bar = tqdm(
789786
desc="Building entry list",
@@ -832,7 +829,7 @@ def build_hit_entries(
832829
# now read how many rows are there in the file
833830
table_name = self.filedb.get_table_name(tier, tb)
834831
try:
835-
n_rows = sto.read_n_rows(table_name, tier_path)
832+
n_rows = lh5.read_n_rows(table_name, tier_path)
836833
except KeyError:
837834
log.warning(f"Cannot find {table_name} in file {tier_path}")
838835
continue
@@ -862,7 +859,7 @@ def build_hit_entries(
862859
# load the data from the tier file, just the columns needed for the cut
863860
table_name = self.filedb.get_table_name(tier, tb)
864861
try:
865-
tier_tb, _ = sto.read(
862+
tier_tb = lh5.read(
866863
table_name, tier_path, field_mask=cut_cols
867864
)
868865
except KeyError:
@@ -902,9 +899,9 @@ def build_hit_entries(
902899
f_dict = f_entries.to_dict("list")
903900
f_struct = Struct(f_dict)
904901
if self.merge_files:
905-
sto.write(f_struct, "entries", output_file, wo_mode="a")
902+
lh5.write(f_struct, "entries", output_file, wo_mode="a")
906903
else:
907-
sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a")
904+
lh5.write(f_struct, f"entries/{file}", output_file, wo_mode="a")
908905

909906
if log.getEffectiveLevel() >= logging.INFO:
910907
progress_bar.close()
@@ -1063,8 +1060,6 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
10631060
tier_table.update(zip(tier_table.keys(), exp_cols))
10641061
return tier_table
10651062

1066-
sto = LH5Store()
1067-
10681063
if self.merge_files:
10691064
tables = entry_list[f"{parent}_table"].unique()
10701065
field_mask = []
@@ -1115,7 +1110,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
11151110
for file in files
11161111
]
11171112

1118-
tier_table, _ = sto.read(
1113+
tier_table = lh5.read(
11191114
name=tb_name,
11201115
lh5_file=tier_paths,
11211116
idx=idx_mask,
@@ -1141,7 +1136,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
11411136
f_table = utils.dict_to_table(col_dict=col_dict, attr_dict=attr_dict)
11421137

11431138
if output_file:
1144-
sto.write(f_table, "merged_data", output_file, wo_mode="o")
1139+
lh5.write(f_table, "merged_data", output_file, wo_mode="o")
11451140
if in_memory:
11461141
if self.output_format == "lgdo.Table":
11471142
return f_table
@@ -1218,7 +1213,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
12181213
raise FileNotFoundError(tier_path)
12191214

12201215
table_name = self.filedb.get_table_name(tier, tb)
1221-
tier_table, _ = sto.read(
1216+
tier_table = lh5.read(
12221217
table_name,
12231218
tier_path,
12241219
idx=idx_mask,
@@ -1244,7 +1239,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
12441239
if in_memory:
12451240
load_out.add_field(name=file, obj=f_table)
12461241
if output_file:
1247-
sto.write(f_table, f"{file}", output_file, wo_mode="o")
1242+
lh5.write(f_table, f"{file}", output_file, wo_mode="o")
12481243
# end file loop
12491244

12501245
if log.getEffectiveLevel() >= logging.INFO:
@@ -1278,8 +1273,6 @@ def load_evts(
12781273
child = self.tcms[tcm_level]["child"]
12791274
load_levels = [parent, child]
12801275

1281-
sto = LH5Store()
1282-
12831276
if self.merge_files: # Try to load all information at once
12841277
raise NotImplementedError
12851278
else: # Not merge_files
@@ -1316,7 +1309,7 @@ def load_evts(
13161309
)
13171310
if os.path.exists(tier_path):
13181311
table_name = self.filedb.get_table_name(tier, tb)
1319-
tier_table, _ = sto.read(
1312+
tier_table = lh5.read(
13201313
table_name,
13211314
tier_path,
13221315
idx=idx_mask,
@@ -1330,7 +1323,7 @@ def load_evts(
13301323
if in_memory:
13311324
load_out[file] = f_table
13321325
if output_file:
1333-
sto.write(f_table, f"file{file}", output_file, wo_mode="o")
1326+
lh5.write(f_table, f"file{file}", output_file, wo_mode="o")
13341327
# end file loop
13351328

13361329
if in_memory:

src/pygama/flow/file_db.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
import warnings
1111

1212
import h5py
13+
import lgdo.lh5 as lh5
1314
import numpy as np
1415
import pandas as pd
1516
from lgdo.lh5 import ls
16-
from lgdo.lh5.store import LH5Store
1717
from lgdo.lh5.utils import expand_path, expand_vars
1818
from lgdo.types import Array, Scalar, VectorOfVectors
1919
from parse import parse
@@ -480,8 +480,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
480480
columns_vov = VectorOfVectors(
481481
flattened_data=flattened, cumulative_length=length
482482
)
483-
sto = LH5Store()
484-
sto.write(columns_vov, "unique_columns", to_file)
483+
lh5.write(columns_vov, "unique_columns", to_file)
485484

486485
return self.columns
487486

@@ -509,7 +508,6 @@ def from_disk(self, path: str | list[str]) -> None:
509508
if not paths:
510509
raise FileNotFoundError(path)
511510

512-
sto = LH5Store()
513511
# objects/accumulators that will be used to configure the FileDB at the end
514512
_cfg = None
515513
_df = None
@@ -531,7 +529,7 @@ def _replace_idx(row, trans, tier):
531529

532530
# loop over the files
533531
for p in paths:
534-
cfg, _ = sto.read("config", p)
532+
cfg = lh5.read("config", p)
535533
cfg = json.loads(cfg.value.decode())
536534

537535
# make sure configurations are all the same
@@ -543,7 +541,7 @@ def _replace_idx(row, trans, tier):
543541
)
544542

545543
# read in unique columns
546-
vov, _ = sto.read("columns", p)
544+
vov = lh5.read("columns", p)
547545
# Convert back from VoV of UTF-8 bytestrings to a list of lists of strings
548546
columns = [[v.decode("utf-8") for v in ov] for ov in list(vov)]
549547

@@ -606,8 +604,7 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None:
606604
"""
607605
log.debug(f"writing database to {filename}")
608606

609-
sto = LH5Store()
610-
sto.write(Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode)
607+
lh5.write(Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode)
611608

612609
if wo_mode in ["write_safe", "w", "overwrite_file", "of"]:
613610
wo_mode = "a"
@@ -624,7 +621,7 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None:
624621
flattened_data=Array(nda=np.array(flat).astype("S")),
625622
cumulative_length=Array(nda=np.array(cum_l)),
626623
)
627-
sto.write(col_vov, "columns", filename, wo_mode=wo_mode)
624+
lh5.write(col_vov, "columns", filename, wo_mode=wo_mode)
628625

629626
# FIXME: to_hdf() throws this:
630627
#

src/pygama/hit/build_hit.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
from typing import Iterable, Mapping
1111

1212
import lgdo
13+
import lgdo.lh5 as lh5
1314
import numpy as np
14-
from lgdo.lh5 import LH5Iterator, LH5Store, ls
15+
from lgdo.lh5 import LH5Iterator, ls
1516

1617
from .. import utils
1718

@@ -75,13 +76,12 @@ def build_hit(
7576
n_max
7677
maximum number of rows to process
7778
wo_mode
78-
forwarded to :meth:`lgdo.lh5.store.LH5Store.write`.
79+
forwarded to :meth:`lgdo.lh5.write`.
7980
8081
See Also
8182
--------
8283
lgdo.types.table.Table.eval
8384
"""
84-
store = LH5Store()
8585

8686
if lh5_tables_config is None and hit_config is None:
8787
raise ValueError("either lh5_tables_config or hit_config must be specified")
@@ -134,13 +134,12 @@ def build_hit(
134134
first_done = False
135135
for tbl, cfg in lh5_tables_config.items():
136136
lh5_it = LH5Iterator(infile, tbl, buffer_len=buffer_len)
137-
tot_n_rows = store.read_n_rows(tbl, infile)
138137
write_offset = 0
139138

140139
log.info(f"Processing table '{tbl}' in file {infile}")
141140

142-
for tbl_obj, start_row, n_rows in lh5_it:
143-
n_rows = min(tot_n_rows - start_row, n_rows)
141+
for tbl_obj in lh5_it:
142+
start_row = lh5_it.current_i_entry
144143

145144
# create a new table object that links all the columns in the
146145
# current table (i.e. no copy)
@@ -193,11 +192,11 @@ def build_hit(
193192
if col not in cfg["outputs"]:
194193
outtbl_obj.remove_column(col, delete=True)
195194

196-
store.write(
195+
lh5.write(
197196
obj=outtbl_obj,
198197
name=tbl.replace("/dsp", "/hit"),
199198
lh5_file=outfile,
200-
n_rows=n_rows,
199+
n_rows=len(tbl_obj),
201200
wo_mode=wo_mode if first_done is False else "append",
202201
write_start=write_offset + start_row,
203202
)

src/pygama/pargen/utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,14 @@ def load_data(
123123
)
124124
df = pd.DataFrame(columns=list(df_fields))
125125

126-
for table, entry, n_rows in lh5_it:
126+
for table in lh5_it:
127127
# Evaluate all provided expressions and add to table
128128
for outname, info in cal_dict.items():
129129
table[outname] = table.eval(
130130
info["expression"], info.get("parameters", None)
131131
)
132+
entry = lh5_it.current_global_entries[0]
133+
n_rows = len(table)
132134

133135
# Copy params in table into dataframe
134136
for par in df:

0 commit comments

Comments
 (0)