legend-exp
diff --git a/‎src/pygama/evt/aggregators.py
Lines changed: 97 additions & 102 deletions b/‎src/pygama/evt/aggregators.py
Lines changed: 97 additions & 102 deletions
@@ -6,8 +6,8 @@
 
 import awkward as ak
 import numpy as np
+import pandas as pd
 from lgdo import lh5, types
-from lgdo.lh5 import LH5Store
 
 from . import utils
 
@@ -58,15 +58,14 @@ def evaluate_to_first_or_last(
     """
     f = utils.make_files_config(datainfo)
 
-    out = None
-    outt = None
-    store = LH5Store(keep_open=True)
+    df = None
 
     for ch in channels:
         table_id = utils.get_tcm_id_by_pattern(f.hit.table_fmt, ch)
 
         # get index list for this channel to be loaded
-        idx_ch = tcm.idx[tcm.id == table_id]
+        chan_tcm_indexs = ak.flatten(tcm.table_key) == table_id
+        idx_ch = ak.flatten(tcm.row_in_table)[chan_tcm_indexs].to_numpy()
 
         # evaluate at channel
         if ch not in channels_skip:
@@ -79,58 +78,52 @@ def evaluate_to_first_or_last(
                 pars_dict=pars_dict,
             )
 
-            if out is None:
+            if df is None:
                 # define dimension of output array
                 out = utils.make_numpy_full(n_rows, default_value, res.dtype)
-                outt = np.zeros(len(out))
-        else:
-            res = np.full(len(idx_ch), default_value)
-
-        # get mask from query
-        limarr = utils.get_mask_from_query(
-            datainfo=datainfo,
-            query=query,
-            length=len(res),
-            ch=ch,
-            idx_ch=idx_ch,
-        )
+                df = pd.DataFrame({"sort_field": np.zeros(len(out)), "res": out})
 
-        # find if sorter is in hit or dsp
-        t0 = store.read(
-            f"{ch}/{sorter[0]}/{sorter[1]}",
-            f.hit.file if f"{f.hit.group}" == sorter[0] else f.dsp.file,
-            idx=idx_ch,
-        )[0].view_as("np")
+            # get mask from query
+            limarr = utils.get_mask_from_query(
+                datainfo=datainfo,
+                query=query,
+                length=len(res),
+                ch=ch,
+                idx_ch=idx_ch,
+            )
 
-        if t0.ndim > 1:
-            raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array")
+            # find if sorter is in hit or dsp
+            sort_field = lh5.read_as(
+                f"{ch}/{sorter[0]}/{sorter[1]}",
+                f.hit.file if f"{f.hit.group}" == sorter[0] else f.dsp.file,
+                idx=idx_ch,
+                library="np",
+            )
 
-        evt_ids_ch = np.searchsorted(
-            tcm.cumulative_length,
-            np.where(tcm.id == table_id)[0],
-            "right",
-        )
+            if sort_field.ndim > 1:
+                raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array")
 
-        if is_first:
-            if ch == channels[0]:
-                outt[:] = np.inf
+            ch_df = pd.DataFrame({"sort_field": sort_field, "res": res})
 
-            out[evt_ids_ch] = np.where(
-                (t0 < outt[evt_ids_ch]) & (limarr), res, out[evt_ids_ch]
-            )
-            outt[evt_ids_ch] = np.where(
-                (t0 < outt[evt_ids_ch]) & (limarr), t0, outt[evt_ids_ch]
+            evt_ids_ch = np.repeat(
+                np.arange(0, len(tcm.table_key)),
+                ak.sum(tcm.table_key == table_id, axis=1),
             )
 
-        else:
-            out[evt_ids_ch] = np.where(
-                (t0 > outt[evt_ids_ch]) & (limarr), res, out[evt_ids_ch]
-            )
-            outt[evt_ids_ch] = np.where(
-                (t0 > outt[evt_ids_ch]) & (limarr), t0, outt[evt_ids_ch]
-            )
+            if is_first:
+                if ch == channels[0]:
+                    df["sort_field"] = np.inf
+                ids = (
+                    ch_df.sort_field.to_numpy() < df.sort_field[evt_ids_ch].to_numpy()
+                ) & (limarr)
+            else:
+                ids = (
+                    ch_df.sort_field.to_numpy() > df.sort_field[evt_ids_ch].to_numpy()
+                ) & (limarr)
 
-    return types.Array(nda=out)
+            df.loc[evt_ids_ch[ids], list(df.columns)] = ch_df.loc[ids, list(df.columns)]
+
+    return types.Array(nda=df.res.to_numpy())
 
 
 def evaluate_to_scalar(
@@ -180,7 +173,8 @@ def evaluate_to_scalar(
         table_id = utils.get_tcm_id_by_pattern(f.hit.table_fmt, ch)
 
         # get index list for this channel to be loaded
-        idx_ch = tcm.idx[tcm.id == table_id]
+        chan_tcm_indexs = ak.flatten(tcm.table_key) == table_id
+        idx_ch = ak.flatten(tcm.row_in_table)[chan_tcm_indexs].to_numpy()
 
         if ch not in channels_skip:
             res = utils.get_data_at_channel(
@@ -195,42 +189,37 @@ def evaluate_to_scalar(
             if out is None:
                 # define dimension of output array
                 out = utils.make_numpy_full(n_rows, default_value, res.dtype)
-        else:
-            res = np.full(len(idx_ch), default_value)
-
-        # get mask from query
-        limarr = utils.get_mask_from_query(
-            datainfo=datainfo,
-            query=query,
-            length=len(res),
-            ch=ch,
-            idx_ch=idx_ch,
-        )
-
-        evt_ids_ch = np.searchsorted(
-            tcm.cumulative_length,
-            np.where(tcm.id == table_id)[0],
-            side="right",
-        )
-
-        # switch through modes
-        if "sum" == mode:
-            if res.dtype == bool:
-                res = res.astype(int)
 
-            out[evt_ids_ch] = np.where(limarr, res + out[evt_ids_ch], out[evt_ids_ch])
+            # get mask from query
+            limarr = utils.get_mask_from_query(
+                datainfo=datainfo,
+                query=query,
+                length=len(res),
+                ch=ch,
+                idx_ch=idx_ch,
+            )
 
-        if "any" == mode:
-            if res.dtype != bool:
-                res = res.astype(bool)
+            evt_ids_ch = np.repeat(
+                np.arange(0, len(tcm.table_key)),
+                ak.sum(tcm.table_key == table_id, axis=1),
+            )
 
-            out[evt_ids_ch] = out[evt_ids_ch] | (res & limarr)
+            # switch through modes
+            if mode == "sum":
+                if res.dtype == bool:
+                    res = res.astype(int)
+                if out.dtype == bool:
+                    out = out.astype(int)
+                out[evt_ids_ch[limarr]] += res[limarr]
+            else:
+                if res.dtype != bool:
+                    res = res.astype(bool)
 
-        if "all" == mode:
-            if res.dtype != bool:
-                res = res.astype(bool)
+                if mode == "any":
+                    out[evt_ids_ch] |= res & limarr
 
-            out[evt_ids_ch] = out[evt_ids_ch] & res & limarr
+                if mode == "all":
+                    out[evt_ids_ch] &= res & limarr
 
     return types.Array(nda=out)
 
@@ -274,16 +263,20 @@ def evaluate_at_channel(
 
     out = None
 
-    for ch in np.unique(ch_comp.nda.astype(int)):
-        table_name = utils.get_table_name_by_pattern(table_id_fmt, ch)
+    for table_id in np.unique(ch_comp.nda.astype(int)):
+        table_name = utils.get_table_name_by_pattern(table_id_fmt, table_id)
         # skip default value
         if table_name not in lh5.ls(f.hit.file):
             continue
 
-        idx_ch = tcm.idx[tcm.id == ch]
-        evt_ids_ch = np.searchsorted(
-            tcm.cumulative_length, np.where(tcm.id == ch)[0], "right"
+        # get index list for this channel to be loaded
+        chan_tcm_indexs = ak.flatten(tcm.table_key) == table_id
+        idx_ch = ak.flatten(tcm.row_in_table)[chan_tcm_indexs].to_numpy()
+
+        evt_ids_ch = np.repeat(
+            np.arange(0, len(tcm.table_key)), ak.sum(tcm.table_key == table_id, axis=1)
         )
+
         if (table_name in channels) and (table_name not in channels_skip):
             res = utils.get_data_at_channel(
                 datainfo=datainfo,
@@ -299,7 +292,9 @@ def evaluate_at_channel(
         if out is None:
             out = utils.make_numpy_full(len(ch_comp.nda), default_value, res.dtype)
 
-        out[evt_ids_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[evt_ids_ch])
+        out[evt_ids_ch] = np.where(
+            table_id == ch_comp.nda[idx_ch], res, out[evt_ids_ch]
+        )
 
     return types.Array(nda=out)
 
@@ -348,10 +343,10 @@ def evaluate_at_channel_vov(
     )
 
     type_name = None
-    for ch in ch_comp_channels:
-        table_name = utils.get_table_name_by_pattern(f.hit.table_fmt, ch)
-        evt_ids_ch = np.searchsorted(
-            tcm.cumulative_length, np.where(tcm.id == ch)[0], "right"
+    for table_id in ch_comp_channels:
+        table_name = utils.get_table_name_by_pattern(f.hit.table_fmt, table_id)
+        evt_ids_ch = np.repeat(
+            np.arange(0, len(tcm.table_key)), ak.sum(tcm.table_key == table_id, axis=1)
         )
         if (table_name in channels) and (table_name not in channels_skip):
             res = utils.get_data_at_channel(
@@ -362,20 +357,19 @@ def evaluate_at_channel_vov(
                 field_list=field_list,
                 pars_dict=pars_dict,
             )
-            new_evt_ids_ch = np.searchsorted(
-                ch_comp.cumulative_length,
-                np.where(ch_comp.flattened_data.nda == ch)[0],
-                "right",
+            new_evt_ids_ch = np.repeat(
+                np.arange(0, len(ch_comp)),
+                ak.sum(ch_comp.view_as("ak") == table_id, axis=1),
             )
             matches = np.isin(evt_ids_ch, new_evt_ids_ch)
-            out[ch_comp.flattened_data.nda == ch] = res[matches]
+            out[ch_comp.flattened_data.nda == table_id] = res[matches]
 
         else:
-            length = len(np.where(ch_comp.flattened_data.nda == ch)[0])
+            length = len(np.where(ch_comp.flattened_data.nda == table_id)[0])
             res = np.full(length, default_value)
-            out[ch_comp.flattened_data.nda == ch] = res
+            out[ch_comp.flattened_data.nda == table_id] = res
 
-        if ch == ch_comp_channels[0]:
+        if table_id == ch_comp_channels[0]:
             out = out.astype(res.dtype)
             type_name = res.dtype
 
@@ -438,12 +432,13 @@ def evaluate_to_aoesa(
 
     for i, ch in enumerate(channels):
         table_id = utils.get_tcm_id_by_pattern(f.hit.table_fmt, ch)
-        idx_ch = tcm.idx[tcm.id == table_id]
 
-        evt_ids_ch = np.searchsorted(
-            tcm.cumulative_length,
-            np.where(tcm.id == table_id)[0],
-            "right",
+        # get index list for this channel to be loaded
+        chan_tcm_indexs = ak.flatten(tcm.table_key) == table_id
+        idx_ch = ak.flatten(tcm.row_in_table)[chan_tcm_indexs].to_numpy()
+
+        evt_ids_ch = np.repeat(
+            np.arange(0, len(tcm.table_key)), ak.sum(tcm.table_key == table_id, axis=1)
         )
 
         if ch not in channels_skip: