Skip to content

Commit 6529e29

Browse files
authored
refactor: RNTuple writing improvements (#1431)
* Safer check for string arrays * RBlob tweaks * A bit of cleanup * Switched to IntFlag * Added support for awkward __doc__ parameter * Fixed docstring * Support inputting data as dict * Make logic for writing page list a bit cleaner
1 parent 218b620 commit 6529e29

File tree

7 files changed

+322
-177
lines changed

7 files changed

+322
-177
lines changed

src/uproot/behaviors/RNTuple.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def iterate(
3838
step_size="100 MB",
3939
decompression_executor=None, # TODO: Not implemented yet
4040
library="ak", # TODO: Not implemented yet
41-
ak_add_doc=False, # TODO: Not implemented yet
41+
ak_add_doc=False,
4242
how=None,
4343
report=False, # TODO: Not implemented yet
4444
allow_missing=False, # TODO: Not implemented yet
@@ -81,7 +81,7 @@ def iterate(
8181
library (str or :doc:`uproot.interpretation.library.Library`): The library
8282
that is used to represent arrays. Options are ``"np"`` for NumPy,
8383
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas. (Not implemented yet.)
84-
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``name``
84+
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``description``
8585
to the Awkward ``__doc__`` parameter of the array.
8686
if dict = {key:value} and ``library="ak"``, add the RField ``value`` to the
8787
Awkward ``key`` parameter of the array.
@@ -207,7 +207,7 @@ def concatenate(
207207
entry_stop=None,
208208
decompression_executor=None, # TODO: Not implemented yet
209209
library="ak", # TODO: Not implemented yet
210-
ak_add_doc=False, # TODO: Not implemented yet
210+
ak_add_doc=False,
211211
how=None,
212212
allow_missing=False,
213213
# For compatibility reasons we also accepts kwargs meant for TTrees
@@ -251,7 +251,7 @@ def concatenate(
251251
library (str or :doc:`uproot.interpretation.library.Library`): The library
252252
that is used to represent arrays. Options are ``"np"`` for NumPy,
253253
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas. (Not implemented yet.)
254-
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``name``
254+
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``description``
255255
to the Awkward ``__doc__`` parameter of the array.
256256
if dict = {key:value} and ``library="ak"``, add the RField ``value`` to the
257257
Awkward ``key`` parameter of the array.
@@ -488,6 +488,7 @@ def to_akform(
488488
filter_name=no_filter,
489489
filter_typename=no_filter,
490490
filter_field=no_filter,
491+
ak_add_doc=False,
491492
# For compatibility reasons we also accepts kwargs meant for TTrees
492493
filter_branch=unset,
493494
):
@@ -501,6 +502,10 @@ def to_akform(
501502
filter to select ``RFields`` using the full
502503
:doc:`uproot.models.RNTuple.RField` object. The ``RField`` is
503504
included if the function returns True, excluded if it returns False.
505+
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``description``
506+
to the Awkward ``__doc__`` parameter of the array.
507+
if dict = {key:value} and ``library="ak"``, add the RField ``value`` to the
508+
Awkward ``key`` parameter of the array.
504509
filter_branch (None or function of :doc:`uproot.models.RNTuple.RField` \u2192 bool): An alias for ``filter_field`` included
505510
for compatibility with software that was used for :doc:`uproot.behaviors.TBranch.TBranch`. This argument should not be used
506511
and will be removed in a future version.
@@ -524,17 +529,31 @@ def to_akform(
524529
# the field needs to be in the keys or be a parent of a field in the keys
525530
if any(key.startswith(field.name) for key in keys):
526531
top_names.append(field.name)
527-
record_list.append(rntuple.field_form(field.field_id, keys))
532+
record_list.append(
533+
rntuple.field_form(field.field_id, keys, ak_add_doc=ak_add_doc)
534+
)
528535
else:
529536
# Always use the full path for keys
530537
# Also include the field itself
531538
keys = [self.path] + [f"{self.path}.{k}" for k in keys]
532539
# The field needs to be in the keys or be a parent of a field in the keys
533540
if any(key.startswith(self.path) for key in keys):
534541
top_names.append(self.name)
535-
record_list.append(rntuple.field_form(self.field_id, keys))
542+
record_list.append(
543+
rntuple.field_form(self.field_id, keys, ak_add_doc=ak_add_doc)
544+
)
545+
546+
parameters = None
547+
if isinstance(ak_add_doc, bool) and ak_add_doc and self.description != "":
548+
parameters = {"__doc__": self.description}
549+
elif isinstance(ak_add_doc, dict) and self is not rntuple:
550+
parameters = {
551+
key: self.__getattribute__(value) for key, value in ak_add_doc.items()
552+
}
536553

537-
form = ak.forms.RecordForm(record_list, top_names, form_key="toplevel")
554+
form = ak.forms.RecordForm(
555+
record_list, top_names, form_key="toplevel", parameters=parameters
556+
)
538557
return form
539558

540559
def arrays(
@@ -603,7 +622,7 @@ def arrays(
603622
to CuFile to provide direct memory access (DMA) transfers between GPU
604623
memory and storage. KvikIO bindings to nvcomp decompress data
605624
buffers.
606-
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``name``
625+
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``description``
607626
to the Awkward ``__doc__`` parameter of the array.
608627
if dict = {key:value} and ``library="ak"``, add the RField ``value`` to the
609628
Awkward ``key`` parameter of the array.
@@ -752,6 +771,7 @@ def _arrays(
752771
filter_typename=filter_typename,
753772
filter_field=filter_field,
754773
filter_branch=filter_branch,
774+
ak_add_doc=ak_add_doc,
755775
)
756776

757777
# only read columns mentioned in the awkward form
@@ -949,7 +969,7 @@ def iterate(
949969
step_size="100 MB",
950970
decompression_executor=None, # TODO: Not implemented yet
951971
library="ak", # TODO: Not implemented yet
952-
ak_add_doc=False, # TODO: Not implemented yet
972+
ak_add_doc=False,
953973
how=None,
954974
report=False, # TODO: Not implemented yet
955975
# For compatibility reasons we also accepts kwargs meant for TTrees
@@ -995,7 +1015,7 @@ def iterate(
9951015
library (str or :doc:`uproot.interpretation.library.Library`): The library
9961016
that is used to represent arrays. Options are ``"np"`` for NumPy,
9971017
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas. (Not implemented yet.)
998-
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``name``
1018+
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``description``
9991019
to the Awkward ``__doc__`` parameter of the array.
10001020
if dict = {key:value} and ``library="ak"``, add the RField ``value`` to the
10011021
Awkward ``key`` parameter of the array. (Not implemented yet.)
@@ -1051,6 +1071,7 @@ def iterate(
10511071
filter_typename=filter_typename,
10521072
filter_field=filter_field,
10531073
filter_branch=filter_branch,
1074+
ak_add_doc=ak_add_doc,
10541075
)
10551076

10561077
step_size = _regularize_step_size(
@@ -1766,6 +1787,13 @@ def name(self):
17661787
"""
17671788
return self.header.ntuple_name
17681789

1790+
@property
1791+
def description(self):
1792+
"""
1793+
Description of the ``RNTuple``.
1794+
"""
1795+
return self.header.ntuple_description
1796+
17691797
@property
17701798
def object_path(self):
17711799
"""

src/uproot/const.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""
66
from __future__ import annotations
77

8-
from enum import IntEnum
8+
from enum import IntFlag
99

1010
import numpy
1111

@@ -184,8 +184,8 @@
184184
0x19: 64,
185185
0x1A: 32,
186186
0x1B: 64,
187-
0x1C: 31, # variable from 10 to 31
188-
0x1D: 32, # variable from 1 to 32
187+
0x1C: 32, # from 10 to 31 in storage, but 32 in memory
188+
0x1D: 32, # from 1 to 32 in storage, but 32 in memory
189189
}
190190
rntuple_col_type_to_num_dict = {
191191
"bit": 0x00,
@@ -253,40 +253,43 @@
253253
)
254254

255255

256-
class RNTupleLocatorType(IntEnum):
256+
class RNTupleLocatorType(IntFlag):
257257
STANDARD = 0x00
258258
LARGE = 0x01
259259

260260

261-
class RNTupleEnvelopeType(IntEnum):
261+
class RNTupleEnvelopeType(IntFlag):
262262
RESERVED = 0x00
263263
HEADER = 0x01
264264
FOOTER = 0x02
265265
PAGELIST = 0x03
266266

267267

268-
class RNTupleFieldRole(IntEnum):
268+
class RNTupleFieldRole(IntFlag):
269269
LEAF = 0x00
270270
COLLECTION = 0x01
271271
RECORD = 0x02
272272
VARIANT = 0x03
273273
STREAMER = 0x04
274274

275275

276-
class RNTupleFieldFlag(IntEnum):
276+
class RNTupleFieldFlags(IntFlag):
277+
NOFLAG = 0x00
277278
REPETITIVE = 0x01
278279
PROJECTED = 0x02
279280
CHECKSUM = 0x04
280281

281282

282-
class RNTupleColumnFlag(IntEnum):
283+
class RNTupleColumnFlags(IntFlag):
284+
NOFLAG = 0x00
283285
DEFERRED = 0x01
284286
RANGE = 0x02
285287

286288

287-
class RNTupleExtraTypeIdentifier(IntEnum):
289+
class RNTupleExtraTypeIdentifier(IntFlag):
288290
ROOT = 0x00
289291

290292

291-
class RNTupleClusterFlag(IntEnum):
293+
class RNTupleClusterFlags(IntFlag):
294+
NOFLAG = 0x00
292295
SHARDED = 0x01

0 commit comments

Comments
 (0)