diff --git a/src/uproot/behaviors/RNTuple.py b/src/uproot/behaviors/RNTuple.py index ed688f703..334c17f18 100644 --- a/src/uproot/behaviors/RNTuple.py +++ b/src/uproot/behaviors/RNTuple.py @@ -38,7 +38,7 @@ def iterate( step_size="100 MB", decompression_executor=None, # TODO: Not implemented yet library="ak", # TODO: Not implemented yet - ak_add_doc=False, # TODO: Not implemented yet + ak_add_doc=False, how=None, report=False, # TODO: Not implemented yet allow_missing=False, # TODO: Not implemented yet @@ -207,7 +207,7 @@ def concatenate( entry_stop=None, decompression_executor=None, # TODO: Not implemented yet library="ak", # TODO: Not implemented yet - ak_add_doc=False, # TODO: Not implemented yet + ak_add_doc=False, how=None, allow_missing=False, # For compatibility reasons we also accepts kwargs meant for TTrees @@ -488,6 +488,7 @@ def to_akform( filter_name=no_filter, filter_typename=no_filter, filter_field=no_filter, + ak_add_doc=False, # For compatibility reasons we also accepts kwargs meant for TTrees filter_branch=unset, ): @@ -501,6 +502,10 @@ def to_akform( filter to select ``RFields`` using the full :doc:`uproot.models.RNTuple.RField` object. The ``RField`` is included if the function returns True, excluded if it returns False. + ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``name`` + to the Awkward ``__doc__`` parameter of the array. + if dict = {key:value} and ``library="ak"``, add the RField ``value`` to the + Awkward ``key`` parameter of the array. filter_branch (None or function of :doc:`uproot.models.RNTuple.RField` \u2192 bool): An alias for ``filter_field`` included for compatibility with software that was used for :doc:`uproot.behaviors.TBranch.TBranch`. This argument should not be used and will be removed in a future version. @@ -524,7 +529,9 @@ def to_akform( # the field needs to be in the keys or be a parent of a field in the keys if any(key.startswith(field.name) for key in keys): top_names.append(field.name) - record_list.append(rntuple.field_form(field.field_id, keys)) + record_list.append( + rntuple.field_form(field.field_id, keys, ak_add_doc=ak_add_doc) + ) else: # Always use the full path for keys # Also include the field itself @@ -532,9 +539,21 @@ def to_akform( # The field needs to be in the keys or be a parent of a field in the keys if any(key.startswith(self.path) for key in keys): top_names.append(self.name) - record_list.append(rntuple.field_form(self.field_id, keys)) + record_list.append( + rntuple.field_form(self.field_id, keys, ak_add_doc=ak_add_doc) + ) - form = ak.forms.RecordForm(record_list, top_names, form_key="toplevel") + parameters = None + if isinstance(ak_add_doc, bool) and ak_add_doc and self.description != "": + parameters = {"__doc__": self.description} + elif isinstance(ak_add_doc, dict) and self is not rntuple: + parameters = { + key: self.__getattribute__(value) for key, value in ak_add_doc.items() + } + + form = ak.forms.RecordForm( + record_list, top_names, form_key="toplevel", parameters=parameters + ) return form def arrays( @@ -658,6 +677,7 @@ def arrays( filter_typename=filter_typename, filter_field=filter_field, filter_branch=filter_branch, + ak_add_doc=ak_add_doc, ) # only read columns mentioned in the awkward form @@ -750,7 +770,7 @@ def iterate( step_size="100 MB", decompression_executor=None, # TODO: Not implemented yet library="ak", # TODO: Not implemented yet - ak_add_doc=False, # TODO: Not implemented yet + ak_add_doc=False, how=None, report=False, # TODO: Not implemented yet # For compatibility reasons we also accepts kwargs meant for TTrees @@ -852,6 +872,7 @@ def iterate( filter_typename=filter_typename, filter_field=filter_field, filter_branch=filter_branch, + ak_add_doc=ak_add_doc, ) step_size = _regularize_step_size( @@ -1567,6 +1588,13 @@ def name(self): """ return self.header.ntuple_name + @property + def description(self): + """ + Description of the ``RNTuple``. + """ + return self.header.ntuple_description + @property def object_path(self): """ diff --git a/src/uproot/const.py b/src/uproot/const.py index 798c03fd5..80af0e62e 100644 --- a/src/uproot/const.py +++ b/src/uproot/const.py @@ -5,7 +5,7 @@ """ from __future__ import annotations -from enum import IntEnum +from enum import IntFlag import numpy @@ -184,8 +184,8 @@ 0x19: 64, 0x1A: 32, 0x1B: 64, - 0x1C: 31, # variable from 10 to 31 - 0x1D: 32, # variable from 1 to 32 + 0x1C: 32, # from 10 to 31 in storage, but 32 in memory + 0x1D: 32, # from 1 to 32 in storage, but 32 in memory } rntuple_col_type_to_num_dict = { "bit": 0x00, @@ -253,19 +253,19 @@ ) -class RNTupleLocatorType(IntEnum): +class RNTupleLocatorType(IntFlag): STANDARD = 0x00 LARGE = 0x01 -class RNTupleEnvelopeType(IntEnum): +class RNTupleEnvelopeType(IntFlag): RESERVED = 0x00 HEADER = 0x01 FOOTER = 0x02 PAGELIST = 0x03 -class RNTupleFieldRole(IntEnum): +class RNTupleFieldRole(IntFlag): LEAF = 0x00 COLLECTION = 0x01 RECORD = 0x02 @@ -273,20 +273,23 @@ class RNTupleFieldRole(IntEnum): STREAMER = 0x04 -class RNTupleFieldFlag(IntEnum): +class RNTupleFieldFlags(IntFlag): + NOFLAG = 0x00 REPETITIVE = 0x01 PROJECTED = 0x02 CHECKSUM = 0x04 -class RNTupleColumnFlag(IntEnum): +class RNTupleColumnFlags(IntFlag): + NOFLAG = 0x00 DEFERRED = 0x01 RANGE = 0x02 -class RNTupleExtraTypeIdentifier(IntEnum): +class RNTupleExtraTypeIdentifier(IntFlag): ROOT = 0x00 -class RNTupleClusterFlag(IntEnum): +class RNTupleClusterFlags(IntFlag): + NOFLAG = 0x00 SHARDED = 0x01 diff --git a/src/uproot/models/RNTuple.py b/src/uproot/models/RNTuple.py index 005274a29..1bb7af305 100644 --- a/src/uproot/models/RNTuple.py +++ b/src/uproot/models/RNTuple.py @@ -61,7 +61,7 @@ def _from_zigzag(n): def _envelop_header(chunk, cursor, context): env_data = cursor.field(chunk, _rntuple_env_header_format, context) - env_type_id = env_data & 0xFFFF + env_type_id = uproot.const.RNTupleEnvelopeType(env_data & 0xFFFF) env_length = env_data >> 16 return {"env_type_id": env_type_id, "env_length": env_length} @@ -400,7 +400,7 @@ def base_col_form(self, cr, col_id, parameters=None, cardinality=False): parameters=parameters, ) - def col_form(self, field_id): + def col_form(self, field_id, extra_parameters=None): """ Args: field_id (int): The field id. @@ -428,7 +428,10 @@ def col_form(self, field_id): if len(rel_crs) == 1: # base case cardinality = "RNTupleCardinality" in self.field_records[field_id].type_name return self.base_col_form( - rel_crs[0], rel_crs[0].idx, cardinality=cardinality + rel_crs[0], + rel_crs[0].idx, + parameters=extra_parameters, + cardinality=cardinality, ) elif ( len(rel_crs) == 2 @@ -439,17 +442,24 @@ def col_form(self, field_id): rel_crs[1], rel_crs[1].idx, parameters={"__array__": "char"} ) form_key = f"column-{rel_crs[0].idx}" + parameters = {"__array__": "string"} + if extra_parameters is not None: + parameters.update(extra_parameters) return ak.forms.ListOffsetForm( - "i64", inner, form_key=form_key, parameters={"__array__": "string"} + "i64", inner, form_key=form_key, parameters=parameters ) else: raise (RuntimeError(f"Missing special case: {field_id}")) - def field_form(self, this_id, keys): + def field_form(self, this_id, keys, ak_add_doc=False): """ Args: this_id (int): The field id. keys (list): The list of keys to search for. + ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``name`` + to the Awkward ``__doc__`` parameter of the array. + if dict = {key:value} and ``library="ak"``, add the RField ``value`` to the + Awkward ``key`` parameter of the array. Returns an Awkward Form describing the field. """ @@ -458,6 +468,15 @@ def field_form(self, this_id, keys): field_records = self.field_records this_record = field_records[this_id] structural_role = this_record.struct_role + + parameters = None + if isinstance(ak_add_doc, bool) and ak_add_doc and this_record.field_desc != "": + parameters = {"__doc__": this_record.field_desc} + elif isinstance(ak_add_doc, dict): + parameters = { + key: self.ntuple.all_fields[this_id].__getattribute__(value) + for key, value in ak_add_doc.items() + } if ( structural_role == uproot.const.RNTupleFieldRole.LEAF and this_record.repetition == 0 @@ -472,17 +491,19 @@ def field_form(self, this_id, keys): this_id = self._related_ids[tmp_id][0] # base case of recursion # n.b. the split may happen in column - return self.col_form(this_id) + return self.col_form(this_id, extra_parameters=parameters) elif structural_role == uproot.const.RNTupleFieldRole.LEAF: if this_id in self._related_ids: # std::array has only one subfield child_id = self._related_ids[this_id][0] - inner = self.field_form(child_id, keys) + inner = self.field_form(child_id, keys, ak_add_doc=ak_add_doc) else: # std::bitset has no subfields, so we use it directly inner = self.col_form(this_id) keyname = f"RegularForm-{this_id}" - return ak.forms.RegularForm(inner, this_record.repetition, form_key=keyname) + return ak.forms.RegularForm( + inner, this_record.repetition, form_key=keyname, parameters=parameters + ) elif structural_role == uproot.const.RNTupleFieldRole.COLLECTION: if this_id not in self._related_ids or len(self._related_ids[this_id]) != 1: keyname = f"vector-{this_id}" @@ -492,11 +513,15 @@ def field_form(self, this_id, keys): namelist = [] for i in newids: if any(key.startswith(self.all_fields[i].path) for key in keys): - recordlist.append(self.field_form(i, keys)) + recordlist.append( + self.field_form(i, keys, ak_add_doc=ak_add_doc) + ) namelist.append(field_records[i].field_name) if all(name == f"_{i}" for i, name in enumerate(namelist)): namelist = None - return ak.forms.RecordForm(recordlist, namelist, form_key="whatever") + return ak.forms.RecordForm( + recordlist, namelist, form_key="whatever", parameters=parameters + ) cfid = this_id if self.field_records[cfid].source_field_id is not None: cfid = self.field_records[cfid].source_field_id @@ -513,8 +538,10 @@ def field_form(self, this_id, keys): # this only has one child if this_id in self._related_ids: child_id = self._related_ids[this_id][0] - inner = self.field_form(child_id, keys) - return ak.forms.ListOffsetForm("i64", inner, form_key=keyname) + inner = self.field_form(child_id, keys, ak_add_doc=ak_add_doc) + return ak.forms.ListOffsetForm( + "i64", inner, form_key=keyname, parameters=parameters + ) elif structural_role == uproot.const.RNTupleFieldRole.RECORD: newids = [] if this_id in self._related_ids: @@ -524,21 +551,27 @@ def field_form(self, this_id, keys): namelist = [] for i in newids: if any(key.startswith(self.all_fields[i].path) for key in keys): - recordlist.append(self.field_form(i, keys)) + recordlist.append(self.field_form(i, keys, ak_add_doc=ak_add_doc)) namelist.append(field_records[i].field_name) if all(name == f"_{i}" for i, name in enumerate(namelist)): namelist = None - return ak.forms.RecordForm(recordlist, namelist, form_key="whatever") + return ak.forms.RecordForm( + recordlist, namelist, form_key="whatever", parameters=parameters + ) elif structural_role == uproot.const.RNTupleFieldRole.VARIANT: keyname = self.col_form(this_id) newids = [] if this_id in self._related_ids: newids = self._related_ids[this_id] - recordlist = [self.field_form(i, keys) for i in newids] + recordlist = [ + self.field_form(i, keys, ak_add_doc=ak_add_doc) for i in newids + ] inner = ak.forms.UnionForm( "i8", "i64", recordlist, form_key=keyname + "-union" ) - return ak.forms.IndexedOptionForm("i64", inner, form_key=keyname) + return ak.forms.IndexedOptionForm( + "i64", inner, form_key=keyname, parameters=parameters + ) elif structural_role == uproot.const.RNTupleFieldRole.STREAMER: raise NotImplementedError( f"Unsplit fields are not supported. {this_record}" @@ -811,7 +844,7 @@ def read(self, chunk, cursor, context): out = MetaData("Locator") out.num_bytes = cursor.field(chunk, _rntuple_locator_size_format, context) if out.num_bytes < 0: - out.type = -out.num_bytes >> 24 + out.type = uproot.const.RNTupleLocatorType(-out.num_bytes >> 24) if out.type == uproot.const.RNTupleLocatorType.LARGE: out.num_bytes = cursor.field( chunk, _rntuple_large_locator_size_format, context @@ -899,23 +932,25 @@ def read(self, chunk, cursor, context): out.struct_role, out.flags, ) = cursor.fields(chunk, _rntuple_field_description_format, context) + out.struct_role = uproot.const.RNTupleFieldRole(out.struct_role) + out.flags = uproot.const.RNTupleFieldFlags(out.flags) out.field_name, out.type_name, out.type_alias, out.field_desc = ( cursor.rntuple_string(chunk, context) for _ in range(4) ) - if out.flags & uproot.const.RNTupleFieldFlag.REPETITIVE: + if out.flags & uproot.const.RNTupleFieldFlags.REPETITIVE: out.repetition = cursor.field(chunk, _rntuple_repetition_format, context) else: out.repetition = 0 - if out.flags & uproot.const.RNTupleFieldFlag.PROJECTED: + if out.flags & uproot.const.RNTupleFieldFlags.PROJECTED: out.source_field_id = cursor.field( chunk, _rntuple_source_field_id_format, context ) else: out.source_field_id = None - if out.flags & uproot.const.RNTupleFieldFlag.CHECKSUM: + if out.flags & uproot.const.RNTupleFieldFlags.CHECKSUM: out.checksum = cursor.field( chunk, _rntuple_root_streamer_checksum_format, context ) @@ -932,13 +967,14 @@ def read(self, chunk, cursor, context): out.type, out.nbits, out.field_id, out.flags, out.repr_idx = cursor.fields( chunk, _rntuple_column_record_format, context ) - if out.flags & uproot.const.RNTupleColumnFlag.DEFERRED: + out.flags = uproot.const.RNTupleColumnFlags(out.flags) + if out.flags & uproot.const.RNTupleColumnFlags.DEFERRED: out.first_element_index = cursor.field( chunk, _rntuple_first_element_index_format, context ) else: out.first_element_index = 0 - if out.flags & uproot.const.RNTupleColumnFlag.RANGE: + if out.flags & uproot.const.RNTupleColumnFlags.RANGE: out.min_value, out.max_value = cursor.fields( chunk, _rntuple_column_range_format, context ) @@ -966,6 +1002,7 @@ def read(self, chunk, cursor, context): out.content_id, out.type_ver = cursor.fields( chunk, _rntuple_extra_type_info_format, context ) + out.content_id = uproot.const.RNTupleExtraTypeIdentifier(out.content_id) out.type_name = cursor.rntuple_string(chunk, context) return out @@ -1017,9 +1054,9 @@ def read(self, chunk, cursor, context): out.num_first_entry, out.num_entries = cursor.fields( chunk, _rntuple_cluster_summary_format, context ) - out.flags = out.num_entries >> 56 + out.flags = uproot.const.RNTupleClusterFlags(out.num_entries >> 56) out.num_entries &= 0xFFFFFFFFFFFFFF - if out.flags & uproot.const.RNTupleClusterFlag.SHARDED: + if out.flags & uproot.const.RNTupleClusterFlags.SHARDED: raise NotImplementedError("Sharded clusters are not supported.") return out @@ -1102,6 +1139,13 @@ def name(self): """ return self._ntuple.field_records[self._fid].field_name + @property + def description(self): + """ + Description of the ``RField``. + """ + return self._ntuple.field_records[self._fid].field_desc + @property def typename(self): """ diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 8dc5e481b..35f142101 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -1727,7 +1727,7 @@ def add_tree( tree.write_anew(sink) return tree - def add_rntuple(self, sink, name, title, akform, description=""): + def add_rntuple(self, sink, name, description, akform): import uproot.writing._cascadentuple anchor = uproot.writing._cascadentuple.NTuple_Anchor( diff --git a/src/uproot/writing/_cascadentuple.py b/src/uproot/writing/_cascadentuple.py index 1ae2cac81..e37409940 100644 --- a/src/uproot/writing/_cascadentuple.py +++ b/src/uproot/writing/_cascadentuple.py @@ -93,11 +93,9 @@ def _cpp_typename(akform, subcall=False): elif isinstance(akform, (awkward.forms.ListOffsetForm, awkward.forms.ListForm)): content_typename = _cpp_typename(akform.content, subcall=True) typename = f"std::vector<{content_typename}>" - override_typename = akform.parameters.get("__array__", "") - if override_typename != "": - typename = ( - f"std::{override_typename}" # TODO: check if this could cause issues - ) + # Check if it contains strings and fix the type + if akform.parameters.get("__array__", "") == "string": + typename = "std::string" elif isinstance(akform, awkward.forms.RecordForm): if akform.is_tuple: field_typenames = [_cpp_typename(t, subcall=True) for t in akform.contents] @@ -139,7 +137,7 @@ def __init__( String(None, ""), String(None, ""), 0, - 13, + 0, location, created_on=created_on, big=big, @@ -160,7 +158,6 @@ def _record_frame_wrap(payload, includeself=True): def _serialize_rntuple_list_frame(items, wrap=True, rawinput=False, extra_payload=None): - # when items is [], b'\xf4\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00' n_items = len(items) if wrap and rawinput: payload_bytes = b"".join([_record_frame_wrap(x) for x in items]) @@ -196,15 +193,15 @@ def _serialize_envelope_header(type, length): class NTuple_Field_Description: def __init__( self, - field_version, - type_version, parent_field_id, struct_role, - flags, field_name, type_name, - type_alias, - field_description, + field_version=0, + type_version=0, + flags=uproot.const.RNTupleFieldFlags.NOFLAG, + type_alias="", + field_description="", repetition=None, ): self.field_version = field_version @@ -241,7 +238,7 @@ def serialize(self): ] ) additional_bytes = b"" - if self.flags & uproot.const.RNTupleFieldFlag.REPETITIVE: + if self.flags & uproot.const.RNTupleFieldFlags.REPETITIVE: additional_bytes += _rntuple_repetition_format.pack(self.repetition) return b"".join([header_bytes, string_bytes, additional_bytes]) @@ -290,7 +287,7 @@ def __repr__(self): ) def _build_field_col_records( - self, akform, field_name=None, parent_fid=None, add_field=True + self, akform, field_name=None, parent_fid=None, add_field=True, description="" ): field_id = len(self._field_records) if parent_fid is None: @@ -298,18 +295,16 @@ def _build_field_col_records( if field_name is None: field_name = f"_{field_id}" self._ak_node_count += 1 + if "__doc__" in akform.parameters: + description = akform.parameters["__doc__"] if isinstance(akform, awkward.forms.NumpyForm) and akform.inner_shape == (): type_name = _cpp_typename(akform) field = NTuple_Field_Description( - 0, - 0, parent_fid, uproot.const.RNTupleFieldRole.LEAF, - 0, field_name, type_name, - "", - "", + field_description=description, ) if add_field: self._field_records.append(field) @@ -331,20 +326,19 @@ def _build_field_col_records( field_name = "_0" reg_akform = reg_akform.content repetitive_flag = ( - 0 if arr_size is None else uproot.const.RNTupleFieldFlag.REPETITIVE + uproot.const.RNTupleFieldFlags.NOFLAG + if arr_size is None + else uproot.const.RNTupleFieldFlags.REPETITIVE ) type_name = _cpp_typename(reg_akform) field = NTuple_Field_Description( - 0, - 0, parent_fid, uproot.const.RNTupleFieldRole.LEAF, - repetitive_flag, field_name, type_name, - "", - "", + flags=repetitive_flag, repetition=arr_size, + field_description=description, ) self._field_records.append(field) ak_primitive = akform.primitive @@ -360,15 +354,11 @@ def _build_field_col_records( type_name = "std::string" field_role = uproot.const.RNTupleFieldRole.LEAF field = NTuple_Field_Description( - 0, - 0, parent_fid, field_role, - 0, field_name, type_name, - "", - "", + field_description=description, ) self._field_records.append(field) ak_offset = akform.offsets @@ -383,6 +373,7 @@ def _build_field_col_records( parent_fid=field_id, add_field=field_role == uproot.const.RNTupleFieldRole.COLLECTION, field_name="_0", + description=description, ) elif isinstance(akform, awkward.forms.ListForm): type_name = _cpp_typename(akform) @@ -391,15 +382,11 @@ def _build_field_col_records( type_name = "std::string" field_role = uproot.const.RNTupleFieldRole.LEAF field = NTuple_Field_Description( - 0, - 0, parent_fid, field_role, - 0, field_name, type_name, - "", - "", + field_description=description, ) self._field_records.append(field) # They are always converted to ListOffsetArrays with Int64 offsets @@ -414,19 +401,16 @@ def _build_field_col_records( parent_fid=field_id, add_field=field_role == uproot.const.RNTupleFieldRole.COLLECTION, field_name="_0", + description=description, ) elif isinstance(akform, awkward.forms.RecordForm): type_name = _cpp_typename(akform) field = NTuple_Field_Description( - 0, - 0, parent_fid, uproot.const.RNTupleFieldRole.RECORD, - 0, field_name, type_name, - "", - "", + field_description=description, ) self._field_records.append(field) for i, subakform in enumerate(akform.contents): @@ -440,35 +424,29 @@ def _build_field_col_records( type_name = _cpp_typename(akform) field_role = uproot.const.RNTupleFieldRole.LEAF field = NTuple_Field_Description( - 0, - 0, parent_fid, field_role, - uproot.const.RNTupleFieldFlag.REPETITIVE, field_name, type_name, - "", - "", + flags=uproot.const.RNTupleFieldFlags.REPETITIVE, repetition=akform.size, + field_description=description, ) self._field_records.append(field) self._build_field_col_records( akform.content, parent_fid=field_id, field_name="_0", + description=description, ) elif isinstance(akform, awkward.forms.IndexedOptionForm): type_name = _cpp_typename(akform) field = NTuple_Field_Description( - 0, - 0, parent_fid, uproot.const.RNTupleFieldRole.COLLECTION, - 0, field_name, type_name, - "", - "", + field_description=description, ) self._field_records.append(field) ak_index = akform.index @@ -482,19 +460,16 @@ def _build_field_col_records( akform.content, parent_fid=field_id, field_name="_0", + description=description, ) elif isinstance(akform, awkward.forms.UnionForm): type_name = _cpp_typename(akform) field = NTuple_Field_Description( - 0, - 0, parent_fid, uproot.const.RNTupleFieldRole.VARIANT, - 0, field_name, type_name, - "", - "", + field_description=description, ) self._field_records.append(field) type_num = uproot.const.rntuple_col_type_to_num_dict["switch"] @@ -515,6 +490,7 @@ def _build_field_col_records( akform.content, parent_fid=parent_fid, field_name=field_name, + description=description, ) else: raise NotImplementedError(f"Form type {type(akform)} cannot be written yet") @@ -1057,7 +1033,8 @@ def add_rblob( raw_data, uncompressed_bytes, ): - strings_size = 8 # TODO: What is this? + # The strings included with the RBlob ("RBlob", "", "") + strings_size = 8 # Always use big files requested_bytes = ( diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 9977a0ce7..7a0dde098 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1348,7 +1348,7 @@ def mkrntuple( self, name, ak_form_or_data, - title="", + description="", ): """ Args: @@ -1357,7 +1357,7 @@ def mkrntuple( and type specification for the fields. If a RecordForm is provided, the RNTuple will be empty. If a RecordArray is provided, the RNTuple will be initialized with the input data. - title (str): Title for the new RNTuple. + description (str): Description for the new RNTuple. Creates an empty RNTuple in this directory. """ @@ -1366,12 +1366,15 @@ def mkrntuple( # TODO: Think of a better alternative to this if isinstance(ak_form_or_data, uproot.extras.awkward().Array): - ntuple = self.mkrntuple(name, ak_form_or_data.layout.form, title) + ntuple = self.mkrntuple(name, ak_form_or_data.layout.form, description) ntuple.extend(ak_form_or_data) return ntuple # The rest assumes that ak_form_or_data is a RecordForm + if description == "" and "__doc__" in ak_form_or_data.parameters: + description = ak_form_or_data.parameters["__doc__"] + try: at = name.rindex("/") except ValueError: @@ -1389,7 +1392,7 @@ def mkrntuple( directory._cascading.add_rntuple( directory._file.sink, treename, - title, + description, ak_form_or_data, ), ) diff --git a/tests/test_1395_rntuple_writing_lists_and_structs.py b/tests/test_1395_rntuple_writing_lists_and_structs.py index 2decc5e91..ab2fa61dc 100644 --- a/tests/test_1395_rntuple_writing_lists_and_structs.py +++ b/tests/test_1395_rntuple_writing_lists_and_structs.py @@ -47,6 +47,13 @@ } ) +data.layout.parameters["__doc__"] = "This is the top record array" +data.layout.contents[0].parameters["__doc__"] = "This is a boolean" +data.layout.contents[10].content.parameters["__doc__"] = "This is an struct record" +data.layout.contents[10].content.contents[0].parameters[ + "__doc__" +] = "This is a subfield" + def test_writing_and_reading(tmp_path): filepath = os.path.join(tmp_path, "test.root") @@ -131,3 +138,34 @@ def test_writing_then_reading_with_ROOT(tmp_path, capfd): in out ) assert "* Field 17 : list_array (std::vector)" in out + + +def test_field_descriptions(tmp_path): + filepath = os.path.join(tmp_path, "test.root") + + with uproot.recreate(filepath) as file: + obj = file.mkrntuple("ntuple", data) # test inputting the data directly + obj.extend(data) + + with uproot.recreate(filepath) as file: + obj = file.mkrntuple("ntuple", data.layout.form) + obj.extend(data) + obj.extend(data) # test multiple cluster groups + + obj = uproot.open(filepath)["ntuple"] + arrays = obj.arrays(ak_add_doc=True) + + assert arrays.layout.parameters["__doc__"] == "This is the top record array" + assert arrays.layout.contents[0].parameters["__doc__"] == "This is a boolean" + assert ( + arrays.layout.contents[10].content.parameters["__doc__"] + == "This is an struct record" + ) + assert ( + arrays.layout.contents[10].content.contents[0].parameters["__doc__"] + == "This is a subfield" + ) + + arrays = obj.arrays(ak_add_doc={"typename": "typename"}) + + assert arrays.layout.contents[0].parameters["typename"] == "bool"