From e390940b1cb6432008181bb13f8d93625b13c42b Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Fri, 28 Feb 2025 14:09:38 +0100 Subject: [PATCH 1/6] Add test for `std::set` --- .gitignore | 4 ++ types/README.md | 1 + types/set/README.md | 4 ++ types/set/fundamental/README.md | 15 ++++++ types/set/fundamental/read.C | 68 +++++++++++++++++++++++++ types/set/fundamental/write.C | 67 ++++++++++++++++++++++++ types/set/nested/LinkDef.h | 6 +++ types/set/nested/Makefile | 20 ++++++++ types/set/nested/NestedSet.hxx | 4 ++ types/set/nested/README.md | 18 +++++++ types/set/nested/read.C | 90 +++++++++++++++++++++++++++++++++ types/set/nested/write.C | 78 ++++++++++++++++++++++++++++ 12 files changed, 375 insertions(+) create mode 100644 types/set/README.md create mode 100644 types/set/fundamental/README.md create mode 100644 types/set/fundamental/read.C create mode 100644 types/set/fundamental/write.C create mode 100644 types/set/nested/LinkDef.h create mode 100644 types/set/nested/Makefile create mode 100644 types/set/nested/NestedSet.hxx create mode 100644 types/set/nested/README.md create mode 100644 types/set/nested/read.C create mode 100644 types/set/nested/write.C diff --git a/.gitignore b/.gitignore index bbc0145..71659f2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ *.json *.root + +# Ignore files related to dictionary generation +*.pcm +Nested*.cxx diff --git a/types/README.md b/types/README.md index 27dd4a7..1a8d1dd 100644 --- a/types/README.md +++ b/types/README.md @@ -2,6 +2,7 @@ * [`fundamental`](fundamental): fundamental column types * [`optional`](optional): `std::optional` with different element types + * [`set`](set): `std::set` with all `[Split]Index{32,64}` column types * [`string`](string): `std::string` with all `[Split]Index{32,64}` column types * [`unique_ptr`](unique_ptr): `std::unique_ptr` with different element types * [`variant`](variant): `std::variant` with `Switch` column type diff --git a/types/set/README.md b/types/set/README.md new file mode 100644 index 0000000..59141d6 --- /dev/null +++ b/types/set/README.md @@ -0,0 +1,4 @@ +# `std::set` + + * [`fundamental`](fundamental): `std::set` + * [`nested`](nested): `std::set>` diff --git a/types/set/fundamental/README.md b/types/set/fundamental/README.md new file mode 100644 index 0000000..7356dad --- /dev/null +++ b/types/set/fundamental/README.md @@ -0,0 +1,15 @@ +# `std::set` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the collection parent field. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the set: + one element in the first field, two elements in the second field, etc. diff --git a/types/set/fundamental/read.C b/types/set/fundamental/read.C new file mode 100644 index 0000000..e9da3a7 --- /dev/null +++ b/types/set/fundamental/read.C @@ -0,0 +1,68 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include +#include +#include +#include +#include +#include + +using Set = std::set; + +static void PrintSetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + Set &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool first = true; + for (auto element : value) { + if (first) { + first = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.set.fundamental.root", + std::string_view output = "types.set.fundamental.json") { + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintSetValue(entry, "Index32", os); + PrintSetValue(entry, "Index64", os); + PrintSetValue(entry, "SplitIndex32", os); + PrintSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/set/fundamental/write.C b/types/set/fundamental/write.C new file mode 100644 index 0000000..662d354 --- /dev/null +++ b/types/set/fundamental/write.C @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include +#include +#include +#include + +using Set = std::set; + +static std::shared_ptr MakeSetField(RNTupleModel &model, + std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.set.fundamental.root") { + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = MakeSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = MakeSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeSetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeSetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {1}; + *Index64 = {2}; + *SplitIndex32 = {3}; + *SplitIndex64 = {4}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {1}; + *Index64 = {2, 3}; + *SplitIndex32 = {4, 5, 6}; + *SplitIndex64 = {7, 8, 9, 10}; + writer->Fill(); +} diff --git a/types/set/nested/LinkDef.h b/types/set/nested/LinkDef.h new file mode 100644 index 0000000..232c236 --- /dev/null +++ b/types/set/nested/LinkDef.h @@ -0,0 +1,6 @@ +#include +#include + +#ifdef __CLING__ +#pragma link C++ class std::set>+; +#endif diff --git a/types/set/nested/Makefile b/types/set/nested/Makefile new file mode 100644 index 0000000..836e1a2 --- /dev/null +++ b/types/set/nested/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS_ROOT=$(shell root-config --cflags) +ifeq ($(CXXFLAGS_ROOT),) + $(error cannot find root-config: make sure to source thisroot.sh) +endif +CXXFLAGS=-Wall $(CXXFLAGS_ROOT) +LDFLAGS=$(shell root-config --libs) + +.PHONY: all clean + +all: NestedSet.cxx libNestedSet.so + +NestedSet.cxx: NestedSet.hxx LinkDef.h + rootcling -f $@ $^ + +libNestedSet.so: NestedSet.cxx + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +clean: + rm -f NestedSet.cxx NestedSet_rdict.pcm libNestedSet.so diff --git a/types/set/nested/NestedSet.hxx b/types/set/nested/NestedSet.hxx new file mode 100644 index 0000000..b52b7e0 --- /dev/null +++ b/types/set/nested/NestedSet.hxx @@ -0,0 +1,4 @@ +#pragma once + +#include +#include diff --git a/types/set/nested/README.md b/types/set/nested/README.md new file mode 100644 index 0000000..a3b50f0 --- /dev/null +++ b/types/set/nested/README.md @@ -0,0 +1,18 @@ +# `std::set>` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the two collection parent fields. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the outer set, with arbitrary lengths of the inner sets + +## Dictionaries + +These tests require ROOT dictionaries, which can be generated with the provided `Makefile` in this directory. This will create a `libNestedSet` shared object. diff --git a/types/set/nested/read.C b/types/set/nested/read.C new file mode 100644 index 0000000..6b87962 --- /dev/null +++ b/types/set/nested/read.C @@ -0,0 +1,90 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include + +#include +#include +#include +#include +#include +#include +#include + +using Set = std::set>; + +static void PrintNestedSetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + Set &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool outerFirst = true; + for (auto inner : value) { + if (outerFirst) { + outerFirst = false; + } else { + os << ","; + } + os << "\n ["; + bool innerFirst = true; + for (auto element : inner) { + if (innerFirst) { + innerFirst = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!inner.empty()) { + os << "\n "; + } + os << "]"; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.set.nested.root", + std::string_view output = "types.set.nested.json") { + if (!std::filesystem::exists("libNestedSet.so")) { + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + } + + gSystem->Load("libNestedSet"); + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintNestedSetValue(entry, "Index32", os); + PrintNestedSetValue(entry, "Index64", os); + PrintNestedSetValue(entry, "SplitIndex32", os); + PrintNestedSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/set/nested/write.C b/types/set/nested/write.C new file mode 100644 index 0000000..069421e --- /dev/null +++ b/types/set/nested/write.C @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include + +#include +#include +#include +#include +#include + +using Set = std::set>; + +static std::shared_ptr MakeSetField(RNTupleModel &model, + std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + field->GetSubFields()[0]->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.set.nested.root") { + if (!std::filesystem::exists("libNestedSet.so")) { + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + } + + gSystem->Load("libNestedSet"); + + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = MakeSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = MakeSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeSetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeSetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {{1}}; + *Index64 = {{2}}; + *SplitIndex32 = {{3}}; + *SplitIndex64 = {{4}}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {{1}}; + *Index64 = {{}, {2, 3}}; + *SplitIndex32 = {{}, {4}, {5, 6}}; + *SplitIndex64 = {{}, {7, 8, 9}, {10}, {11, 12}}; + writer->Fill(); +} From 8e2efd40dba6f1a94fa3c7e5d36131fa02db2c38 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Fri, 28 Feb 2025 14:34:29 +0100 Subject: [PATCH 2/6] Add test for `std::unordered_set` --- types/README.md | 1 + types/unordered_set/README.md | 4 + types/unordered_set/fundamental/README.md | 15 +++ types/unordered_set/fundamental/read.C | 72 ++++++++++++ types/unordered_set/fundamental/write.C | 69 ++++++++++++ types/unordered_set/nested/LinkDef.h | 6 + types/unordered_set/nested/Makefile | 20 ++++ .../nested/NestedUnorderedSet.hxx | 15 +++ types/unordered_set/nested/README.md | 18 +++ types/unordered_set/nested/read.C | 104 ++++++++++++++++++ types/unordered_set/nested/write.C | 81 ++++++++++++++ 11 files changed, 405 insertions(+) create mode 100644 types/unordered_set/README.md create mode 100644 types/unordered_set/fundamental/README.md create mode 100644 types/unordered_set/fundamental/read.C create mode 100644 types/unordered_set/fundamental/write.C create mode 100644 types/unordered_set/nested/LinkDef.h create mode 100644 types/unordered_set/nested/Makefile create mode 100644 types/unordered_set/nested/NestedUnorderedSet.hxx create mode 100644 types/unordered_set/nested/README.md create mode 100644 types/unordered_set/nested/read.C create mode 100644 types/unordered_set/nested/write.C diff --git a/types/README.md b/types/README.md index 1a8d1dd..8cc4ba6 100644 --- a/types/README.md +++ b/types/README.md @@ -5,5 +5,6 @@ * [`set`](set): `std::set` with all `[Split]Index{32,64}` column types * [`string`](string): `std::string` with all `[Split]Index{32,64}` column types * [`unique_ptr`](unique_ptr): `std::unique_ptr` with different element types + * [`unordered_set`](unordered_set): `std::unordered_set` with all `[Split]Index{32,64}` column types * [`variant`](variant): `std::variant` with `Switch` column type * [`vector`](vector): `std::vector` with all `[Split]Index{32,64}` column types diff --git a/types/unordered_set/README.md b/types/unordered_set/README.md new file mode 100644 index 0000000..3a8ff4b --- /dev/null +++ b/types/unordered_set/README.md @@ -0,0 +1,4 @@ +# `std::unordered_set` + + * [`fundamental`](fundamental): `std::unordered_set` + * [`nested`](nested): `std::unordered_set>` diff --git a/types/unordered_set/fundamental/README.md b/types/unordered_set/fundamental/README.md new file mode 100644 index 0000000..13ecc45 --- /dev/null +++ b/types/unordered_set/fundamental/README.md @@ -0,0 +1,15 @@ +# `std::unordered_set` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the collection parent field. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the set: + one element in the first field, two elements in the second field, etc. diff --git a/types/unordered_set/fundamental/read.C b/types/unordered_set/fundamental/read.C new file mode 100644 index 0000000..daf2502 --- /dev/null +++ b/types/unordered_set/fundamental/read.C @@ -0,0 +1,72 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include +#include +#include +#include +#include +#include + +using UnorderedSet = std::unordered_set; + +static void PrintUnorderedSetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + UnorderedSet &value = *entry.GetPtr(name); + + std::vector valueSorted(value.begin(), value.end()); + std::sort(valueSorted.begin(), valueSorted.end()); + + os << " \"" << name << "\": ["; + bool first = true; + for (auto element : valueSorted) { + if (first) { + first = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!valueSorted.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.unordered_set.fundamental.root", + std::string_view output = "types.unordered_set.fundamental.json") { + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintUnorderedSetValue(entry, "Index32", os); + PrintUnorderedSetValue(entry, "Index64", os); + PrintUnorderedSetValue(entry, "SplitIndex32", os); + PrintUnorderedSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/unordered_set/fundamental/write.C b/types/unordered_set/fundamental/write.C new file mode 100644 index 0000000..1233983 --- /dev/null +++ b/types/unordered_set/fundamental/write.C @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include +#include +#include +#include + +using UnorderedSet = std::unordered_set; + +static std::shared_ptr +MakeUnorderedSetField(RNTupleModel &model, std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.unordered_set.fundamental.root") { + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = + MakeUnorderedSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = + MakeUnorderedSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeUnorderedSetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeUnorderedSetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {1}; + *Index64 = {2}; + *SplitIndex32 = {3}; + *SplitIndex64 = {4}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {1}; + *Index64 = {2, 3}; + *SplitIndex32 = {4, 5, 6}; + *SplitIndex64 = {7, 8, 9, 10}; + writer->Fill(); +} diff --git a/types/unordered_set/nested/LinkDef.h b/types/unordered_set/nested/LinkDef.h new file mode 100644 index 0000000..74a49be --- /dev/null +++ b/types/unordered_set/nested/LinkDef.h @@ -0,0 +1,6 @@ +#include +#include + +#ifdef __CLING__ +#pragma link C++ class std::unordered_set>+; +#endif diff --git a/types/unordered_set/nested/Makefile b/types/unordered_set/nested/Makefile new file mode 100644 index 0000000..66c4e16 --- /dev/null +++ b/types/unordered_set/nested/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS_ROOT=$(shell root-config --cflags) +ifeq ($(CXXFLAGS_ROOT),) + $(error cannot find root-config: make sure to source thisroot.sh) +endif +CXXFLAGS=-Wall $(CXXFLAGS_ROOT) +LDFLAGS=$(shell root-config --libs) + +.PHONY: all clean + +all: NestedUnorderedSet.cxx libNestedUnorderedSet.so + +NestedUnorderedSet.cxx: NestedUnorderedSet.hxx LinkDef.h + rootcling -f $@ $^ + +libNestedUnorderedSet.so: NestedUnorderedSet.cxx + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +clean: + rm -f NestedUnorderedSet.cxx NestedUnorderedSet_rdict.pcm libNestedUnorderedSet.so diff --git a/types/unordered_set/nested/NestedUnorderedSet.hxx b/types/unordered_set/nested/NestedUnorderedSet.hxx new file mode 100644 index 0000000..9629461 --- /dev/null +++ b/types/unordered_set/nested/NestedUnorderedSet.hxx @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +template <> struct std::hash> { + std::size_t + operator()(const std::unordered_set &s) const noexcept { + std::size_t h = 0; + for (const auto &el : s) { + h ^= std::hash{}(el); + } + return h; + } +}; diff --git a/types/unordered_set/nested/README.md b/types/unordered_set/nested/README.md new file mode 100644 index 0000000..6511f6b --- /dev/null +++ b/types/unordered_set/nested/README.md @@ -0,0 +1,18 @@ +# `std::unordered_set>` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the two collection parent fields. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the outer set, with arbitrary lengths of the inner sets + +## Dictionaries + +These tests require ROOT dictionaries, which can be generated with the provided `Makefile` in this directory. This will create a `libNestedUnorderedSet` shared object. diff --git a/types/unordered_set/nested/read.C b/types/unordered_set/nested/read.C new file mode 100644 index 0000000..43ead18 --- /dev/null +++ b/types/unordered_set/nested/read.C @@ -0,0 +1,104 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include + +#include +#include +#include +#include +#include +#include +#include + +using UnorderedSet = std::unordered_set>; + +static void PrintNestedUnorderedSetValue(const REntry &entry, + std::string_view name, + std::ostream &os, bool last = false) { + UnorderedSet &value = *entry.GetPtr(name); + + std::vector valueInnerSorted( + value.begin(), value.end()); + std::vector> valueSorted; + + for (auto inner : value) { + std::vector innerSorted(inner.begin(), inner.end()); + std::sort(innerSorted.begin(), innerSorted.end()); + valueSorted.push_back(innerSorted); + } + + std::sort(valueSorted.begin(), valueSorted.end()); + + os << " \"" << name << "\": ["; + bool outerFirst = true; + for (auto inner : valueSorted) { + if (outerFirst) { + outerFirst = false; + } else { + os << ","; + } + os << "\n ["; + bool innerFirst = true; + for (auto element : inner) { + if (innerFirst) { + innerFirst = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!inner.empty()) { + os << "\n "; + } + os << "]"; + } + if (!valueSorted.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.unordered_set.nested.root", + std::string_view output = "types.unordered_set.nested.json") { + if (!std::filesystem::exists("libNestedUnorderedSet.so")) { + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + } + gSystem->Load("libNestedUnorderedSet"); + + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintNestedUnorderedSetValue(entry, "Index32", os); + PrintNestedUnorderedSetValue(entry, "Index64", os); + PrintNestedUnorderedSetValue(entry, "SplitIndex32", os); + PrintNestedUnorderedSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/unordered_set/nested/write.C b/types/unordered_set/nested/write.C new file mode 100644 index 0000000..f222c61 --- /dev/null +++ b/types/unordered_set/nested/write.C @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include + +#include +#include +#include +#include +#include + +#include "NestedUnorderedSet.hxx" + +using UnorderedSet = std::unordered_set>; + +static std::shared_ptr +MakeUnorderedSetField(RNTupleModel &model, std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + field->GetSubFields()[0]->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.unordered_set.nested.root") { + if (!std::filesystem::exists("libNestedUnorderedSet.so")) { + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + } + gSystem->Load("libNestedUnorderedSet"); + + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = + MakeUnorderedSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = + MakeUnorderedSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeUnorderedSetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeUnorderedSetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {{1}}; + *Index64 = {{2}}; + *SplitIndex32 = {{3}}; + *SplitIndex64 = {{4}}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {{1}}; + *Index64 = {{}, {2, 3}}; + *SplitIndex32 = {{}, {4}, {5, 6}}; + *SplitIndex64 = {{}, {7, 8, 9}, {10}, {11, 12}}; + writer->Fill(); +} From fcb348b8e8e32e4054175a7818ddec089207c2a1 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Fri, 28 Feb 2025 15:17:47 +0100 Subject: [PATCH 3/6] Add test for `std::multiset` --- types/README.md | 1 + types/multiset/README.md | 4 ++ types/multiset/fundamental/README.md | 15 ++++ types/multiset/fundamental/read.C | 68 ++++++++++++++++++ types/multiset/fundamental/write.C | 67 ++++++++++++++++++ types/multiset/nested/LinkDef.h | 6 ++ types/multiset/nested/Makefile | 20 ++++++ types/multiset/nested/NestedMultiset.hxx | 4 ++ types/multiset/nested/README.md | 18 +++++ types/multiset/nested/read.C | 90 ++++++++++++++++++++++++ types/multiset/nested/write.C | 78 ++++++++++++++++++++ 11 files changed, 371 insertions(+) create mode 100644 types/multiset/README.md create mode 100644 types/multiset/fundamental/README.md create mode 100644 types/multiset/fundamental/read.C create mode 100644 types/multiset/fundamental/write.C create mode 100644 types/multiset/nested/LinkDef.h create mode 100644 types/multiset/nested/Makefile create mode 100644 types/multiset/nested/NestedMultiset.hxx create mode 100644 types/multiset/nested/README.md create mode 100644 types/multiset/nested/read.C create mode 100644 types/multiset/nested/write.C diff --git a/types/README.md b/types/README.md index 8cc4ba6..6067dc6 100644 --- a/types/README.md +++ b/types/README.md @@ -2,6 +2,7 @@ * [`fundamental`](fundamental): fundamental column types * [`optional`](optional): `std::optional` with different element types + * [`multiset`](multiset): `std::multiset` with all `[Split]Index{32,64}` column types * [`set`](set): `std::set` with all `[Split]Index{32,64}` column types * [`string`](string): `std::string` with all `[Split]Index{32,64}` column types * [`unique_ptr`](unique_ptr): `std::unique_ptr` with different element types diff --git a/types/multiset/README.md b/types/multiset/README.md new file mode 100644 index 0000000..637bdcf --- /dev/null +++ b/types/multiset/README.md @@ -0,0 +1,4 @@ +# `std::multiset` + + * [`fundamental`](fundamental): `std::multiset` + * [`nested`](nested): `std::multiset>` diff --git a/types/multiset/fundamental/README.md b/types/multiset/fundamental/README.md new file mode 100644 index 0000000..ed57daa --- /dev/null +++ b/types/multiset/fundamental/README.md @@ -0,0 +1,15 @@ +# `std::multiset` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the collection parent field. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the set: + one element in the first field, two elements in the second field, etc. diff --git a/types/multiset/fundamental/read.C b/types/multiset/fundamental/read.C new file mode 100644 index 0000000..9e24295 --- /dev/null +++ b/types/multiset/fundamental/read.C @@ -0,0 +1,68 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include +#include +#include +#include +#include +#include + +using Multiset = std::multiset; + +static void PrintMultisetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + Multiset &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool first = true; + for (auto element : value) { + if (first) { + first = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.multiset.fundamental.root", + std::string_view output = "types.multiset.fundamental.json") { + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintMultisetValue(entry, "Index32", os); + PrintMultisetValue(entry, "Index64", os); + PrintMultisetValue(entry, "SplitIndex32", os); + PrintMultisetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/multiset/fundamental/write.C b/types/multiset/fundamental/write.C new file mode 100644 index 0000000..0654e7f --- /dev/null +++ b/types/multiset/fundamental/write.C @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include +#include +#include +#include + +using Multiset = std::multiset; + +static std::shared_ptr MakeMultisetField(RNTupleModel &model, + std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.multiset.fundamental.root") { + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = MakeMultisetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = MakeMultisetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeMultisetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeMultisetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {1}; + *Index64 = {2}; + *SplitIndex32 = {3}; + *SplitIndex64 = {4}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {1}; + *Index64 = {2, 3}; + *SplitIndex32 = {4, 5, 6}; + *SplitIndex64 = {7, 8, 9, 10}; + writer->Fill(); +} diff --git a/types/multiset/nested/LinkDef.h b/types/multiset/nested/LinkDef.h new file mode 100644 index 0000000..c5651ce --- /dev/null +++ b/types/multiset/nested/LinkDef.h @@ -0,0 +1,6 @@ +#include +#include + +#ifdef __CLING__ +#pragma link C++ class std::multiset>+; +#endif diff --git a/types/multiset/nested/Makefile b/types/multiset/nested/Makefile new file mode 100644 index 0000000..9cce17d --- /dev/null +++ b/types/multiset/nested/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS_ROOT=$(shell root-config --cflags) +ifeq ($(CXXFLAGS_ROOT),) + $(error cannot find root-config: make sure to source thisroot.sh) +endif +CXXFLAGS=-Wall $(CXXFLAGS_ROOT) +LDFLAGS=$(shell root-config --libs) + +.PHONY: all clean + +all: NestedMultiset.cxx libNestedMultiset.so + +NestedMultiset.cxx: NestedMultiset.hxx LinkDef.h + rootcling -f $@ $^ + +libNestedMultiset.so: NestedMultiset.cxx + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +clean: + rm -f NestedMultiset.cxx NestedMultiset_rdict.pcm libNestedMultiset.so diff --git a/types/multiset/nested/NestedMultiset.hxx b/types/multiset/nested/NestedMultiset.hxx new file mode 100644 index 0000000..b52b7e0 --- /dev/null +++ b/types/multiset/nested/NestedMultiset.hxx @@ -0,0 +1,4 @@ +#pragma once + +#include +#include diff --git a/types/multiset/nested/README.md b/types/multiset/nested/README.md new file mode 100644 index 0000000..ca161ee --- /dev/null +++ b/types/multiset/nested/README.md @@ -0,0 +1,18 @@ +# `std::multiset>` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the two collection parent fields. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the outer set, with arbitrary lengths of the inner sets + +## Dictionaries + +These tests require ROOT dictionaries, which can be generated with the provided `Makefile` in this directory. This will create a `libNestedMultiset` shared object. diff --git a/types/multiset/nested/read.C b/types/multiset/nested/read.C new file mode 100644 index 0000000..04153a8 --- /dev/null +++ b/types/multiset/nested/read.C @@ -0,0 +1,90 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include + +#include +#include +#include +#include +#include +#include +#include + +using Multiset = std::multiset>; + +static void PrintNestedMultisetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + Multiset &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool outerFirst = true; + for (auto inner : value) { + if (outerFirst) { + outerFirst = false; + } else { + os << ","; + } + os << "\n ["; + bool innerFirst = true; + for (auto element : inner) { + if (innerFirst) { + innerFirst = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!inner.empty()) { + os << "\n "; + } + os << "]"; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.multiset.nested.root", + std::string_view output = "types.multiset.nested.json") { + if (!std::filesystem::exists("libNestedMultiset.so")) { + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + } + + gSystem->Load("libNestedMultiset"); + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintNestedMultisetValue(entry, "Index32", os); + PrintNestedMultisetValue(entry, "Index64", os); + PrintNestedMultisetValue(entry, "SplitIndex32", os); + PrintNestedMultisetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/multiset/nested/write.C b/types/multiset/nested/write.C new file mode 100644 index 0000000..f951526 --- /dev/null +++ b/types/multiset/nested/write.C @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include + +#include +#include +#include +#include +#include + +using Multiset = std::multiset>; + +static std::shared_ptr MakeMultisetField(RNTupleModel &model, + std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + field->GetSubFields()[0]->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.multiset.nested.root") { + if (!std::filesystem::exists("libNestedMultiset.so")) { + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + } + + gSystem->Load("libNestedMultiset"); + + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = MakeMultisetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = MakeMultisetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeMultisetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeMultisetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {{1}}; + *Index64 = {{2}}; + *SplitIndex32 = {{3}}; + *SplitIndex64 = {{4}}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {{1}}; + *Index64 = {{}, {2, 3}}; + *SplitIndex32 = {{}, {4}, {5, 6}}; + *SplitIndex64 = {{}, {7, 8, 9}, {10}, {11, 12}}; + writer->Fill(); +} From e9ee6aaee64b713037602d747386f81e5e5c6975 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Fri, 28 Feb 2025 15:05:25 +0100 Subject: [PATCH 4/6] Add test for `std::unordered_multiset` --- types/README.md | 1 + types/unordered_multiset/README.md | 4 + .../unordered_multiset/fundamental/README.md | 15 +++ types/unordered_multiset/fundamental/read.C | 74 ++++++++++++ types/unordered_multiset/fundamental/write.C | 70 ++++++++++++ types/unordered_multiset/nested/LinkDef.h | 6 + types/unordered_multiset/nested/Makefile | 20 ++++ .../nested/NestedUnorderedMultiset.hxx | 16 +++ types/unordered_multiset/nested/README.md | 18 +++ types/unordered_multiset/nested/read.C | 105 ++++++++++++++++++ types/unordered_multiset/nested/write.C | 82 ++++++++++++++ 11 files changed, 411 insertions(+) create mode 100644 types/unordered_multiset/README.md create mode 100644 types/unordered_multiset/fundamental/README.md create mode 100644 types/unordered_multiset/fundamental/read.C create mode 100644 types/unordered_multiset/fundamental/write.C create mode 100644 types/unordered_multiset/nested/LinkDef.h create mode 100644 types/unordered_multiset/nested/Makefile create mode 100644 types/unordered_multiset/nested/NestedUnorderedMultiset.hxx create mode 100644 types/unordered_multiset/nested/README.md create mode 100644 types/unordered_multiset/nested/read.C create mode 100644 types/unordered_multiset/nested/write.C diff --git a/types/README.md b/types/README.md index 6067dc6..0d8d046 100644 --- a/types/README.md +++ b/types/README.md @@ -6,6 +6,7 @@ * [`set`](set): `std::set` with all `[Split]Index{32,64}` column types * [`string`](string): `std::string` with all `[Split]Index{32,64}` column types * [`unique_ptr`](unique_ptr): `std::unique_ptr` with different element types + * [`unordered_multiset`](unordered_multiset): `std::unordered_multiset` with all `[Split]Index{32,64}` column types * [`unordered_set`](unordered_set): `std::unordered_set` with all `[Split]Index{32,64}` column types * [`variant`](variant): `std::variant` with `Switch` column type * [`vector`](vector): `std::vector` with all `[Split]Index{32,64}` column types diff --git a/types/unordered_multiset/README.md b/types/unordered_multiset/README.md new file mode 100644 index 0000000..21eb0a2 --- /dev/null +++ b/types/unordered_multiset/README.md @@ -0,0 +1,4 @@ +# `std::unordered_multiset` + + * [`fundamental`](fundamental): `std::unordered_multiset` + * [`nested`](nested): `std::unordered_multiset>` diff --git a/types/unordered_multiset/fundamental/README.md b/types/unordered_multiset/fundamental/README.md new file mode 100644 index 0000000..e5404f5 --- /dev/null +++ b/types/unordered_multiset/fundamental/README.md @@ -0,0 +1,15 @@ +# `std::unordered_multiset` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the collection parent field. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the set: + one element in the first field, two elements in the second field, etc. diff --git a/types/unordered_multiset/fundamental/read.C b/types/unordered_multiset/fundamental/read.C new file mode 100644 index 0000000..8b4b422 --- /dev/null +++ b/types/unordered_multiset/fundamental/read.C @@ -0,0 +1,74 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include +#include +#include +#include +#include +#include + +using UnorderedMultiSet = std::unordered_multiset; + +static void PrintUnorderedMultiSetValue(const REntry &entry, + std::string_view name, std::ostream &os, + bool last = false) { + UnorderedMultiSet &value = *entry.GetPtr(name); + + std::vector valueSorted(value.begin(), value.end()); + std::sort(valueSorted.begin(), valueSorted.end()); + + os << " \"" << name << "\": ["; + bool first = true; + for (auto element : valueSorted) { + if (first) { + first = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!valueSorted.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read( + std::string_view input = "types.unordered_multiset.fundamental.root", + std::string_view output = "types.unordered_multiset.fundamental.json") { + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintUnorderedMultiSetValue(entry, "Index32", os); + PrintUnorderedMultiSetValue(entry, "Index64", os); + PrintUnorderedMultiSetValue(entry, "SplitIndex32", os); + PrintUnorderedMultiSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/unordered_multiset/fundamental/write.C b/types/unordered_multiset/fundamental/write.C new file mode 100644 index 0000000..2363242 --- /dev/null +++ b/types/unordered_multiset/fundamental/write.C @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include +#include +#include +#include + +using UnorderedMultiSet = std::unordered_multiset; + +static std::shared_ptr +MakeUnorderedMultiSetField(RNTupleModel &model, std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write( + std::string_view filename = "types.unordered_multiset.fundamental.root") { + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = + MakeUnorderedMultiSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = + MakeUnorderedMultiSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = MakeUnorderedMultiSetField(*model, "SplitIndex32", + EColumnType::kSplitIndex32); + auto SplitIndex64 = MakeUnorderedMultiSetField(*model, "SplitIndex64", + EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {1}; + *Index64 = {2}; + *SplitIndex32 = {3}; + *SplitIndex64 = {4}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {1}; + *Index64 = {2, 3}; + *SplitIndex32 = {4, 5, 6}; + *SplitIndex64 = {7, 8, 9, 10}; + writer->Fill(); +} diff --git a/types/unordered_multiset/nested/LinkDef.h b/types/unordered_multiset/nested/LinkDef.h new file mode 100644 index 0000000..0f5799a --- /dev/null +++ b/types/unordered_multiset/nested/LinkDef.h @@ -0,0 +1,6 @@ +#include +#include + +#ifdef __CLING__ +#pragma link C++ class std::unordered_multiset>+; +#endif diff --git a/types/unordered_multiset/nested/Makefile b/types/unordered_multiset/nested/Makefile new file mode 100644 index 0000000..2eadb02 --- /dev/null +++ b/types/unordered_multiset/nested/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS_ROOT=$(shell root-config --cflags) +ifeq ($(CXXFLAGS_ROOT),) + $(error cannot find root-config: make sure to source thisroot.sh) +endif +CXXFLAGS=-Wall $(CXXFLAGS_ROOT) +LDFLAGS=$(shell root-config --libs) + +.PHONY: all clean + +all: NestedUnorderedMultiset.cxx libNestedUnorderedMultiset.so + +NestedUnorderedMultiset.cxx: NestedUnorderedMultiset.hxx LinkDef.h + rootcling -f $@ $^ + +libNestedUnorderedMultiset.so: NestedUnorderedMultiset.cxx + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +clean: + rm -f NestedUnorderedMultiset.cxx NestedUnorderedMultiset_rdict.pcm libNestedUnorderedMultiset.so diff --git a/types/unordered_multiset/nested/NestedUnorderedMultiset.hxx b/types/unordered_multiset/nested/NestedUnorderedMultiset.hxx new file mode 100644 index 0000000..d4b10d8 --- /dev/null +++ b/types/unordered_multiset/nested/NestedUnorderedMultiset.hxx @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +template<> +struct std::hash> { + std::size_t + operator()(const std::unordered_multiset &s) const noexcept { + std::size_t h = 0; + for (const auto &el : s) { + h ^= std::hash{}(el); + } + return h; + } +}; diff --git a/types/unordered_multiset/nested/README.md b/types/unordered_multiset/nested/README.md new file mode 100644 index 0000000..5413f61 --- /dev/null +++ b/types/unordered_multiset/nested/README.md @@ -0,0 +1,18 @@ +# `std::unordered_multiset>` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the two collection parent fields. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the outer set, with arbitrary lengths of the inner sets + +## Dictionaries + +These tests require ROOT dictionaries, which can be generated with the provided `Makefile` in this directory. This will create a `libNestedUnorderedMultiset` shared object. diff --git a/types/unordered_multiset/nested/read.C b/types/unordered_multiset/nested/read.C new file mode 100644 index 0000000..229bc00 --- /dev/null +++ b/types/unordered_multiset/nested/read.C @@ -0,0 +1,105 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include + +#include +#include +#include +#include +#include +#include +#include + +using UnorderedMultiset = + std::unordered_multiset>; + +static void PrintNestedUnorderedMultisetValue(const REntry &entry, + std::string_view name, + std::ostream &os, + bool last = false) { + UnorderedMultiset &value = *entry.GetPtr(name); + + std::vector valueInnerSorted(value.begin(), + value.end()); + std::vector> + valueSorted; + + for (auto inner : value) { + std::vector innerSorted(inner.begin(), inner.end()); + std::sort(innerSorted.begin(), innerSorted.end()); + valueSorted.push_back(innerSorted); +} + + os << " \"" << name << "\": ["; + bool outerFirst = true; + for (auto inner : valueSorted) { + if (outerFirst) { + outerFirst = false; + } else { + os << ","; + } + os << "\n ["; + bool innerFirst = true; + for (auto element : inner) { + if (innerFirst) { + innerFirst = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!inner.empty()) { + os << "\n "; + } + os << "]"; + } + if (!valueSorted.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.unordered_multiset.nested.root", + std::string_view output = "types.unordered_multiset.nested.json") { + if (!std::filesystem::exists("libNestedUnorderedMultiset.so")) { + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + } + gSystem->Load("libNestedUnorderedMultiset"); + + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintNestedUnorderedMultisetValue(entry, "Index32", os); + PrintNestedUnorderedMultisetValue(entry, "Index64", os); + PrintNestedUnorderedMultisetValue(entry, "SplitIndex32", os); + PrintNestedUnorderedMultisetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/unordered_multiset/nested/write.C b/types/unordered_multiset/nested/write.C new file mode 100644 index 0000000..1fae99b --- /dev/null +++ b/types/unordered_multiset/nested/write.C @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include + +#include +#include +#include +#include +#include + +#include "NestedUnorderedMultiset.hxx" + +using UnorderedMultiset = + std::unordered_multiset>; + +static std::shared_ptr +MakeUnorderedMultisetField(RNTupleModel &model, std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + field->GetSubFields()[0]->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.unordered_multiset.nested.root") { + if (!std::filesystem::exists("libNestedUnorderedMultiset.so")) { + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + } + gSystem->Load("libNestedUnorderedMultiset"); + + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = + MakeUnorderedMultisetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = + MakeUnorderedMultisetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = MakeUnorderedMultisetField(*model, "SplitIndex32", + EColumnType::kSplitIndex32); + auto SplitIndex64 = MakeUnorderedMultisetField(*model, "SplitIndex64", + EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {{1}}; + *Index64 = {{2}}; + *SplitIndex32 = {{3}}; + *SplitIndex64 = {{4}}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {{1}}; + *Index64 = {{}, {2, 3}}; + *SplitIndex32 = {{}, {4}, {5, 6}}; + *SplitIndex64 = {{}, {7, 8, 9}, {10}, {11, 12}}; + writer->Fill(); +} From 5e573e3a4fcea42f29746c0c1cf6d86e38b5c8f1 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Mon, 17 Mar 2025 11:15:16 +0100 Subject: [PATCH 5/6] Automatically create and load dictionaries ...for the tests that need them (currently, all nested `std::set` and friends). --- Makefile | 10 ++++++++-- types/multiset/nested/read.C | 4 +--- types/multiset/nested/write.C | 5 +---- types/set/nested/read.C | 4 +--- types/set/nested/write.C | 5 +---- types/unordered_multiset/nested/read.C | 4 +--- types/unordered_multiset/nested/write.C | 4 +--- types/unordered_set/nested/read.C | 4 +--- types/unordered_set/nested/write.C | 4 +--- 9 files changed, 16 insertions(+), 28 deletions(-) diff --git a/Makefile b/Makefile index c220bce..cadf867 100644 --- a/Makefile +++ b/Makefile @@ -5,17 +5,23 @@ endif .PHONY: all all: + $(MAKE) dict $(MAKE) write $(MAKE) read # This assumes there is no whitespace in any of the paths... +DICT_MAKEFILE_DIR := $(sort $(shell find */ -name Makefile -printf "%h\n")) WRITE_C := $(sort $(shell find . -name write.C)) READ_C := $(sort $(shell find . -name read.C)) +.PHONY: dict +dict: + @$(foreach d,$(DICT_MAKEFILE_DIR),make -C $(d) &&) true + .PHONY: write write: - @$(foreach c,$(WRITE_C),$(ROOT_EXE) -q -l $(c) &&) true + @$(foreach c,$(WRITE_C),LD_LIBRARY_PATH=$(shell dirname $(c)) $(ROOT_EXE) -q -l $(c) &&) true .PHONY: read read: - @$(foreach c,$(READ_C),$(ROOT_EXE) -q -l $(c) &&) true + @$(foreach c,$(READ_C),LD_LIBRARY_PATH=$(shell dirname $(c)) $(ROOT_EXE) -q -l $(c) &&) true diff --git a/types/multiset/nested/read.C b/types/multiset/nested/read.C index 04153a8..b19ffb7 100644 --- a/types/multiset/nested/read.C +++ b/types/multiset/nested/read.C @@ -54,12 +54,10 @@ static void PrintNestedMultisetValue(const REntry &entry, std::string_view name, void read(std::string_view input = "types.multiset.nested.root", std::string_view output = "types.multiset.nested.json") { - if (!std::filesystem::exists("libNestedMultiset.so")) { + if (gSystem->Load("libNestedMultiset") == -1) throw std::runtime_error("could not find the required ROOT dictionaries, " "please make sure to run `make` first"); - } - gSystem->Load("libNestedMultiset"); std::ofstream os(std::string{output}); os << "[\n"; diff --git a/types/multiset/nested/write.C b/types/multiset/nested/write.C index f951526..710787b 100644 --- a/types/multiset/nested/write.C +++ b/types/multiset/nested/write.C @@ -31,12 +31,9 @@ static std::shared_ptr MakeMultisetField(RNTupleModel &model, } void write(std::string_view filename = "types.multiset.nested.root") { - if (!std::filesystem::exists("libNestedMultiset.so")) { + if (gSystem->Load("libNestedMultiset") == -1) throw std::runtime_error("could not find the required ROOT dictionaries, " "please make sure to run `make` first"); - } - - gSystem->Load("libNestedMultiset"); auto model = RNTupleModel::Create(); diff --git a/types/set/nested/read.C b/types/set/nested/read.C index 6b87962..30ba4d2 100644 --- a/types/set/nested/read.C +++ b/types/set/nested/read.C @@ -54,12 +54,10 @@ static void PrintNestedSetValue(const REntry &entry, std::string_view name, void read(std::string_view input = "types.set.nested.root", std::string_view output = "types.set.nested.json") { - if (!std::filesystem::exists("libNestedSet.so")) { + if (gSystem->Load("libNestedSet") == -1) throw std::runtime_error("could not find the required ROOT dictionaries, " "please make sure to run `make` first"); - } - gSystem->Load("libNestedSet"); std::ofstream os(std::string{output}); os << "[\n"; diff --git a/types/set/nested/write.C b/types/set/nested/write.C index 069421e..f93b38b 100644 --- a/types/set/nested/write.C +++ b/types/set/nested/write.C @@ -31,12 +31,9 @@ static std::shared_ptr MakeSetField(RNTupleModel &model, } void write(std::string_view filename = "types.set.nested.root") { - if (!std::filesystem::exists("libNestedSet.so")) { + if (gSystem->Load("libNestedSet") == -1) throw std::runtime_error("could not find the required ROOT dictionaries, " "please make sure to run `make` first"); - } - - gSystem->Load("libNestedSet"); auto model = RNTupleModel::Create(); diff --git a/types/unordered_multiset/nested/read.C b/types/unordered_multiset/nested/read.C index 229bc00..1180744 100644 --- a/types/unordered_multiset/nested/read.C +++ b/types/unordered_multiset/nested/read.C @@ -69,11 +69,9 @@ static void PrintNestedUnorderedMultisetValue(const REntry &entry, void read(std::string_view input = "types.unordered_multiset.nested.root", std::string_view output = "types.unordered_multiset.nested.json") { - if (!std::filesystem::exists("libNestedUnorderedMultiset.so")) { + if (gSystem->Load("libNestedUnorderedMultiset") == -1) throw std::runtime_error("could not find the required ROOT dictionaries, " "please make sure to run `make` first"); - } - gSystem->Load("libNestedUnorderedMultiset"); std::ofstream os(std::string{output}); os << "[\n"; diff --git a/types/unordered_multiset/nested/write.C b/types/unordered_multiset/nested/write.C index 1fae99b..62e5866 100644 --- a/types/unordered_multiset/nested/write.C +++ b/types/unordered_multiset/nested/write.C @@ -34,11 +34,9 @@ MakeUnorderedMultisetField(RNTupleModel &model, std::string_view name, } void write(std::string_view filename = "types.unordered_multiset.nested.root") { - if (!std::filesystem::exists("libNestedUnorderedMultiset.so")) { + if (gSystem->Load("libNestedUnorderedMultiset") == -1) throw std::runtime_error("could not find the required ROOT dictionaries, " "please make sure to run `make` first"); - } - gSystem->Load("libNestedUnorderedMultiset"); auto model = RNTupleModel::Create(); diff --git a/types/unordered_set/nested/read.C b/types/unordered_set/nested/read.C index 43ead18..82ebc43 100644 --- a/types/unordered_set/nested/read.C +++ b/types/unordered_set/nested/read.C @@ -68,11 +68,9 @@ static void PrintNestedUnorderedSetValue(const REntry &entry, void read(std::string_view input = "types.unordered_set.nested.root", std::string_view output = "types.unordered_set.nested.json") { - if (!std::filesystem::exists("libNestedUnorderedSet.so")) { + if (gSystem->Load("libNestedUnorderedSet") == -1) throw std::runtime_error("could not find the required ROOT dictionaries, " "please make sure to run `make` first"); - } - gSystem->Load("libNestedUnorderedSet"); std::ofstream os(std::string{output}); os << "[\n"; diff --git a/types/unordered_set/nested/write.C b/types/unordered_set/nested/write.C index f222c61..e0afbcb 100644 --- a/types/unordered_set/nested/write.C +++ b/types/unordered_set/nested/write.C @@ -33,11 +33,9 @@ MakeUnorderedSetField(RNTupleModel &model, std::string_view name, } void write(std::string_view filename = "types.unordered_set.nested.root") { - if (!std::filesystem::exists("libNestedUnorderedSet.so")) { + if (gSystem->Load("libNestedUnorderedSet") == -1) throw std::runtime_error("could not find the required ROOT dictionaries, " "please make sure to run `make` first"); - } - gSystem->Load("libNestedUnorderedSet"); auto model = RNTupleModel::Create(); From 016171c45f1b650aa49cf5509d9d1bd1451fc616 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Thu, 8 May 2025 08:53:42 +0200 Subject: [PATCH 6/6] Slightly reword and fix grammar --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9cdad0f..1e90883 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ To that end, each test comes with a written description of the schema and the ex Eventually, we will also provide a set of reference `.root` and `.json` files (see below). > [!WARNING] -> The RNTuple Validation Suite is currently work-in-progress. +> The RNTuple Validation Suite is currently work in progress. > Its structure and implementation may change during development. > We welcome early feedback on the current state from any interested party. @@ -22,7 +22,7 @@ More tests are planned in the future, please [consult the list of issues](https: ## Reference Implementation This repository also contains a reference implementation with ROOT macros. -They currently target ROOT v6.34 with the first official version of the on-disk binary format. +They currently target ROOT v6.34 with the first official version of the RNTuple on-disk binary format. In this release, the API is not yet finalized and all classes are in the `ROOT::Experimental` namespace. We plan to rely on this version until a first version of the RNTuple Validation Suite is done. Then we will produce a first set of reference files that can be used to test backwards compatibility.