From 1387ea228788e918d9d6c6fb8869203b5bd48634 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Mon, 30 Jun 2025 15:38:21 +0000 Subject: [PATCH] [llvm][YAML] Support aliases (anchors) in YAMLIO This change makes it possible to use YAML anchors [1], [2] with YAMLTraits. All of the necessary parser machinery already exists, so the only change that is necessary is to wire it up to YAMLTraits. This is done by keeping track of all `Anchor` -> `HNode *` mappings and reusing those when an `AliasNode` is encountered. In accordance with the spec [2], anchors do not have to be unique and refer to the last occurrence in the serialization. Example usage: ```yaml foo: &a 42 bar: *a ``` The above would be deserialized as: ```yaml foo: 42 bar: 42 ``` Note that aliases are a serialization detail and can be discarded during composition into a Representation Graph (`HNode` hierarchy). [1]: https://yaml.org/spec/1.2.2/#692-node-anchors [2]: https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases --- llvm/include/llvm/Support/YAMLTraits.h | 3 + llvm/lib/Support/YAMLTraits.cpp | 27 ++++++++- llvm/unittests/Support/YAMLIOTest.cpp | 79 ++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 1b1d78ded8642..6701bb8245bed 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" @@ -1539,6 +1540,7 @@ class LLVM_ABI Input : public IO { std::vector Entries; }; + void saveAliasHNode(Node *node, HNode *hnode); Input::HNode *createHNodes(Node *node); void setError(HNode *hnode, const Twine &message); void setError(Node *node, const Twine &message); @@ -1577,6 +1579,7 @@ class LLVM_ABI Input : public IO { HNode *CurrentNode = nullptr; bool ScalarMatchFound = false; bool AllowUnknownKeys = false; + DenseMap AliasMap; }; /// diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index 035828b594e84..390e9d86fb526 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -397,6 +397,17 @@ void Input::releaseHNodeBuffers() { MapHNodeAllocator.DestroyAll(); } +void Input::saveAliasHNode(Node *N, HNode *HN) { + if (auto Anchor = N->getAnchor(); !Anchor.empty()) + // YAML 1.2.2 - 3.2.2.2. Anchors and Aliases: + // + // An alias event refers to the most recent event in the serialization + // having the specified anchor. Therefore, anchors need not be unique within + // a serialization. In addition, an anchor need not have an alias node + // referring to it. + AliasMap[Anchor] = HN; +} + Input::HNode *Input::createHNodes(Node *N) { SmallString<128> StringStorage; switch (N->getType()) { @@ -407,7 +418,9 @@ Input::HNode *Input::createHNodes(Node *N) { // Copy string to permanent storage KeyStr = StringStorage.str().copy(StringAllocator); } - return new (ScalarHNodeAllocator.Allocate()) ScalarHNode(N, KeyStr); + auto *SHNode = new (ScalarHNodeAllocator.Allocate()) ScalarHNode(N, KeyStr); + saveAliasHNode(SN, SHNode); + return SHNode; } case Node::NK_BlockScalar: { BlockScalarNode *BSN = dyn_cast(N); @@ -423,6 +436,7 @@ Input::HNode *Input::createHNodes(Node *N) { break; SQHNode->Entries.push_back(Entry); } + saveAliasHNode(SQ, SQHNode); return SQHNode; } case Node::NK_Mapping: { @@ -456,10 +470,21 @@ Input::HNode *Input::createHNodes(Node *N) { mapHNode->Mapping[KeyStr] = std::make_pair(std::move(ValueHNode), KeyNode->getSourceRange()); } + saveAliasHNode(Map, mapHNode); return std::move(mapHNode); } case Node::NK_Null: return new (EmptyHNodeAllocator.Allocate()) EmptyHNode(N); + case Node::NK_Alias: { + AliasNode *AN = dyn_cast(N); + auto AliasName = AN->getName(); + auto AHN = AliasMap.find(AliasName); + if (AHN == AliasMap.end()) { + setError(AN, Twine("undefined alias '" + AliasName + "'")); + return nullptr; + } + return AHN->second; + } default: setError(N, "unknown node kind"); return nullptr; diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp index 283e5f829ba46..a2f7dd60db386 100644 --- a/llvm/unittests/Support/YAMLIOTest.cpp +++ b/llvm/unittests/Support/YAMLIOTest.cpp @@ -2889,6 +2889,85 @@ TEST(YAMLIO, TestEmptySequenceWrite) { } } +TEST(YAMLIO, TestScalarAlias) { + FooBar doc; + { + Input yin("---\nfoo: &three 3\nbar: *three\n...\n"); + yin >> doc; + + EXPECT_FALSE(yin.error()); + EXPECT_EQ(doc.foo, 3); + EXPECT_EQ(doc.bar, 3); + } + + { + auto testDiagnostic = [](const llvm::SMDiagnostic &Error, void *) { + EXPECT_EQ(Error.getMessage(), "undefined alias 'unknown'"); + }; + Input yin("---\nfoo: &anchor 3\nbar: *unknown\n...\n", nullptr, + testDiagnostic); + yin >> doc; + EXPECT_TRUE(!!yin.error()); + } +} + +TEST(YAMLIO, TestMapAlias) { + { + FooBarContainer cont; + + Input yin(R"(--- + fbs: + - foo: 13 + bar: &anchor 42 + - &seq_elmt + foo: &anchor 19 + bar: *anchor + - *seq_elmt + ... + )"); + yin >> cont; + EXPECT_FALSE(yin.error()); + + auto &fbs = cont.fbs; + EXPECT_EQ(fbs.size(), 3ul); + EXPECT_EQ(fbs[0].foo, 13); + EXPECT_EQ(fbs[0].bar, 42); + EXPECT_EQ(fbs[1].foo, 19); + EXPECT_EQ(fbs[1].bar, 19); + EXPECT_EQ(fbs[2].foo, fbs[1].foo); + EXPECT_EQ(fbs[2].bar, fbs[1].bar); + } + + { + FooBarMapMap cont; + Input yin(R"(--- + fbm: + one: &forty_two + foo: 42 + bar: 42 + forty_two: *forty_two + precedence: &forty_two + foo: 13 + bar: 42 + not_forty_two: *forty_two + ... + )"); + auto &fbm = cont.fbm; + yin >> cont; + EXPECT_FALSE(yin.error()); + + EXPECT_EQ(fbm.size(), 4ul); + EXPECT_EQ(fbm["one"].foo, 42); + EXPECT_EQ(fbm["one"].bar, 42); + EXPECT_EQ(fbm["forty_two"].foo, 42); + EXPECT_EQ(fbm["forty_two"].bar, 42); + EXPECT_EQ(fbm["precedence"].foo, 13); + EXPECT_EQ(fbm["precedence"].bar, 42); + EXPECT_EQ(fbm["not_forty_two"].foo, 13); + EXPECT_EQ(fbm["not_forty_two"].bar, 42); + } +} + static void TestEscaped(llvm::StringRef Input, llvm::StringRef Expected) { std::string out; llvm::raw_string_ostream ostr(out);