Skip to content

Commit cfc13b1

Browse files
refactor: Use suffixes for forward decl resolution (#349)
In a cross-repo setting, the prefix for a forward declaration cannot be determined eagerly due to lack of package ID information, so we only serialize the suffix, and perform lookups based on the suffix.
1 parent 5b8e89e commit cfc13b1

File tree

11 files changed

+239
-56
lines changed

11 files changed

+239
-56
lines changed

docs/Design.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,7 @@ There are two options for doing this:
730730
and use the suffix as the lookup key.
731731

732732
Option 1 would increase the overhead of (de)serializing indexes,
733-
and requires us to fiddle with the Protobuf format
733+
and requires us to fiddle with the Index Protobuf schema
734734
(to add an extra field which tracks the prefix-free symbol name).
735735

736736
So we go with option 2 instead. Specifically, the descriptor
@@ -739,4 +739,14 @@ add it to the end of the version, either is OK).
739739
We would forbid the use of `$` in package names and versions,
740740
allowing us to quickly split a symbol name.
741741

742-
The extra `$` could be stripped when serializing the final index.
742+
<!-- See NOTE(ref: symbol-string-hack-for-forward-decls) -->
743+
744+
The extra `$` could be stripped when serializing the final index.
745+
746+
Additionally, since Occurrences track the symbol name,
747+
we separate out occurrences for forward declarations
748+
(see [fwd_decls.proto](/proto/fwd_decls.proto))
749+
into a separate file with a different format,
750+
to avoid having to rewrite the symbol string for a large
751+
number of occurrences.
752+
Instead, those occurrences are written out at the end.

indexer/Driver.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,7 +1087,7 @@ class Driver {
10871087
this->options.compdbPath.asStringRef());
10881088
}
10891089

1090-
auto symbolToInfoMap = builder.populateSymbolToInfoMap();
1090+
auto forwardDeclResolver = builder.populateForwardDeclResolver();
10911091

10921092
for (auto &paths : this->shardPaths) {
10931093
scip::ForwardDeclIndex indexShard;
@@ -1097,7 +1097,7 @@ class Driver {
10971097
TRACE_EVENT(tracing::indexMerging, "addForwardDeclarations", "size",
10981098
indexShard.forward_decls_size());
10991099
for (auto &forwardDeclSym : *indexShard.mutable_forward_decls()) {
1100-
builder.addForwardDeclaration(*symbolToInfoMap,
1100+
builder.addForwardDeclaration(*forwardDeclResolver,
11011101
std::move(forwardDeclSym));
11021102
}
11031103
}

indexer/Indexer.cc

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -353,20 +353,23 @@ void ForwardDeclMap::emit(bool deterministic, scip::ForwardDeclIndex &index) {
353353
this->map.size());
354354
scip_clang::extractTransform(
355355
std::move(this->map), deterministic,
356-
absl::FunctionRef<void(SymbolNameRef &&, Value &&)>(
357-
[&](auto &&symbol, auto &&value) {
358-
scip::ForwardDecl fwdDecl{};
359-
fwdDecl.set_symbol(symbol.value.data(), symbol.value.size());
360-
value.docComment.addTo(*fwdDecl.mutable_documentation());
361-
for (auto [path, range] : value.ranges) {
362-
scip::ForwardDecl::Reference ref{};
363-
range.addTo(ref);
364-
auto p = path.asStringView();
365-
ref.set_relative_path(p.data(), p.size());
366-
*fwdDecl.add_references() = std::move(ref);
367-
}
368-
*index.add_forward_decls() = std::move(fwdDecl);
369-
}));
356+
absl::FunctionRef<void(SymbolNameRef &&, Value &&)>([&](auto &&symbol,
357+
auto &&value) {
358+
scip::ForwardDecl fwdDecl{};
359+
auto optSuffix = symbol.getPackageAgnosticSuffix();
360+
ENFORCE(optSuffix.has_value(), "missing $ in symbol name {}",
361+
symbol.value);
362+
fwdDecl.set_suffix(optSuffix->value.data(), optSuffix->value.size());
363+
value.docComment.addTo(*fwdDecl.mutable_documentation());
364+
for (auto [path, range] : value.ranges) {
365+
scip::ForwardDecl::Reference ref{};
366+
range.addTo(ref);
367+
auto p = path.asStringView();
368+
ref.set_relative_path(p.data(), p.size());
369+
*fwdDecl.add_references() = std::move(ref);
370+
}
371+
*index.add_forward_decls() = std::move(fwdDecl);
372+
}));
370373
}
371374

372375
TuIndexer::TuIndexer(const clang::SourceManager &sourceManager,

indexer/ScipExtras.cc

Lines changed: 106 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,53 @@ void SymbolInformationBuilder::finish(bool deterministic,
100100
}));
101101
}
102102

103+
void ForwardDeclResolver::insert(SymbolSuffix suffix,
104+
SymbolInformationBuilder *builder) {
105+
this->docInternalMap.emplace(suffix, SymbolInfoOrBuilderPtr{builder});
106+
}
107+
108+
void ForwardDeclResolver::insert(SymbolSuffix suffix,
109+
SymbolInformation *symbolInfo) {
110+
this->docInternalMap.emplace(suffix, SymbolInfoOrBuilderPtr{symbolInfo});
111+
}
112+
113+
void ForwardDeclResolver::insertExternal(SymbolNameRef symbol) {
114+
if (auto optSuffix = symbol.getPackageAgnosticSuffix()) {
115+
this->externalsMap[*optSuffix].insert(symbol);
116+
}
117+
}
118+
119+
std::optional<ForwardDeclResolver::SymbolInfoOrBuilderPtr>
120+
ForwardDeclResolver::lookupInDocuments(SymbolSuffix suffix) const {
121+
auto it = this->docInternalMap.find(suffix);
122+
if (it == this->docInternalMap.end()) {
123+
return {};
124+
}
125+
return it->second;
126+
}
127+
128+
const absl::flat_hash_set<SymbolNameRef> *
129+
ForwardDeclResolver::lookupExternals(SymbolSuffix suffix) const {
130+
auto it = this->externalsMap.find(suffix);
131+
if (it == this->externalsMap.end()) {
132+
return {};
133+
}
134+
return &it->second;
135+
}
136+
137+
void ForwardDeclResolver::deleteExternals(SymbolSuffix suffix) {
138+
this->externalsMap.erase(suffix);
139+
}
140+
103141
SymbolNameRef SymbolNameInterner::intern(std::string &&s) {
104142
return SymbolNameRef{std::string_view(this->impl.save(s))};
105143
}
106144

145+
SymbolNameRef SymbolNameInterner::intern(SymbolNameRef s) {
146+
return SymbolNameRef{
147+
std::string_view(this->impl.save(llvm::StringRef(s.value)))};
148+
}
149+
107150
DocumentBuilder::DocumentBuilder(scip::Document &&first,
108151
SymbolNameInterner interner)
109152
: soFar(), interner(interner),
@@ -144,11 +187,12 @@ void DocumentBuilder::merge(scip::Document &&doc) {
144187
}
145188
}
146189

147-
void DocumentBuilder::populateSymbolToInfoMap(
148-
SymbolToInfoMap &symbolToInfoMap) {
190+
void DocumentBuilder::populateForwardDeclResolver(
191+
ForwardDeclResolver &forwardDeclResolver) {
149192
for (auto &[symbolName, symbolInfoBuilder] : this->symbolInfos) {
150-
symbolToInfoMap.emplace(symbolName,
151-
SymbolToInfoMap::mapped_type(&symbolInfoBuilder));
193+
if (auto optSuffix = symbolName.getPackageAgnosticSuffix()) {
194+
forwardDeclResolver.insert(*optSuffix, &symbolInfoBuilder);
195+
}
152196
}
153197
}
154198

@@ -251,60 +295,92 @@ void IndexBuilder::addExternalSymbol(scip::SymbolInformation &&extSym) {
251295
builder->mergeRelationships(std::move(*extSym.mutable_relationships()));
252296
}
253297

254-
std::unique_ptr<SymbolToInfoMap> IndexBuilder::populateSymbolToInfoMap() {
298+
std::unique_ptr<ForwardDeclResolver>
299+
IndexBuilder::populateForwardDeclResolver() {
255300
TRACE_EVENT(scip_clang::tracing::indexMerging,
256-
"IndexBuilder::populateSymbolToInfoMap", "documents.size",
301+
"IndexBuilder::populateForwardDeclResolver", "documents.size",
257302
this->documents.size(), "multiplyIndexed.size",
258303
this->multiplyIndexed.size());
259-
SymbolToInfoMap symbolToInfoMap;
304+
ForwardDeclResolver forwardDeclResolver;
260305
for (auto &document : this->documents) {
261306
for (auto &symbolInfo : *document.mutable_symbols()) {
262-
symbolToInfoMap.emplace(SymbolNameRef{symbolInfo.symbol()},
263-
SymbolToInfoMap::mapped_type(&symbolInfo));
307+
if (auto optSuffix =
308+
SymbolNameRef{symbolInfo.symbol()}.getPackageAgnosticSuffix()) {
309+
forwardDeclResolver.insert(*optSuffix, &symbolInfo);
310+
}
264311
}
265312
}
266313
for (auto &[_, docBuilder] : this->multiplyIndexed) {
267-
docBuilder->populateSymbolToInfoMap(symbolToInfoMap);
314+
docBuilder->populateForwardDeclResolver(forwardDeclResolver);
268315
}
269-
return std::make_unique<SymbolToInfoMap>(std::move(symbolToInfoMap));
316+
return std::make_unique<ForwardDeclResolver>(std::move(forwardDeclResolver));
270317
}
271318

272-
void IndexBuilder::addForwardDeclaration(const SymbolToInfoMap &symbolToInfoMap,
273-
scip::ForwardDecl &&forwardDeclSym) {
274-
auto name =
275-
this->interner.intern(std::move(*forwardDeclSym.mutable_symbol()));
276-
auto it = symbolToInfoMap.find(name);
277-
if (it == symbolToInfoMap.end()) {
278-
if (!this->externalSymbols.contains(name)) {
279-
scip::SymbolInformation extSym{};
280-
*extSym.add_documentation() = std::move(forwardDeclSym.documentation());
281-
this->addExternalSymbolUnchecked(name, std::move(extSym));
282-
} else {
319+
void IndexBuilder::addForwardDeclaration(
320+
ForwardDeclResolver &forwardDeclResolver,
321+
scip::ForwardDecl &&forwardDeclSym) {
322+
auto suffix = SymbolSuffix{
323+
this->interner.intern(std::move(*forwardDeclSym.mutable_suffix())).value};
324+
auto optSymbolInfoOrBuilderPtr =
325+
forwardDeclResolver.lookupInDocuments(suffix);
326+
if (!optSymbolInfoOrBuilderPtr.has_value()) {
327+
if (auto *externals = forwardDeclResolver.lookupExternals(suffix)) {
283328
// The main index confirms that this was an external symbol, which means
284329
// that we must have seen the definition in an out-of-project file.
285330
// In this case, just throw away the information we have,
286331
// and rely on what we found externally as the source of truth.
332+
ENFORCE(!externals->empty(),
333+
"externals list for a suffix should be non-empty");
334+
// TODO: Log a debug warning if externals->size() > 1
335+
for (auto symbolName : *externals) {
336+
this->addForwardDeclOccurrences(symbolName,
337+
scip::ForwardDecl{forwardDeclSym});
338+
}
339+
} else {
340+
scip::SymbolInformation extSym{};
341+
// Make up a fake prefix, as we have no package information 🤷🏽
342+
auto name = this->interner.intern(
343+
std::move(suffix.addFakePrefix().asStringRefMut()));
344+
*extSym.add_documentation() = std::move(forwardDeclSym.documentation());
345+
this->addExternalSymbolUnchecked(name, std::move(extSym));
346+
this->addForwardDeclOccurrences(name, std::move(forwardDeclSym));
287347
}
288-
this->addForwardDeclOccurrences(name, std::move(forwardDeclSym));
289348
return;
290349
}
291-
auto extIt = this->externalSymbols.find(name);
292-
if (extIt != this->externalSymbols.end()) {
350+
auto symbolInfoOrBuilderPtr = optSymbolInfoOrBuilderPtr.value();
351+
if (auto *externals = forwardDeclResolver.lookupExternals(suffix)) {
293352
// We found the symbol in a document, so the external symbols list is
294353
// too pessimistic. This can happen when a TU processes a decl only via
295354
// a forward decl (and hence conservatively assumes it must be external),
296355
// but another in-project TU contains the definition.
297356
//
298357
// So remove the entry from the external symbols list.
299-
extIt->second->discard();
300-
this->externalSymbols.erase(extIt);
358+
for (auto name : *externals) {
359+
auto it = this->externalSymbols.find(name);
360+
if (it != this->externalSymbols.end()) {
361+
it->second->discard();
362+
this->externalSymbols.erase(it);
363+
}
364+
}
365+
forwardDeclResolver.deleteExternals(suffix);
366+
}
367+
SymbolNameRef name;
368+
if (auto *symbolInfo =
369+
symbolInfoOrBuilderPtr.dyn_cast<scip::SymbolInformation *>()) {
370+
name = SymbolNameRef{std::string_view(symbolInfo->symbol())};
371+
} else {
372+
auto *symbolInfoBuilder =
373+
symbolInfoOrBuilderPtr.get<SymbolInformationBuilder *>();
374+
name = symbolInfoBuilder->name;
301375
}
376+
name = this->interner.intern(name);
302377
if (!forwardDeclSym.documentation().empty()) {
303378
// FIXME(def: better-doc-merging): The missing documentation placeholder
304379
// value is due to a bug in the backend which seemingly came up randomly
305380
// and also got fixed somehow. We should remove the workaround if this
306381
// is no longer an issue.
307-
if (auto *symbolInfo = it->second.dyn_cast<scip::SymbolInformation *>()) {
382+
if (auto *symbolInfo =
383+
symbolInfoOrBuilderPtr.dyn_cast<scip::SymbolInformation *>()) {
308384
bool isMissingDocumentation =
309385
symbolInfo->documentation_size() == 0
310386
|| (symbolInfo->documentation_size() == 1
@@ -316,7 +392,8 @@ void IndexBuilder::addForwardDeclaration(const SymbolToInfoMap &symbolToInfoMap,
316392
std::move(*forwardDeclSym.mutable_documentation());
317393
}
318394
} else {
319-
auto &symbolInfoBuilder = *it->second.get<SymbolInformationBuilder *>();
395+
auto &symbolInfoBuilder =
396+
*symbolInfoOrBuilderPtr.get<SymbolInformationBuilder *>();
320397
// FIXME(def: better-doc-merging): We shouldn't drop
321398
// documentation attached to a forward declaration.
322399
if (!symbolInfoBuilder.hasDocumentation()) {

indexer/ScipExtras.h

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,14 @@ class SymbolInformationBuilder final {
9292
scip_clang::Bomb _bomb;
9393

9494
public:
95+
SymbolNameRef name;
96+
9597
template <typename C1, typename C2>
9698
SymbolInformationBuilder(SymbolNameRef name, C1 &&docs, C2 &&rels)
9799
: documentation(), relationships(),
98100
_bomb(BOMB_INIT(
99-
fmt::format("SymbolInformationBuilder for '{}'", name.value))) {
101+
fmt::format("SymbolInformationBuilder for '{}'", name.value))),
102+
name(name) {
100103
(void)name;
101104
this->setDocumentation(std::move(docs));
102105
this->mergeRelationships(std::move(rels));
@@ -120,9 +123,28 @@ class SymbolInformationBuilder final {
120123
void finish(bool deterministic, scip::SymbolInformation &out);
121124
};
122125

123-
using SymbolToInfoMap = absl::flat_hash_map<
124-
SymbolNameRef,
125-
llvm::PointerUnion<SymbolInformation *, SymbolInformationBuilder *>>;
126+
class ForwardDeclResolver {
127+
using SymbolInfoOrBuilderPtr =
128+
llvm::PointerUnion<SymbolInformation *, SymbolInformationBuilder *>;
129+
130+
absl::flat_hash_map<SymbolSuffix, SymbolInfoOrBuilderPtr> docInternalMap;
131+
132+
absl::flat_hash_map<SymbolSuffix, absl::flat_hash_set<SymbolNameRef>>
133+
externalsMap;
134+
135+
public:
136+
ForwardDeclResolver() = default;
137+
138+
void insert(SymbolSuffix, SymbolInformationBuilder *);
139+
void insert(SymbolSuffix, SymbolInformation *);
140+
void insertExternal(SymbolNameRef);
141+
142+
std::optional<SymbolInfoOrBuilderPtr> lookupInDocuments(SymbolSuffix) const;
143+
144+
const absl::flat_hash_set<SymbolNameRef> *lookupExternals(SymbolSuffix) const;
145+
146+
void deleteExternals(SymbolSuffix);
147+
};
126148

127149
class SymbolNameInterner {
128150
llvm::UniqueStringSaver &impl;
@@ -131,6 +153,7 @@ class SymbolNameInterner {
131153
SymbolNameInterner(llvm::UniqueStringSaver &impl) : impl(impl) {}
132154

133155
SymbolNameRef intern(std::string &&);
156+
SymbolNameRef intern(SymbolNameRef);
134157
};
135158

136159
class DocumentBuilder final {
@@ -147,7 +170,7 @@ class DocumentBuilder final {
147170
public:
148171
DocumentBuilder(scip::Document &&document, SymbolNameInterner interner);
149172
void merge(scip::Document &&doc);
150-
void populateSymbolToInfoMap(SymbolToInfoMap &);
173+
void populateForwardDeclResolver(ForwardDeclResolver &);
151174
void finish(bool deterministic, scip::Document &out);
152175
};
153176

@@ -204,8 +227,8 @@ class IndexBuilder final {
204227
void addExternalSymbol(scip::SymbolInformation &&extSym);
205228

206229
// The map contains interior references into IndexBuilder's state.
207-
std::unique_ptr<SymbolToInfoMap> populateSymbolToInfoMap();
208-
void addForwardDeclaration(const SymbolToInfoMap &, scip::ForwardDecl &&);
230+
std::unique_ptr<ForwardDeclResolver> populateForwardDeclResolver();
231+
void addForwardDeclaration(ForwardDeclResolver &, scip::ForwardDecl &&);
209232

210233
void finish(bool deterministic, std::ostream &);
211234

indexer/SymbolFormatter.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ void SymbolBuilder::formatTo(std::string &buf) const {
108108
::addSpaceEscaped(out, text);
109109
out << ' ';
110110
}
111+
// NOTE(def: symbol-string-hack-for-forward-decls): Add a '$' prefix
112+
// after the space after the version, but before any descriptors.
113+
// When splitting the symbol, we check for the '$' preceded by a ' '.
114+
// Strictly speaking, it is possible that the same pattern is present
115+
// due to a funky filename which contains the pattern ' $', it's highly
116+
// improbable to cause any incorrect lookup by not colliding with an
117+
// actual symbol name.
118+
out << '$';
111119
for (auto &descriptor : this->descriptors) {
112120
descriptor.formatTo(out);
113121
}

indexer/SymbolFormatter.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,15 @@ struct SymbolBuilder {
7474
/// this avoids the extra work of recomputing parent symbol strings.
7575
static void formatContextual(std::string &buf, SymbolNameRef contextSymbol,
7676
const DescriptorBuilder &descriptor);
77+
78+
static std::optional<scip::SymbolSuffix>
79+
getPackageAgnosticSuffix(scip::SymbolNameRef);
80+
81+
static scip::SymbolName addFakePrefix(scip::SymbolSuffix);
7782
};
7883

84+
class FileMetadataMap;
85+
7986
class SymbolFormatter final {
8087
const clang::SourceManager &sourceManager;
8188
FileMetadataMap &fileMetadataMap;

0 commit comments

Comments
 (0)