Skip to content

Commit 0d9ac41

Browse files
feat: Support forward declarations (#148)
1 parent a56c6dd commit 0d9ac41

22 files changed

+599
-220
lines changed

docs/Design.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,15 @@ by a significant amount for an extended period of time.
4646

4747
### Disk I/O
4848

49-
Workers write out partial SCIP indexes based on paths
49+
Workers write out shards (incomplete SCIP indexes) based on paths
5050
provided by the driver after completing an indexing job.
5151
This write is done after each job is complete,
5252
instead of after completing all jobs,
5353
because it reduces RSS as well as the blast radius
5454
in case the worker gets killed or crashes later.
5555

5656
After all indexing work is completed, the driver
57-
assembles the partial indexes into a full SCIP index.
57+
assembles the shards into a full SCIP index.
5858

5959
### Bazel and distributed builds
6060

@@ -69,11 +69,11 @@ because they would do redundant work.
6969
"Flattening" the job DAG would get rid of
7070
intermediate link jobs.)
7171

72-
Each job could output a partial index,
72+
Each job could output some shards,
7373
along with hashes (for faster de-duplication
7474
during the merge step).
7575
The final merge step would take these
76-
hashes and partial indexes
76+
hashes and shards
7777
and assemble them into an Index.
7878

7979
NOTE: As of Jan 24 2023, this functionality is planned

indexer/Driver.cc

Lines changed: 52 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ class Scheduler final {
403403
return fmt::format("emitting an index for '{}'",
404404
fileInfoIt->path.asStringRef());
405405
}
406-
return "emitting a partial index";
406+
return "emitting a shard";
407407
}
408408
}());
409409
}
@@ -581,7 +581,7 @@ class Driver {
581581
FileIndexingPlanner planner;
582582

583583
std::vector<std::pair<JobId, IndexingStatistics>> allStatistics;
584-
std::vector<AbsolutePath> indexPartPaths;
584+
std::vector<ShardPaths> shardPaths;
585585

586586
/// Total number of commands in the compilation database.
587587
size_t compdbCommandCount = 0;
@@ -593,8 +593,7 @@ class Driver {
593593

594594
Driver(std::string driverId, DriverOptions &&options)
595595
: options(std::move(options)), id(driverId), scheduler(),
596-
planner(this->options.projectRootPath), indexPartPaths(),
597-
compdbParser() {
596+
planner(this->options.projectRootPath), shardPaths(), compdbParser() {
598597
MessageQueues::deleteIfPresent(this->id, this->numWorkers());
599598
this->queues = MessageQueues(this->id, this->numWorkers(),
600599
{IPC_BUFFER_MAX_SIZE, IPC_BUFFER_MAX_SIZE});
@@ -675,20 +674,23 @@ class Driver {
675674

676675
scip::Index fullIndex{};
677676
if (this->options.deterministic) {
678-
// Sorting before merging so that mergeIndexParts can be const
679-
absl::c_sort(this->indexPartPaths,
680-
[](const AbsolutePath &p1, const AbsolutePath &p2) -> bool {
681-
auto cmp = p1 <=> p2;
682-
ENFORCE(cmp != 0, "2+ index parts have same path '{}'",
683-
p1.asStringRef());
684-
return cmp == std::strong_ordering::less;
685-
});
686-
}
687-
this->mergeIndexParts(fullIndex);
677+
// Sorting before merging so that mergeShards can be const
678+
absl::c_sort(
679+
this->shardPaths, [](const auto &paths1, const auto &paths2) -> bool {
680+
auto cmp = paths1.docsAndExternals <=> paths2.docsAndExternals;
681+
ENFORCE(cmp != 0, "2+ index parts have same path '{}'",
682+
paths1.docsAndExternals.asStringRef());
683+
ENFORCE(paths1.forwardDecls != paths2.forwardDecls,
684+
"2+ index parts have same path '{}'",
685+
paths1.forwardDecls.asStringRef());
686+
return cmp == std::strong_ordering::less;
687+
});
688+
}
689+
this->mergeShards(fullIndex);
688690
fullIndex.SerializeToOstream(&outputStream);
689691
}
690692

691-
void mergeIndexParts(scip::Index &fullIndex) const {
693+
void mergeShards(scip::Index &fullIndex) const {
692694
LogTimerRAII timer("index merging");
693695

694696
scip::ToolInfo toolInfo;
@@ -721,23 +723,31 @@ class Driver {
721723
// a dependency on a library with a concurrent hash table.
722724
*fullIndex.mutable_metadata() = std::move(metadata);
723725

724-
scip::IndexBuilder builder{fullIndex};
725-
// TODO: Measure how much time this is taking and parallelize if too slow.
726-
for (auto &indexPartAbsPath : this->indexPartPaths) {
727-
auto &indexPartPath = indexPartAbsPath.asStringRef();
728-
std::ifstream inputStream(indexPartPath,
726+
auto readIndexShard = [](const AbsolutePath &path,
727+
scip::Index &indexShard) -> bool {
728+
auto &shardPath = path.asStringRef();
729+
std::ifstream inputStream(shardPath,
729730
std::ios_base::in | std::ios_base::binary);
730731
if (inputStream.fail()) {
731-
spdlog::warn("failed to open partial index at '{}' ({})", indexPartPath,
732+
spdlog::warn("failed to open shard at '{}' ({})", shardPath,
732733
std::strerror(errno));
733-
continue;
734+
return false;
735+
}
736+
if (!indexShard.ParseFromIstream(&inputStream)) {
737+
spdlog::warn("failed to parse shard at '{}'", shardPath);
738+
return false;
734739
}
735-
scip::Index partialIndex;
736-
if (!partialIndex.ParseFromIstream(&inputStream)) {
737-
spdlog::warn("failed to parse partial index at '{}'", indexPartPath);
740+
return true;
741+
};
742+
743+
scip::IndexBuilder builder{fullIndex};
744+
// TODO: Measure how much time this is taking and parallelize if too slow.
745+
for (auto &paths : this->shardPaths) {
746+
scip::Index indexShard;
747+
if (!readIndexShard(paths.docsAndExternals, indexShard)) {
738748
continue;
739749
}
740-
for (auto &doc : *partialIndex.mutable_documents()) {
750+
for (auto &doc : *indexShard.mutable_documents()) {
741751
bool isMultiplyIndexed = this->planner.isMultiplyIndexed(
742752
RootRelativePathRef{doc.relative_path(), RootKind::Project});
743753
builder.addDocument(std::move(doc), isMultiplyIndexed);
@@ -746,10 +756,24 @@ class Driver {
746756
// deterministic mode, indexes should be the same, and iterated over in
747757
// sorted order. So if external symbol emission in each part is
748758
// deterministic, addExternalSymbol will be called in deterministic order.
749-
for (auto &extSym : *partialIndex.mutable_external_symbols()) {
759+
for (auto &extSym : *indexShard.mutable_external_symbols()) {
750760
builder.addExternalSymbol(std::move(extSym));
751761
}
752762
}
763+
764+
auto symbolToInfoMap = builder.populateSymbolToInfoMap();
765+
766+
for (auto &paths : this->shardPaths) {
767+
scip::Index indexShard;
768+
if (!readIndexShard(paths.forwardDecls, indexShard)) {
769+
continue;
770+
}
771+
for (auto &forwardDeclSym : *indexShard.mutable_external_symbols()) {
772+
builder.addForwardDeclaration(*symbolToInfoMap,
773+
std::move(forwardDeclSym));
774+
}
775+
}
776+
753777
builder.finish(this->options.deterministic);
754778
}
755779

@@ -877,8 +901,7 @@ class Driver {
877901
this->allStatistics.emplace_back(response.jobId,
878902
std::move(result.statistics));
879903
}
880-
this->indexPartPaths.emplace_back(
881-
std::move(response.result.emitIndex.indexPartPath));
904+
this->shardPaths.emplace_back(std::move(result.shardPaths));
882905
break;
883906
}
884907
}

indexer/Indexer.cc

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ TuIndexer::TuIndexer(const clang::SourceManager &sourceManager,
263263
GetStableFileId getStableFileId)
264264
: sourceManager(sourceManager), langOptions(langOptions),
265265
astContext(astContext), symbolFormatter(symbolFormatter), documentMap(),
266-
getStableFileId(getStableFileId) {}
266+
getStableFileId(getStableFileId), externalSymbols(),
267+
forwardDeclarations() {}
267268

268269
void TuIndexer::saveBindingDecl(const clang::BindingDecl &bindingDecl) {
269270
auto optSymbol = this->symbolFormatter.getBindingSymbol(bindingDecl);
@@ -358,8 +359,8 @@ void TuIndexer::saveFunctionDecl(const clang::FunctionDecl &functionDecl) {
358359
this->saveDefinition(symbol, functionDecl.getLocation(),
359360
std::move(symbolInfo));
360361
} else {
361-
// See TODO(ref: handle-forward-decls)
362-
this->saveReference(symbol, functionDecl.getLocation());
362+
this->saveForwardDeclaration(symbol, functionDecl.getLocation(),
363+
this->tryGetDocComment(functionDecl));
363364
}
364365
}
365366

@@ -484,11 +485,8 @@ void TuIndexer::saveTagDecl(const clang::TagDecl &tagDecl) {
484485
auto symbol = optSymbol.value();
485486

486487
if (!tagDecl.isThisDeclarationADefinition()) {
487-
// 1. We should emit an external symbol here in some situations
488-
// See TODO(ref: handle-forward-decls)
489-
// 2. Pass in a forward declaration SymbolRole here
490-
// once https://github.com/sourcegraph/scip/issues/131 is fixed.
491-
this->saveReference(symbol, tagDecl.getLocation());
488+
this->saveForwardDeclaration(symbol, tagDecl.getLocation(),
489+
this->tryGetDocComment(tagDecl));
492490
return;
493491
}
494492

@@ -681,6 +679,34 @@ void TuIndexer::emitDocumentOccurrencesAndSymbols(
681679
}));
682680
}
683681

682+
void TuIndexer::emitExternalSymbols(bool deterministic,
683+
scip::Index &indexShard) {
684+
scip_clang::extractTransform(
685+
std::move(this->externalSymbols), deterministic,
686+
absl::FunctionRef<void(std::string_view &&, scip::SymbolInformation &&)>(
687+
[&](auto &&symbol, auto &&symbolInfo) {
688+
symbolInfo.set_symbol(symbol.data(), symbol.size());
689+
*indexShard.add_external_symbols() = std::move(symbolInfo);
690+
}));
691+
}
692+
693+
void TuIndexer::emitForwardDeclarations(bool deterministic,
694+
scip::Index &forwardDeclIndex) {
695+
scip_clang::extractTransform(
696+
std::move(this->forwardDeclarations), deterministic,
697+
absl::FunctionRef<void(std::string_view &&, DocComment &&)>(
698+
[&](auto &&symbol, auto &&docComment) {
699+
scip::SymbolInformation symbolInfo{};
700+
for (auto &line : docComment.lines) {
701+
*symbolInfo.add_documentation() = std::move(line);
702+
}
703+
symbolInfo.set_symbol(symbol.data(), symbol.size());
704+
// Add a forward declaration SymbolRole here
705+
// once https://github.com/sourcegraph/scip/issues/131 is fixed.
706+
*forwardDeclIndex.add_external_symbols() = std::move(symbolInfo);
707+
}));
708+
}
709+
684710
std::pair<FileLocalSourceRange, clang::FileID>
685711
TuIndexer::getTokenExpansionRange(
686712
clang::SourceLocation startExpansionLoc) const {
@@ -692,6 +718,17 @@ TuIndexer::getTokenExpansionRange(
692718
{startExpansionLoc, endLoc});
693719
}
694720

721+
void TuIndexer::saveForwardDeclaration(std::string_view symbol,
722+
clang::SourceLocation loc,
723+
DocComment &&docComments) {
724+
this->saveReference(symbol, loc);
725+
auto [it, inserted] = this->forwardDeclarations.emplace(symbol, docComments);
726+
if (!inserted && it->second.lines.empty() && !docComments.lines.empty()) {
727+
it->second.lines = std::move(docComments.lines);
728+
}
729+
return;
730+
}
731+
695732
void TuIndexer::saveReference(std::string_view symbol,
696733
clang::SourceLocation loc, int32_t extraRoles) {
697734
auto expansionLoc = this->sourceManager.getExpansionLoc(loc);
@@ -721,6 +758,19 @@ void TuIndexer::saveDefinition(
721758
if (optSymbolInfo.has_value()) {
722759
doc.symbolInfos.emplace(symbol, std::move(optSymbolInfo.value()));
723760
}
761+
} else if (optSymbolInfo.has_value()) {
762+
this->saveExternalSymbol(symbol, std::move(*optSymbolInfo));
763+
}
764+
}
765+
766+
void TuIndexer::saveExternalSymbol(std::string_view symbol,
767+
scip::SymbolInformation &&symbolInfo) {
768+
auto [it, inserted] =
769+
this->externalSymbols.emplace(symbol, std::move(symbolInfo));
770+
if (!inserted && it->second.documentation().empty()
771+
&& !symbolInfo.documentation().empty()) {
772+
*it->second.mutable_documentation() =
773+
std::move(*symbolInfo.mutable_documentation());
724774
}
725775
}
726776

indexer/Indexer.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ class TuIndexer final {
214214
documentMap;
215215
GetStableFileId getStableFileId;
216216

217+
absl::flat_hash_map</*name*/ std::string_view, scip::SymbolInformation>
218+
externalSymbols;
219+
220+
absl::flat_hash_map<std::string_view, DocComment> forwardDeclarations;
221+
217222
public:
218223
TuIndexer(const clang::SourceManager &, const clang::LangOptions &,
219224
const clang::ASTContext &, SymbolFormatter &, GetStableFileId);
@@ -242,10 +247,19 @@ class TuIndexer final {
242247
void emitDocumentOccurrencesAndSymbols(bool deterministic, clang::FileID,
243248
scip::Document &);
244249

250+
void emitExternalSymbols(bool deterministic, scip::Index &);
251+
void emitForwardDeclarations(bool deterministic, scip::Index &);
252+
245253
private:
246254
std::pair<FileLocalSourceRange, clang::FileID>
247255
getTokenExpansionRange(clang::SourceLocation startExpansionLoc) const;
248256

257+
/// Helper method for recording forward declarations.
258+
///
259+
/// Prefer this over \c saveReference or \c saveOccurrence.
260+
void saveForwardDeclaration(std::string_view symbol,
261+
clang::SourceLocation loc, DocComment &&);
262+
249263
void saveReference(std::string_view symbol, clang::SourceLocation loc,
250264
int32_t extraRoles = 0);
251265

@@ -259,6 +273,9 @@ class TuIndexer final {
259273
std::optional<scip::SymbolInformation> &&symbolInfo,
260274
int32_t extraRoles = 0);
261275

276+
/// Only for use inside \c saveDefinition.
277+
void saveExternalSymbol(std::string_view symbol, scip::SymbolInformation &&);
278+
262279
/// Lower-level method for only saving a Occurrence.
263280
///
264281
/// Returns a \c PartialDocument reference so that \c scip::SymbolInformation

indexer/IpcMessages.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ DERIVE_SERIALIZE_1_NEWTYPE(scip_clang::EmitIndexJobDetails, filesToBeIndexed)
152152
DERIVE_SERIALIZE_1_NEWTYPE(scip_clang::IpcTestMessage, content)
153153
DERIVE_SERIALIZE_1_NEWTYPE(scip_clang::SemanticAnalysisJobDetails, command)
154154

155-
DERIVE_SERIALIZE_2(scip_clang::EmitIndexJobResult, statistics, indexPartPath)
155+
DERIVE_SERIALIZE_2(scip_clang::ShardPaths, docsAndExternals, forwardDecls)
156+
DERIVE_SERIALIZE_2(scip_clang::EmitIndexJobResult, statistics, shardPaths)
156157
DERIVE_SERIALIZE_2(scip_clang::PreprocessedFileInfo, path, hashValue)
157158
DERIVE_SERIALIZE_2(scip_clang::PreprocessedFileInfoMulti, path, hashValues)
158159
DERIVE_SERIALIZE_2(scip_clang::IndexJobRequest, id, job)
@@ -185,4 +186,4 @@ bool fromJSON(const llvm::json::Value &value, IndexJobResponse &r,
185186
&& mapper.map("jobId", r.jobId) && mapper.map("result", r.result);
186187
}
187188

188-
} // namespace scip_clang
189+
} // namespace scip_clang

indexer/IpcMessages.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,15 @@ struct IndexingStatistics {
153153
};
154154
SERIALIZABLE(IndexingStatistics)
155155

156+
struct ShardPaths {
157+
AbsolutePath docsAndExternals;
158+
AbsolutePath forwardDecls;
159+
};
160+
SERIALIZABLE(ShardPaths)
161+
156162
struct EmitIndexJobResult {
157163
IndexingStatistics statistics;
158-
AbsolutePath indexPartPath;
164+
ShardPaths shardPaths;
159165
};
160166
SERIALIZABLE(EmitIndexJobResult)
161167

0 commit comments

Comments
 (0)