Skip to content

Commit bb9f5a5

Browse files
fix: Pass hashes in IPC results. (#142)
1 parent ec840cf commit bb9f5a5

File tree

3 files changed

+136
-72
lines changed

3 files changed

+136
-72
lines changed

indexer/Driver.cc

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -257,18 +257,16 @@ class FileIndexingPlanner {
257257
FileIndexingPlanner(const FileIndexingPlanner &) = delete;
258258

259259
void saveSemaResult(SemanticAnalysisJobResult &&semaResult,
260-
std::vector<AbsolutePath> &filesToBeIndexed) {
260+
std::vector<PreprocessedFileInfo> &filesToBeIndexed) {
261261
absl::flat_hash_set<HashValue> emptyHashSet{};
262262
for (auto &fileInfoMulti : semaResult.illBehavedFiles) {
263263
auto [it, _] = hashesSoFar.insert(
264264
{AbsolutePath(std::move(fileInfoMulti.path)), emptyHashSet});
265265
auto &[path, hashes] = *it;
266-
bool addedFile = false;
267266
for (auto hashValue : fileInfoMulti.hashValues) {
268267
auto [_, inserted] = hashes.insert(hashValue);
269-
if (inserted && !addedFile) {
270-
filesToBeIndexed.push_back(path); // deliberate copy
271-
addedFile = true;
268+
if (inserted) {
269+
filesToBeIndexed.push_back({path, hashValue}); // deliberate copy
272270
}
273271
}
274272
}
@@ -278,7 +276,8 @@ class FileIndexingPlanner {
278276
auto &[path, hashes] = *it;
279277
auto [__, inserted] = hashes.insert(fileInfo.hashValue);
280278
if (inserted) {
281-
filesToBeIndexed.push_back(path); // deliberate copy
279+
filesToBeIndexed.push_back(
280+
{path, fileInfo.hashValue}); // deliberate copy
282281
}
283282
}
284283
}
@@ -359,15 +358,16 @@ class Scheduler final {
359358
return fmt::format("running semantic analysis for '{}'",
360359
it->second.semanticAnalysis.command.Filename);
361360
case IndexJob::Kind::EmitIndex:
362-
auto &paths = it->second.emitIndex.filesToBeIndexed;
363-
auto pathIt = absl::c_find_if(paths, [](const AbsolutePath &p) -> bool {
364-
auto &sv = p.asStringRef();
365-
return sv.ends_with(".c") || sv.ends_with(".cc")
366-
|| sv.ends_with(".cxx") || sv.ends_with(".cpp");
367-
});
368-
if (pathIt != paths.end()) {
361+
auto &fileInfos = it->second.emitIndex.filesToBeIndexed;
362+
auto fileInfoIt = absl::c_find_if(
363+
fileInfos, [](const PreprocessedFileInfo &fileInfo) -> bool {
364+
auto &sv = fileInfo.path.asStringRef();
365+
return sv.ends_with(".c") || sv.ends_with(".cc")
366+
|| sv.ends_with(".cxx") || sv.ends_with(".cpp");
367+
});
368+
if (fileInfoIt != fileInfos.end()) {
369369
return fmt::format("emitting an index for '{}'",
370-
pathIt->asStringRef());
370+
fileInfoIt->path.asStringRef());
371371
}
372372
return "emitting a partial index";
373373
}
@@ -791,7 +791,7 @@ class Driver {
791791
switch (response.result.kind) {
792792
case IndexJob::Kind::SemanticAnalysis: {
793793
auto &semaResult = response.result.semanticAnalysis;
794-
std::vector<AbsolutePath> filesToBeIndexed{};
794+
std::vector<PreprocessedFileInfo> filesToBeIndexed{};
795795
this->planner.saveSemaResult(std::move(semaResult), filesToBeIndexed);
796796
auto &queue = this->queues.driverToWorker[latestIdleWorkerId.id];
797797
queue.send(this->scheduler.createSubtaskAndScheduleOnWorker(

indexer/IpcMessages.h

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,26 @@ SERIALIZABLE(clang::tooling::CompileCommand) // Not implemented upstream 😕
8484

8585
namespace scip_clang {
8686

87+
struct PreprocessedFileInfo {
88+
AbsolutePath path;
89+
HashValue hashValue;
90+
91+
friend std::strong_ordering operator<=>(const PreprocessedFileInfo &lhs,
92+
const PreprocessedFileInfo &rhs);
93+
};
94+
SERIALIZABLE(PreprocessedFileInfo)
95+
96+
struct PreprocessedFileInfoMulti {
97+
AbsolutePath path;
98+
std::vector<HashValue> hashValues;
99+
100+
friend std::strong_ordering operator<=>(const PreprocessedFileInfoMulti &lhs,
101+
const PreprocessedFileInfoMulti &rhs);
102+
};
103+
SERIALIZABLE(PreprocessedFileInfoMulti)
104+
87105
struct EmitIndexJobDetails {
88-
std::vector<AbsolutePath> filesToBeIndexed;
106+
std::vector<PreprocessedFileInfo> filesToBeIndexed;
89107
};
90108
SERIALIZABLE(EmitIndexJobDetails)
91109

@@ -116,24 +134,6 @@ SERIALIZABLE(IndexJobRequest)
116134

117135
SERIALIZABLE(HashValue)
118136

119-
struct PreprocessedFileInfo {
120-
AbsolutePath path;
121-
HashValue hashValue;
122-
123-
friend std::strong_ordering operator<=>(const PreprocessedFileInfo &lhs,
124-
const PreprocessedFileInfo &rhs);
125-
};
126-
SERIALIZABLE(PreprocessedFileInfo)
127-
128-
struct PreprocessedFileInfoMulti {
129-
AbsolutePath path;
130-
std::vector<HashValue> hashValues;
131-
132-
friend std::strong_ordering operator<=>(const PreprocessedFileInfoMulti &lhs,
133-
const PreprocessedFileInfoMulti &rhs);
134-
};
135-
SERIALIZABLE(PreprocessedFileInfoMulti)
136-
137137
struct SemanticAnalysisJobResult {
138138
std::vector<PreprocessedFileInfo> wellBehavedFiles;
139139
std::vector<PreprocessedFileInfoMulti> illBehavedFiles;

indexer/Worker.cc

Lines changed: 102 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,37 @@ struct PreprocessorDebugContext {
224224
std::string tuMainFilePath;
225225
};
226226

227-
using PathToIdMap = absl::flat_hash_map<AbsolutePathRef, clang::FileID>;
227+
/// Similar to \c PreprocessedFileInfo but storing a PathRef instead.
228+
struct PreprocessedFileInfoRef {
229+
HashValue hash;
230+
AbsolutePathRef path;
231+
232+
template <typename H>
233+
friend H AbslHashValue(H h, const PreprocessedFileInfoRef &p) {
234+
return H::combine(std::move(h), p.hash.rawValue, p.path);
235+
}
236+
237+
DERIVE_EQ_ALL(PreprocessedFileInfoRef)
238+
};
239+
240+
/// Type to retrieve information about the \c clang::FileID corresponding
241+
/// to a (HashValue, Path) pair.
242+
///
243+
/// The worker and driver communicate using (HashValue, Path) pairs,
244+
/// since those are stable across different workers running in parallel.
245+
///
246+
/// However, inside a worker, we'd like to use \c clang::FileID keys if
247+
/// possible (e.g. storing Documents before indexing), since they are
248+
/// 32-bit integer values. This map translates driver->worker info
249+
/// in (HashValue, Path) terms to FileIDs.
250+
///
251+
/// In general, it may be the case that multiple FileIDs correspond to
252+
/// the same (HashValue, Path) pair (this happens for well-behaved
253+
/// headers, c.f. \c FileIndexingPlanner); the representative FileID
254+
/// is chosen arbitrarily.
255+
using ClangIdLookupMap = absl::flat_hash_map<
256+
PreprocessedFileInfoRef,
257+
absl::flat_hash_set<llvm_ext::AbslHashAdapter<clang::FileID>>>;
228258

229259
class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
230260
const IndexerPreprocessorOptions &options;
@@ -259,7 +289,8 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
259289
finishedProcessing(), finishedProcessingMulti(),
260290
macroIndexer(sourceManager), debugContext(std::move(debugContext)) {}
261291

262-
void flushState(SemanticAnalysisJobResult &result, PathToIdMap &pathToIdMap,
292+
void flushState(SemanticAnalysisJobResult &result,
293+
ClangIdLookupMap &clangIdLookupMap,
263294
MacroIndexer &macroIndexerOutput) {
264295
// HACK: It seems like EnterInclude and ExitInclude events are not
265296
// perfectly balanced in Clang. Work around that.
@@ -321,7 +352,8 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
321352
auto absPathRef = optPath.value();
322353
result.wellBehavedFiles.emplace_back(
323354
PreprocessedFileInfo{AbsolutePath{absPathRef}, hashValue});
324-
pathToIdMap.insert({absPathRef, fileId});
355+
clangIdLookupMap[PreprocessedFileInfoRef{hashValue, absPathRef}]
356+
.insert({fileId});
325357
}
326358
}
327359
if (this->options.deterministic) {
@@ -343,7 +375,10 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
343375
}
344376
result.illBehavedFiles.emplace_back(PreprocessedFileInfoMulti{
345377
AbsolutePath{absPathRef}, std::move(hashes)});
346-
pathToIdMap.insert({absPathRef, fileId});
378+
for (auto &hash : hashes) {
379+
clangIdLookupMap[PreprocessedFileInfoRef{hash, absPathRef}].insert(
380+
{fileId});
381+
}
347382
}
348383
}
349384
if (this->options.deterministic) {
@@ -568,7 +603,7 @@ class IndexerAstVisitor;
568603
/// Not every file that is part of this project will be part of this map.
569604
/// For example, if a file+hash was already indexed by another worker,
570605
/// then one shouldn't call insert(..) for that file.
571-
using FilesToBeIndexedSet =
606+
using FileIdsToBeIndexedSet =
572607
absl::flat_hash_set<llvm_ext::AbslHashAdapter<clang::FileID>>;
573608

574609
/// Type to track canonical relative paths for FileIDs.
@@ -600,24 +635,32 @@ class CanonicalPathMap final {
600635
CanonicalPathMap(const CanonicalPathMap &) = delete;
601636
CanonicalPathMap &operator=(const CanonicalPathMap &) = delete;
602637

603-
void populate(const PathToIdMap &pathToIdMap) {
604-
this->map.reserve(pathToIdMap.size());
605-
for (auto [absPathRef, fileId] : pathToIdMap) {
606-
this->insert(fileId, absPathRef);
638+
void populate(const ClangIdLookupMap &clangIdLookupMap) {
639+
this->map.reserve(clangIdLookupMap.size());
640+
for (auto [fileInfo, fileIdSet] : clangIdLookupMap) {
641+
ENFORCE(!fileIdSet.empty());
642+
for (auto wrappedFileId : fileIdSet) {
643+
bool inserted = this->insert(wrappedFileId.data, fileInfo.path);
644+
ENFORCE(
645+
inserted,
646+
"there is a 1-1 mapping from FileID -> (path, hash)"
647+
"so it's unexpected that the FileID {} was inserted for {} already",
648+
wrappedFileId.data.getHashValue(), fileInfo.path.asStringView());
649+
}
607650
}
608651
}
609652

610653
/// Returns true iff a new entry was inserted.
611-
void insert(clang::FileID fileId, AbsolutePathRef absPathRef) {
654+
bool insert(clang::FileID fileId, AbsolutePathRef absPathRef) {
612655
ENFORCE(fileId.isValid(),
613656
"invalid FileIDs should be filtered out after preprocessing");
614657
ENFORCE(!absPathRef.asStringView().empty(),
615658
"inserting file with empty absolute path");
616659

617-
auto insertRelPath = [&](RootRelativePathRef projectRootRelPath) -> void {
660+
auto insertRelPath = [&](RootRelativePathRef projectRootRelPath) -> bool {
618661
ENFORCE(!projectRootRelPath.asStringView().empty(),
619662
"file path is unexpectedly equal to project root");
620-
this->map.insert({{fileId}, projectRootRelPath});
663+
return this->map.insert({{fileId}, projectRootRelPath}).second;
621664
};
622665

623666
// In practice, CMake ends up passing paths to project files as well
@@ -640,7 +683,7 @@ class CanonicalPathMap final {
640683
this->projectRootPath.tryMakeRelative(absPathRef)) {
641684
return insertRelPath(optProjectRootRelPath.value());
642685
}
643-
this->map.insert({{fileId}, absPathRef});
686+
return this->map.insert({{fileId}, absPathRef}).second;
644687
}
645688

646689
bool contains(clang::FileID fileId) const {
@@ -684,14 +727,15 @@ class IndexerAstVisitor : public clang::RecursiveASTVisitor<IndexerAstVisitor> {
684727
using Base = RecursiveASTVisitor;
685728

686729
const CanonicalPathMap &pathMap;
687-
FilesToBeIndexedSet toBeIndexed;
730+
FileIdsToBeIndexedSet toBeIndexed;
688731
bool deterministic;
689732

690733
TuIndexer &tuIndexer;
691734

692735
public:
693-
IndexerAstVisitor(const CanonicalPathMap &pathMap, FilesToBeIndexedSet &&map,
694-
bool deterministic, TuIndexer &tuIndexer)
736+
IndexerAstVisitor(const CanonicalPathMap &pathMap,
737+
FileIdsToBeIndexedSet &&map, bool deterministic,
738+
TuIndexer &tuIndexer)
695739
: pathMap(pathMap), toBeIndexed(std::move(map)),
696740
deterministic(deterministic), tuIndexer(tuIndexer) {}
697741

@@ -841,10 +885,10 @@ class IndexerAstConsumer : public clang::SemaConsumer {
841885
// it during the traversal (instead of say flushing state in the dtor
842886
// would arguably be more idiomatic).
843887
SemanticAnalysisJobResult semaResult{};
844-
PathToIdMap pathToIdMap{};
888+
ClangIdLookupMap clangIdLookupMap{};
845889
auto &sourceManager = astContext.getSourceManager();
846890
MacroIndexer macroIndexer{sourceManager};
847-
this->preprocessorWrapper->flushState(semaResult, pathToIdMap,
891+
this->preprocessorWrapper->flushState(semaResult, clangIdLookupMap,
848892
macroIndexer);
849893

850894
EmitIndexJobDetails emitIndexDetails{};
@@ -856,9 +900,10 @@ class IndexerAstConsumer : public clang::SemaConsumer {
856900

857901
CanonicalPathMap canonicalPathMap{this->options.projectRootPath,
858902
this->options.buildRootPath};
859-
FilesToBeIndexedSet toBeIndexed{};
860-
this->computePathsToBeIndexed(astContext, emitIndexDetails, pathToIdMap,
861-
canonicalPathMap, toBeIndexed);
903+
FileIdsToBeIndexedSet toBeIndexed{};
904+
this->computeFileIdsToBeIndexed(astContext, emitIndexDetails,
905+
clangIdLookupMap, canonicalPathMap,
906+
toBeIndexed);
862907

863908
auto getRelativePath =
864909
[&](clang::FileID fileId) -> std::optional<RootRelativePathRef> {
@@ -885,15 +930,15 @@ class IndexerAstConsumer : public clang::SemaConsumer {
885930
}
886931

887932
private:
888-
void computePathsToBeIndexed(const clang::ASTContext &astContext,
889-
const EmitIndexJobDetails &emitIndexDetails,
890-
const PathToIdMap &pathToIdMap,
891-
CanonicalPathMap &canonicalPathMap,
892-
FilesToBeIndexedSet &toBeIndexed) {
933+
void computeFileIdsToBeIndexed(const clang::ASTContext &astContext,
934+
const EmitIndexJobDetails &emitIndexDetails,
935+
const ClangIdLookupMap &clangIdLookupMap,
936+
CanonicalPathMap &canonicalPathMap,
937+
FileIdsToBeIndexedSet &toBeIndexed) {
893938
auto &sourceManager = astContext.getSourceManager();
894939
auto mainFileId = sourceManager.getMainFileID();
895940

896-
canonicalPathMap.populate(pathToIdMap);
941+
canonicalPathMap.populate(clangIdLookupMap);
897942
if (auto *mainFileEntry = sourceManager.getFileEntryForID(mainFileId)) {
898943
if (auto optMainFileAbsPath =
899944
AbsolutePathRef::tryFrom(mainFileEntry->tryGetRealPathName())) {
@@ -906,18 +951,34 @@ class IndexerAstConsumer : public clang::SemaConsumer {
906951
}
907952
}
908953

909-
for (auto &absPath : emitIndexDetails.filesToBeIndexed) {
910-
auto absPathRef = absPath.asRef();
911-
auto it = pathToIdMap.find(absPathRef);
912-
if (it == pathToIdMap.end()) {
954+
for (auto &fileInfo : emitIndexDetails.filesToBeIndexed) {
955+
auto absPathRef = fileInfo.path.asRef();
956+
auto it = clangIdLookupMap.find({fileInfo.hashValue, absPathRef});
957+
if (it == clangIdLookupMap.end()) {
913958
spdlog::debug(
914959
"failed to find clang::FileID for path '{}' received from Driver",
915960
absPathRef.asStringView());
916961
continue;
917962
}
918-
toBeIndexed.insert({it->second});
919-
ENFORCE(canonicalPathMap.contains(it->second),
920-
"missing entry for path: {}", absPath.asStringRef());
963+
auto &fileIdSet = it->second;
964+
ENFORCE(!fileIdSet.empty());
965+
for (auto wrappedFileId : fileIdSet) {
966+
toBeIndexed.insert(wrappedFileId);
967+
// Pick the representative FileID arbitrarily; it doesn't
968+
// matter since the hashes are all the same.
969+
break;
970+
}
971+
std::string message;
972+
auto check = [&](auto wrappedFileId) -> bool {
973+
auto fileId = wrappedFileId.data;
974+
if (canonicalPathMap.contains(fileId)) {
975+
return true;
976+
}
977+
message = fmt::format("missing fileId {} for path: {}",
978+
fileId.getHashValue(), absPathRef.asStringView());
979+
return false;
980+
};
981+
ENFORCE(absl::c_all_of(fileIdSet, check), "{}", message);
921982
}
922983
}
923984
};
@@ -1151,11 +1212,14 @@ Worker::ReceiveStatus Worker::processTranslationUnitAndRespond(
11511212
EmitIndexJobDetails &emitIndexDetails) -> bool {
11521213
callbackInvoked++;
11531214
if (this->options.mode == WorkerMode::Compdb) {
1154-
for (auto &p : semaResult.wellBehavedFiles) {
1155-
emitIndexDetails.filesToBeIndexed.emplace_back(std::move(p.path));
1215+
for (auto &fileInfo : semaResult.wellBehavedFiles) {
1216+
emitIndexDetails.filesToBeIndexed.emplace_back(std::move(fileInfo));
11561217
}
1157-
for (auto &p : semaResult.illBehavedFiles) {
1158-
emitIndexDetails.filesToBeIndexed.emplace_back(std::move(p.path));
1218+
for (auto &fileInfoMulti : semaResult.illBehavedFiles) {
1219+
for (auto &hashValue : fileInfoMulti.hashValues) {
1220+
emitIndexDetails.filesToBeIndexed.emplace_back(
1221+
PreprocessedFileInfo{fileInfoMulti.path, hashValue});
1222+
}
11591223
}
11601224
return true;
11611225
}

0 commit comments

Comments
 (0)