Skip to content

Commit f9a5cb7

Browse files
refactor: Simplify handling for ill-behaved headers (#164)
1 parent dd82991 commit f9a5cb7

File tree

2 files changed

+95
-119
lines changed

2 files changed

+95
-119
lines changed

docs/SourceLocation.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ since the main purpose of this type
115115
is to be an ID for [`SLocEntry`](#slocentry) values,
116116
which contain more information.
117117

118+
This also means that, if a single header is included multiple times,
119+
regardless of whether it expands the same way or not, there will
120+
be two different `FileID` values for it, not one.
121+
118122
- A valid `FileID` always has a corresponding `SLocEntry`.
119123
- Since a `FileID` may not actually represent a source file,
120124
it is possible that `sourceManager.getFileEntryForID`

indexer/Worker.cc

Lines changed: 91 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -225,19 +225,6 @@ struct PreprocessorDebugContext {
225225
std::string tuMainFilePath;
226226
};
227227

228-
/// Similar to \c PreprocessedFileInfo but storing a PathRef instead.
229-
struct PreprocessedFileInfoRef {
230-
HashValue hash;
231-
AbsolutePathRef path;
232-
233-
template <typename H>
234-
friend H AbslHashValue(H h, const PreprocessedFileInfoRef &p) {
235-
return H::combine(std::move(h), p.hash.rawValue, p.path);
236-
}
237-
238-
DERIVE_EQ_ALL(PreprocessedFileInfoRef)
239-
};
240-
241228
/// Type to retrieve information about the \c clang::FileID corresponding
242229
/// to a (HashValue, Path) pair.
243230
///
@@ -253,30 +240,62 @@ struct PreprocessedFileInfoRef {
253240
/// the same (HashValue, Path) pair (this happens for well-behaved
254241
/// headers, c.f. \c FileIndexingPlanner); the representative FileID
255242
/// is chosen arbitrarily.
256-
using ClangIdLookupMap = absl::flat_hash_map<
257-
PreprocessedFileInfoRef,
258-
absl::flat_hash_set<llvm_ext::AbslHashAdapter<clang::FileID>>>;
243+
class ClangIdLookupMap {
244+
struct Value {
245+
absl::flat_hash_map<HashValue, clang::FileID> hashToFileId;
246+
};
247+
248+
absl::flat_hash_map<AbsolutePathRef, std::shared_ptr<Value>> impl;
249+
250+
public:
251+
ClangIdLookupMap() = default;
252+
253+
void insert(AbsolutePathRef absPathRef, HashValue hashValue,
254+
clang::FileID fileId) {
255+
auto it = this->impl.find(absPathRef);
256+
if (it == this->impl.end()) {
257+
this->impl.emplace(absPathRef,
258+
std::make_shared<Value>(
259+
Value{.hashToFileId = {{hashValue, fileId}}}));
260+
return;
261+
}
262+
// A single representative FileID is sufficient.
263+
it->second->hashToFileId[hashValue] = fileId;
264+
}
265+
266+
void forEachPathAndHash(
267+
absl::FunctionRef<void(AbsolutePathRef, const absl::flat_hash_map<
268+
HashValue, clang::FileID> &)>
269+
callback) const {
270+
for (auto &[absPathRef, valuePtr] : this->impl) {
271+
ENFORCE(!valuePtr->hashToFileId.empty(),
272+
"Shouldn't have stored empty maps");
273+
callback(absPathRef, valuePtr->hashToFileId);
274+
}
275+
}
276+
277+
std::optional<clang::FileID> lookup(AbsolutePathRef absPathRef,
278+
HashValue hashValue) const {
279+
auto it = this->impl.find(absPathRef);
280+
if (it == this->impl.end()) {
281+
return {};
282+
}
283+
auto hashIt = it->second->hashToFileId.find(hashValue);
284+
if (hashIt == it->second->hashToFileId.end()) {
285+
return {};
286+
}
287+
return hashIt->second;
288+
}
289+
};
259290

260291
class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
261292
const IndexerPreprocessorOptions &options;
262293

263294
clang::SourceManager &sourceManager;
264295
IndexerPreprocessorStack stack;
265296

266-
struct MultiHashValue {
267-
const HashValue hashValue;
268-
bool isMultiple;
269-
};
270-
// Headers which we've seen only expand in a single way.
271-
// The extra bit inside the MultiHashValue struct indicates
272-
// if should look in the finishedProcessingMulti map instead.
273-
absl::flat_hash_map<llvm_ext::AbslHashAdapter<clang::FileID>, MultiHashValue>
297+
absl::flat_hash_map<llvm_ext::AbslHashAdapter<clang::FileID>, HashValue>
274298
finishedProcessing;
275-
// Headers which expand in at least 2 different ways.
276-
// The values have size() >= 2.
277-
absl::flat_hash_map<llvm_ext::AbslHashAdapter<clang::FileID>,
278-
absl::flat_hash_set<HashValue>>
279-
finishedProcessingMulti;
280299

281300
MacroIndexer macroIndexer;
282301

@@ -287,8 +306,8 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
287306
const IndexerPreprocessorOptions &options,
288307
PreprocessorDebugContext &&debugContext)
289308
: options(options), sourceManager(sourceManager), stack(),
290-
finishedProcessing(), finishedProcessingMulti(),
291-
macroIndexer(sourceManager), debugContext(std::move(debugContext)) {}
309+
finishedProcessing(), macroIndexer(sourceManager),
310+
debugContext(std::move(debugContext)) {}
292311

293312
void flushState(SemanticAnalysisJobResult &result,
294313
ClangIdLookupMap &clangIdLookupMap,
@@ -342,49 +361,34 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
342361
}
343362
return optAbsPath;
344363
};
345-
{
346-
for (auto [wrappedFileId, hashInfo] : this->finishedProcessing) {
347-
if (hashInfo.isMultiple) {
348-
continue;
349-
}
350-
auto hashValue = hashInfo.hashValue;
351-
auto fileId = wrappedFileId.data;
352-
if (auto optPath = getAbsPath(fileId)) {
353-
auto absPathRef = optPath.value();
354-
result.wellBehavedFiles.emplace_back(
355-
PreprocessedFileInfo{AbsolutePath{absPathRef}, hashValue});
356-
clangIdLookupMap[PreprocessedFileInfoRef{hashValue, absPathRef}]
357-
.insert({fileId});
358-
}
359-
}
360-
if (this->options.deterministic) {
361-
absl::c_sort(result.wellBehavedFiles);
364+
for (auto [wrappedFileId, hashValue] : this->finishedProcessing) {
365+
auto fileId = wrappedFileId.data;
366+
if (auto optPath = getAbsPath(fileId)) {
367+
auto absPathRef = optPath.value();
368+
clangIdLookupMap.insert(absPathRef, hashValue, fileId);
362369
}
363370
}
364-
{
365-
auto &fileHashes = this->finishedProcessingMulti;
366-
for (auto it = fileHashes.begin(), end = fileHashes.end(); it != end;
367-
++it) {
368-
auto fileId = it->first.data;
369-
if (auto optPath = getAbsPath(fileId)) {
370-
auto absPathRef = optPath.value();
371-
std::vector<HashValue> hashes{};
372-
hashes.reserve(it->second.size());
373-
absl::c_move(it->second, std::back_inserter(hashes));
374-
if (this->options.deterministic) {
375-
absl::c_sort(hashes);
376-
}
377-
result.illBehavedFiles.emplace_back(PreprocessedFileInfoMulti{
378-
AbsolutePath{absPathRef}, std::move(hashes)});
379-
for (auto &hash : hashes) {
380-
clangIdLookupMap[PreprocessedFileInfoRef{hash, absPathRef}].insert(
381-
{fileId});
371+
clangIdLookupMap.forEachPathAndHash(
372+
[&](AbsolutePathRef absPathRef,
373+
const absl::flat_hash_map<HashValue, clang::FileID> &map) {
374+
if (map.size() == 1) {
375+
for (auto &[hashValue, fileId] : map) {
376+
result.wellBehavedFiles.emplace_back(
377+
PreprocessedFileInfo{AbsolutePath{absPathRef}, hashValue});
378+
}
379+
} else {
380+
std::vector<HashValue> hashes;
381+
hashes.reserve(map.size());
382+
for (auto &[hashValue, fileId] : map) {
383+
hashes.push_back(hashValue);
384+
}
385+
result.illBehavedFiles.emplace_back(PreprocessedFileInfoMulti{
386+
AbsolutePath{absPathRef}, std::move(hashes)});
382387
}
383-
}
384-
}
385-
if (this->options.deterministic) {
386-
absl::c_sort(result.illBehavedFiles);
387-
}
388+
});
389+
if (this->options.deterministic) {
390+
absl::c_sort(result.wellBehavedFiles);
391+
absl::c_sort(result.illBehavedFiles);
388392
}
389393
}
390394

@@ -443,23 +447,10 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
443447
}
444448

445449
auto key = llvm_ext::AbslHashAdapter<clang::FileID>{fileInfo.fileId};
446-
auto it = this->finishedProcessing.find(key);
447450
auto [hashValue, history] = fileInfo.hashValueBuilder.finish();
451+
auto it = this->finishedProcessing.find(key);
448452
if (it == this->finishedProcessing.end()) {
449-
this->finishedProcessing.insert({key, MultiHashValue{hashValue, false}});
450-
} else if (it->second.isMultiple) {
451-
auto itMulti = this->finishedProcessingMulti.find(key);
452-
ENFORCE(itMulti != this->finishedProcessingMulti.end(),
453-
"isMultiple = true but key missing from finishedProcessingMulti");
454-
itMulti->second.insert(hashValue);
455-
} else if (it->second.hashValue != hashValue) {
456-
it->second.isMultiple = true;
457-
auto oldHash = it->second.hashValue;
458-
auto newHash = hashValue;
459-
auto [_, inserted] =
460-
this->finishedProcessingMulti.insert({key, {oldHash, newHash}});
461-
ENFORCE(inserted, "isMultiple = false, but key already present is "
462-
"finishedProcessingMulti");
453+
this->finishedProcessing.insert({key, hashValue});
463454
}
464455
if (history) {
465456
ENFORCE(this->options.recorder,
@@ -645,18 +636,17 @@ class StableFileIdMap final {
645636
StableFileIdMap &operator=(const StableFileIdMap &) = delete;
646637

647638
void populate(const ClangIdLookupMap &clangIdLookupMap) {
648-
this->map.reserve(clangIdLookupMap.size());
649-
for (auto [fileInfo, fileIdSet] : clangIdLookupMap) {
650-
ENFORCE(!fileIdSet.empty());
651-
for (auto wrappedFileId : fileIdSet) {
652-
bool inserted = this->insert(wrappedFileId.data, fileInfo.path);
653-
ENFORCE(
654-
inserted,
655-
"there is a 1-1 mapping from FileID -> (path, hash)"
656-
"so it's unexpected that the FileID {} was inserted for {} already",
657-
wrappedFileId.data.getHashValue(), fileInfo.path.asStringView());
658-
}
659-
}
639+
clangIdLookupMap.forEachPathAndHash( // force formatting break
640+
[&](AbsolutePathRef absPathRef,
641+
const absl::flat_hash_map<HashValue, clang::FileID> &map) {
642+
for (auto &[hash, fileId] : map) {
643+
bool inserted = this->insert(fileId, absPathRef);
644+
ENFORCE(inserted,
645+
"there is a 1-1 mapping from FileID -> (path, hash) so the"
646+
" FileID {} for {} should not have been inserted earlier",
647+
fileId.getHashValue(), absPathRef.asStringView());
648+
}
649+
});
660650
}
661651

662652
/// Returns true iff a new entry was inserted.
@@ -965,32 +955,14 @@ class IndexerAstConsumer : public clang::SemaConsumer {
965955

966956
for (auto &fileInfo : emitIndexDetails.filesToBeIndexed) {
967957
auto absPathRef = fileInfo.path.asRef();
968-
auto it = clangIdLookupMap.find({fileInfo.hashValue, absPathRef});
969-
if (it == clangIdLookupMap.end()) {
958+
auto optFileId = clangIdLookupMap.lookup(absPathRef, fileInfo.hashValue);
959+
if (!optFileId.has_value()) {
970960
spdlog::debug(
971961
"failed to find clang::FileID for path '{}' received from Driver",
972962
absPathRef.asStringView());
973963
continue;
974964
}
975-
auto &fileIdSet = it->second;
976-
ENFORCE(!fileIdSet.empty());
977-
for (auto wrappedFileId : fileIdSet) {
978-
toBeIndexed.insert(wrappedFileId);
979-
// Pick the representative FileID arbitrarily; it doesn't
980-
// matter since the hashes are all the same.
981-
break;
982-
}
983-
std::string message;
984-
auto check = [&](auto wrappedFileId) -> bool {
985-
auto fileId = wrappedFileId.data;
986-
if (stableFileIdMap.contains(fileId)) {
987-
return true;
988-
}
989-
message = fmt::format("missing fileId {} for path: {}",
990-
fileId.getHashValue(), absPathRef.asStringView());
991-
return false;
992-
};
993-
ENFORCE(absl::c_all_of(fileIdSet, check), "{}", message);
965+
toBeIndexed.insert({*optFileId});
994966
}
995967
}
996968
};

0 commit comments

Comments
 (0)