@@ -224,7 +224,37 @@ struct PreprocessorDebugContext {
224
224
std::string tuMainFilePath;
225
225
};
226
226
227
- using PathToIdMap = absl::flat_hash_map<AbsolutePathRef, clang::FileID>;
227
+ // / Similar to \c PreprocessedFileInfo but storing a PathRef instead.
228
+ struct PreprocessedFileInfoRef {
229
+ HashValue hash;
230
+ AbsolutePathRef path;
231
+
232
+ template <typename H>
233
+ friend H AbslHashValue (H h, const PreprocessedFileInfoRef &p) {
234
+ return H::combine (std::move (h), p.hash .rawValue , p.path );
235
+ }
236
+
237
+ DERIVE_EQ_ALL (PreprocessedFileInfoRef)
238
+ };
239
+
240
+ // / Type to retrieve information about the \c clang::FileID corresponding
241
+ // / to a (HashValue, Path) pair.
242
+ // /
243
+ // / The worker and driver communicate using (HashValue, Path) pairs,
244
+ // / since those are stable across different workers running in parallel.
245
+ // /
246
+ // / However, inside a worker, we'd like to use \c clang::FileID keys if
247
+ // / possible (e.g. storing Documents before indexing), since they are
248
+ // / 32-bit integer values. This map translates driver->worker info
249
+ // / in (HashValue, Path) terms to FileIDs.
250
+ // /
251
+ // / In general, it may be the case that multiple FileIDs correspond to
252
+ // / the same (HashValue, Path) pair (this happens for well-behaved
253
+ // / headers, c.f. \c FileIndexingPlanner); the representative FileID
254
+ // / is chosen arbitrarily.
255
+ using ClangIdLookupMap = absl::flat_hash_map<
256
+ PreprocessedFileInfoRef,
257
+ absl::flat_hash_set<llvm_ext::AbslHashAdapter<clang::FileID>>>;
228
258
229
259
class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
230
260
const IndexerPreprocessorOptions &options;
@@ -259,7 +289,8 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
259
289
finishedProcessing (), finishedProcessingMulti(),
260
290
macroIndexer(sourceManager), debugContext(std::move(debugContext)) {}
261
291
262
- void flushState (SemanticAnalysisJobResult &result, PathToIdMap &pathToIdMap,
292
+ void flushState (SemanticAnalysisJobResult &result,
293
+ ClangIdLookupMap &clangIdLookupMap,
263
294
MacroIndexer ¯oIndexerOutput) {
264
295
// HACK: It seems like EnterInclude and ExitInclude events are not
265
296
// perfectly balanced in Clang. Work around that.
@@ -321,7 +352,8 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
321
352
auto absPathRef = optPath.value ();
322
353
result.wellBehavedFiles .emplace_back (
323
354
PreprocessedFileInfo{AbsolutePath{absPathRef}, hashValue});
324
- pathToIdMap.insert ({absPathRef, fileId});
355
+ clangIdLookupMap[PreprocessedFileInfoRef{hashValue, absPathRef}]
356
+ .insert ({fileId});
325
357
}
326
358
}
327
359
if (this ->options .deterministic ) {
@@ -343,7 +375,10 @@ class IndexerPreprocessorWrapper final : public clang::PPCallbacks {
343
375
}
344
376
result.illBehavedFiles .emplace_back (PreprocessedFileInfoMulti{
345
377
AbsolutePath{absPathRef}, std::move (hashes)});
346
- pathToIdMap.insert ({absPathRef, fileId});
378
+ for (auto &hash : hashes) {
379
+ clangIdLookupMap[PreprocessedFileInfoRef{hash, absPathRef}].insert (
380
+ {fileId});
381
+ }
347
382
}
348
383
}
349
384
if (this ->options .deterministic ) {
@@ -568,7 +603,7 @@ class IndexerAstVisitor;
568
603
// / Not every file that is part of this project will be part of this map.
569
604
// / For example, if a file+hash was already indexed by another worker,
570
605
// / then one shouldn't call insert(..) for that file.
571
- using FilesToBeIndexedSet =
606
+ using FileIdsToBeIndexedSet =
572
607
absl::flat_hash_set<llvm_ext::AbslHashAdapter<clang::FileID>>;
573
608
574
609
// / Type to track canonical relative paths for FileIDs.
@@ -600,24 +635,32 @@ class CanonicalPathMap final {
600
635
CanonicalPathMap (const CanonicalPathMap &) = delete ;
601
636
CanonicalPathMap &operator =(const CanonicalPathMap &) = delete ;
602
637
603
- void populate (const PathToIdMap &pathToIdMap) {
604
- this ->map .reserve (pathToIdMap.size ());
605
- for (auto [absPathRef, fileId] : pathToIdMap) {
606
- this ->insert (fileId, absPathRef);
638
+ void populate (const ClangIdLookupMap &clangIdLookupMap) {
639
+ this ->map .reserve (clangIdLookupMap.size ());
640
+ for (auto [fileInfo, fileIdSet] : clangIdLookupMap) {
641
+ ENFORCE (!fileIdSet.empty ());
642
+ for (auto wrappedFileId : fileIdSet) {
643
+ bool inserted = this ->insert (wrappedFileId.data , fileInfo.path );
644
+ ENFORCE (
645
+ inserted,
646
+ " there is a 1-1 mapping from FileID -> (path, hash)"
647
+ " so it's unexpected that the FileID {} was inserted for {} already" ,
648
+ wrappedFileId.data .getHashValue (), fileInfo.path .asStringView ());
649
+ }
607
650
}
608
651
}
609
652
610
653
// / Returns true iff a new entry was inserted.
611
- void insert (clang::FileID fileId, AbsolutePathRef absPathRef) {
654
+ bool insert (clang::FileID fileId, AbsolutePathRef absPathRef) {
612
655
ENFORCE (fileId.isValid (),
613
656
" invalid FileIDs should be filtered out after preprocessing" );
614
657
ENFORCE (!absPathRef.asStringView ().empty (),
615
658
" inserting file with empty absolute path" );
616
659
617
- auto insertRelPath = [&](RootRelativePathRef projectRootRelPath) -> void {
660
+ auto insertRelPath = [&](RootRelativePathRef projectRootRelPath) -> bool {
618
661
ENFORCE (!projectRootRelPath.asStringView ().empty (),
619
662
" file path is unexpectedly equal to project root" );
620
- this ->map .insert ({{fileId}, projectRootRelPath});
663
+ return this ->map .insert ({{fileId}, projectRootRelPath}). second ;
621
664
};
622
665
623
666
// In practice, CMake ends up passing paths to project files as well
@@ -640,7 +683,7 @@ class CanonicalPathMap final {
640
683
this ->projectRootPath .tryMakeRelative (absPathRef)) {
641
684
return insertRelPath (optProjectRootRelPath.value ());
642
685
}
643
- this ->map .insert ({{fileId}, absPathRef});
686
+ return this ->map .insert ({{fileId}, absPathRef}). second ;
644
687
}
645
688
646
689
bool contains (clang::FileID fileId) const {
@@ -684,14 +727,15 @@ class IndexerAstVisitor : public clang::RecursiveASTVisitor<IndexerAstVisitor> {
684
727
using Base = RecursiveASTVisitor;
685
728
686
729
const CanonicalPathMap &pathMap;
687
- FilesToBeIndexedSet toBeIndexed;
730
+ FileIdsToBeIndexedSet toBeIndexed;
688
731
bool deterministic;
689
732
690
733
TuIndexer &tuIndexer;
691
734
692
735
public:
693
- IndexerAstVisitor (const CanonicalPathMap &pathMap, FilesToBeIndexedSet &&map,
694
- bool deterministic, TuIndexer &tuIndexer)
736
+ IndexerAstVisitor (const CanonicalPathMap &pathMap,
737
+ FileIdsToBeIndexedSet &&map, bool deterministic,
738
+ TuIndexer &tuIndexer)
695
739
: pathMap(pathMap), toBeIndexed(std::move(map)),
696
740
deterministic (deterministic), tuIndexer(tuIndexer) {}
697
741
@@ -841,10 +885,10 @@ class IndexerAstConsumer : public clang::SemaConsumer {
841
885
// it during the traversal (instead of say flushing state in the dtor
842
886
// would arguably be more idiomatic).
843
887
SemanticAnalysisJobResult semaResult{};
844
- PathToIdMap pathToIdMap {};
888
+ ClangIdLookupMap clangIdLookupMap {};
845
889
auto &sourceManager = astContext.getSourceManager ();
846
890
MacroIndexer macroIndexer{sourceManager};
847
- this ->preprocessorWrapper ->flushState (semaResult, pathToIdMap ,
891
+ this ->preprocessorWrapper ->flushState (semaResult, clangIdLookupMap ,
848
892
macroIndexer);
849
893
850
894
EmitIndexJobDetails emitIndexDetails{};
@@ -856,9 +900,10 @@ class IndexerAstConsumer : public clang::SemaConsumer {
856
900
857
901
CanonicalPathMap canonicalPathMap{this ->options .projectRootPath ,
858
902
this ->options .buildRootPath };
859
- FilesToBeIndexedSet toBeIndexed{};
860
- this ->computePathsToBeIndexed (astContext, emitIndexDetails, pathToIdMap,
861
- canonicalPathMap, toBeIndexed);
903
+ FileIdsToBeIndexedSet toBeIndexed{};
904
+ this ->computeFileIdsToBeIndexed (astContext, emitIndexDetails,
905
+ clangIdLookupMap, canonicalPathMap,
906
+ toBeIndexed);
862
907
863
908
auto getRelativePath =
864
909
[&](clang::FileID fileId) -> std::optional<RootRelativePathRef> {
@@ -885,15 +930,15 @@ class IndexerAstConsumer : public clang::SemaConsumer {
885
930
}
886
931
887
932
private:
888
- void computePathsToBeIndexed (const clang::ASTContext &astContext,
889
- const EmitIndexJobDetails &emitIndexDetails,
890
- const PathToIdMap &pathToIdMap ,
891
- CanonicalPathMap &canonicalPathMap,
892
- FilesToBeIndexedSet &toBeIndexed) {
933
+ void computeFileIdsToBeIndexed (const clang::ASTContext &astContext,
934
+ const EmitIndexJobDetails &emitIndexDetails,
935
+ const ClangIdLookupMap &clangIdLookupMap ,
936
+ CanonicalPathMap &canonicalPathMap,
937
+ FileIdsToBeIndexedSet &toBeIndexed) {
893
938
auto &sourceManager = astContext.getSourceManager ();
894
939
auto mainFileId = sourceManager.getMainFileID ();
895
940
896
- canonicalPathMap.populate (pathToIdMap );
941
+ canonicalPathMap.populate (clangIdLookupMap );
897
942
if (auto *mainFileEntry = sourceManager.getFileEntryForID (mainFileId)) {
898
943
if (auto optMainFileAbsPath =
899
944
AbsolutePathRef::tryFrom (mainFileEntry->tryGetRealPathName ())) {
@@ -906,18 +951,34 @@ class IndexerAstConsumer : public clang::SemaConsumer {
906
951
}
907
952
}
908
953
909
- for (auto &absPath : emitIndexDetails.filesToBeIndexed ) {
910
- auto absPathRef = absPath .asRef ();
911
- auto it = pathToIdMap .find (absPathRef);
912
- if (it == pathToIdMap .end ()) {
954
+ for (auto &fileInfo : emitIndexDetails.filesToBeIndexed ) {
955
+ auto absPathRef = fileInfo. path .asRef ();
956
+ auto it = clangIdLookupMap .find ({fileInfo. hashValue , absPathRef} );
957
+ if (it == clangIdLookupMap .end ()) {
913
958
spdlog::debug (
914
959
" failed to find clang::FileID for path '{}' received from Driver" ,
915
960
absPathRef.asStringView ());
916
961
continue ;
917
962
}
918
- toBeIndexed.insert ({it->second });
919
- ENFORCE (canonicalPathMap.contains (it->second ),
920
- " missing entry for path: {}" , absPath.asStringRef ());
963
+ auto &fileIdSet = it->second ;
964
+ ENFORCE (!fileIdSet.empty ());
965
+ for (auto wrappedFileId : fileIdSet) {
966
+ toBeIndexed.insert (wrappedFileId);
967
+ // Pick the representative FileID arbitrarily; it doesn't
968
+ // matter since the hashes are all the same.
969
+ break ;
970
+ }
971
+ std::string message;
972
+ auto check = [&](auto wrappedFileId) -> bool {
973
+ auto fileId = wrappedFileId.data ;
974
+ if (canonicalPathMap.contains (fileId)) {
975
+ return true ;
976
+ }
977
+ message = fmt::format (" missing fileId {} for path: {}" ,
978
+ fileId.getHashValue (), absPathRef.asStringView ());
979
+ return false ;
980
+ };
981
+ ENFORCE (absl::c_all_of (fileIdSet, check), " {}" , message);
921
982
}
922
983
}
923
984
};
@@ -1151,11 +1212,14 @@ Worker::ReceiveStatus Worker::processTranslationUnitAndRespond(
1151
1212
EmitIndexJobDetails &emitIndexDetails) -> bool {
1152
1213
callbackInvoked++;
1153
1214
if (this ->options .mode == WorkerMode::Compdb) {
1154
- for (auto &p : semaResult.wellBehavedFiles ) {
1155
- emitIndexDetails.filesToBeIndexed .emplace_back (std::move (p. path ));
1215
+ for (auto &fileInfo : semaResult.wellBehavedFiles ) {
1216
+ emitIndexDetails.filesToBeIndexed .emplace_back (std::move (fileInfo ));
1156
1217
}
1157
- for (auto &p : semaResult.illBehavedFiles ) {
1158
- emitIndexDetails.filesToBeIndexed .emplace_back (std::move (p.path ));
1218
+ for (auto &fileInfoMulti : semaResult.illBehavedFiles ) {
1219
+ for (auto &hashValue : fileInfoMulti.hashValues ) {
1220
+ emitIndexDetails.filesToBeIndexed .emplace_back (
1221
+ PreprocessedFileInfo{fileInfoMulti.path , hashValue});
1222
+ }
1159
1223
}
1160
1224
return true ;
1161
1225
}
0 commit comments