Skip to content

Commit 5796298

Browse files
fix: Replace canonical paths with StableFileId (#143)
1 parent bb9f5a5 commit 5796298

File tree

5 files changed

+131
-73
lines changed

5 files changed

+131
-73
lines changed

indexer/Path.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ AbsolutePathRef::makeRelative(AbsolutePathRef longerPath) {
4343
return {};
4444
}
4545

46+
std::optional<std::string_view> AbsolutePathRef::fileName() const {
47+
auto sview = this->asStringView();
48+
auto i = sview.find_last_of(std::filesystem::path::preferred_separator);
49+
if (i == std::string::npos || i == sview.size() - 1) {
50+
return {};
51+
}
52+
return sview.substr(i + 1);
53+
}
54+
4655
AbsolutePathRef AbsolutePath::asRef() const {
4756
auto sv = std::string_view(this->value.data(), this->value.size());
4857
auto optRef = AbsolutePathRef::tryFrom(sv);
@@ -93,6 +102,14 @@ RootRelativePath::RootRelativePath(RootRelativePathRef ref)
93102
ENFORCE(llvm::sys::path::is_relative(this->value));
94103
}
95104

105+
RootRelativePath::RootRelativePath(std::string &&path, RootKind kind)
106+
: value(std::move(path)), _kind(kind) {
107+
if (this->value.empty()) {
108+
return;
109+
}
110+
ENFORCE(llvm::sys::path::is_relative(this->value));
111+
}
112+
96113
void RootRelativePath::replaceExtension(std::string_view newExtension) {
97114
auto it = this->value.rfind('.');
98115
if (it == std::string::npos) {

indexer/Path.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ class AbsolutePathRef {
4040

4141
// Basic prefix-based implementation; does not handle lexical normalization.
4242
std::optional<std::string_view> makeRelative(AbsolutePathRef longerPath);
43+
44+
/// Try to get the file name by slicing off the prefix till the last
45+
/// path separator.
46+
std::optional<std::string_view> fileName() const;
4347
};
4448

4549
/// Typically used when referring to paths for files which may or may not
@@ -124,6 +128,8 @@ class RootRelativePath {
124128

125129
explicit RootRelativePath(RootRelativePathRef ref);
126130

131+
RootRelativePath(std::string &&, RootKind);
132+
127133
const std::string &asStringRef() const {
128134
return this->value;
129135
}

indexer/SymbolFormatter.cc

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -136,16 +136,16 @@ std::string_view SymbolFormatter::getMacroSymbol(clang::SourceLocation defLoc) {
136136
auto defPLoc =
137137
this->sourceManager.getPresumedLoc(defLoc, /*UseLineDirectives*/ false);
138138
ENFORCE(defPLoc.isValid());
139-
std::string_view filename;
140-
if (auto optRelPath = this->getCanonicalPath(defPLoc.getFileID())) {
141-
filename = optRelPath->asStringView();
139+
std::string_view filepath;
140+
if (auto optStableFileId = this->getStableFileId(defPLoc.getFileID())) {
141+
filepath = optStableFileId->path.asStringView();
142142
} else {
143-
filename = std::string_view(defPLoc.getFilename());
143+
filepath = std::string_view(defPLoc.getFilename());
144144
}
145145

146146
// Technically, ':' is used by SCIP for <meta>, but using ':'
147147
// here lines up with other situations like compiler errors.
148-
auto name = this->formatTemporary("{}:{}:{}", filename, defPLoc.getLine(),
148+
auto name = this->formatTemporary("{}:{}:{}", filepath, defPLoc.getLine(),
149149
defPLoc.getColumn());
150150
std::string out{};
151151
SymbolBuilder{.packageName = "todo-pkg",
@@ -313,10 +313,10 @@ SymbolFormatter::getTagSymbol(const clang::TagDecl &tagDecl) {
313313
//
314314
// If we don't include the hash, the anonymous structs will end up with
315315
// the same symbol name.
316-
if (auto optRelativePath = this->getCanonicalPath(defFileId)) {
316+
if (auto optStableFileId = this->getStableFileId(defFileId)) {
317317
descriptor.name = this->formatTemporary(
318318
"$anonymous_type_{:x}_{}",
319-
HashValue::forText(optRelativePath->asStringView()), counter);
319+
HashValue::forText(optStableFileId->path.asStringView()), counter);
320320
}
321321
}
322322
if (descriptor.name.empty()) {
@@ -444,23 +444,12 @@ SymbolFormatter::getNamespaceSymbol(const clang::NamespaceDecl &namespaceDecl) {
444444
if (namespaceDecl.isAnonymousNamespace()) {
445445
auto mainFileId = this->sourceManager.getMainFileID();
446446
ENFORCE(mainFileId.isValid());
447-
auto path = this->getCanonicalPath(mainFileId);
448-
if (!path.has_value()) {
449-
// Strictly speaking, this will be suboptimal in the following case:
450-
// - header 1: in source tree (has canonical path), uses
451-
// namespace {..}
452-
// - header 2: in source tree (has canonical path), uses
453-
// namespace {..}
454-
// - generated C++ file: in build tree only (no canonical path), and
455-
// includes header 1 and 2.
456-
// We will not emit a symbol that connects the anonymous namespace
457-
// in header 1 and header 2. However, that is OK, because it is
458-
// unclear how to handle this case anyways, and anonymous namespaces
459-
// are rarely (if ever) used in headers.
460-
return {};
461-
}
447+
auto optStableFileId = this->getStableFileId(mainFileId);
448+
ENFORCE(optStableFileId.has_value(),
449+
"main file always has a valid StableFileId");
450+
auto path = optStableFileId->path;
462451
descriptor.name = this->formatTemporary("$anonymous_namespace_{}",
463-
path->asStringView());
452+
path.asStringView());
464453
} else {
465454
descriptor.name = this->formatTemporary(namespaceDecl);
466455
}

indexer/SymbolFormatter.h

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,6 @@ class raw_ostream;
5353

5454
namespace scip_clang {
5555

56-
using GetCanonicalPath =
57-
absl::FunctionRef<std::optional<RootRelativePathRef>(clang::FileID)>;
58-
5956
/// Type similar to \c scip::Descriptor but carrying \c string_view fields
6057
/// instead to avoid redundant intermediate allocations.
6158
struct DescriptorBuilder {
@@ -93,9 +90,36 @@ struct SymbolBuilder {
9390
const DescriptorBuilder &descriptor);
9491
};
9592

93+
/// An identifier for a file that is stable across indexing runs,
94+
/// represented as a path.
95+
///
96+
/// There are 4 kinds of files:
97+
/// 1. In-project files.
98+
/// 2. Generated files: These are present in the build root,
99+
/// but not in the project root.
100+
/// 3. External files: From libraries (stdlib, SDKs etc.)
101+
/// 4. Magic files: Corresponding to the builtin header,
102+
/// and command-line arguments.
103+
///
104+
/// For 2, 3 and 4, we make up fake paths that are likely to be
105+
/// distinct from actual in-project paths.
106+
///
107+
/// In the future, for cross-repo, a directory layout<->project mapping
108+
/// may be supplied or inferred, which would enable us to use non-synthetic
109+
/// paths for external files.
110+
struct StableFileId {
111+
RootRelativePathRef path;
112+
bool isInProject;
113+
/// Track this for debugging.
114+
bool isSynthetic;
115+
};
116+
117+
using GetStableFileId =
118+
absl::FunctionRef<std::optional<StableFileId>(clang::FileID)>;
119+
96120
class SymbolFormatter final {
97121
const clang::SourceManager &sourceManager;
98-
GetCanonicalPath getCanonicalPath;
122+
GetStableFileId getStableFileId;
99123

100124
// Q: Should we allocate into an arena instead of having standalone
101125
// std::string values here?
@@ -112,8 +136,8 @@ class SymbolFormatter final {
112136

113137
public:
114138
SymbolFormatter(const clang::SourceManager &sourceManager,
115-
GetCanonicalPath getCanonicalPath)
116-
: sourceManager(sourceManager), getCanonicalPath(getCanonicalPath),
139+
GetStableFileId getStableFileId)
140+
: sourceManager(sourceManager), getStableFileId(getStableFileId),
117141
locationBasedCache(), declBasedCache(), scratchBuffer() {}
118142
SymbolFormatter(const SymbolFormatter &) = delete;
119143
SymbolFormatter &operator=(const SymbolFormatter &) = delete;

indexer/Worker.cc

Lines changed: 66 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -616,24 +616,32 @@ using FileIdsToBeIndexedSet =
616616
/// In the future, for cross-repo, a directory layout<->project mapping
617617
/// may be supplied or inferred, which would provide canonical relative
618618
/// paths for more files.
619-
class CanonicalPathMap final {
619+
class StableFileIdMap final {
620+
std::vector<RootRelativePath> storage;
621+
622+
struct ExternalFileEntry {
623+
AbsolutePathRef absPath;
624+
// Points to storage
625+
RootRelativePathRef fakeRelativePath;
626+
};
627+
620628
absl::flat_hash_map<llvm_ext::AbslHashAdapter<clang::FileID>,
621-
std::variant<RootRelativePathRef, AbsolutePathRef>>
629+
std::variant<RootRelativePathRef, ExternalFileEntry>>
622630
map;
623631

624632
const RootPath &projectRootPath;
625633

626634
const RootPath &buildRootPath;
627635

628636
public:
629-
CanonicalPathMap() = delete;
630-
CanonicalPathMap(const RootPath &projectRootPath,
631-
const RootPath &buildRootPath)
637+
StableFileIdMap() = delete;
638+
StableFileIdMap(const RootPath &projectRootPath,
639+
const RootPath &buildRootPath)
632640
: map(), projectRootPath(projectRootPath), buildRootPath(buildRootPath) {}
633-
CanonicalPathMap(CanonicalPathMap &&other) = default;
634-
CanonicalPathMap &operator=(CanonicalPathMap &&) = delete;
635-
CanonicalPathMap(const CanonicalPathMap &) = delete;
636-
CanonicalPathMap &operator=(const CanonicalPathMap &) = delete;
641+
StableFileIdMap(StableFileIdMap &&other) = default;
642+
StableFileIdMap &operator=(StableFileIdMap &&) = delete;
643+
StableFileIdMap(const StableFileIdMap &) = delete;
644+
StableFileIdMap &operator=(const StableFileIdMap &) = delete;
637645

638646
void populate(const ClangIdLookupMap &clangIdLookupMap) {
639647
this->map.reserve(clangIdLookupMap.size());
@@ -683,28 +691,40 @@ class CanonicalPathMap final {
683691
this->projectRootPath.tryMakeRelative(absPathRef)) {
684692
return insertRelPath(optProjectRootRelPath.value());
685693
}
686-
return this->map.insert({{fileId}, absPathRef}).second;
694+
auto optFileName = absPathRef.fileName();
695+
ENFORCE(optFileName.has_value(),
696+
"Clang returned file path {} without a file name",
697+
absPathRef.asStringView());
698+
this->storage.emplace_back(
699+
fmt::format("<external>/{}/{}",
700+
HashValue::forText(absPathRef.asStringView()),
701+
*optFileName),
702+
RootKind::Build); // fake value to satisfy the RootRelativePathRef API
703+
return this->map
704+
.insert({{fileId},
705+
ExternalFileEntry{absPathRef, this->storage.back().asRef()}})
706+
.second;
687707
}
688708

689709
bool contains(clang::FileID fileId) const {
690710
return this->map.contains({fileId});
691711
}
692712

693-
/// See the doc comment on \c CanonicalPathMap
694-
std::optional<RootRelativePathRef>
695-
tryGetCanonicalPath(clang::FileID fileId) const {
713+
/// See the doc comment on \c StableFileIdMap
714+
std::optional<StableFileId> getStableFileId(clang::FileID fileId) const {
696715
auto it = this->map.find({fileId});
697-
// TODO: Some headers like <built-in> and those in the Xcode SDK
698-
// are not getting populated here. That's fine, because they are
699-
// out-of-project anyways, but it seems weird that the populate(...)
700-
// call isn't adding them.
701716
if (it == this->map.end()) {
702717
return {};
703718
}
704719
if (std::holds_alternative<RootRelativePathRef>(it->second)) {
705-
return std::get<RootRelativePathRef>(it->second);
720+
return StableFileId{.path = std::get<RootRelativePathRef>(it->second),
721+
.isInProject = true,
722+
.isSynthetic = false};
706723
}
707-
return {};
724+
return StableFileId{
725+
.path = std::get<ExternalFileEntry>(it->second).fakeRelativePath,
726+
.isInProject = false,
727+
.isSynthetic = true};
708728
}
709729
};
710730

@@ -726,17 +746,17 @@ struct PartialScipIndex {
726746
class IndexerAstVisitor : public clang::RecursiveASTVisitor<IndexerAstVisitor> {
727747
using Base = RecursiveASTVisitor;
728748

729-
const CanonicalPathMap &pathMap;
749+
const StableFileIdMap &stableFileIdMap;
730750
FileIdsToBeIndexedSet toBeIndexed;
731751
bool deterministic;
732752

733753
TuIndexer &tuIndexer;
734754

735755
public:
736-
IndexerAstVisitor(const CanonicalPathMap &pathMap,
737-
FileIdsToBeIndexedSet &&map, bool deterministic,
756+
IndexerAstVisitor(const StableFileIdMap &pathMap,
757+
FileIdsToBeIndexedSet &&toBeIndexed, bool deterministic,
738758
TuIndexer &tuIndexer)
739-
: pathMap(pathMap), toBeIndexed(std::move(map)),
759+
: stableFileIdMap(pathMap), toBeIndexed(std::move(toBeIndexed)),
740760
deterministic(deterministic), tuIndexer(tuIndexer) {}
741761

742762
// See clang/include/clang/Basic/DeclNodes.td for list of declarations.
@@ -807,13 +827,15 @@ class IndexerAstVisitor : public clang::RecursiveASTVisitor<IndexerAstVisitor> {
807827

808828
void writeIndex(SymbolFormatter &&symbolFormatter, MacroIndexer &&macroIndex,
809829
scip::Index &scipIndex) {
810-
std::vector<std::pair<RootRelativePathRef, clang::FileID>> relativePaths;
811-
for (auto wrappedFileId : toBeIndexed) {
812-
// We may have been asked to index files outside the project,
813-
// which lack canonical paths.
814-
if (auto optRelPathRef =
815-
pathMap.tryGetCanonicalPath(wrappedFileId.data)) {
816-
relativePaths.push_back({optRelPathRef.value(), wrappedFileId.data});
830+
std::vector<std::pair<RootRelativePathRef, clang::FileID>>
831+
indexedProjectFiles;
832+
for (auto wrappedFileId : this->toBeIndexed) {
833+
if (auto optStableFileId =
834+
this->stableFileIdMap.getStableFileId(wrappedFileId.data)) {
835+
if (optStableFileId->isInProject) {
836+
indexedProjectFiles.emplace_back(optStableFileId->path,
837+
wrappedFileId.data);
838+
}
817839
}
818840
}
819841
if (this->deterministic) {
@@ -824,10 +846,10 @@ class IndexerAstVisitor : public clang::RecursiveASTVisitor<IndexerAstVisitor> {
824846
p1.first.asStringView());
825847
return cmp == std::strong_ordering::less;
826848
};
827-
absl::c_sort(relativePaths, comparePaths);
849+
absl::c_sort(indexedProjectFiles, comparePaths);
828850
}
829851

830-
for (auto [relPathRef, fileId] : relativePaths) {
852+
for (auto [relPathRef, fileId] : indexedProjectFiles) {
831853
scip::Document document;
832854
auto relPath = relPathRef.asStringView();
833855
document.set_relative_path(relPath.data(), relPath.size());
@@ -898,22 +920,22 @@ class IndexerAstConsumer : public clang::SemaConsumer {
898920
return;
899921
}
900922

901-
CanonicalPathMap canonicalPathMap{this->options.projectRootPath,
902-
this->options.buildRootPath};
923+
StableFileIdMap stableFileIdMap{this->options.projectRootPath,
924+
this->options.buildRootPath};
903925
FileIdsToBeIndexedSet toBeIndexed{};
904926
this->computeFileIdsToBeIndexed(astContext, emitIndexDetails,
905-
clangIdLookupMap, canonicalPathMap,
927+
clangIdLookupMap, stableFileIdMap,
906928
toBeIndexed);
907929

908-
auto getRelativePath =
909-
[&](clang::FileID fileId) -> std::optional<RootRelativePathRef> {
910-
return canonicalPathMap.tryGetCanonicalPath(fileId);
930+
auto getStableFileId =
931+
[&](clang::FileID fileId) -> std::optional<StableFileId> {
932+
return stableFileIdMap.getStableFileId(fileId);
911933
};
912-
SymbolFormatter symbolFormatter{sourceManager, getRelativePath};
934+
SymbolFormatter symbolFormatter{sourceManager, getStableFileId};
913935
TuIndexer tuIndexer{sourceManager, this->sema->getLangOpts(),
914936
this->sema->getASTContext(), symbolFormatter};
915937

916-
IndexerAstVisitor visitor{canonicalPathMap, std::move(toBeIndexed),
938+
IndexerAstVisitor visitor{stableFileIdMap, std::move(toBeIndexed),
917939
this->options.deterministic, tuIndexer};
918940
visitor.TraverseAST(astContext);
919941

@@ -933,16 +955,16 @@ class IndexerAstConsumer : public clang::SemaConsumer {
933955
void computeFileIdsToBeIndexed(const clang::ASTContext &astContext,
934956
const EmitIndexJobDetails &emitIndexDetails,
935957
const ClangIdLookupMap &clangIdLookupMap,
936-
CanonicalPathMap &canonicalPathMap,
958+
StableFileIdMap &stableFileIdMap,
937959
FileIdsToBeIndexedSet &toBeIndexed) {
938960
auto &sourceManager = astContext.getSourceManager();
939961
auto mainFileId = sourceManager.getMainFileID();
940962

941-
canonicalPathMap.populate(clangIdLookupMap);
963+
stableFileIdMap.populate(clangIdLookupMap);
942964
if (auto *mainFileEntry = sourceManager.getFileEntryForID(mainFileId)) {
943965
if (auto optMainFileAbsPath =
944966
AbsolutePathRef::tryFrom(mainFileEntry->tryGetRealPathName())) {
945-
canonicalPathMap.insert(mainFileId, optMainFileAbsPath.value());
967+
stableFileIdMap.insert(mainFileId, optMainFileAbsPath.value());
946968
toBeIndexed.insert({mainFileId});
947969
} else {
948970
spdlog::debug(
@@ -971,7 +993,7 @@ class IndexerAstConsumer : public clang::SemaConsumer {
971993
std::string message;
972994
auto check = [&](auto wrappedFileId) -> bool {
973995
auto fileId = wrappedFileId.data;
974-
if (canonicalPathMap.contains(fileId)) {
996+
if (stableFileIdMap.contains(fileId)) {
975997
return true;
976998
}
977999
message = fmt::format("missing fileId {} for path: {}",

0 commit comments

Comments
 (0)