Skip to content

Commit e3e7393

Browse files
authored
[Clang] [Diagnostics] Simplify filenames that contain '..' (#143520)
This can significantly shorten file paths to standard library headers, e.g. on my system, `<ranges>` is currently printed as ```console /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/ranges ``` but with this change, we instead print ```console /usr/include/c++/15/ranges ``` This is of course just a heuristic; there are paths that would get longer as a result of this, so we use whichever path ends up being shorter. @AaronBallman pointed out that this might be problematic for network file systems since path resolution might take a while, so this is enabled only for paths that are part of a local filesystem—though not on Windows since there we noticed that the check itself is slow. The file names are cached in `SourceManager`.
1 parent c885005 commit e3e7393

File tree

7 files changed

+115
-69
lines changed

7 files changed

+115
-69
lines changed

clang-tools-extra/test/clang-tidy/infrastructure/file-filter-symlinks.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
// RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='header\.h' %s -- -I %t 2>&1 | FileCheck --check-prefix=CHECK_HEADER %s
1313
// RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='header\.h' -quiet %s -- -I %t 2>&1 | FileCheck --check-prefix=CHECK_HEADER %s
1414

15-
// Check that `-header-filter` operates on the same file paths as paths in
16-
// diagnostics printed by ClangTidy.
15+
// `-header-filter` operates on the actual file path that the user provided in
16+
// the #include directive; however, Clang's path name simplification causes the
17+
// path to be printed in canonicalised form here.
1718
#include "dir1/dir2/../header_alias.h"
18-
// CHECK_HEADER_ALIAS: dir1/dir2/../header_alias.h:1:11: warning: single-argument constructors
19+
// CHECK_HEADER_ALIAS: dir1/header.h:1:11: warning: single-argument constructors
1920
// CHECK_HEADER-NOT: warning:

clang/include/clang/Basic/SourceManager.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,12 @@ class SourceManager : public RefCountedBase<SourceManager> {
824824

825825
mutable std::unique_ptr<SrcMgr::SLocEntry> FakeSLocEntryForRecovery;
826826

827+
/// Cache for filenames used in diagnostics. See 'getNameForDiagnostic()'.
828+
mutable llvm::StringMap<StringRef> DiagNames;
829+
830+
/// Allocator for absolute/short names.
831+
mutable llvm::BumpPtrAllocator DiagNameAlloc;
832+
827833
/// Lazily computed map of macro argument chunks to their expanded
828834
/// source location.
829835
using MacroArgsMap = std::map<unsigned, SourceLocation>;
@@ -1848,6 +1854,16 @@ class SourceManager : public RefCountedBase<SourceManager> {
18481854
/// \return Location of the top-level macro caller.
18491855
SourceLocation getTopMacroCallerLoc(SourceLocation Loc) const;
18501856

1857+
/// Retrieve the name of a file suitable for diagnostics.
1858+
// FIXME: Passing in the DiagnosticOptions here is a workaround for the
1859+
// fact that installapi does some weird things with DiagnosticsEngines,
1860+
// which causes the 'Diag' member of SourceManager (or at least the
1861+
// DiagnosticsOptions member thereof) to be a dangling reference
1862+
// sometimes. We should probably fix that or decouple the two classes
1863+
// to avoid this issue entirely.
1864+
StringRef getNameForDiagnostic(StringRef Filename,
1865+
const DiagnosticOptions &Opts) const;
1866+
18511867
private:
18521868
friend class ASTReader;
18531869
friend class ASTWriter;

clang/lib/Basic/SourceManager.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2390,3 +2390,75 @@ SourceManagerForFile::SourceManagerForFile(StringRef FileName,
23902390
assert(ID.isValid());
23912391
SourceMgr->setMainFileID(ID);
23922392
}
2393+
2394+
StringRef
2395+
SourceManager::getNameForDiagnostic(StringRef Filename,
2396+
const DiagnosticOptions &Opts) const {
2397+
OptionalFileEntryRef File = getFileManager().getOptionalFileRef(Filename);
2398+
if (!File)
2399+
return Filename;
2400+
2401+
bool SimplifyPath = [&] {
2402+
if (Opts.AbsolutePath)
2403+
return true;
2404+
2405+
// Try to simplify paths that contain '..' in any case since paths to
2406+
// standard library headers especially tend to get quite long otherwise.
2407+
// Only do that for local filesystems though to avoid slowing down
2408+
// compilation too much.
2409+
if (!File->getName().contains(".."))
2410+
return false;
2411+
2412+
// If we're not on Windows, check if we're on a network file system and
2413+
// avoid simplifying the path in that case since that can be slow. On
2414+
// Windows, the check for a local filesystem is already slow, so skip it.
2415+
#ifndef _WIN32
2416+
if (!llvm::sys::fs::is_local(File->getName()))
2417+
return false;
2418+
#endif
2419+
2420+
return true;
2421+
}();
2422+
2423+
if (!SimplifyPath)
2424+
return Filename;
2425+
2426+
// This may involve computing canonical names, so cache the result.
2427+
StringRef &CacheEntry = DiagNames[Filename];
2428+
if (!CacheEntry.empty())
2429+
return CacheEntry;
2430+
2431+
// We want to print a simplified absolute path, i. e. without "dots".
2432+
//
2433+
// The hardest part here are the paths like "<part1>/<link>/../<part2>".
2434+
// On Unix-like systems, we cannot just collapse "<link>/..", because
2435+
// paths are resolved sequentially, and, thereby, the path
2436+
// "<part1>/<part2>" may point to a different location. That is why
2437+
// we use FileManager::getCanonicalName(), which expands all indirections
2438+
// with llvm::sys::fs::real_path() and caches the result.
2439+
//
2440+
// On the other hand, it would be better to preserve as much of the
2441+
// original path as possible, because that helps a user to recognize it.
2442+
// real_path() expands all links, which sometimes too much. Luckily,
2443+
// on Windows we can just use llvm::sys::path::remove_dots(), because,
2444+
// on that system, both aforementioned paths point to the same place.
2445+
SmallString<256> TempBuf;
2446+
#ifdef _WIN32
2447+
TempBuf = File->getName();
2448+
llvm::sys::fs::make_absolute(TempBuf);
2449+
llvm::sys::path::native(TempBuf);
2450+
llvm::sys::path::remove_dots(TempBuf, /* remove_dot_dot */ true);
2451+
#else
2452+
TempBuf = getFileManager().getCanonicalName(*File);
2453+
#endif
2454+
2455+
// In some cases, the resolved path may actually end up being longer (e.g.
2456+
// if it was originally a relative path), so just retain whichever one
2457+
// ends up being shorter.
2458+
if (!Opts.AbsolutePath && TempBuf.size() > Filename.size())
2459+
CacheEntry = Filename;
2460+
else
2461+
CacheEntry = TempBuf.str().copy(DiagNameAlloc);
2462+
2463+
return CacheEntry;
2464+
}

clang/lib/Frontend/SARIFDiagnostic.cpp

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -163,36 +163,7 @@ SARIFDiagnostic::addDiagnosticLevelToRule(SarifRule Rule,
163163

164164
llvm::StringRef SARIFDiagnostic::emitFilename(StringRef Filename,
165165
const SourceManager &SM) {
166-
if (DiagOpts.AbsolutePath) {
167-
auto File = SM.getFileManager().getOptionalFileRef(Filename);
168-
if (File) {
169-
// We want to print a simplified absolute path, i. e. without "dots".
170-
//
171-
// The hardest part here are the paths like "<part1>/<link>/../<part2>".
172-
// On Unix-like systems, we cannot just collapse "<link>/..", because
173-
// paths are resolved sequentially, and, thereby, the path
174-
// "<part1>/<part2>" may point to a different location. That is why
175-
// we use FileManager::getCanonicalName(), which expands all indirections
176-
// with llvm::sys::fs::real_path() and caches the result.
177-
//
178-
// On the other hand, it would be better to preserve as much of the
179-
// original path as possible, because that helps a user to recognize it.
180-
// real_path() expands all links, which is sometimes too much. Luckily,
181-
// on Windows we can just use llvm::sys::path::remove_dots(), because,
182-
// on that system, both aforementioned paths point to the same place.
183-
#ifdef _WIN32
184-
SmallString<256> TmpFilename = File->getName();
185-
llvm::sys::fs::make_absolute(TmpFilename);
186-
llvm::sys::path::native(TmpFilename);
187-
llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true);
188-
Filename = StringRef(TmpFilename.data(), TmpFilename.size());
189-
#else
190-
Filename = SM.getFileManager().getCanonicalName(*File);
191-
#endif
192-
}
193-
}
194-
195-
return Filename;
166+
return SM.getNameForDiagnostic(Filename, DiagOpts);
196167
}
197168

198169
/// Print out the file/line/column information and include trace.

clang/lib/Frontend/TextDiagnostic.cpp

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -738,39 +738,7 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS,
738738
}
739739

740740
void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
741-
#ifdef _WIN32
742-
SmallString<4096> TmpFilename;
743-
#endif
744-
if (DiagOpts.AbsolutePath) {
745-
auto File = SM.getFileManager().getOptionalFileRef(Filename);
746-
if (File) {
747-
// We want to print a simplified absolute path, i. e. without "dots".
748-
//
749-
// The hardest part here are the paths like "<part1>/<link>/../<part2>".
750-
// On Unix-like systems, we cannot just collapse "<link>/..", because
751-
// paths are resolved sequentially, and, thereby, the path
752-
// "<part1>/<part2>" may point to a different location. That is why
753-
// we use FileManager::getCanonicalName(), which expands all indirections
754-
// with llvm::sys::fs::real_path() and caches the result.
755-
//
756-
// On the other hand, it would be better to preserve as much of the
757-
// original path as possible, because that helps a user to recognize it.
758-
// real_path() expands all links, which sometimes too much. Luckily,
759-
// on Windows we can just use llvm::sys::path::remove_dots(), because,
760-
// on that system, both aforementioned paths point to the same place.
761-
#ifdef _WIN32
762-
TmpFilename = File->getName();
763-
llvm::sys::fs::make_absolute(TmpFilename);
764-
llvm::sys::path::native(TmpFilename);
765-
llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true);
766-
Filename = StringRef(TmpFilename.data(), TmpFilename.size());
767-
#else
768-
Filename = SM.getFileManager().getCanonicalName(*File);
769-
#endif
770-
}
771-
}
772-
773-
OS << Filename;
741+
OS << SM.getNameForDiagnostic(Filename, DiagOpts);
774742
}
775743

776744
/// Print out the file/line/column information and include trace.

clang/test/Frontend/absolute-paths.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88

99
#include "absolute-paths.h"
1010

11-
// Check whether the diagnostic from the header above includes the dummy
12-
// directory in the path.
13-
// NORMAL: SystemHeaderPrefix
11+
// Check that the bogus prefix we added is stripped out even if absolute paths
12+
// are disabled.
13+
// NORMAL-NOT: SystemHeaderPrefix
1414
// ABSOLUTE-NOT: SystemHeaderPrefix
1515
// CHECK: warning: non-void function does not return a value
1616

clang/test/Frontend/simplify-paths.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// REQUIRES: shell
2+
3+
// RUN: rm -rf %t
4+
// RUN: mkdir -p %t/a/b/
5+
// RUN: echo 'void x;' > %t/test.h
6+
// RUN: echo 'const void x;' > %t/header_with_a_really_long_name.h
7+
// RUN: ln -s %t/header_with_a_really_long_name.h %t/a/shorter_name.h
8+
//
9+
// RUN: %clang_cc1 -fsyntax-only -I %t %s 2> %t/output.txt || true
10+
// RUN: cat %t/output.txt | FileCheck %s
11+
12+
// Check that we strip '..' by canonicalising the path...
13+
#include "a/b/../../test.h"
14+
// CHECK: simplify-paths.c.tmp/test.h:1:6: error: variable has incomplete type 'void'
15+
16+
// ... but only if the resulting path is actually shorter.
17+
#include "a/b/../shorter_name.h"
18+
// CHECK: simplify-paths.c.tmp/a/b/../shorter_name.h:1:12: error: variable has incomplete type 'const void'

0 commit comments

Comments
 (0)