Skip to content

Commit d24e531

Browse files
committed
[DTLTO] Normalize Windows 8.3 short paths to long form
8.3-style short paths (e.g., C:\PROGRA~1) can cause inconsistencies or failures on remote machines during distributed linking. Such paths may not resolve correctly or may differ between systems. We anticipate that many/all distribution systems will have difficulties with such paths. This patch ensures that all Windows paths used for DTLTO in ELF LLD are converted to their long form using GetLongPathNameW. This avoids ambiguity and improves portability across most distribution systems.
1 parent 1680a58 commit d24e531

File tree

7 files changed

+368
-32
lines changed

7 files changed

+368
-32
lines changed

cross-project-tests/dtlto/path.test

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# REQUIRES: x86-registered-target,ld.lld,llvm-ar
2+
3+
# Check that any shortened "8.3" paths containing '~' characters are expanded,
4+
# and their path seperators changed to Winodws style ones, prior to being passed
5+
# to SN-DBS during DTLTO.
6+
7+
RUN: rm -rf %t && split-file %s %t && cd %t
8+
9+
# Create an archive to confirm unpacked member paths are expanded.
10+
RUN: prospero-clang --target=x86_64-linux-gnu -c start.c f.c -flto=thin -O2
11+
RUN: prospero-llvm-ar rcs libf.a f.o --thin
12+
13+
RUN: %python in-tilde-dir.py \
14+
RUN: ld.lld start.o libf.a -o start.elf \
15+
RUN: --thinlto-distributor=%python \
16+
RUN: --thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
17+
RUN: --thinlto-remote-compiler=%clang \
18+
RUN: --save-temps
19+
20+
# Ensure that cross-module importing occurred. Cross-module importing is somewhat
21+
# fragile - e.g. it requires amenable code and compilation with -O2 or above.
22+
# It's important that importing happens for this test case, as the paths to
23+
# imported-from modules are recorded in the ThinLTO metadata.
24+
RUN: prospero-llvm-objdump -d start.elf | tee d.txt | FileCheck %s --check-prefix=IMPORT
25+
IMPORT: <_start>:
26+
IMPORT: jmp 0x{{[0-9A-F]+}} <_start>
27+
IMPORT: <f>:
28+
IMPORT-NEXT: jmp 0x{{[0-9A-F]+}} <f>
29+
30+
RUN: FileCheck --input-file start.*.dist-file.json %s --check-prefix=TILDE
31+
TILDE-NOT: ~
32+
33+
#--- f.c
34+
int _start();
35+
int f() { return _start(); }
36+
37+
#--- start.c
38+
int f();
39+
int _start() { return f(); }
40+
41+
#--- in-tilde-dir.py
42+
import os, shutil, sys, uuid, subprocess
43+
from pathlib import Path
44+
45+
temp = Path(os.environ['TEMP'])
46+
assert temp.is_dir()
47+
48+
# Copy the CWD to a unique directory inside TEMP and determine its 8.3 form.
49+
# TEMP is likely to be on a drive that supports long+short paths.
50+
d = (temp / str(uuid.uuid4()) / 'veryverylong').resolve()
51+
d83 = d.parent / (d.name[:6] + '~1')
52+
d.parent.mkdir(parents=True)
53+
try:
54+
shutil.copytree(Path.cwd(), d)
55+
56+
# Replace the arguments of the command that name files with equivalents in
57+
# our temp directory, prefixed with the 8.3 form.
58+
cmd = [a if not Path(a).is_file() else str(d83/a) \
59+
for a in sys.argv[1:]]
60+
print(cmd)
61+
62+
sys.exit(subprocess.run(cmd).returncode)
63+
64+
finally:
65+
shutil.rmtree(d.parent)

lld/Common/Filesystem.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
#include <unistd.h>
2323
#endif
2424
#include <thread>
25+
#if defined(_WIN32)
26+
#include "llvm/Support/ConvertUTF.h"
27+
#include "llvm/Support/Windows/WindowsSupport.h"
28+
#include "llvm/Support/WindowsError.h"
29+
#include <io.h>
30+
#endif
2531

2632
using namespace llvm;
2733
using namespace lld;
@@ -155,3 +161,15 @@ std::unique_ptr<raw_fd_ostream> lld::openLTOOutputFile(StringRef file) {
155161
return fs;
156162
return openFile(file);
157163
}
164+
165+
std::error_code lld::dtltoNormalizePath(std::string &PathOut) {
166+
#if defined(_WIN32)
167+
llvm::SmallString<128> Expanded;
168+
if (auto EC = llvm::sys::windows::makeLong(PathOut, Expanded))
169+
return EC;
170+
171+
PathOut.assign(Expanded.begin(), Expanded.end());
172+
#endif
173+
(void)PathOut;
174+
return std::error_code{};
175+
}

lld/ELF/InputFiles.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "SyntheticSections.h"
1818
#include "Target.h"
1919
#include "lld/Common/DWARF.h"
20+
#include "lld/Common/Filesystem.h"
2021
#include "llvm/ADT/CachedHashString.h"
2122
#include "llvm/ADT/STLExtras.h"
2223
#include "llvm/LTO/LTO.h"
@@ -1764,6 +1765,9 @@ static bool dtltoAdjustMemberPathIfThinArchive(Ctx &ctx, StringRef archivePath,
17641765
path::append(resolvedPath, memberPath);
17651766
memberPath = resolvedPath.str();
17661767
}
1768+
1769+
lld::dtltoNormalizePath(memberPath);
1770+
17671771
return true;
17681772
}
17691773

@@ -1777,6 +1781,11 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
17771781
if (ctx.arg.thinLTOIndexOnly)
17781782
path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier());
17791783

1784+
// SCE_PRIVATE: begin DTLTO
1785+
// TODO: Investigate if we can remove this code.
1786+
if (!ctx.arg.dtltoDistributor.empty())
1787+
lld::dtltoNormalizePath(path);
1788+
17801789
// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
17811790
// name. If two archives define two members with the same name, this
17821791
// causes a collision which result in only one of the objects being taken

lld/include/lld/Common/Filesystem.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ void unlinkAsync(StringRef path);
1919
std::error_code tryCreateFile(StringRef path);
2020
std::unique_ptr<llvm::raw_fd_ostream> openFile(StringRef file);
2121
std::unique_ptr<llvm::raw_fd_ostream> openLTOOutputFile(StringRef file);
22+
std::error_code dtltoNormalizePath(std::string &PathOut);
2223
} // namespace lld
2324

2425
#endif

llvm/include/llvm/Support/Windows/WindowsSupport.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,14 @@ LLVM_ABI std::error_code widenPath(const Twine &Path8,
245245
SmallVectorImpl<wchar_t> &Path16,
246246
size_t MaxPathLen = MAX_PATH);
247247

248+
/// Convert UTF-8 path into a long form UTF-8 path.
249+
LLVM_ABI std::error_code makeLong(const Twine &Path8,
250+
llvm::SmallVectorImpl<char> &Result);
251+
252+
/// Retrieves the volume mount point from UTF-16 path as UTF-16 result.
253+
std::error_code getVolumeRootFromPath(SmallVectorImpl<wchar_t> &Path,
254+
SmallVectorImpl<wchar_t> &Result);
255+
248256
} // end namespace windows
249257
} // end namespace sys
250258
} // end namespace llvm.

llvm/lib/Support/Windows/Path.inc

Lines changed: 81 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,19 @@ static bool is_separator(const wchar_t value) {
5858
}
5959
}
6060

61+
static void stripExtendedPrefix(wchar_t *&Data, DWORD &CountChars) {
62+
if (CountChars >= 8 && ::memcmp(Data, L"\\\\?\\UNC\\", 16) == 0) {
63+
// Convert \\?\UNC\foo\bar to \\foo\bar
64+
CountChars -= 6;
65+
Data += 6;
66+
Data[0] = '\\';
67+
} else if (CountChars >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) {
68+
// Convert \\?\C:\foo to C:\foo
69+
CountChars -= 4;
70+
Data += 4;
71+
}
72+
}
73+
6174
namespace llvm {
6275
namespace sys {
6376
namespace windows {
@@ -125,6 +138,70 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
125138
return UTF8ToUTF16(FullPath, Path16);
126139
}
127140

141+
std::error_code makeLong(const Twine &Path8,
142+
llvm::SmallVectorImpl<char> &Result8) {
143+
constexpr StringLiteral LongPathPrefix = R"(\\?\)";
144+
145+
SmallString<128> PathStorage;
146+
StringRef PathStr = Path8.toStringRef(PathStorage);
147+
bool HadPrefix = PathStr.starts_with(LongPathPrefix);
148+
149+
// Convert UTF-8 to UTF-16
150+
SmallVector<wchar_t, 128> Path16;
151+
if (std::error_code EC = widenPath(PathStr, Path16))
152+
return EC;
153+
154+
// Start with a buffer equal to input.
155+
llvm::SmallVector<wchar_t, 128> Long16;
156+
DWORD Len = static_cast<DWORD>(Path16.size());
157+
158+
do {
159+
Long16.resize_for_overwrite(Len); // grow
160+
161+
Len = ::GetLongPathNameW(Path16.data(), Long16.data(), Len);
162+
163+
// A zero return value indicates a failure other than insufficient space.
164+
if (Len == 0)
165+
return mapWindowsError(::GetLastError());
166+
167+
// If there's insufficient space, the len returned is larger than the len
168+
// given - in this case len includes the null-terminator.
169+
} while (Len >= Long16.size());
170+
171+
// On success, GetLongPathNameW returns the number of characters not
172+
// including the null-terminator.
173+
Long16.truncate(Len);
174+
// Strip \\?\ or \\?\UNC\ prefix if it wasn't part of the original path
175+
wchar_t *Data = Long16.data();
176+
if (!HadPrefix)
177+
stripExtendedPrefix(Data, Len);
178+
179+
return sys::windows::UTF16ToUTF8(Data, Len, Result8);
180+
}
181+
182+
std::error_code getVolumeRootFromPath(SmallVectorImpl<wchar_t> &Path,
183+
SmallVectorImpl<wchar_t> &VolumeRoot) {
184+
size_t Len = 128;
185+
while (true) {
186+
VolumeRoot.resize(Len);
187+
BOOL Success =
188+
::GetVolumePathNameW(Path.data(), VolumeRoot.data(), VolumeRoot.size());
189+
if (Success)
190+
break;
191+
192+
DWORD Err = ::GetLastError();
193+
if (Err != ERROR_INSUFFICIENT_BUFFER)
194+
return mapWindowsError(Err);
195+
196+
Len *= 2;
197+
}
198+
199+
// Ensure null-termination in edge case where buffer was exactly full
200+
VolumeRoot.push_back(L'\0');
201+
VolumeRoot.truncate(wcslen(VolumeRoot.data()));
202+
return std::error_code();
203+
}
204+
128205
} // end namespace windows
129206

130207
namespace fs {
@@ -309,29 +386,10 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
309386
static std::error_code is_local_internal(SmallVectorImpl<wchar_t> &Path,
310387
bool &Result) {
311388
SmallVector<wchar_t, 128> VolumePath;
312-
size_t Len = 128;
313-
while (true) {
314-
VolumePath.resize(Len);
315-
BOOL Success =
316-
::GetVolumePathNameW(Path.data(), VolumePath.data(), VolumePath.size());
317-
318-
if (Success)
319-
break;
320-
321-
DWORD Err = ::GetLastError();
322-
if (Err != ERROR_INSUFFICIENT_BUFFER)
323-
return mapWindowsError(Err);
389+
if (std::error_code EC = windows::getVolumeRootFromPath(Path, VolumePath))
390+
return EC;
324391

325-
Len *= 2;
326-
}
327-
// If the output buffer has exactly enough space for the path name, but not
328-
// the null terminator, it will leave the output unterminated. Push a null
329-
// terminator onto the end to ensure that this never happens.
330-
VolumePath.push_back(L'\0');
331-
VolumePath.truncate(wcslen(VolumePath.data()));
332-
const wchar_t *P = VolumePath.data();
333-
334-
UINT Type = ::GetDriveTypeW(P);
392+
UINT Type = ::GetDriveTypeW(VolumePath.data());
335393
switch (Type) {
336394
case DRIVE_FIXED:
337395
Result = true;
@@ -392,16 +450,7 @@ static std::error_code realPathFromHandle(HANDLE H,
392450
// paths don't get canonicalized by file APIs.
393451
wchar_t *Data = Buffer.data();
394452
DWORD CountChars = Buffer.size();
395-
if (CountChars >= 8 && ::memcmp(Data, L"\\\\?\\UNC\\", 16) == 0) {
396-
// Convert \\?\UNC\foo\bar to \\foo\bar
397-
CountChars -= 6;
398-
Data += 6;
399-
Data[0] = '\\';
400-
} else if (CountChars >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) {
401-
// Convert \\?\c:\foo to c:\foo
402-
CountChars -= 4;
403-
Data += 4;
404-
}
453+
stripExtendedPrefix(Data, CountChars);
405454

406455
// Convert the result from UTF-16 to UTF-8.
407456
if (std::error_code EC = UTF16ToUTF8(Data, CountChars, RealPath))

0 commit comments

Comments
 (0)