Skip to content

Commit 4d81206

Browse files
committed
Swift: teach extractor to emit build artifacts for later consumption
1 parent 1997d6b commit 4d81206

File tree

7 files changed

+402
-15
lines changed

7 files changed

+402
-15
lines changed

swift/extractor/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ load("//swift:rules.bzl", "swift_cc_binary")
33
swift_cc_binary(
44
name = "extractor",
55
srcs = [
6+
"SwiftOutputRewrite.cpp",
7+
"SwiftOutputRewrite.h",
68
"SwiftExtractor.cpp",
79
"SwiftExtractor.h",
810
"SwiftExtractorConfiguration.h",

swift/extractor/SwiftExtractor.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,17 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config,
8080
<< "': " << ec.message() << "\n";
8181
return;
8282
}
83-
trapStream << "// extractor-args: ";
83+
trapStream << "/* extractor-args:\n";
8484
for (auto opt : config.frontendOptions) {
85-
trapStream << std::quoted(opt) << " ";
85+
trapStream << " " << std::quoted(opt) << " \\\n";
8686
}
87-
trapStream << "\n\n";
87+
trapStream << "\n*/\n";
88+
89+
trapStream << "/* swift-frontend-args:\n";
90+
for (auto opt : config.patchedFrontendOptions) {
91+
trapStream << " " << std::quoted(opt) << " \\\n";
92+
}
93+
trapStream << "\n*/\n";
8894

8995
TrapOutput trap{trapStream};
9096
TrapArena arena{};

swift/extractor/SwiftExtractorConfiguration.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,22 @@ struct SwiftExtractorConfiguration {
1616
// Subdirectory of the scratchDir.
1717
std::string tempTrapDir;
1818

19+
// VFS (virtual file system) support.
20+
// A temporary directory that contains VFS files used during extraction.
21+
// Subdirectory of the scratchDir.
22+
std::string VFSDir;
23+
// A temporary directory that contains temp VFS files before they moved into VFSDir.
24+
// Subdirectory of the scratchDir.
25+
std::string tempVFSDir;
26+
27+
// A temporary directory that contains build artifacts generated by the extractor during the
28+
// overall extraction process.
29+
// Subdirectory of the scratchDir.
30+
std::string tempArtifactDir;
31+
1932
// The original arguments passed to the extractor. Used for debugging.
2033
std::vector<std::string> frontendOptions;
34+
// The patched arguments passed to the swift::performFrontend/ Used for debugging.
35+
std::vector<std::string> patchedFrontendOptions;
2136
};
2237
} // namespace codeql
Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,318 @@
1+
#include "SwiftOutputRewrite.h"
2+
#include "swift/extractor/SwiftExtractorConfiguration.h"
3+
4+
#include <llvm/ADT/SmallString.h>
5+
#include <llvm/Support/FileSystem.h>
6+
#include <llvm/Support/Path.h>
7+
#include <swift/Basic/OutputFileMap.h>
8+
#include <swift/Basic/Platform.h>
9+
#include <unistd.h>
10+
#include <unordered_set>
11+
#include <optional>
12+
#include <iostream>
13+
14+
// Creates a copy of the output file map and updated remapping table in place
15+
// It does not change the original map file as it is dependent upon by the original compiler
16+
// Returns path to the newly created output file map on success, or None in a case of failure
17+
static std::optional<std::string> rewriteOutputFileMap(
18+
const codeql::SwiftExtractorConfiguration& config,
19+
const std::string& outputFileMapPath,
20+
const std::vector<std::string>& inputs,
21+
std::unordered_map<std::string, std::string>& remapping) {
22+
auto newPath = config.tempArtifactDir + '/' + outputFileMapPath;
23+
24+
// TODO: do not assume absolute path for the second parameter
25+
auto outputMapOrError = swift::OutputFileMap::loadFromPath(outputFileMapPath, "");
26+
if (!outputMapOrError) {
27+
return std::nullopt;
28+
}
29+
auto oldOutputMap = outputMapOrError.get();
30+
swift::OutputFileMap newOutputMap;
31+
std::vector<llvm::StringRef> keys;
32+
for (auto& key : inputs) {
33+
auto oldMap = oldOutputMap.getOutputMapForInput(key);
34+
if (!oldMap) {
35+
continue;
36+
}
37+
keys.push_back(key);
38+
auto& newMap = newOutputMap.getOrCreateOutputMapForInput(key);
39+
newMap.copyFrom(*oldMap);
40+
for (auto& entry : newMap) {
41+
auto oldPath = entry.getSecond();
42+
auto newPath = config.tempArtifactDir + '/' + oldPath;
43+
entry.getSecond() = newPath;
44+
remapping[oldPath] = newPath;
45+
}
46+
}
47+
std::error_code ec;
48+
llvm::SmallString<PATH_MAX> filepath(newPath);
49+
llvm::StringRef parent = llvm::sys::path::parent_path(filepath);
50+
if (std::error_code ec = llvm::sys::fs::create_directories(parent)) {
51+
std::cerr << "Cannot create relocated output map dir: '" << parent.str()
52+
<< "': " << ec.message() << "\n";
53+
return std::nullopt;
54+
}
55+
56+
llvm::raw_fd_ostream fd(newPath, ec, llvm::sys::fs::OF_None);
57+
newOutputMap.write(fd, keys);
58+
return newPath;
59+
}
60+
61+
// This is Xcode-specific workaround to produce alias names for an existing .swiftmodule file.
62+
// In the case of Xcode, it calls the Swift compiler and asks it to produce a Swift module.
63+
// Once it's done, Xcode moves the .swiftmodule file in another location, and the location is
64+
// rather arbitrary. Here are examples of such locations:
65+
// Original file produced by the frontend:
66+
// DerivedData/<Project>/Build/Intermediates.noindex/<Project>.build/<BuiltType>-<Target>/<Project>.build/Objects-normal/<Arch>/<ModuleName>.swiftmodule
67+
// where:
68+
// Project: name of a project, target, or scheme
69+
// BuildType: Debug, Release, etc.
70+
// Target: macOS, iphoneos, appletvsimulator, etc.
71+
// Arch: arm64, x86_64, etc.
72+
//
73+
// So far we observed that Xcode can move the module into different locations, and it's not
74+
// entirely clear how to deduce the destination from the context available for the extractor.
75+
// 1. First case:
76+
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>.swiftmodule/<Arch>.swiftmodule
77+
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>.swiftmodule/<Triple>.swiftmodule
78+
// 2. Second case:
79+
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.swiftmodule/<Arch>.swiftmodule
80+
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.swiftmodule/<Triple>.swiftmodule
81+
// 2. Third case:
82+
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.framework/Modules/<ModuleName>.swiftmodule/<Arch>.swiftmodule
83+
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.framework/Modules/<ModuleName>.swiftmodule/<Triple>.swiftmodule
84+
// The <Triple> here is a normalized target triple (e.g. arm64-apple-iphoneos15.4 ->
85+
// arm64-apple-iphoneos).
86+
//
87+
// This method construct those aliases for a module only if it comes from Xcode, which is detected
88+
// by the presence of `Intermediates.noindex` directory in the module path.
89+
//
90+
// In the case of Swift Package Manager (`swift build`) this is not needed.
91+
static std::vector<std::string> computeModuleAliases(llvm::StringRef modulePath,
92+
const std::string& targetTriple) {
93+
if (modulePath.empty()) {
94+
return {};
95+
}
96+
if (!modulePath.endswith(".swiftmodule")) {
97+
return {};
98+
}
99+
100+
llvm::SmallVector<llvm::StringRef> chunks;
101+
modulePath.split(chunks, '/');
102+
size_t intermediatesDirIndex = 0;
103+
for (size_t i = 0; i < chunks.size(); i++) {
104+
if (chunks[i] == "Intermediates.noindex") {
105+
intermediatesDirIndex = i;
106+
break;
107+
}
108+
}
109+
// Not built by Xcode, skipping
110+
if (intermediatesDirIndex == 0) {
111+
return {};
112+
}
113+
// e.g. Debug-iphoneos, Release-iphonesimulator, etc.
114+
auto destinationDir = chunks[intermediatesDirIndex + 2].str();
115+
auto arch = chunks[intermediatesDirIndex + 5].str();
116+
auto moduleNameWithExt = chunks.back();
117+
auto moduleName = moduleNameWithExt.substr(0, moduleNameWithExt.find_last_of('.'));
118+
std::string relocatedModulePath = chunks[0].str();
119+
for (size_t i = 1; i < intermediatesDirIndex; i++) {
120+
relocatedModulePath += '/' + chunks[i].str();
121+
}
122+
relocatedModulePath += "/Products/";
123+
relocatedModulePath += destinationDir + '/';
124+
125+
std::vector<std::string> moduleLocations;
126+
127+
std::string firstCase = relocatedModulePath;
128+
firstCase += moduleNameWithExt.str() + '/';
129+
moduleLocations.push_back(firstCase);
130+
131+
std::string secondCase = relocatedModulePath;
132+
secondCase += moduleName.str() + '/';
133+
secondCase += moduleNameWithExt.str() + '/';
134+
moduleLocations.push_back(secondCase);
135+
136+
std::string thirdCase = relocatedModulePath;
137+
thirdCase += moduleName.str() + '/';
138+
thirdCase += moduleName.str() + ".framework/Modules/";
139+
thirdCase += moduleNameWithExt.str() + '/';
140+
moduleLocations.push_back(thirdCase);
141+
142+
std::vector<std::string> aliases;
143+
for (auto& location : moduleLocations) {
144+
aliases.push_back(location + arch + ".swiftmodule");
145+
if (!targetTriple.empty()) {
146+
llvm::Triple triple(targetTriple);
147+
auto moduleTriple = swift::getTargetSpecificModuleTriple(triple);
148+
aliases.push_back(location + moduleTriple.normalize() + ".swiftmodule");
149+
}
150+
}
151+
152+
return aliases;
153+
}
154+
155+
namespace codeql {
156+
157+
std::unordered_map<std::string, std::string> rewriteOutputsInPlace(
158+
SwiftExtractorConfiguration& config,
159+
std::vector<std::string>& CLIArgs) {
160+
std::unordered_map<std::string, std::string> remapping;
161+
162+
// TODO: handle filelists?
163+
std::unordered_set<std::string> pathRewriteOptions({
164+
"-emit-dependencies-path",
165+
"-emit-module-path",
166+
"-emit-module-doc-path",
167+
"-emit-module-source-info-path",
168+
"-emit-objc-header-path",
169+
"-emit-reference-dependencies-path",
170+
"-index-store-path",
171+
"-module-cache-path",
172+
"-o",
173+
"-pch-output-dir",
174+
"-serialize-diagnostics-path",
175+
});
176+
177+
std::unordered_set<std::string> outputFileMaps(
178+
{"-supplementary-output-file-map", "-output-file-map"});
179+
180+
std::vector<size_t> outputFileMapIndexes;
181+
std::vector<std::string> maybeInput;
182+
std::string targetTriple;
183+
184+
std::vector<std::string> newLocations;
185+
for (size_t i = 0; i < CLIArgs.size(); i++) {
186+
if (pathRewriteOptions.count(CLIArgs[i])) {
187+
auto oldPath = CLIArgs[i + 1];
188+
auto newPath = config.tempArtifactDir + '/' + oldPath;
189+
CLIArgs[++i] = newPath;
190+
newLocations.push_back(newPath);
191+
192+
remapping[oldPath] = newPath;
193+
} else if (outputFileMaps.count(CLIArgs[i])) {
194+
// collect output map indexes for further rewriting and skip the following argument
195+
// We don't patch the map in place as we need to collect all the input files first
196+
outputFileMapIndexes.push_back(++i);
197+
} else if (CLIArgs[i] == "-target") {
198+
targetTriple = CLIArgs[++i];
199+
} else if (CLIArgs[i][0] != '-') {
200+
// TODO: add support for input file lists?
201+
// We need to collect input file names to later use them to extract information from the
202+
// output file maps.
203+
maybeInput.push_back(CLIArgs[i]);
204+
}
205+
}
206+
207+
for (auto index : outputFileMapIndexes) {
208+
auto oldPath = CLIArgs[index];
209+
auto maybeNewPath = rewriteOutputFileMap(config, oldPath, maybeInput, remapping);
210+
if (maybeNewPath) {
211+
auto newPath = maybeNewPath.value();
212+
CLIArgs[index] = newPath;
213+
remapping[oldPath] = newPath;
214+
}
215+
}
216+
217+
// This doesn't really belong here, but we've got Xcode...
218+
for (auto& [oldPath, newPath] : remapping) {
219+
llvm::StringRef path(oldPath);
220+
auto aliases = computeModuleAliases(path, targetTriple);
221+
for (auto& alias : aliases) {
222+
remapping[alias] = newPath;
223+
}
224+
}
225+
226+
return remapping;
227+
}
228+
229+
void ensureNewPathsExist(const std::unordered_map<std::string, std::string>& remapping) {
230+
for (auto& [_, newPath] : remapping) {
231+
llvm::SmallString<PATH_MAX> filepath(newPath);
232+
llvm::StringRef parent = llvm::sys::path::parent_path(filepath);
233+
if (std::error_code ec = llvm::sys::fs::create_directories(parent)) {
234+
std::cerr << "Cannot create redirected directory: " << ec.message() << "\n";
235+
}
236+
}
237+
}
238+
239+
void storeRemappingForVFS(const SwiftExtractorConfiguration& config,
240+
const std::unordered_map<std::string, std::string>& remapping) {
241+
// Only create remapping for the .swiftmodule files
242+
std::unordered_map<std::string, std::string> modules;
243+
for (auto& [oldPath, newPath] : remapping) {
244+
if (llvm::StringRef(oldPath).endswith(".swiftmodule")) {
245+
modules[oldPath] = newPath;
246+
}
247+
}
248+
249+
if (modules.empty()) {
250+
return;
251+
}
252+
253+
if (std::error_code ec = llvm::sys::fs::create_directories(config.tempVFSDir)) {
254+
std::cerr << "Cannot create temp VFS directory: " << ec.message() << "\n";
255+
return;
256+
}
257+
258+
if (std::error_code ec = llvm::sys::fs::create_directories(config.VFSDir)) {
259+
std::cerr << "Cannot create VFS directory: " << ec.message() << "\n";
260+
return;
261+
}
262+
263+
// Constructing the VFS yaml file in a temp folder so that the other process doesn't read it
264+
// while it is not complete
265+
// TODO: Pick a more robust way to not collide with files from other processes
266+
auto tempVfsPath = config.tempVFSDir + '/' + std::to_string(getpid()) + "-vfs.yaml";
267+
std::error_code ec;
268+
llvm::raw_fd_ostream fd(tempVfsPath, ec, llvm::sys::fs::OF_None);
269+
if (ec) {
270+
std::cerr << "Cannot create temp VFS file: '" << tempVfsPath << "': " << ec.message() << "\n";
271+
return;
272+
}
273+
// TODO: there must be a better API than this
274+
// LLVM expects the version to be 0
275+
fd << "{ version: 0,\n";
276+
// This tells the FS not to fallback to the physical file system in case the remapped file is not
277+
// present
278+
fd << " fallthrough: false,\n";
279+
fd << " roots: [\n";
280+
for (auto& [oldPath, newPath] : modules) {
281+
fd << " {\n";
282+
fd << " type: 'file',\n";
283+
fd << " name: '" << oldPath << "\',\n";
284+
fd << " external-contents: '" << newPath << "\'\n";
285+
fd << " },\n";
286+
}
287+
fd << " ]\n";
288+
fd << "}\n";
289+
290+
fd.flush();
291+
auto vfsPath = config.VFSDir + '/' + std::to_string(getpid()) + "-vfs.yaml";
292+
if (std::error_code ec = llvm::sys::fs::rename(tempVfsPath, vfsPath)) {
293+
std::cerr << "Cannot move temp VFS file '" << tempVfsPath << "' -> '" << vfsPath
294+
<< "': " << ec.message() << "\n";
295+
return;
296+
}
297+
}
298+
299+
std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& config) {
300+
auto vfsDir = config.VFSDir + '/';
301+
if (!llvm::sys::fs::exists(vfsDir)) {
302+
return {};
303+
}
304+
std::vector<std::string> overlays;
305+
std::error_code ec;
306+
llvm::sys::fs::directory_iterator it(vfsDir, ec);
307+
while (!ec && it != llvm::sys::fs::directory_iterator()) {
308+
llvm::StringRef path(it->path());
309+
if (path.endswith("vfs.yaml")) {
310+
overlays.push_back(path.str());
311+
}
312+
it.increment(ec);
313+
}
314+
315+
return overlays;
316+
}
317+
318+
} // namespace codeql

0 commit comments

Comments
 (0)