Skip to content

Commit a186c53

Browse files
fix: Strip common arch-specific flags before processing (#488)
Generalizes the argument dropping mechanism used specifically for nvcc to also work for Clang and GCC, and skips some known arch-specific flags for Clang and GCC, to reduce risk of unknown flags hit in practice on other architectures. Fixes GRAPH-1187. See comments for more context. I've not added a full-blown mechanism to skip arbitrary flags (e.g. via an extra flag), but we could do that later if necessary. Added some basic unit tests at the level of the command-line tweaking. It's a bit more difficult to test this logic at a higher-level, because the layer above it does I/O for compiler detection.
1 parent e00ddf5 commit a186c53

File tree

4 files changed

+200
-82
lines changed

4 files changed

+200
-82
lines changed

indexer/CommandLineCleaner.cc

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#include "llvm/ADT/StringRef.h"
2+
3+
#include "indexer/CommandLineCleaner.h"
4+
#include "indexer/Enforce.h"
5+
6+
namespace {
7+
8+
enum class Action {
9+
Keep,
10+
ZapOne,
11+
ZapTwo,
12+
};
13+
14+
template <typename T>
15+
void zap(std::vector<T> &vs, absl::FunctionRef<Action(const T &)> check) {
16+
bool dropFromBefore = false;
17+
auto zappedBegin =
18+
std::stable_partition(vs.begin(), vs.end(), [&](const T &v) -> bool {
19+
if (dropFromBefore) {
20+
dropFromBefore = false;
21+
return false;
22+
}
23+
switch (check(v)) {
24+
case Action::Keep:
25+
return true;
26+
case Action::ZapOne:
27+
return false;
28+
case Action::ZapTwo:
29+
dropFromBefore = true;
30+
return false;
31+
}
32+
});
33+
vs.resize(std::distance(vs.begin(), zappedBegin));
34+
}
35+
36+
// Strip out architecture specific flags, because scip-clang may
37+
// be used to index code which relies on architectures known only
38+
// to GCC, or only to some proprietary compilers.
39+
constexpr const char *clangGccSkipOptionsWithArgs[] = {
40+
"-march",
41+
"-mcpu",
42+
"-mtune",
43+
};
44+
45+
// Patterns of arg-less options to strip out.
46+
//
47+
// For example, Clang supports -mfix-cortex-a53-835769 (so does GCC)
48+
// but GCC supports -mfix-cortex-a53-843419 which is not supported by Clang.
49+
//
50+
// In practice, options starting with '-m' seem to all correspond to
51+
// ABI-related options (which ~never affect codenav). However, we cannot
52+
// simply use '-m.*' as the pattern here, because some options with '-m'
53+
// take an argument and some do not, and there isn't an easy programmatic
54+
// way to determine which ones do/do not.
55+
constexpr const char *clangGccSkipOptionsNoArgsPattern = "-m(no-)?fix-.*";
56+
57+
} // namespace
58+
59+
namespace scip_clang::compdb {
60+
61+
void CommandLineCleaner::clean(std::vector<std::string> &commandLine) const {
62+
zap<std::string>(commandLine, [this](const std::string &arg) -> Action {
63+
if (!arg.starts_with('-')) {
64+
return Action::Keep;
65+
}
66+
std::string_view flag = arg;
67+
auto eqIndex = arg.find('=');
68+
if (eqIndex != std::string::npos) {
69+
flag = std::string_view(arg.data(), eqIndex);
70+
} else if (this->noArgumentMatcher
71+
&& this->noArgumentMatcher->match(llvm::StringRef(arg))) {
72+
return Action::ZapOne;
73+
}
74+
auto it = this->toZap.find(flag);
75+
if (it == this->toZap.end()) {
76+
return Action::Keep;
77+
}
78+
switch (it->second) {
79+
case CliOptionKind::NoArgument:
80+
return Action::ZapOne;
81+
case CliOptionKind::OneArgument:
82+
if (flag.size() == arg.size()) {
83+
return Action::ZapTwo;
84+
}
85+
return Action::ZapOne;
86+
}
87+
ENFORCE(false, "should've exited earlier");
88+
});
89+
}
90+
91+
std::unique_ptr<CommandLineCleaner> CommandLineCleaner::forClangOrGcc() {
92+
CommandLineCleaner::MapType toZap;
93+
for (auto s : clangGccSkipOptionsWithArgs) {
94+
toZap.emplace(std::string_view(s), CliOptionKind::NoArgument);
95+
}
96+
CommandLineCleaner cleaner{
97+
.toZap = std::move(toZap),
98+
.noArgumentMatcher = {llvm::Regex(clangGccSkipOptionsNoArgsPattern)}};
99+
return std::make_unique<CommandLineCleaner>(std::move(cleaner));
100+
}
101+
102+
} // namespace scip_clang::compdb

indexer/CommandLineCleaner.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#ifndef SCIP_CLANG_COMMAND_LINE_CLEANER_H
2+
#define SCIP_CLANG_COMMAND_LINE_CLEANER_H
3+
4+
#include <memory>
5+
#include <string>
6+
#include <vector>
7+
8+
#include "absl/container/flat_hash_map.h"
9+
#include "llvm/Support/Regex.h"
10+
11+
namespace scip_clang::compdb {
12+
13+
enum class CliOptionKind {
14+
NoArgument,
15+
OneArgument,
16+
};
17+
18+
struct CommandLineCleaner {
19+
using MapType = absl::flat_hash_map<std::string_view, CliOptionKind>;
20+
// Fixed list of options for which the command-line arguments should be
21+
// zapped. If CliOptionKind is NoArgument, then only one string will be
22+
// zapped. If CliOptionKind is OneArgument, then two successive strings will
23+
// be zapped.
24+
MapType toZap;
25+
// Optional matcher for zapping arguments more flexibly.
26+
// This is to allow for handling unknown flags which match a particular
27+
// pattern. For known flags, put them in toZap.
28+
std::optional<llvm::Regex> noArgumentMatcher;
29+
30+
void clean(std::vector<std::string> &commandLine) const;
31+
32+
static std::unique_ptr<CommandLineCleaner> forClangOrGcc();
33+
};
34+
35+
} // namespace scip_clang::compdb
36+
37+
#endif

indexer/CompilationDatabase.cc

Lines changed: 33 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "rapidjson/reader.h"
2222
#include "spdlog/fmt/fmt.h"
2323

24+
#include "indexer/CommandLineCleaner.h"
2425
#include "indexer/CompilationDatabase.h"
2526
#include "indexer/FileSystem.h"
2627
#include "indexer/LlvmCommandLineParsing.h"
@@ -73,25 +74,30 @@ namespace {
7374

7475
using AbsolutePath = scip_clang::AbsolutePath;
7576
using ToolchainInfo = scip_clang::compdb::ToolchainInfo;
77+
using CommandLineCleaner = scip_clang::compdb::CommandLineCleaner;
7678
using CompilerKind = scip_clang::compdb::CompilerKind;
79+
using CliOptionKind = scip_clang::compdb::CliOptionKind;
7780

7881
struct ClangToolchainInfo : public ToolchainInfo {
7982
std::string resourceDir;
8083
std::vector<std::string> findResourceDirInvocation;
8184
std::string compilerDriverPath;
8285
std::vector<std::string> findDriverInvocation;
86+
std::unique_ptr<CommandLineCleaner> cleaner;
8387

8488
// All strings and vectors above should be non-empty for
8589
// a valid toolchain.
8690

8791
ClangToolchainInfo(std::string resourceDir,
8892
std::vector<std::string> findResourceDirInvocation,
8993
std::string compilerDriverPath,
90-
std::vector<std::string> findDriverInvocation)
94+
std::vector<std::string> findDriverInvocation,
95+
std::unique_ptr<CommandLineCleaner> cleaner)
9196
: ToolchainInfo(), resourceDir(resourceDir),
9297
findResourceDirInvocation(findResourceDirInvocation),
9398
compilerDriverPath(compilerDriverPath),
94-
findDriverInvocation(findDriverInvocation){};
99+
findDriverInvocation(findDriverInvocation),
100+
cleaner(std::move(cleaner)){};
95101

96102
virtual CompilerKind kind() const override {
97103
return CompilerKind::Clang;
@@ -116,6 +122,7 @@ struct ClangToolchainInfo : public ToolchainInfo {
116122
virtual void
117123
adjustCommandLine(std::vector<std::string> &commandLine) const override {
118124
commandLine[0] = this->compilerDriverPath;
125+
this->cleaner->clean(commandLine);
119126
commandLine.push_back("-resource-dir");
120127
commandLine.push_back(this->resourceDir);
121128
}
@@ -165,18 +172,21 @@ struct ClangToolchainInfo : public ToolchainInfo {
165172

166173
return std::make_unique<ClangToolchainInfo>(
167174
resourceDir, findResourceDirInvocation, compilerDriverPath,
168-
findDriverInvocation);
175+
findDriverInvocation, CommandLineCleaner::forClangOrGcc());
169176
}
170177
};
171178

172179
struct GccToolchainInfo : public ToolchainInfo {
173180
std::string installDir;
174181
std::vector<std::string> findInstallDirInvocation;
182+
std::unique_ptr<CommandLineCleaner> cleaner;
175183

176184
GccToolchainInfo(std::string installDir,
177-
std::vector<std::string> findInstallDirInvocation)
185+
std::vector<std::string> findInstallDirInvocation,
186+
std::unique_ptr<CommandLineCleaner> cleaner)
178187
: ToolchainInfo(), installDir(installDir),
179-
findInstallDirInvocation(findInstallDirInvocation) {}
188+
findInstallDirInvocation(findInstallDirInvocation),
189+
cleaner(std::move(cleaner)) {}
180190

181191
virtual CompilerKind kind() const override {
182192
return CompilerKind::Gcc;
@@ -194,6 +204,7 @@ struct GccToolchainInfo : public ToolchainInfo {
194204

195205
virtual void
196206
adjustCommandLine(std::vector<std::string> &commandLine) const override {
207+
this->cleaner->clean(commandLine);
197208
commandLine.push_back("-resource-dir");
198209
commandLine.push_back(this->installDir);
199210
// gcc-7 adds headers like limits.h and syslimits.h in include-fixed
@@ -227,21 +238,17 @@ struct GccToolchainInfo : public ToolchainInfo {
227238
return nullptr;
228239
}
229240
spdlog::debug("found gcc install directory at {}", installDir);
230-
return std::make_unique<GccToolchainInfo>(installDir,
231-
findSearchDirsInvocation);
241+
return std::make_unique<GccToolchainInfo>(
242+
installDir, findSearchDirsInvocation,
243+
CommandLineCleaner::forClangOrGcc());
232244
}
233245
};
234246

235-
enum class NvccOptionType {
236-
NoArgument,
237-
OneArgument,
238-
};
239-
240247
// Based on nvcc --help from nvcc version V12.2.140
241248
// Build cuda_12.2.r12.2/compiler.33191640_0
242249

243250
// clang-format off
244-
constexpr const char* skipOptionsNoArgs[] = {
251+
constexpr const char* nvccSkipOptionsNoArgs[] = {
245252
"--cuda", "-cuda",
246253
"--cubin", "-cubin",
247254
"--fatbin", "-fatbin",
@@ -303,7 +310,7 @@ constexpr const char* skipOptionsNoArgs[] = {
303310
"--host-relocatable-link", "-r"
304311
};
305312

306-
constexpr const char* skipOptionsWithArgs[] = {
313+
constexpr const char* nvccSkipOptionsWithArgs[] = {
307314
"--cudart", "-cudart",
308315
"--cudadevrt", "-cudadevrt",
309316
"--libdevice-directory", "-ldir",
@@ -361,18 +368,19 @@ struct NvccToolchainInfo : public ToolchainInfo {
361368
/// doesn't even construct the appropriate CUDAKernelCallExpr values.
362369
std::unique_ptr<ClangToolchainInfo> clangInfo;
363370

364-
absl::flat_hash_map<std::string_view, NvccOptionType> toBeSkipped;
371+
CommandLineCleaner cleaner;
365372

366373
NvccToolchainInfo(AbsolutePath cudaDir)
367374
: ToolchainInfo(), cudaDir(cudaDir), clangInfo(nullptr) {
368-
for (auto s : skipOptionsNoArgs) {
369-
this->toBeSkipped.emplace(std::string_view(s),
370-
NvccOptionType::NoArgument);
375+
CommandLineCleaner::MapType toZap;
376+
for (auto s : nvccSkipOptionsNoArgs) {
377+
toZap.emplace(std::string_view(s), CliOptionKind::NoArgument);
371378
}
372-
for (auto s : skipOptionsWithArgs) {
373-
this->toBeSkipped.emplace(std::string_view(s),
374-
NvccOptionType::OneArgument);
379+
for (auto s : nvccSkipOptionsWithArgs) {
380+
toZap.emplace(std::string_view(s), CliOptionKind::OneArgument);
375381
}
382+
this->cleaner =
383+
CommandLineCleaner{.toZap = toZap, .noArgumentMatcher = std::nullopt};
376384

377385
// TODO: In principle, we could pick up Clang from -ccbin but that
378386
// requires more plumbing; it would require using the -ccbin arg
@@ -412,72 +420,15 @@ struct NvccToolchainInfo : public ToolchainInfo {
412420
return true;
413421
}
414422

415-
enum class ArgumentProcessing {
416-
Keep,
417-
DropCurrent,
418-
DropCurrentAndNextIffBothPresent,
419-
};
420-
421-
ArgumentProcessing handleArgument(const std::string &arg) const {
422-
if (!arg.starts_with('-')) {
423-
return ArgumentProcessing::Keep;
424-
}
425-
std::string_view substr = arg;
426-
auto eqIndex = arg.find('=');
427-
if (eqIndex != std::string::npos) {
428-
substr = std::string_view(arg.data(), eqIndex);
429-
}
430-
auto it = this->toBeSkipped.find(substr);
431-
if (it == this->toBeSkipped.end()) {
432-
return ArgumentProcessing::Keep;
433-
}
434-
switch (it->second) {
435-
case NvccOptionType::NoArgument:
436-
return ArgumentProcessing::DropCurrent;
437-
case NvccOptionType::OneArgument:
438-
if (substr.size() == arg.size()) {
439-
return ArgumentProcessing::DropCurrentAndNextIffBothPresent;
440-
}
441-
return ArgumentProcessing::DropCurrent;
442-
}
443-
ENFORCE(false, "should've exited earlier");
444-
}
445-
446-
void removeUnknownArguments(std::vector<std::string> &commandLine) const {
447-
absl::flat_hash_set<size_t> drop{};
448-
for (size_t i = 0; i < commandLine.size(); ++i) {
449-
switch (this->handleArgument(commandLine[i])) {
450-
case ArgumentProcessing::Keep:
451-
continue;
452-
case ArgumentProcessing::DropCurrent:
453-
drop.insert(i);
454-
continue;
455-
case ArgumentProcessing::DropCurrentAndNextIffBothPresent:
456-
if (i + 1 < commandLine.size()) {
457-
drop.insert(i);
458-
drop.insert(i + 1);
459-
}
460-
}
461-
}
462-
std::vector<std::string> tmp;
463-
tmp.reserve(commandLine.size() - drop.size());
464-
for (size_t i = 0; i < commandLine.size(); ++i) {
465-
if (!drop.contains(i)) {
466-
tmp.push_back(std::move(commandLine[i]));
467-
}
468-
}
469-
std::swap(tmp, commandLine);
470-
}
471-
472423
virtual void
473424
adjustCommandLine(std::vector<std::string> &commandLine) const override {
474-
this->removeUnknownArguments(commandLine);
475-
commandLine.push_back(
476-
fmt::format("-isystem{}{}include", this->cudaDir.asStringRef(),
477-
std::filesystem::path::preferred_separator));
425+
this->cleaner.clean(commandLine);
478426
if (this->clangInfo) {
479427
this->clangInfo->adjustCommandLine(commandLine);
480428
}
429+
commandLine.push_back(
430+
fmt::format("-isystem{}{}include", this->cudaDir.asStringRef(),
431+
std::filesystem::path::preferred_separator));
481432
}
482433

483434
static std::unique_ptr<NvccToolchainInfo>

0 commit comments

Comments
 (0)