Skip to content

Commit ded1426

Browse files
authored
[clang][modules-driver] Add scanner to detect C++20 module presence (#145220)
This PR is part of a series to natively support C++20 module usage from the Clang driver (without requiring an external build system). This introduces a new scanner that detects C++20 module usage in source files without using the preprocessor or lexer. For now, it is enabled only with the `-fmodules-driver` flag and serves solely diagnostic purposes. In the future, the scanner will be enabled for any (modules-driver compatible) compilation with two or more inputs, and will help the driver determine whether to implicitly enable the modules driver. Since the scanner adds very little overhead, we are also exploring enabling it for compilations with only a single input. This approach could allow us to detect `import std` usage in a single-file compilation, which would then activate the modules driver. For performance measurements on this, see https://github.com/naveen-seth/llvm-dev-cxx-modules-check-benchmark. RFC: https://discourse.llvm.org/t/rfc-modules-support-simple-c-20-modules-use-from-the-clang-driver-without-a-build-system
1 parent de7ff1f commit ded1426

File tree

7 files changed

+453
-1
lines changed

7 files changed

+453
-1
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,16 @@ def err_drv_reduced_module_output_overrided : Warning<
577577
"please consider use '-fmodule-output=' to specify the output file for reduced BMI explicitly">,
578578
InGroup<DiagGroup<"reduced-bmi-output-overrided">>;
579579

580+
def remark_found_cxx20_module_usage : Remark<
581+
"found C++20 module usage in file '%0'">,
582+
InGroup<ModulesDriver>;
583+
def remark_performing_driver_managed_module_build : Remark<
584+
"performing driver managed module build">,
585+
InGroup<ModulesDriver>;
586+
def warn_modules_driver_unsupported_standard : Warning<
587+
"'-fmodules-driver' is not supported before C++20">,
588+
InGroup<ModulesDriver>;
589+
580590
def warn_drv_delayed_template_parsing_after_cxx20 : Warning<
581591
"-fdelayed-template-parsing is deprecated after C++20">,
582592
InGroup<DiagGroup<"delayed-template-parsing-in-cxx20">>;

clang/include/clang/Basic/DiagnosticGroups.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ def ModuleConflict : DiagGroup<"module-conflict">;
625625
def ModuleFileExtension : DiagGroup<"module-file-extension">;
626626
def ModuleIncludeDirectiveTranslation : DiagGroup<"module-include-translation">;
627627
def ModuleMap : DiagGroup<"module-map">;
628+
def ModulesDriver : DiagGroup<"modules-driver">;
628629
def RoundTripCC1Args : DiagGroup<"round-trip-cc1-args">;
629630
def NewlineEOF : DiagGroup<"newline-eof">;
630631
def Nullability : DiagGroup<"nullability">;

clang/include/clang/Driver/Driver.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,9 @@ class Driver {
504504

505505
/// BuildActions - Construct the list of actions to perform for the
506506
/// given arguments, which are only done for a single architecture.
507+
/// If the compilation is an explicit module build, delegates to
508+
/// BuildDriverManagedModuleBuildActions. Otherwise, BuildDefaultActions is
509+
/// used.
507510
///
508511
/// \param C - The compilation that is being built.
509512
/// \param Args - The input arguments.
@@ -789,6 +792,35 @@ class Driver {
789792
/// compilation based on which -f(no-)?lto(=.*)? option occurs last.
790793
void setLTOMode(const llvm::opt::ArgList &Args);
791794

795+
/// BuildDefaultActions - Constructs the list of actions to perform
796+
/// for the provided arguments, which are only done for a single architecture.
797+
///
798+
/// \param C - The compilation that is being built.
799+
/// \param Args - The input arguments.
800+
/// \param Actions - The list to store the resulting actions onto.
801+
void BuildDefaultActions(Compilation &C, llvm::opt::DerivedArgList &Args,
802+
const InputList &Inputs, ActionList &Actions) const;
803+
804+
/// BuildDriverManagedModuleBuildActions - Performs a dependency
805+
/// scan and constructs the list of actions to perform for dependency order
806+
/// and the provided arguments. This is only done for a single a architecture.
807+
///
808+
/// \param C - The compilation that is being built.
809+
/// \param Args - The input arguments.
810+
/// \param Actions - The list to store the resulting actions onto.
811+
void BuildDriverManagedModuleBuildActions(Compilation &C,
812+
llvm::opt::DerivedArgList &Args,
813+
const InputList &Inputs,
814+
ActionList &Actions) const;
815+
816+
/// Scans the leading lines of the C++ source inputs to detect C++20 module
817+
/// usage.
818+
///
819+
/// \returns True if module usage is detected, false otherwise, or an error on
820+
/// read failure.
821+
llvm::ErrorOr<bool>
822+
ScanInputsForCXXModuleUsage(const InputList &Inputs) const;
823+
792824
/// Retrieves a ToolChain for a particular \p Target triple.
793825
///
794826
/// Will cache ToolChains for the life of the driver object, and create them

clang/include/clang/Driver/Options.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3259,6 +3259,13 @@ def modules_reduced_bmi : Flag<["-"], "fmodules-reduced-bmi">,
32593259
HelpText<"Generate the reduced BMI">,
32603260
MarshallingInfoFlag<FrontendOpts<"GenReducedBMI">>;
32613261

3262+
def fmodules_driver : Flag<["-"], "fmodules-driver">,
3263+
Group<f_Group>, Visibility<[ClangOption]>,
3264+
HelpText<"Enable support for driver managed module builds (experimental)">;
3265+
def fno_modules_driver : Flag<["-"], "fno-modules-driver">,
3266+
Group<f_Group>, Visibility<[ClangOption]>,
3267+
HelpText<"Disable support for driver managed module builds (experimental)">;
3268+
32623269
def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">,
32633270
Group<f_Group>, Visibility<[ClangOption, CC1Option]>, Alias<modules_reduced_bmi>;
32643271

clang/lib/Driver/Driver.cpp

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#include "ToolChains/WebAssembly.h"
5454
#include "ToolChains/XCore.h"
5555
#include "ToolChains/ZOS.h"
56+
#include "clang/Basic/CharInfo.h"
5657
#include "clang/Basic/DiagnosticDriver.h"
5758
#include "clang/Basic/TargetID.h"
5859
#include "clang/Basic/Version.h"
@@ -4291,6 +4292,13 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
42914292
YcArg = nullptr;
42924293
}
42934294

4295+
if (Args.hasArgNoClaim(options::OPT_fmodules_driver))
4296+
// TODO: Check against all incompatible -fmodules-driver arguments
4297+
if (!ModulesModeCXX20) {
4298+
Diag(diag::warn_modules_driver_unsupported_standard);
4299+
Args.eraseArg(options::OPT_fmodules_driver);
4300+
}
4301+
42944302
Arg *FinalPhaseArg;
42954303
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
42964304

@@ -4417,6 +4425,174 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
44174425
}
44184426
}
44194427

4428+
static void skipWhitespace(const char *&Ptr) {
4429+
while (isWhitespace(*Ptr))
4430+
++Ptr;
4431+
}
4432+
4433+
// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n).
4434+
static unsigned isEOL(const char *Ptr) {
4435+
if (*Ptr == '\0')
4436+
return 0;
4437+
if (*(Ptr + 1) != '\0' && isVerticalWhitespace(Ptr[0]) &&
4438+
isVerticalWhitespace(Ptr[1]) && Ptr[0] != Ptr[1])
4439+
return 2;
4440+
return !!isVerticalWhitespace(Ptr[0]);
4441+
}
4442+
4443+
static void skipLine(const char *&Ptr) {
4444+
for (;;) {
4445+
char LastNonWhitespace = ' ';
4446+
while (!isVerticalWhitespace(*Ptr) && *Ptr != '\0') {
4447+
if (!isHorizontalWhitespace(*Ptr))
4448+
LastNonWhitespace = *Ptr;
4449+
++Ptr;
4450+
}
4451+
4452+
const unsigned Len = isEOL(Ptr);
4453+
if (!Len)
4454+
return;
4455+
4456+
Ptr += Len;
4457+
if (LastNonWhitespace != '\\')
4458+
break;
4459+
}
4460+
}
4461+
4462+
// Returns the length of a line splice sequence (including trailing
4463+
// whitespace), or 0 if no line splice is found.
4464+
static unsigned isLineSplice(const char *Start) {
4465+
if (*Start != '\\')
4466+
return 0;
4467+
4468+
const char *Ptr = Start + 1;
4469+
while (isHorizontalWhitespace(*Ptr))
4470+
++Ptr;
4471+
4472+
if (unsigned Len = isEOL(Ptr))
4473+
return Ptr - Start + Len;
4474+
return 0;
4475+
}
4476+
4477+
static bool trySkipLineSplice(const char *&Ptr) {
4478+
if (unsigned Len = isLineSplice(Ptr); Len) {
4479+
Ptr += Len;
4480+
return true;
4481+
}
4482+
return false;
4483+
}
4484+
4485+
static bool trySkipDirective(const char *&Ptr) {
4486+
if (*Ptr != '#')
4487+
return false;
4488+
4489+
++Ptr;
4490+
skipLine(Ptr);
4491+
return true;
4492+
}
4493+
4494+
static bool trySkipLineComment(const char *&Ptr) {
4495+
if (Ptr[0] != '/' || Ptr[1] != '/')
4496+
return false;
4497+
4498+
Ptr += 2;
4499+
skipLine(Ptr);
4500+
return true;
4501+
}
4502+
4503+
static bool trySkipBlockComment(const char *&Ptr) {
4504+
if (Ptr[0] != '/' || Ptr[1] != '*')
4505+
return false;
4506+
4507+
Ptr += 2;
4508+
while (*Ptr != '\0') {
4509+
if (Ptr[0] == '*' && Ptr[1] == '/') {
4510+
Ptr += 2; // '*/'
4511+
return true;
4512+
}
4513+
++Ptr;
4514+
}
4515+
return true;
4516+
}
4517+
4518+
static bool trySkipComment(const char *&Ptr) {
4519+
return trySkipLineComment(Ptr) || trySkipBlockComment(Ptr);
4520+
}
4521+
4522+
// Skipps over comments and (non-module) directives
4523+
static void skipToRelevantCXXModuleText(const char *&Ptr) {
4524+
while (*Ptr != '\0') {
4525+
skipWhitespace(Ptr);
4526+
if (trySkipComment(Ptr) || trySkipDirective(Ptr) || trySkipLineSplice(Ptr))
4527+
continue;
4528+
break; // Found relevant text!
4529+
}
4530+
}
4531+
4532+
static bool scanBufferForCXXModuleUsage(const llvm::MemoryBuffer &Buffer) {
4533+
const char *Ptr = Buffer.getBufferStart();
4534+
skipToRelevantCXXModuleText(Ptr);
4535+
4536+
// Check if the buffer has enough remaining bytes left for any of the
4537+
// module-related declaration fragments we are checking for, without making
4538+
// the potentially memory-mapped buffer load unnecessary pages.
4539+
constexpr int MinKeywordLength = 6;
4540+
const char *Begin = Ptr;
4541+
for (int i = 0; i < MinKeywordLength; ++i) {
4542+
if (*Ptr == '\0')
4543+
return false;
4544+
++Ptr;
4545+
}
4546+
StringRef Text(Begin, MinKeywordLength);
4547+
4548+
const bool IsGlobalModule = Text.starts_with("module");
4549+
if (!IsGlobalModule && !Text.starts_with("import") &&
4550+
!Text.starts_with("export"))
4551+
return false;
4552+
4553+
// Ensure the keyword has a proper ending and isn't part of a identifier
4554+
// or namespace. For this we might have to skip comments and line
4555+
// continuations.
4556+
while (*Ptr != '\0') {
4557+
if (isWhitespace(*Ptr) || (IsGlobalModule && *Ptr == ';'))
4558+
return true;
4559+
if (trySkipBlockComment(Ptr) || trySkipLineSplice(Ptr))
4560+
continue;
4561+
return false;
4562+
}
4563+
4564+
return false;
4565+
}
4566+
4567+
static bool hasCXXModuleInputType(const Driver::InputList &Inputs) {
4568+
const auto IsTypeCXXModule = [](const auto &Input) -> bool {
4569+
const auto TypeID = Input.first;
4570+
return (TypeID == types::TY_CXXModule);
4571+
};
4572+
return llvm::any_of(Inputs, IsTypeCXXModule);
4573+
}
4574+
4575+
llvm::ErrorOr<bool>
4576+
Driver::ScanInputsForCXXModuleUsage(const InputList &Inputs) const {
4577+
const auto CXXInputs = llvm::make_filter_range(
4578+
Inputs, [](const auto &Input) { return types::isCXX(Input.first); });
4579+
4580+
for (const auto &Input : CXXInputs) {
4581+
StringRef Filename = Input.second->getSpelling();
4582+
auto ErrOrBuffer = VFS->getBufferForFile(Filename);
4583+
if (!ErrOrBuffer)
4584+
return ErrOrBuffer.getError();
4585+
const auto Buffer = std::move(*ErrOrBuffer);
4586+
4587+
if (scanBufferForCXXModuleUsage(*Buffer)) {
4588+
Diags.Report(diag::remark_found_cxx20_module_usage) << Filename;
4589+
return true;
4590+
}
4591+
}
4592+
4593+
return false;
4594+
}
4595+
44204596
void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
44214597
const InputList &Inputs, ActionList &Actions) const {
44224598
llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
@@ -4428,6 +4604,33 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
44284604

44294605
handleArguments(C, Args, Inputs, Actions);
44304606

4607+
if (Args.hasFlag(options::OPT_fmodules_driver,
4608+
options::OPT_fno_modules_driver, false)) {
4609+
// TODO: Move the logic for implicitly enabling explicit-module-builds out
4610+
// of -fmodules-driver once it is no longer experimental.
4611+
// Currently, this serves diagnostic purposes only.
4612+
bool UsesCXXModules = hasCXXModuleInputType(Inputs);
4613+
if (!UsesCXXModules) {
4614+
const auto ErrOrScanResult = ScanInputsForCXXModuleUsage(Inputs);
4615+
if (!ErrOrScanResult) {
4616+
Diags.Report(diag::err_cannot_open_file)
4617+
<< ErrOrScanResult.getError().message();
4618+
return;
4619+
}
4620+
UsesCXXModules = *ErrOrScanResult;
4621+
}
4622+
if (UsesCXXModules)
4623+
BuildDriverManagedModuleBuildActions(C, Args, Inputs, Actions);
4624+
return;
4625+
}
4626+
4627+
BuildDefaultActions(C, Args, Inputs, Actions);
4628+
}
4629+
4630+
void Driver::BuildDefaultActions(Compilation &C, DerivedArgList &Args,
4631+
const InputList &Inputs,
4632+
ActionList &Actions) const {
4633+
44314634
bool UseNewOffloadingDriver =
44324635
C.isOffloadingHostKind(Action::OFK_OpenMP) ||
44334636
C.isOffloadingHostKind(Action::OFK_SYCL) ||
@@ -4711,6 +4914,13 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
47114914
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
47124915
}
47134916

4917+
void Driver::BuildDriverManagedModuleBuildActions(
4918+
Compilation &C, llvm::opt::DerivedArgList &Args, const InputList &Inputs,
4919+
ActionList &Actions) const {
4920+
Diags.Report(diag::remark_performing_driver_managed_module_build);
4921+
return;
4922+
}
4923+
47144924
/// Returns the canonical name for the offloading architecture when using a HIP
47154925
/// or CUDA architecture.
47164926
static StringRef getCanonicalArchString(Compilation &C,

0 commit comments

Comments
 (0)