diff --git a/llvm/docs/DTLTO.rst b/llvm/docs/DTLTO.rst new file mode 100644 index 0000000000000..d5f90b90a5737 --- /dev/null +++ b/llvm/docs/DTLTO.rst @@ -0,0 +1,224 @@ +=================== +DTLTO +=================== +.. contents:: + :local: + :depth: 2 + +.. toctree:: + :maxdepth: 1 + +Distributed ThinLTO (DTLTO) +=========================== + +Distributed ThinLTO (DTLTO) facilitates the distribution of backend ThinLTO +compilations via external distribution systems such as Incredibuild. + +The existing method of distributing ThinLTO compilations via separate thin-link, +backend compilation, and link steps often requires significant changes to the +user's build process to adopt, as it requires using a build system which can +handle the dynamic dependencies specified by the index files, such as Bazel. + +DTLTO eliminates this need by managing distribution internally within the LLD +linker during the traditional link step. This allows DTLTO to be used with any +build process that supports in-process ThinLTO. + +Limitations +----------- + +The current implementation of DTLTO has the following limitations: + +- The ThinLTO cache is not supported. +- Only ELF and COFF platforms are supported. +- Archives with bitcode members are not supported. +- Only a very limited set of LTO configurations are currently supported, e.g., + support for basic block sections is not currently available. + +Overview of Operation +--------------------- + +For each ThinLTO backend compilation job, LLD: + +1. Generates the required summary index shard. +2. Records a list of input and output files. +3. Constructs a Clang command line to perform the ThinLTO backend compilation. + +This information is supplied, via a JSON file, to a distributor program that +executes the backend compilations using a distribution system. Upon completion, +LLD integrates the compiled native object files into the link process. + +The design keeps the details of distribution systems out of the LLVM source +code. + +Distributors +------------ + +Distributors are programs responsible for: + +1. Consuming the JSON backend compilations job description file. +2. Translating job descriptions into requests for the distribution system. +3. Blocking execution until all backend compilations are complete. + +Distributors must return a non-zero exit code on failure. They can be +implemented as binaries or in scripting languages, such as Python. An example +script demonstrating basic local execution is available with the LLVM source +code. + +How Distributors Are Invoked +---------------------------- + +Clang and LLD provide options to specify a distributor program for managing +backend compilations. Distributor options and backend compilation options, can +also be specified. Such options are transparently forwarded. + +The backend compilations are currently performed by invoking Clang. For further +details, refer to: + +- Clang documentation: https://clang.llvm.org/docs/ThinLTO.html +- LLD documentation: https://lld.llvm.org/DTLTO.html + +When invoked with a distributor, LLD generates a JSON file describing the +backend compilation jobs and executes the distributor passing it this file. The +JSON file provides the following information to the distributor: + +- The **command line** to execute the backend compilations. + - DTLTO constructs a Clang command line by translating some of the LTO + configuration state into Clang options and forwarding options specified + by the user. + +- **Link output path**. + - A string identifying the output to which this LTO invocation will + contribute. Distributors can use this to label build jobs for informational + purposes. + +- The list of **imports** required for each job. + - The per-job list of bitcode files from which importing will occur. This is + the same information that is emitted into import files for ThinLTO. + +- The **input files** required for each job. + - The per-job set of files required for backend compilation, such as bitcode + files, summary index files, and profile data. + +- The **output files** generated by each job. + - The per-job files generated by the backend compilations, such as compiled + object files and toolchain metrics. + +Temporary Files +--------------- + +During its operation, DTLTO generates temporary files. Temporary files are +created in the same directory as the linker's output file and their filenames +include the stem of the bitcode module, or the output file that the LTO +invocation is contributing to, to aid the user in identifying them: + +- **JSON Job Description File**: + - Format: `dtlto..dist-file.json` + - Example: `dtlto.77380.dist-file.json` (for output file `dtlto.elf`). + +- **Object Files From Backend Compilations**: + - Format: `...native.o` + - Example: `my.1.77380.native.o` (for bitcode module `my.o`). + +- **Summary Index Shard Files**: + - Format: `...native.o.thinlto.bc` + - Example: `my.1.77380.native.o.thinlto.bc` (for bitcode module `my.o`). + +Temporary files are removed, by default, after the backend compilations complete. + +JSON Schema +----------- + +Below is an example of a JSON job file for backend compilation of the module +`dtlto.o`: + +.. code-block:: json + + { + "common": { + "linker_output": "dtlto.elf", + "linker_version": "LLD 20.0.0", + "args": [ + "/usr/local/clang", + "-O3", "-fprofile-sample-use=my.profdata", + "-o", ["primary_output", 0], + "-c", "-x", "ir", ["primary_input", 0], + ["summary_index", "-fthinlto-index=", 0], + "--target=x86_64-sie-ps5" + ] + }, + "jobs": [ + { + "primary_input": ["dtlto.o"], + "summary_index": ["dtlto.1.51232.native.o.thinlto.bc"], + "primary_output": ["dtlto.1.51232.native.o"], + "imports": [], + "additional_inputs": ["my.profdata"] + } + ] + } + +Key Features of the Schema +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- **Input/Output Paths**: Paths are stored in per-file-type array fields. This + allows files to be adjusted, if required, to meet the constraints of the + underlying distribution system. For example, a system may only be able to read + and write remote files to `C:\\sandbox`. The remote paths used can be adjusted + by the distributor for such constraints. Once outputs are back on the local + system, the distributor can rename them as required. + + +- **Command-Line Template**: Command-line options are stored in a common + template to avoid duplication for each job. The template consists of an array + of strings and arrays. The arrays are placeholders which reference per-job + paths. This allows the remote compiler and its arguments to be changed without + updating the distributors. + +Command-Line Expansion Example +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To create the backend compilation commands, the command-line template is +expanded for each job. Placeholders are expanded in the following way: The first +array element specifies the name of the array field to look in. The remaining +elements are converted to strings and concatenated. Integers are converted by +indexing into the specified array. + +The example above generates the following backend compilation command for +`main.o`: + +.. code-block:: console + + /usr/local/clang -O3 -fprofile-sample-use=my.profdata \ + -o dtlto.1.51232.native.o -c -x ir dtlto.o \ + -fthinlto-index=dtlto.1.51232.native.o.thinlto.bc --target=x86_64-sie-ps5 + +This expansion scheme allows the remote compiler to be changed without updating +the distributors. For example, if the "args" field in the above example was +replaced with: + +.. code-block:: json + + "args": [ + "custom-compiler", + "-opt-level=2", + "-profile-instrument-use-path=my.profdata", + "-output", ["primary_output", 0], + "-input", ["primary_input", 0], + "-thinlto-index", ["summary_index", 0], + "-triple", "x86_64-sie-ps5" + ] + +Then distributors can expand the command line without needing to be updated: + +.. code-block:: console + + custom-compiler -opt-level=2 -profile-instrument-use-path=my.profdata \ + -output dtlto.1.51232.native.o -input dtlto.o \ + -thinlto-index dtlto.1.51232.native.o.thinlto.bc -triple x86_64-sie-ps5 + +Constraints +----------- + +- Matching versions of Clang and LLD should be used. +- The distributor used must support the JSON schema generated by the version of + LLD in use. \ No newline at end of file diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 6eee564713d6d..3e16fe42b7d11 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -32,6 +32,7 @@ intermediate LLVM representation. DebuggingJITedCode DirectXUsage Docker + DTLTO FatLTO ExtendingLLVM GitHub @@ -164,6 +165,11 @@ Optimizations This document describes the interface between LLVM intermodular optimizer and the linker and its design +:doc:`DTLTO` + This document describes the DTLTO implementation, which allows for + distributing ThinLTO backend compilations without requiring support from + the build system. + :doc:`GoldPlugin` How to build your programs with link-time optimization on Linux. diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 242a05f7d32c0..4d0eda30b8636 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -199,6 +199,8 @@ class InputFile { using IndexWriteCallback = std::function; +using ImportsFilesContainer = llvm::SmallVector; + /// This class defines the interface to the ThinLTO backend. class ThinBackendProc { protected: @@ -223,13 +225,15 @@ class ThinBackendProc { BackendThreadPool(ThinLTOParallelism) {} virtual ~ThinBackendProc() = default; + virtual void setup(unsigned MaxTasks, unsigned ReservedTasks) {} virtual Error start( unsigned Task, BitcodeModule BM, const FunctionImporter::ImportMapTy &ImportList, const FunctionImporter::ExportSetTy &ExportList, const std::map &ResolvedODR, - MapVector &ModuleMap) = 0; - Error wait() { + MapVector &ModuleMap, + DenseMap &ModuleTriples) = 0; + virtual Error wait() { BackendThreadPool.wait(); if (Err) return std::move(*Err); @@ -240,8 +244,15 @@ class ThinBackendProc { // Write sharded indices and (optionally) imports to disk Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, - llvm::StringRef ModulePath, - const std::string &NewModulePath) const; + StringRef ModulePath, const std::string &NewModulePath) const; + + // Write sharded indices to SummaryPath, (optionally) imports to disk, and + // (optionally) record imports in ImportsFiles. + Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, + StringRef ModulePath, StringRef SummaryPath, + const std::string &NewModulePath, + std::optional> + ImportsFiles) const; }; /// This callable defines the behavior of a ThinLTO backend after the thin-link @@ -253,7 +264,7 @@ class ThinBackendProc { using ThinBackendFunction = std::function( const Config &C, ModuleSummaryIndex &CombinedIndex, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache Cache)>; + AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache)>; /// This type defines the behavior following the thin-link phase during ThinLTO. /// It encapsulates a backend function and a strategy for thread pool @@ -268,10 +279,10 @@ struct ThinBackend { std::unique_ptr operator()( const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache Cache) { + AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache) { assert(isValid() && "Invalid backend function"); return Func(Conf, CombinedIndex, ModuleToDefinedGVSummaries, - std::move(AddStream), std::move(Cache)); + std::move(AddStream), std::move(AddBuffer), std::move(Cache)); } ThreadPoolStrategy getParallelism() const { return Parallelism; } bool isValid() const { return static_cast(Func); } @@ -294,6 +305,22 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism, bool ShouldEmitIndexFiles = false, bool ShouldEmitImportsFiles = false); +/// This ThinBackend generates the index shards and then runs the individual +/// backend jobs via an external process. It takes the same parameters as the +/// InProcessThinBackend, however, these parameters only control the behavior +/// when generating the index files for the modules. Additionally: +/// LinkerOutputFile is a string that should identify this LTO invocation in +/// the context of a wider build. It's used for naming to aid the user in +/// identifying activity related to a specific LTO invocation. +/// Distributor specifies the path to a process to invoke to manage the backend +/// jobs execution. +/// SaveTemps is a debugging tool that prevents temporary files created by this +/// backend from being cleaned up. +ThinBackend createOutOfProcessThinBackend( + ThreadPoolStrategy Parallelism, IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, + StringRef LinkerOutputFile, StringRef Distributor, bool SaveTemps); + /// This ThinBackend writes individual module indexes to files, instead of /// running the individual backend jobs. This backend is for distributed builds /// where separate processes will invoke the real backends. @@ -369,15 +396,22 @@ class LTO { /// full description of tasks see LTOBackend.h. unsigned getMaxTasks() const; - /// Runs the LTO pipeline. This function calls the supplied AddStream - /// function to add native object files to the link. + /// Runs the LTO pipeline. This function calls the supplied AddStream or + /// AddBuffer function to add native object files to the link depending on + /// whether the files are streamed into memory or written to disk by the + /// backend. /// /// The Cache parameter is optional. If supplied, it will be used to cache /// native object files and add them to the link. /// - /// The client will receive at most one callback (via either AddStream or + /// The AddBuffer parameter is only required for DTLTO, currently. It is + /// optional to minimise the impact on current LTO users (DTLTO is not used + /// currently). + /// + /// The client will receive at most one callback (via AddStream, AddBuffer or /// Cache) for each task identifier. - Error run(AddStreamFn AddStream, FileCache Cache = {}); + Error run(AddStreamFn AddStream, FileCache Cache = {}, + AddBufferFn AddBuffer = nullptr); /// Static method that returns a list of libcall symbols that can be generated /// by LTO but might not be visible from bitcode symbol table. @@ -426,6 +460,7 @@ class LTO { // The bitcode modules to compile, if specified by the LTO Config. std::optional ModulesToCompile; DenseMap PrevailingModuleForGUID; + DenseMap ModuleTriples; } ThinLTO; // The global resolution for a particular (mangled) symbol name. This is in @@ -517,10 +552,12 @@ class LTO { bool LivenessFromIndex); Error addThinLTO(BitcodeModule BM, ArrayRef Syms, - const SymbolResolution *&ResI, const SymbolResolution *ResE); + const SymbolResolution *&ResI, const SymbolResolution *ResE, + StringRef Triple); Error runRegularLTO(AddStreamFn AddStream); - Error runThinLTO(AddStreamFn AddStream, FileCache Cache, + Error runThinLTO(AddStreamFn AddStream, AddBufferFn AddBuffer, + FileCache Cache, const DenseSet &GUIDPreservedSymbols); Error checkPartiallySplit(); diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h index cf45145619d95..8c3ea4f205d4c 100644 --- a/llvm/include/llvm/Support/Caching.h +++ b/llvm/include/llvm/Support/Caching.h @@ -84,7 +84,8 @@ struct FileCache { std::string CacheDirectoryPath; }; -/// This type defines the callback to add a pre-existing file (e.g. in a cache). +/// This type defines the callback to add a pre-existing file (e.g. in a cache +/// or created by a backend compilation run as a separate process). /// /// Buffer callbacks must be thread safe. using AddBufferFn = std::function F); + /// Based on the information recorded in the summaries during global /// summary-based analysis: /// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 0f53c60851217..a76521b4c8873 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -41,8 +41,11 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/ThreadPool.h" @@ -91,6 +94,19 @@ extern cl::opt SupportsHotColdNew; /// Enable MemProf context disambiguation for thin link. extern cl::opt EnableMemProfContextDisambiguation; + +cl::list AdditionalThinLTODistributorArgs( + "thinlto-distributor-arg", + cl::desc("Additional arguments to pass to the ThinLTO distributor")); + +cl::opt ThinLTORemoteCompiler( + "thinlto-remote-compiler", + cl::desc("Compiler to invoke for the ThinLTO backend compilations")); + +cl::list + ThinLTORemoteCompilerArgs("thinlto-remote-compiler-arg", + cl::desc("Additional arguments to pass to the " + "ThinLTO remote compiler")); } // namespace llvm // Computes a unique hash for the Module considering the current list of @@ -783,7 +799,7 @@ Error LTO::addModule(InputFile &Input, unsigned ModI, LTOInfo->HasSummary); if (IsThinLTO) - return addThinLTO(BM, ModSyms, ResI, ResE); + return addThinLTO(BM, ModSyms, ResI, ResE, Input.getTargetTriple()); RegularLTO.EmptyCombinedModule = false; Expected ModOrErr = @@ -1030,7 +1046,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, // Add a ThinLTO module to the link. Error LTO::addThinLTO(BitcodeModule BM, ArrayRef Syms, const SymbolResolution *&ResI, - const SymbolResolution *ResE) { + const SymbolResolution *ResE, StringRef Triple) { const SymbolResolution *ResITmp = ResI; for (const InputFile::Symbol &Sym : Syms) { assert(ResITmp != ResE); @@ -1090,6 +1106,8 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef Syms, "Expected at most one ThinLTO module per bitcode file", inconvertibleErrorCode()); + ThinLTO.ModuleTriples.insert({BM.getModuleIdentifier(), Triple.str()}); + if (!Conf.ThinLTOModulesToCompile.empty()) { if (!ThinLTO.ModulesToCompile) ThinLTO.ModulesToCompile = ModuleMapType(); @@ -1158,7 +1176,7 @@ Error LTO::checkPartiallySplit() { return Error::success(); } -Error LTO::run(AddStreamFn AddStream, FileCache Cache) { +Error LTO::run(AddStreamFn AddStream, FileCache Cache, AddBufferFn AddBuffer) { // Compute "dead" symbols, we don't want to import/export these! DenseSet GUIDPreservedSymbols; DenseMap GUIDPrevailingResolutions; @@ -1208,7 +1226,7 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { if (!Result) // This will reset the GlobalResolutions optional once done with it to // reduce peak memory before importing. - Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); + Result = runThinLTO(AddStream, AddBuffer, Cache, GUIDPreservedSymbols); if (StatsFile) PrintStatisticsJSON(StatsFile->os()); @@ -1390,6 +1408,16 @@ SmallVector LTO::getRuntimeLibcallSymbols(const Triple &TT) { Error ThinBackendProc::emitFiles( const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath, const std::string &NewModulePath) const { + return emitFiles(ImportList, ModulePath, NewModulePath + ".thinlto.bc", + NewModulePath, + /*ImportsFiles=*/std::nullopt); +} + +Error ThinBackendProc::emitFiles( + const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath, + StringRef SummaryPath, const std::string &NewModulePath, + std::optional> ImportsFiles) + const { ModuleToSummariesForIndexTy ModuleToSummariesForIndex; GVSummaryPtrSet DeclarationSummaries; @@ -1398,10 +1426,9 @@ Error ThinBackendProc::emitFiles( ImportList, ModuleToSummariesForIndex, DeclarationSummaries); - raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, - sys::fs::OpenFlags::OF_None); + raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None); if (EC) - return createFileError("cannot open " + NewModulePath + ".thinlto.bc", EC); + return createFileError("cannot open " + Twine(SummaryPath), EC); writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex, &DeclarationSummaries); @@ -1412,29 +1439,31 @@ Error ThinBackendProc::emitFiles( if (ImportFilesError) return ImportFilesError; } + + // Optionally, store the imports files. + if (ImportsFiles) + processImportsFiles( + ModulePath, ModuleToSummariesForIndex, + [&](StringRef M) { ImportsFiles->get().push_back(M.str()); }); + return Error::success(); } namespace { -class InProcessThinBackend : public ThinBackendProc { +class CGThinBackend : public ThinBackendProc { protected: - AddStreamFn AddStream; - FileCache Cache; DenseSet CfiFunctionDefs; DenseSet CfiFunctionDecls; - bool ShouldEmitIndexFiles; public: - InProcessThinBackend( + CGThinBackend( const Config &Conf, ModuleSummaryIndex &CombinedIndex, - ThreadPoolStrategy ThinLTOParallelism, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite, - bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) + lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles, + bool ShouldEmitImportsFiles, ThreadPoolStrategy ThinLTOParallelism) : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism), - AddStream(std::move(AddStream)), Cache(std::move(Cache)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) { for (auto &Name : CombinedIndex.cfiFunctionDefs()) CfiFunctionDefs.insert( @@ -1443,6 +1472,24 @@ class InProcessThinBackend : public ThinBackendProc { CfiFunctionDecls.insert( GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name))); } +}; + +class InProcessThinBackend : public CGThinBackend { +protected: + AddStreamFn AddStream; + FileCache Cache; + +public: + InProcessThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) + : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite, + ShouldEmitIndexFiles, ShouldEmitImportsFiles, + ThinLTOParallelism), + AddStream(std::move(AddStream)), Cache(std::move(Cache)) {} virtual Error runThinLTOBackendThread( AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM, @@ -1496,7 +1543,8 @@ class InProcessThinBackend : public ThinBackendProc { const FunctionImporter::ImportMapTy &ImportList, const FunctionImporter::ExportSetTy &ExportList, const std::map &ResolvedODR, - MapVector &ModuleMap) override { + MapVector &ModuleMap, + DenseMap & /*ModuleTriples*/) override { StringRef ModulePath = BM.getModuleIdentifier(); assert(ModuleToDefinedGVSummaries.count(ModulePath)); const GVSummaryMapTy &DefinedGlobals = @@ -1709,7 +1757,7 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism, auto Func = [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache Cache) { + AddStreamFn AddStream, AddBufferFn /*AddBuffer*/, FileCache Cache) { return std::make_unique( Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream, Cache, OnWrite, ShouldEmitIndexFiles, @@ -1776,7 +1824,8 @@ class WriteIndexesThinBackend : public ThinBackendProc { const FunctionImporter::ImportMapTy &ImportList, const FunctionImporter::ExportSetTy &ExportList, const std::map &ResolvedODR, - MapVector &ModuleMap) override { + MapVector &ModuleMap, + DenseMap & /*ModuleTriples*/) override { StringRef ModulePath = BM.getModuleIdentifier(); // The contents of this file may be used as input to a native link, and must @@ -1830,7 +1879,7 @@ ThinBackend lto::createWriteIndexesThinBackend( auto Func = [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache Cache) { + AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache) { return std::make_unique( Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles, @@ -1839,7 +1888,8 @@ ThinBackend lto::createWriteIndexesThinBackend( return ThinBackend(Func, Parallelism); } -Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, +Error LTO::runThinLTO(AddStreamFn AddStream, AddBufferFn AddBuffer, + FileCache Cache, const DenseSet &GUIDPreservedSymbols) { LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); ThinLTO.CombinedIndex.releaseTemporaryMemory(); @@ -2013,9 +2063,12 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, return BackendProcess->start( RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, ImportLists[Mod.first], ExportLists[Mod.first], - ResolvedODR[Mod.first], ThinLTO.ModuleMap); + ResolvedODR[Mod.first], ThinLTO.ModuleMap, ThinLTO.ModuleTriples); }; + BackendProcess->setup(ModuleMap.size(), + RegularLTO.ParallelCodeGenParallelismLevel); + if (BackendProcess->getThreadCount() == 1 || BackendProcess->isSensitiveToInputOrder()) { // Process the modules in the order they were provided on the @@ -2045,7 +2098,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, if (!CodeGenDataThinLTOTwoRounds) { std::unique_ptr BackendProc = ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); + AddStream, AddBuffer, Cache); return RunBackends(BackendProc.get()); } @@ -2142,3 +2195,319 @@ std::vector lto::generateModulesOrdering(ArrayRef R) { }); return ModulesOrdering; } + +namespace { +// For this out-of-process backend no codegen is done when invoked for each +// task. Instead we generate the required information (e.g. the summary index +// shard, import list, etc..) to allow for the codegen to be performed +// externally . This backend's `wait` function then invokes an external +// distributor process to do backend compilations. +class OutOfProcessThinBackend : public CGThinBackend { + using SString = SmallString<128>; + + AddBufferFn AddBuffer; + + BumpPtrAllocator Alloc; + StringSaver Saver{Alloc}; + + SString LinkerOutputFile; + SString DistributorPath; + bool SaveTemps; + + SmallVector CodegenOptions; + DenseSet AdditionalInputs; + + // Information specific to individual backend compilation job. + struct Job { + unsigned Task; + StringRef ModuleID; + StringRef Triple; + StringRef NativeObjectPath; + StringRef SummaryIndexPath; + ImportsFilesContainer ImportFiles; + }; + // The set of backend compilations jobs. + SmallVector Jobs; + + // A unique string to identify the current link. + SmallString<8> UID; + + // The first ReservedTasks entries in the task range are used for Full LTO. + unsigned ReservedTasks; + +public: + OutOfProcessThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, AddBufferFn AddBuffer, + lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles, + bool ShouldEmitImportsFiles, StringRef LinkerOutputFile, + StringRef Distributor, bool SaveTemps) + : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite, + ShouldEmitIndexFiles, ShouldEmitImportsFiles, + ThinLTOParallelism), + AddBuffer(std::move(AddBuffer)), LinkerOutputFile(LinkerOutputFile), + DistributorPath(Distributor), SaveTemps(SaveTemps) {} + + virtual void setup(unsigned MaxTasks, unsigned ReservedTasks) override { + UID = itostr(sys::Process::getProcessId()); + Jobs.resize((size_t)MaxTasks); + this->ReservedTasks = ReservedTasks; + } + + Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + MapVector &ModuleMap, + DenseMap &ModuleTriples) override { + + StringRef ModulePath = BM.getModuleIdentifier(); + + SString ObjFilePath = sys::path::parent_path(LinkerOutputFile); + sys::path::append(ObjFilePath, sys::path::stem(ModulePath) + "." + + itostr(Task) + "." + UID + ".native.o"); + + Job &J = Jobs[Task - ReservedTasks]; + J = {Task, + ModulePath, + ModuleTriples[ModulePath], + Saver.save(ObjFilePath.str()), + Saver.save(ObjFilePath.str() + ".thinlto.bc"), + {}}; + + assert(ModuleToDefinedGVSummaries.count(ModulePath)); + BackendThreadPool.async( + [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) { + if (auto E = emitFiles(ImportList, J.ModuleID, J.SummaryIndexPath, + J.ModuleID.str(), J.ImportFiles)) { + std::unique_lock L(ErrMu); + if (Err) + Err = joinErrors(std::move(*Err), std::move(E)); + else + Err = std::move(E); + } + }, + std::ref(J), std::ref(ImportList)); + + return Error::success(); + } + + // Derive a set of Clang options that will be shared/common for all DTLTO + // backend compilations. We are intentionally minimal here as these options + // must remain synchronized with the behavior of Clang. DTLTO does not support + // all the features available with in-process LTO. More features are expected + // to be added over time. Users can specify Clang options directly if a + // feature is not supported. Note that explicitly specified options that imply + // additional input or output file dependencies must be communicated to the + // distribution system, potentially by setting extra options on the + // distributor program. + // TODO: If this strategy of deriving options proves insufficient, alternative + // approaches should be considered, such as: + // - A serialization/deserialization format for LTO configuration. + // - Modifying LLD to be the tool that performs the backend compilations. + void buildCommonRemoteCompilerOptions() { + const lto::Config &C = Conf; + auto &Ops = CodegenOptions; + llvm::Triple TT{Jobs.front().Triple}; + + Ops.push_back(Saver.save("-O" + Twine(C.OptLevel))); + + if (C.Options.EmitAddrsig) + Ops.push_back("-faddrsig"); + if (C.Options.FunctionSections) + Ops.push_back("-ffunction-sections"); + if (C.Options.DataSections) + Ops.push_back("-fdata-sections"); + + if (C.RelocModel == Reloc::PIC_) + // Clang doesn't have -fpic for all triples. + if (!TT.isOSBinFormatCOFF()) + Ops.push_back("-fpic"); + + // Turn on/off warnings about profile cfg mismatch (default on) + // --lto-pgo-warn-mismatch. + if (!C.PGOWarnMismatch) { + Ops.push_back("-mllvm"); + Ops.push_back("-no-pgo-warn-mismatch"); + } + + // Enable sample-based profile guided optimizations. + // Sample profile file path --lto-sample-profile=. + if (!C.SampleProfile.empty()) { + Ops.push_back( + Saver.save("-fprofile-sample-use=" + Twine(C.SampleProfile))); + AdditionalInputs.insert(C.SampleProfile); + } + + // We don't know which of options will be used by Clang. + Ops.push_back("-Wno-unused-command-line-argument"); + + // Forward any supplied options. + if (!ThinLTORemoteCompilerArgs.empty()) + for (auto &a : ThinLTORemoteCompilerArgs) + Ops.push_back(a); + } + + // Generates a JSON file describing the backend compilations, for the + // distributor. + bool emitDistributorJson(StringRef DistributorJson) { + using json::Array; + std::error_code EC; + raw_fd_ostream OS(DistributorJson, EC); + if (EC) + return false; + + json::OStream JOS(OS); + JOS.object([&]() { + // Information common to all jobs note that we use a custom syntax for + // referencing by index into the job input and output file arrays. + JOS.attributeObject("common", [&]() { + JOS.attribute("linker_output", LinkerOutputFile); + + // Common command line template. + JOS.attributeArray("args", [&]() { + JOS.value(ThinLTORemoteCompiler); + + // Reference to Job::NativeObjectPath. + JOS.value("-o"); + JOS.value(Array{"primary_output", 0}); + + JOS.value("-c"); + + JOS.value("-x"); + JOS.value("ir"); + + // Reference to Job::ModuleID. + JOS.value(Array{"primary_input", 0}); + + // Reference to Job::SummaryIndexPath. + JOS.value(Array{"summary_index", "-fthinlto-index=", 0}); + JOS.value(Saver.save("--target=" + Twine(Jobs.front().Triple))); + + for (const auto &A : CodegenOptions) + JOS.value(A); + }); + }); + JOS.attributeArray("jobs", [&]() { + for (const auto &J : Jobs) { + assert(J.Task != 0); + JOS.object([&]() { + JOS.attribute("primary_input", Array{J.ModuleID}); + JOS.attribute("summary_index", Array{J.SummaryIndexPath}); + JOS.attribute("primary_output", Array{J.NativeObjectPath}); + + // Add the bitcode files from which imports will be made. These do + // not appear on the command line but are recorded in the summary + // index shard. + JOS.attribute("imports", Array(J.ImportFiles)); + + // Add any input files that are common to each invocation. These + // filenames are duplicated in the command line template and in + // each of the per job "inputs" array. However, this small amount + // of duplication makes the schema simpler. + JOS.attribute("additional_inputs", Array(AdditionalInputs)); + }); + } + }); + }); + + return true; + } + + void removeFile(StringRef FileName) { + std::error_code EC = sys::fs::remove(FileName, true); + if (EC && EC != std::make_error_code(std::errc::no_such_file_or_directory)) + errs() << "warning: could not remove the file '" << FileName + << "': " << EC.message() << "\n"; + } + + Error wait() override { + // Wait for the information on the required backend compilations to be + // gathered. + BackendThreadPool.wait(); + if (Err) + return std::move(*Err); + + auto CleanPerJobFiles = llvm::make_scope_exit([&] { + if (!SaveTemps) + for (auto &Job : Jobs) { + removeFile(Job.NativeObjectPath); + if (!ShouldEmitIndexFiles) + removeFile(Job.SummaryIndexPath); + } + }); + + const StringRef BCError = "DTLTO backend compilation: "; + + // TODO: If we move to using an optimisation tool that does not require an + // explicit triple to be passed then the triple handling can be removed + // entirely. + if (!llvm::all_of(Jobs, [&](const auto &Job) { + return Job.Triple == Jobs.front().Triple; + })) + return make_error(BCError + "all triples must be consistent", + inconvertibleErrorCode()); + + buildCommonRemoteCompilerOptions(); + + SString JsonFile = sys::path::parent_path(LinkerOutputFile); + sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID + + ".dist-file.json"); + if (!emitDistributorJson(JsonFile)) + return make_error( + BCError + "failed to generate distributor JSON script: " + JsonFile, + inconvertibleErrorCode()); + auto CleanJson = llvm::make_scope_exit([&] { + if (!SaveTemps) + removeFile(JsonFile); + }); + + SmallVector Args = {DistributorPath}; + llvm::append_range(Args, AdditionalThinLTODistributorArgs); + Args.push_back(JsonFile); + std::string ErrMsg; + if (sys::ExecuteAndWait(Args[0], Args, + /*Env=*/std::nullopt, /*Redirects=*/{}, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) { + return make_error( + BCError + "distributor execution failed" + + (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")), + inconvertibleErrorCode()); + } + + for (auto &Job : Jobs) { + // Load the native object from a file into a memory buffer + // and store its contents in the output buffer. + ErrorOr> objFileMbOrErr = + MemoryBuffer::getFile(Job.NativeObjectPath, false, false); + if (std::error_code ec = objFileMbOrErr.getError()) + return make_error( + BCError + "cannot open native object file: " + + Job.NativeObjectPath + ": " + ec.message(), + inconvertibleErrorCode()); + AddBuffer(Job.Task, Job.ModuleID, std::move(objFileMbOrErr.get())); + } + + return Error::success(); + } +}; +} // end anonymous namespace + +ThinBackend lto::createOutOfProcessThinBackend( + ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, + StringRef LinkerOutputFile, StringRef Distributor, bool SaveTemps) { + auto Func = + [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache /*Cache*/) { + return std::make_unique( + Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, + AddStream, AddBuffer, OnWrite, ShouldEmitIndexFiles, + ShouldEmitImportsFiles, LinkerOutputFile, Distributor, SaveTemps); + }; + return ThinBackend(Func, Parallelism); +} diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index c3d0a1a3a046e..cdcf918d3fae8 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1568,13 +1568,23 @@ Error llvm::EmitImportsFiles( if (EC) return createFileError("cannot open " + OutputFilename, errorCodeToError(EC)); + processImportsFiles(ModulePath, ModuleToSummariesForIndex, + [&](StringRef M) { ImportsOS << M << "\n"; }); + return Error::success(); +} + +/// Invoke callback \p F on the file paths from which \p ModulePath +/// will import. +void llvm::processImportsFiles( + StringRef ModulePath, + const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex, + function_ref F) { for (const auto &ILI : ModuleToSummariesForIndex) // The ModuleToSummariesForIndex map includes an entry for the current // Module (needed for writing out the index files). We don't want to // include it in the imports file, however, so filter it out. if (ILI.first != ModulePath) - ImportsOS << ILI.first << "\n"; - return Error::success(); + F(ILI.first); } bool llvm::convertToDeclaration(GlobalValue &GV) { diff --git a/llvm/test/ThinLTO/X86/dtlto/dtlto.test b/llvm/test/ThinLTO/X86/dtlto/dtlto.test new file mode 100644 index 0000000000000..2239cdcc6bdf1 --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/dtlto.test @@ -0,0 +1,84 @@ +# Test DTLTO output with llvm-lto2. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Generate bitcode files with summary. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +# Generate mock native object files. +RUN: opt t1.ll -o t1.o +RUN: opt t2.ll -o t2.o + +# Create an empty subdirectory to avoid having to account for the input files. +RUN: mkdir %t/out && cd %t/out + +# Define a substitution to share the common DTLTO arguments. +DEFINE: %{command} = llvm-lto2 run ../t1.bc ../t2.bc -o t.o \ +DEFINE: -dtlto \ +DEFINE: -dtlto-distributor=%python \ +DEFINE: -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py \ +DEFINE: -thinlto-distributor-arg=../t1.o \ +DEFINE: -thinlto-distributor-arg=../t2.o \ +DEFINE: -r=../t1.bc,t1,px \ +DEFINE: -r=../t2.bc,t2,px + +# Perform DTLTO. mock.py does not do any compilation, instead it simply writes +# the contents of the object files supplied on the command line into the +# output object files in job order. +RUN: %{command} + +# Check that the expected output files have been created. +RUN: ls | count 2 +RUN: ls | FileCheck %s --check-prefix=THINLTO + +# llvm-lto2 ThinLTO output files. +THINLTO-DAG: {{^}}t.o.1{{$}} +THINLTO-DAG: {{^}}t.o.2{{$}} + +RUN: cd .. && rm -rf %t/out && mkdir %t/out && cd %t/out + +# Perform DTLTO with --save-temps. +RUN: %{command} --save-temps + +# Check that the expected output files have been created. +RUN: ls | count 12 +RUN: ls | FileCheck %s --check-prefixes=THINLTO,SAVETEMPS + +# Common -save-temps files from llvm-lto2. +SAVETEMPS-DAG: {{^}}t.o.resolution.txt{{$}} +SAVETEMPS-DAG: {{^}}t.o.index.bc{{$}} +SAVETEMPS-DAG: {{^}}t.o.index.dot{{$}} + +# -save-temps incremental files. +SAVETEMPS-DAG: {{^}}t.o.0.0.preopt.bc{{$}} +SAVETEMPS-DAG: {{^}}t.o.0.2.internalize.bc{{$}} + +# A jobs description JSON. +SAVETEMPS-DAG: {{^}}t.[[#]].dist-file.json{{$}} + +# Summary shards emitted for DTLTO. +SAVETEMPS-DAG: {{^}}t1.1.[[#]].native.o.thinlto.bc{{$}} +SAVETEMPS-DAG: {{^}}t2.2.[[#]].native.o.thinlto.bc{{$}} + +# DTLTO native output files (the results of the external backend compilations). +SAVETEMPS-DAG: {{^}}t1.1.[[#]].native.o{{$}} +SAVETEMPS-DAG: {{^}}t2.2.[[#]].native.o{{$}} + +#--- t1.ll + +target triple = "x86_64-unknown-linux-gnu" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @t1() { + ret void +} + +#--- t2.ll + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t2() { + ret void +} diff --git a/llvm/test/ThinLTO/X86/dtlto/imports.test b/llvm/test/ThinLTO/X86/dtlto/imports.test new file mode 100644 index 0000000000000..b1f47ffb95ff9 --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/imports.test @@ -0,0 +1,73 @@ +# Check that DTLTO creates imports lists correctly. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Compile bitcode. +RUN: opt -thinlto-bc 0.ll -o 0.bc -O2 +RUN: opt -thinlto-bc 1.ll -o 1.bc -O2 + +# Define a substitution to share the common DTLTO arguments. Note that the use +# of validate.py will cause a failure as it does not create output files. +DEFINE: %{command} = llvm-lto2 run 0.bc 1.bc -o t.o \ +DEFINE: -dtlto \ +DEFINE: -dtlto-distributor=%python \ +DEFINE: -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \ +DEFINE: -thinlto-distributor-arg=0.bc \ +DEFINE: -thinlto-distributor-arg=1.bc \ +DEFINE: -thinlto-emit-indexes \ +DEFINE: -r=0.bc,g,px \ +DEFINE: -r=1.bc,f,px \ +DEFINE: -r=1.bc,g + +# We expect an import from 0.o into 1.o but no imports into 0.o. Check that the +# expected input files have been added to the JSON. +RUN: not %{command} >out.log 2>&1 +RUN: FileCheck --input-file=out.log %s --check-prefixes=INPUTS,ERR + +INPUTS: "primary_input": [ +INPUTS-NEXT: "0.bc" +INPUTS-NEXT: ] +INPUTS: "imports": [] +INPUTS: "primary_input": [ +INPUTS-NEXT: "1.bc" +INPUTS-NEXT: ] +INPUTS: "imports": [ +INPUTS-NEXT: "0.bc" +INPUTS-NEXT: ] + +# This check ensures that we have failed for the expected reason. +ERR: failed: DTLTO backend compilation: cannot open native object file: + +# Check that imports files are not created even if -save-temps is active. +RUN: not %{command} -save-temps 2>&1 \ +RUN: | FileCheck %s --check-prefixes=ERR +RUN: ls | FileCheck %s --check-prefix=NOIMPORTFILES +NOIMPORTFILES-NOT: imports + +# Check that imports files are created with -thinlto-emit-imports. +RUN: not %{command} -thinlto-emit-imports 2>&1 \ +RUN: | FileCheck %s --check-prefixes=ERR +RUN: ls | FileCheck %s --check-prefix=IMPORTFILES +IMPORTFILES: 0.bc.imports +IMPORTFILES: 1.bc.imports + +#--- 0.ll +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @g() { +entry: + ret void +} + +#--- 1.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g(...) + +define void @f() { +entry: + call void (...) @g() + ret void +} diff --git a/llvm/test/ThinLTO/X86/dtlto/json.test b/llvm/test/ThinLTO/X86/dtlto/json.test new file mode 100644 index 0000000000000..ee15dcfff1282 --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/json.test @@ -0,0 +1,102 @@ +# Check that the JSON output from DTLTO is as expected. Note that validate.py +# checks the JSON structure so we just check the field contents in this test. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Generate bitcode files with summary. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +# Perform DTLTO. +RUN: not llvm-lto2 run t1.bc t2.bc -o my.output \ +RUN: -r=t1.bc,t1,px -r=t2.bc,t2,px \ +RUN: -dtlto \ +RUN: -dtlto-distributor=%python \ +RUN: -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \ +RUN: -thinlto-remote-compiler=my_clang.exe \ +RUN: -thinlto-remote-compiler-arg=--rota1=10 \ +RUN: -thinlto-remote-compiler-arg=--rota2=20 \ +RUN: -thinlto-distributor-arg=--da1=10 \ +RUN: -thinlto-distributor-arg=--da2=10 \ +RUN: 2>&1 | FileCheck %s + +CHECK: distributor_args=['--da1=10', '--da2=10'] + +# Check the common object. +CHECK: "linker_output": "my.output" +CHECK: "args": +CHECK: "my_clang.exe" +CHECK: "-o" +CHECK-NEXT: [ +CHECK-NEXT: "primary_output" +CHECK-NEXT: 0 +CHECK-NEXT: ] +CHECK: "-c" +CHECK: "-x" +CHECK: "ir" +CHECK-NEXT: [ +CHECK-NEXT: "primary_input" +CHECK-NEXT: 0 +CHECK-NEXT: ] +CHECK: "summary_index" +CHECK-NEXT: "-fthinlto-index=" +CHECK-NEXT: 0 +CHECK-NEXT: ] +CHECK: "--target=x86_64-unknown-linux-gnu" +CHECK: "-O2", +CHECK: "-fpic" +CHECK: "-Wno-unused-command-line-argument" +CHECK: "--rota1=10" +CHECK: "--rota2=20" + +# Check the first job entry. +CHECK: "jobs": +CHECK: "primary_input": [ +CHECK-NEXT: "t1.bc" +CHECK-NEXT: ] +CHECK: "summary_index": [ +CHECK-NEXT: "t1.1.[[#]].native.o.thinlto.bc" +CHECK-NEXT: ] +CHECK: "primary_output": [ +CHECK-NEXT: "t1.1.[[#]].native.o" +CHECK-NEXT: ] +CHECK: "imports": [], +CHECK: "additional_inputs": [] +CHECK-NEXT: } + +# Check the second job entry. +CHECK-NEXT: { +CHECK-NEXT: "primary_input": [ +CHECK-NEXT: "t2.bc" +CHECK-NEXT: ] +CHECK-NEXT: "summary_index": [ +CHECK-NEXT: "t2.2.[[#]].native.o.thinlto.bc" +CHECK-NEXT: ] +CHECK-NEXT: "primary_output": [ +CHECK-NEXT: "t2.2.[[#]].native.o" +CHECK-NEXT: ] +CHECK-NEXT: "imports": [] +CHECK-NEXT: "additional_inputs": [] +CHECK-NEXT: } +CHECK-NEXT: ] + +# This check ensures that we have failed for the expected reason. +CHECK: failed: DTLTO backend compilation: cannot open native object file: + +#--- t1.ll +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t1() { +entry: + ret void +} + +#--- t2.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t2() { +entry: + ret void +} diff --git a/llvm/test/ThinLTO/X86/dtlto/summary.test b/llvm/test/ThinLTO/X86/dtlto/summary.test new file mode 100644 index 0000000000000..a2fb08a736968 --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/summary.test @@ -0,0 +1,54 @@ +# Check that DTLTO creates identical summary index shard files as are created +# for an equivalent ThinLTO link. + + RUN: rm -rf %t && split-file %s %t && cd %t + +# Generate ThinLTO bitcode files. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +# Generate mock native object files. +RUN: opt t1.ll -o t1.o +RUN: opt t2.ll -o t2.o + +# Define a substitution to share the common arguments. +DEFINE: %{command} = llvm-lto2 run t1.bc t2.bc -o t.o \ +DEFINE: -r=t1.bc,t1,px \ +DEFINE: -r=t2.bc,t2,px \ +DEFINE: -r=t2.bc,t1 \ +DEFINE: -thinlto-emit-indexes + +# Perform DTLTO. +RUN: %{command} -dtlto \ +RUN: -dtlto-distributor=%python \ +RUN: -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py \ +RUN: -thinlto-distributor-arg=t1.o \ +RUN: -thinlto-distributor-arg=t2.o + +# Perform ThinLTO. +RUN: %{command} + +# Check for equivalence. We use a wildcard to account for the PID. +RUN: cmp t1.1.*.native.o.thinlto.bc t1.bc.thinlto.bc +RUN: cmp t2.2.*.native.o.thinlto.bc t2.bc.thinlto.bc + +#--- t1.ll +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t1() { +entry: + ret void +} + +#--- t2.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @t1(...) + +define void @t2() { +entry: + call void (...) @t1() + ret void +} diff --git a/llvm/test/ThinLTO/X86/dtlto/triple.test b/llvm/test/ThinLTO/X86/dtlto/triple.test new file mode 100644 index 0000000000000..de6cedd382dd7 --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/triple.test @@ -0,0 +1,43 @@ +# Test the DTLTO limitation that all triples must match. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Generate bitcode files with summary. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +# Generate native object files. +RUN: opt t1.ll -o t1.o +RUN: opt t2.ll -o t2.o + +# Perform DTLTO. mock.py does not do any compilation, instead it emits the +# object files supplied using -thinlto-distributor-arg in job order. +RUN: not llvm-lto2 run t1.bc t2.bc -o t.o -save-temps \ +RUN: -dtlto \ +RUN: -dtlto-distributor=%python \ +RUN: -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py \ +RUN: -thinlto-distributor-arg=t1.o \ +RUN: -thinlto-distributor-arg=t2.o \ +RUN: -r=t1.bc,t1,px \ +RUN: -r=t2.bc,t2,px 2>&1 | FileCheck %s + +# This check ensures that we have failed for the expected reason. +CHECK: failed: DTLTO backend compilation: all triples must be consistent + +;--- t1.ll + +target triple = "x86_64-unknown-linux-gnu" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @t1() { + ret void +} + +;--- t2.ll + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown-gnu" + +define void @t2() { + ret void +} diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index aad7a088551b2..6722064d2a7b6 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -91,6 +91,7 @@ def get_asan_rtlib(): config.substitutions.append(("%shlibext", config.llvm_shlib_ext)) config.substitutions.append(("%pluginext", config.llvm_plugin_ext)) config.substitutions.append(("%exeext", config.llvm_exe_ext)) +config.substitutions.append(("%llvm_src_root", config.llvm_src_root)) lli_args = [] diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index d4f022ef021a4..f00af52b6ca5f 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -97,6 +97,12 @@ static cl::opt "specified with -thinlto-emit-indexes or " "-thinlto-distributed-indexes")); +static cl::opt DTLTO("dtlto", cl::desc("Perform DTLTO")); + +static cl::opt + DTLTODistributor("dtlto-distributor", + cl::desc("Specify the distributor for DTLTO")); + // Default to using all available threads in the system, but using only one // thread per core (no SMT). // Use -thinlto-threads=all to use hardware_concurrency() instead, which means @@ -344,6 +350,10 @@ static int run(int argc, char **argv) { Conf.PTO.LoopVectorization = Conf.OptLevel > 1; Conf.PTO.SLPVectorization = Conf.OptLevel > 1; + if (ThinLTODistributedIndexes && DTLTO) + llvm::errs() << "-thinlto-distributed-indexes cannot be specfied together " + "with -dtlto\n"; + ThinBackend Backend; if (ThinLTODistributedIndexes) Backend = createWriteIndexesThinBackend(llvm::hardware_concurrency(Threads), @@ -353,7 +363,13 @@ static int run(int argc, char **argv) { ThinLTOEmitImports, /*LinkedObjectsFile=*/nullptr, /*OnWrite=*/{}); - else + else if (DTLTO) { + + Backend = createOutOfProcessThinBackend( + llvm::heavyweight_hardware_concurrency(Threads), + /*OnWrite=*/{}, ThinLTOEmitIndexes, ThinLTOEmitImports, OutputFilename, + DTLTODistributor, SaveTemps); + } else Backend = createInProcessThinBackend( llvm::heavyweight_hardware_concurrency(Threads), /* OnWrite */ {}, ThinLTOEmitIndexes, ThinLTOEmitImports); @@ -456,7 +472,7 @@ static int run(int argc, char **argv) { Cache = check(localCache("ThinLTO", "Thin", CacheDir, AddBuffer), "failed to create cache"); - check(Lto.run(AddStream, Cache), "LTO::run failed"); + check(Lto.run(AddStream, Cache, AddBuffer), "LTO::run failed"); return static_cast(HasErrors); } diff --git a/llvm/utils/dtlto/local.py b/llvm/utils/dtlto/local.py new file mode 100644 index 0000000000000..adf463c02f479 --- /dev/null +++ b/llvm/utils/dtlto/local.py @@ -0,0 +1,41 @@ +""" +DTLTO local serial distributor. + +This script parses the Distributed ThinLTO (DTLTO) JSON file and serially +executes the specified code generation tool on the local host to perform each +backend compilation job. This simple functional distributor is intended to be +used for integration tests. + +Usage: + python local.py + +Arguments: + - : JSON file describing the DTLTO jobs. +""" + +import subprocess +import sys +import json +from pathlib import Path + +if __name__ == "__main__": + # Load the DTLTO information from the input JSON file. + with Path(sys.argv[-1]).open() as f: + data = json.load(f) + + # Iterate over the jobs and execute the codegen tool. + for job in data["jobs"]: + jobargs = [] + for arg in data["common"]["args"]: + if isinstance(arg, list): + # arg is a "template", into which an external filename is to be + # inserted. The first element of arg names an array of strings + # in the job. The remaining elements of arg are either indices + # into the array or literal strings. + files, rest = job[arg[0]], arg[1:] + jobargs.append( + "".join(files[x] if isinstance(x, int) else x for x in rest) + ) + else: + jobargs.append(arg) + subprocess.check_call(jobargs) diff --git a/llvm/utils/dtlto/mock.py b/llvm/utils/dtlto/mock.py new file mode 100644 index 0000000000000..5c5772cf5afe5 --- /dev/null +++ b/llvm/utils/dtlto/mock.py @@ -0,0 +1,42 @@ +""" +DTLTO Mock Distributor. + +This script acts as a mock distributor for Distributed ThinLTO (DTLTO). It is +used for testing DTLTO when a Clang binary is not be available to invoke to +perform the backend compilation jobs. + +Usage: + python mock.py ... + +Arguments: + - , , ... : Input files to be copied. + - : JSON file describing the DTLTO jobs. + +The script performs the following: + 1. Reads the JSON file containing job descriptions. + 2. For each job copies the corresponding input file to the output location + specified for that job. + 3. Validates the JSON format using the `validate` module. +""" + +import sys +import json +import shutil +from pathlib import Path +import validate + +if __name__ == "__main__": + json_arg = sys.argv[-1] + distributor_args = sys.argv[1:-1] + + # Load the DTLTO information from the input JSON file. + with Path(json_arg).open() as f: + data = json.load(f) + + # Iterate over the jobs and create the output + # files by copying over the supplied input files. + for job_index, job in enumerate(data["jobs"]): + shutil.copy(distributor_args[job_index], job["primary_output"][0]) + + # Check the format of the JSON. + validate.validate(data) diff --git a/llvm/utils/dtlto/validate.py b/llvm/utils/dtlto/validate.py new file mode 100644 index 0000000000000..3fa13bebdf17f --- /dev/null +++ b/llvm/utils/dtlto/validate.py @@ -0,0 +1,99 @@ +""" +DTLTO JSON Validator. + +This script is used for DTLTO testing to check that the distributor has +been invoked correctly. + +Usage: + python validate.py + +Arguments: + - : JSON file describing the DTLTO jobs. + +The script does the following: + 1. Prints the supplied CLI arguments. + 2. Loads the JSON file. + 3. Validates the structure and required fields. + 4. Pretty prints the JSON. +""" + +import sys +import json +from pathlib import Path + + +def take(jvalue, jpath): + parts = jpath.split(".") + for part in parts[:-1]: + jvalue = jvalue[part] + return jvalue.pop(parts[-1], KeyError) + + +def validate(jdoc): + # Check the format of the JSON + assert type(take(jdoc, "common.linker_output")) is str + + args = take(jdoc, "common.args") + assert type(args) is list + assert len(args) > 0 + + def validate_reference(a): + for j in jdoc["jobs"]: + for x in a[1:]: + if type(x) is int: + if a[0] not in j or x >= len(j[a[0]]): + return False + return True + + for a in args: + assert type(a) is str or ( + type(a) is list + and len(a) >= 2 + and type(a[0]) is str + and all(type(x) in (str, int) for x in a[1:]) + and any(type(x) is int for x in a[1:]) + and validate_reference(a) + ) + + assert len(take(jdoc, "common")) == 0 + + jobs = take(jdoc, "jobs") + assert type(jobs) is list + for j in jobs: + assert type(j) is dict + + # Mandatory job attributes. + for attr in ("primary_input", "primary_output", "summary_index"): + array = take(j, attr) + assert type(array) is list + assert len(array) == 1 + assert type(array[0]) is str + + # Optional job attributes. + for attr in ("additional_inputs", "additional_outputs", "imports"): + array = take(j, attr) + if array is KeyError: + continue + assert type(array) is list + assert all(type(a) is str for a in array) + + assert len(j) == 0 + + assert len(jdoc) == 0 + + +if __name__ == "__main__": + json_arg = Path(sys.argv[-1]) + distributor_args = sys.argv[1:-1] + + print(f"{distributor_args=}") + + # Load the DTLTO information from the input JSON file. + with json_arg.open() as f: + jdoc = json.load(f) + + # Write the input JSON to stdout. + print(json.dumps(jdoc, indent=4)) + + # Check the format of the JSON. + validate(jdoc)