diff --git a/llvm/docs/DTLTO.rst b/llvm/docs/DTLTO.rst new file mode 100644 index 0000000000000..9e5ee0d961a92 --- /dev/null +++ b/llvm/docs/DTLTO.rst @@ -0,0 +1,178 @@ +=================== +DTLTO +=================== +.. contents:: + :local: + :depth: 2 + +.. toctree:: + :maxdepth: 1 + +Distributed ThinLTO (DTLTO) +=========================== + +Distributed ThinLTO (DTLTO) enables the distribution of backend ThinLTO +compilations via external distribution systems, such as Incredibuild, during the +link step. + +DTLTO extends the existing ThinLTO distribution support which uses separate +*thin-link*, *backend compilation*, and *link* steps. This method is documented +here: + + https://blog.llvm.org/2016/06/thinlto-scalable-and-incremental-lto.html + +Using the *separate thin-link* approach requires a build system capable of +handling the dynamic dependencies specified in the individual summary index +files, such as Bazel. DTLTO removes this requirement, allowing it to be used +with any build process that supports in-process ThinLTO. + +The following commands show the steps used for the *separate thin-link* +approach for a basic example: + +.. code-block:: console + + 1. clang -flto=thin -O2 t1.c t2.c -c + 2. clang -flto=thin -O2 t1.o t2.o -fuse-ld=lld -Wl,--thinlto-index-only + 3. clang -O2 -o t1.native.o t1.o -c -fthinlto-index=t1.o.thinlto.bc + 4. clang -O2 -o t2.native.o t2.o -c -fthinlto-index=t2.o.thinlto.bc + 5. clang t1.native.o t2.native.o -o a.out -fuse-ld=lld + +With DTLTO, steps 2-5 are performed internally as part of the link step. The +equivalent DTLTO commands for the above are: + +.. code-block:: console + + clang -flto=thin -O2 t1.c t2.c -c + clang -flto=thin -O2 t1.o t2.o -fuse-ld=lld -fthinlto-distributor= + +For DTLTO, LLD prepares the following for each ThinLTO backend compilation job: + +- An individual index file and a list of input and output files (corresponds to + step 2 above). +- A Clang command line to perform the ThinLTO backend compilations. + +This information is supplied, via a JSON file, to ``distributor_process``, which +executes the backend compilations using a distribution system (corresponds to +steps 3 and 4 above). Upon completion, LLD integrates the compiled native object +files into the link process and completes the link (corresponds to step 5 +above). + +This design keeps the details of distribution systems out of the LLVM source +code. + +An example distributor that performs all work on the local system is included in +the LLVM source tree. To run an example with that distributor, a command line +such as the following can be used: + +.. code-block:: console + + clang -flto=thin -fuse-ld=lld -O2 t1.o t2.o -fthinlto-distributor=$(which python3) \ + -Xthinlto-distributor=$LLVMSRC/llvm/utils/dtlto/local.py + +Distributors +------------ + +Distributors are programs responsible for: + +1. Consuming the JSON backend compilations job description file. +2. Translating job descriptions into requests for the distribution system. +3. Blocking execution until all backend compilations are complete. + +Distributors must return a non-zero exit code on failure. They can be +implemented as platform native executables or in a scripting language, such as +Python. + +Clang and LLD provide options to specify a distributor program for managing +backend compilations. Distributor options and backend compilation options can +also be specified. Such options are transparently forwarded. + +The backend compilations are currently performed by invoking Clang. For further +details, refer to: + +* Clang documentation: https://clang.llvm.org/docs/ThinLTO.html +* LLD documentation: https://lld.llvm.org/DTLTO.html + +When invoked with a distributor, LLD generates a JSON file describing the +backend compilation jobs and executes the distributor, passing it this file. + +JSON Schema +----------- + +The JSON format is explained by reference to the following example, which +describes the backend compilation of the modules ``t1.o`` and ``t2.o``: + +.. code-block:: json + + { + "common": { + "linker_output": "dtlto.elf", + "args": ["/usr/bin/clang", "-O2", "-c", "-fprofile-sample-use=my.prof"], + "inputs": ["my.prof"] + }, + "jobs": [ + { + "args": ["t1.o", "-fthinlto-index=t1.o.thinlto.bc", "-o", "t1.native.o", "-fproc-stat-report=t1.stats.txt"], + "inputs": ["t1.o", "t1.o.thinlto.bc"], + "outputs": ["t1.native.o", "t1.stats.txt"] + }, + { + "args": ["t2.o", "-fthinlto-index=t2.o.thinlto.bc", "-o", "t2.native.o", "-fproc-stat-report=t2.stats.txt"], + "inputs": ["t2.o", "t2.o.thinlto.bc"], + "outputs": ["t2.native.o", "t2.stats.txt"] + } + ] + } + +Each entry in the ``jobs`` array represents a single backend compilation job. +Each job object records its own command-line arguments and input/output files. +Shared arguments and inputs are defined once in the ``common`` object. + +Reserved Entries: + +- The first entry in the ``common.args`` array specifies the compiler + executable to invoke. +- The first entry in each job's ``inputs`` array is the bitcode file for the + module being compiled. +- The second entry in each job's ``inputs`` array is the corresponding + individual summary index file. +- The first entry in each job's ``outputs`` array is the primary output object + file. + +Command-line arguments and input/output files are stored separately to allow +the remote compiler to be changed without updating the distributors, as the +distributors do not need to understand the details of the compiler command +line. + +To generate the backend compilation commands, the common and job-specific +arguments are concatenated. + +When consuming the example JSON above, a distributor is expected to issue the +following backend compilation commands with maximum parallelism: + +.. code-block:: console + + /usr/bin/clang -O2 -c -fprofile-sample-use=my.prof t1.o -fthinlto-index=t1.o.thinlto.bc -o t1.native.o \ + -fproc-stat-report=t1.stats.txt + + /usr/bin/clang -O2 -c -fprofile-sample-use=my.prof t2.o -fthinlto-index=t2.o.thinlto.bc -o t2.native.o \ + -fproc-stat-report=t2.stats.txt + +TODOs +----- + +The following features are planned for DTLTO but not yet implemented: + +- Support for the ThinLTO in-process cache. +- Support for platforms other than ELF and COFF. +- Support for archives with bitcode members. +- Support for more LTO configurations; only a very limited set of LTO + configurations is supported currently, e.g., support for basic block sections + is not currently available. + +Constraints +----------- + +- Matching versions of Clang and LLD should be used. +- The distributor used must support the JSON schema generated by the version of + LLD in use. + diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 6eee564713d6d..3e16fe42b7d11 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -32,6 +32,7 @@ intermediate LLVM representation. DebuggingJITedCode DirectXUsage Docker + DTLTO FatLTO ExtendingLLVM GitHub @@ -164,6 +165,11 @@ Optimizations This document describes the interface between LLVM intermodular optimizer and the linker and its design +:doc:`DTLTO` + This document describes the DTLTO implementation, which allows for + distributing ThinLTO backend compilations without requiring support from + the build system. + :doc:`GoldPlugin` How to build your programs with link-time optimization on Linux. diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 242a05f7d32c0..736daca4be82c 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -199,6 +199,8 @@ class InputFile { using IndexWriteCallback = std::function; +using ImportsFilesContainer = llvm::SmallVector; + /// This class defines the interface to the ThinLTO backend. class ThinBackendProc { protected: @@ -223,13 +225,15 @@ class ThinBackendProc { BackendThreadPool(ThinLTOParallelism) {} virtual ~ThinBackendProc() = default; + virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset, + StringRef Triple) {} virtual Error start( unsigned Task, BitcodeModule BM, const FunctionImporter::ImportMapTy &ImportList, const FunctionImporter::ExportSetTy &ExportList, const std::map &ResolvedODR, MapVector &ModuleMap) = 0; - Error wait() { + virtual Error wait() { BackendThreadPool.wait(); if (Err) return std::move(*Err); @@ -240,8 +244,15 @@ class ThinBackendProc { // Write sharded indices and (optionally) imports to disk Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, - llvm::StringRef ModulePath, - const std::string &NewModulePath) const; + StringRef ModulePath, const std::string &NewModulePath) const; + + // Write sharded indices to SummaryPath, (optionally) imports to disk, and + // (optionally) record imports in ImportsFiles. + Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, + StringRef ModulePath, StringRef SummaryPath, + const std::string &NewModulePath, + std::optional> + ImportsFiles) const; }; /// This callable defines the behavior of a ThinLTO backend after the thin-link @@ -294,6 +305,30 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism, bool ShouldEmitIndexFiles = false, bool ShouldEmitImportsFiles = false); +/// This ThinBackend generates the index shards and then runs the individual +/// backend jobs via an external process. It takes the same parameters as the +/// InProcessThinBackend; however, these parameters only control the behavior +/// when generating the index files for the modules. Additionally: +/// LinkerOutputFile is a string that should identify this LTO invocation in +/// the context of a wider build. It's used for naming to aid the user in +/// identifying activity related to a specific LTO invocation. +/// Distributor specifies the path to a process to invoke to manage the backend +/// job execution. +/// DistributorArgs specifies a list of arguments to be applied to the +/// distributor. +/// RemoteCompiler specifies the path to a Clang executable to be invoked for +/// the backend jobs. +/// RemoteCompilerArgs specifies a list of arguments to be applied to the +/// backend compilations. +/// SaveTemps is a debugging tool that prevents temporary files created by this +/// backend from being cleaned up. +ThinBackend createOutOfProcessThinBackend( + ThreadPoolStrategy Parallelism, IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, + StringRef LinkerOutputFile, StringRef Distributor, + ArrayRef DistributorArgs, StringRef RemoteCompiler, + ArrayRef RemoteCompilerArgs, bool SaveTemps); + /// This ThinBackend writes individual module indexes to files, instead of /// running the individual backend jobs. This backend is for distributed builds /// where separate processes will invoke the real backends. diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h index 3623f9194d4d1..5e4116834b7f2 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h @@ -421,6 +421,12 @@ Error EmitImportsFiles( StringRef ModulePath, StringRef OutputFilename, const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex); +/// Call \p F passing each of the files module \p ModulePath will import from. +void processImportsFiles( + StringRef ModulePath, + const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex, + function_ref F); + /// Based on the information recorded in the summaries during global /// summary-based analysis: /// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 0f53c60851217..fdba8bc04ba53 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -41,8 +41,11 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/ThreadPool.h" @@ -1390,6 +1393,16 @@ SmallVector LTO::getRuntimeLibcallSymbols(const Triple &TT) { Error ThinBackendProc::emitFiles( const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath, const std::string &NewModulePath) const { + return emitFiles(ImportList, ModulePath, NewModulePath + ".thinlto.bc", + NewModulePath, + /*ImportsFiles=*/std::nullopt); +} + +Error ThinBackendProc::emitFiles( + const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath, + StringRef SummaryPath, const std::string &NewModulePath, + std::optional> ImportsFiles) + const { ModuleToSummariesForIndexTy ModuleToSummariesForIndex; GVSummaryPtrSet DeclarationSummaries; @@ -1398,10 +1411,9 @@ Error ThinBackendProc::emitFiles( ImportList, ModuleToSummariesForIndex, DeclarationSummaries); - raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, - sys::fs::OpenFlags::OF_None); + raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None); if (EC) - return createFileError("cannot open " + NewModulePath + ".thinlto.bc", EC); + return createFileError("cannot open " + Twine(SummaryPath), EC); writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex, &DeclarationSummaries); @@ -1412,29 +1424,36 @@ Error ThinBackendProc::emitFiles( if (ImportFilesError) return ImportFilesError; } + + // Optionally, store the imports files. + if (ImportsFiles) + processImportsFiles( + ModulePath, ModuleToSummariesForIndex, + [&](StringRef M) { ImportsFiles->get().push_back(M.str()); }); + return Error::success(); } namespace { -class InProcessThinBackend : public ThinBackendProc { +// Base class for ThinLTO backends that perform code generation and insert the +// generated files back into the link. +class CGThinBackend : public ThinBackendProc { protected: AddStreamFn AddStream; - FileCache Cache; DenseSet CfiFunctionDefs; DenseSet CfiFunctionDecls; - bool ShouldEmitIndexFiles; public: - InProcessThinBackend( + CGThinBackend( const Config &Conf, ModuleSummaryIndex &CombinedIndex, - ThreadPoolStrategy ThinLTOParallelism, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite, - bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) + AddStreamFn AddStream, lto::IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, + ThreadPoolStrategy ThinLTOParallelism) : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism), - AddStream(std::move(AddStream)), Cache(std::move(Cache)), + AddStream(std::move(AddStream)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) { for (auto &Name : CombinedIndex.cfiFunctionDefs()) CfiFunctionDefs.insert( @@ -1443,6 +1462,23 @@ class InProcessThinBackend : public ThinBackendProc { CfiFunctionDecls.insert( GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name))); } +}; + +class InProcessThinBackend : public CGThinBackend { +protected: + FileCache Cache; + +public: + InProcessThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) + : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, + AddStream, OnWrite, ShouldEmitIndexFiles, + ShouldEmitImportsFiles, ThinLTOParallelism), + Cache(std::move(Cache)) {} virtual Error runThinLTOBackendThread( AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM, @@ -2016,6 +2052,10 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ResolvedODR[Mod.first], ThinLTO.ModuleMap); }; + BackendProcess->setup(ModuleMap.size(), + RegularLTO.ParallelCodeGenParallelismLevel, + RegularLTO.CombinedModule->getTargetTriple()); + if (BackendProcess->getThreadCount() == 1 || BackendProcess->isSensitiveToInputOrder()) { // Process the modules in the order they were provided on the @@ -2142,3 +2182,322 @@ std::vector lto::generateModulesOrdering(ArrayRef R) { }); return ModulesOrdering; } + +namespace { +// For this out-of-process backend no codegen is done when invoked for each +// task. Instead we generate the required information (e.g. the summary index +// shard, import list, etc..) to allow for the codegen to be performed +// externally (similar to WriteIndexesThinBackend). This backend's `wait` +// function then invokes an external distributor process to do backend +// compilations. +class OutOfProcessThinBackend : public CGThinBackend { + using SString = SmallString<128>; + + BumpPtrAllocator Alloc; + StringSaver Saver{Alloc}; + + SString LinkerOutputFile; + + SString DistributorPath; + ArrayRef DistributorArgs; + + SString RemoteCompiler; + ArrayRef RemoteCompilerArgs; + + bool SaveTemps; + + SmallVector CodegenOptions; + DenseSet CommonInputs; + + // Information specific to individual backend compilation job. + struct Job { + unsigned Task; + StringRef ModuleID; + StringRef NativeObjectPath; + StringRef SummaryIndexPath; + ImportsFilesContainer ImportFiles; + }; + // The set of backend compilations jobs. + SmallVector Jobs; + + // A unique string to identify the current link. + SmallString<8> UID; + + // The offset to the first ThinLTO task. + unsigned ThinLTOTaskOffset; + + // The target triple to supply for backend compilations. + StringRef Triple; + +public: + OutOfProcessThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, lto::IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, + StringRef LinkerOutputFile, StringRef Distributor, + ArrayRef DistributorArgs, StringRef RemoteCompiler, + ArrayRef RemoteCompilerArgs, bool SaveTemps) + : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, + AddStream, OnWrite, ShouldEmitIndexFiles, + ShouldEmitImportsFiles, ThinLTOParallelism), + LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor), + DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler), + RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {} + + virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset, + StringRef Triple) override { + UID = itostr(sys::Process::getProcessId()); + Jobs.resize((size_t)ThinLTONumTasks); + this->ThinLTOTaskOffset = ThinLTOTaskOffset; + this->Triple = Triple; + } + + Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + MapVector &ModuleMap) override { + + StringRef ModulePath = BM.getModuleIdentifier(); + + SString ObjFilePath = sys::path::parent_path(LinkerOutputFile); + sys::path::append(ObjFilePath, sys::path::stem(ModulePath) + "." + + itostr(Task) + "." + UID + ".native.o"); + + Job &J = Jobs[Task - ThinLTOTaskOffset]; + J = {Task, + ModulePath, + Saver.save(ObjFilePath.str()), + Saver.save(ObjFilePath.str() + ".thinlto.bc"), + {}}; + + assert(ModuleToDefinedGVSummaries.count(ModulePath)); + BackendThreadPool.async( + [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) { + if (auto E = emitFiles(ImportList, J.ModuleID, J.SummaryIndexPath, + J.ModuleID.str(), J.ImportFiles)) { + std::unique_lock L(ErrMu); + if (Err) + Err = joinErrors(std::move(*Err), std::move(E)); + else + Err = std::move(E); + } + }, + std::ref(J), std::ref(ImportList)); + + return Error::success(); + } + + // Derive a set of Clang options that will be shared/common for all DTLTO + // backend compilations. We are intentionally minimal here as these options + // must remain synchronized with the behavior of Clang. DTLTO does not support + // all the features available with in-process LTO. More features are expected + // to be added over time. Users can specify Clang options directly if a + // feature is not supported. Note that explicitly specified options that imply + // additional input or output file dependencies must be communicated to the + // distribution system, potentially by setting extra options on the + // distributor program. + void buildCommonRemoteCompilerOptions() { + const lto::Config &C = Conf; + auto &Ops = CodegenOptions; + llvm::Triple TT{Triple}; + + Ops.push_back(Saver.save("-O" + Twine(C.OptLevel))); + + if (C.Options.EmitAddrsig) + Ops.push_back("-faddrsig"); + if (C.Options.FunctionSections) + Ops.push_back("-ffunction-sections"); + if (C.Options.DataSections) + Ops.push_back("-fdata-sections"); + + if (C.RelocModel == Reloc::PIC_) + // Clang doesn't have -fpic for all triples. + if (!TT.isOSBinFormatCOFF()) + Ops.push_back("-fpic"); + + // Turn on/off warnings about profile cfg mismatch (default on) + // --lto-pgo-warn-mismatch. + if (!C.PGOWarnMismatch) { + Ops.push_back("-mllvm"); + Ops.push_back("-no-pgo-warn-mismatch"); + } + + // Enable sample-based profile guided optimizations. + // Sample profile file path --lto-sample-profile=. + if (!C.SampleProfile.empty()) { + Ops.push_back( + Saver.save("-fprofile-sample-use=" + Twine(C.SampleProfile))); + CommonInputs.insert(C.SampleProfile); + } + + // We don't know which of options will be used by Clang. + Ops.push_back("-Wno-unused-command-line-argument"); + + // Forward any supplied options. + if (!RemoteCompilerArgs.empty()) + for (auto &a : RemoteCompilerArgs) + Ops.push_back(a); + } + + // Generates a JSON file describing the backend compilations, for the + // distributor. + bool emitDistributorJson(StringRef DistributorJson) { + using json::Array; + std::error_code EC; + raw_fd_ostream OS(DistributorJson, EC); + if (EC) + return false; + + json::OStream JOS(OS); + JOS.object([&]() { + // Information common to all jobs. + JOS.attributeObject("common", [&]() { + JOS.attribute("linker_output", LinkerOutputFile); + + JOS.attributeArray("args", [&]() { + JOS.value(RemoteCompiler); + + JOS.value("-c"); + + JOS.value(Saver.save("--target=" + Twine(Triple))); + + for (const auto &A : CodegenOptions) + JOS.value(A); + }); + + JOS.attribute("inputs", Array(CommonInputs)); + }); + + // Per-compilation-job information. + JOS.attributeArray("jobs", [&]() { + for (const auto &J : Jobs) { + assert(J.Task != 0); + + SmallVector Inputs; + SmallVector Outputs; + + JOS.object([&]() { + JOS.attributeArray("args", [&]() { + JOS.value(J.ModuleID); + Inputs.push_back(J.ModuleID); + + JOS.value( + Saver.save("-fthinlto-index=" + Twine(J.SummaryIndexPath))); + Inputs.push_back(J.SummaryIndexPath); + + JOS.value("-o"); + JOS.value(J.NativeObjectPath); + Outputs.push_back(J.NativeObjectPath); + }); + + // Add the bitcode files from which imports will be made. These do + // not explicitly appear on the backend compilation command lines + // but are recorded in the summary index shards. + llvm::append_range(Inputs, J.ImportFiles); + JOS.attribute("inputs", Array(Inputs)); + + JOS.attribute("outputs", Array(Outputs)); + }); + } + }); + }); + + return true; + } + + void removeFile(StringRef FileName) { + std::error_code EC = sys::fs::remove(FileName, true); + if (EC && EC != std::make_error_code(std::errc::no_such_file_or_directory)) + errs() << "warning: could not remove the file '" << FileName + << "': " << EC.message() << "\n"; + } + + Error wait() override { + // Wait for the information on the required backend compilations to be + // gathered. + BackendThreadPool.wait(); + if (Err) + return std::move(*Err); + + auto CleanPerJobFiles = llvm::make_scope_exit([&] { + if (!SaveTemps) + for (auto &Job : Jobs) { + removeFile(Job.NativeObjectPath); + if (!ShouldEmitIndexFiles) + removeFile(Job.SummaryIndexPath); + } + }); + + const StringRef BCError = "DTLTO backend compilation: "; + + buildCommonRemoteCompilerOptions(); + + SString JsonFile = sys::path::parent_path(LinkerOutputFile); + sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID + + ".dist-file.json"); + if (!emitDistributorJson(JsonFile)) + return make_error( + BCError + "failed to generate distributor JSON script: " + JsonFile, + inconvertibleErrorCode()); + auto CleanJson = llvm::make_scope_exit([&] { + if (!SaveTemps) + removeFile(JsonFile); + }); + + SmallVector Args = {DistributorPath}; + llvm::append_range(Args, DistributorArgs); + Args.push_back(JsonFile); + std::string ErrMsg; + if (sys::ExecuteAndWait(Args[0], Args, + /*Env=*/std::nullopt, /*Redirects=*/{}, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) { + return make_error( + BCError + "distributor execution failed" + + (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")), + inconvertibleErrorCode()); + } + + for (auto &Job : Jobs) { + // Load the native object from a file into a memory buffer + // and store its contents in the output buffer. + auto ObjFileMbOrErr = + MemoryBuffer::getFile(Job.NativeObjectPath, false, false); + if (std::error_code ec = ObjFileMbOrErr.getError()) + return make_error( + BCError + "cannot open native object file: " + + Job.NativeObjectPath + ": " + ec.message(), + inconvertibleErrorCode()); + auto StreamOrErr = AddStream(Job.Task, Job.ModuleID); + if (Error Err = StreamOrErr.takeError()) + report_fatal_error(std::move(Err)); + *StreamOrErr->get()->OS + << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer(); + } + + return Error::success(); + } +}; +} // end anonymous namespace + +ThinBackend lto::createOutOfProcessThinBackend( + ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, + StringRef LinkerOutputFile, StringRef Distributor, + ArrayRef DistributorArgs, StringRef RemoteCompiler, + ArrayRef RemoteCompilerArgs, bool SaveTemps) { + auto Func = + [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, FileCache /*Cache*/) { + return std::make_unique( + Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, + AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles, + LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler, + RemoteCompilerArgs, SaveTemps); + }; + return ThinBackend(Func, Parallelism); +} diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index c3d0a1a3a046e..cdcf918d3fae8 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1568,13 +1568,23 @@ Error llvm::EmitImportsFiles( if (EC) return createFileError("cannot open " + OutputFilename, errorCodeToError(EC)); + processImportsFiles(ModulePath, ModuleToSummariesForIndex, + [&](StringRef M) { ImportsOS << M << "\n"; }); + return Error::success(); +} + +/// Invoke callback \p F on the file paths from which \p ModulePath +/// will import. +void llvm::processImportsFiles( + StringRef ModulePath, + const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex, + function_ref F) { for (const auto &ILI : ModuleToSummariesForIndex) // The ModuleToSummariesForIndex map includes an entry for the current // Module (needed for writing out the index files). We don't want to // include it in the imports file, however, so filter it out. if (ILI.first != ModulePath) - ImportsOS << ILI.first << "\n"; - return Error::success(); + F(ILI.first); } bool llvm::convertToDeclaration(GlobalValue &GV) { diff --git a/llvm/test/ThinLTO/X86/dtlto/dtlto.ll b/llvm/test/ThinLTO/X86/dtlto/dtlto.ll new file mode 100644 index 0000000000000..8c18bffcb9f5d --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/dtlto.ll @@ -0,0 +1,80 @@ +; Test DTLTO output with llvm-lto2. + +RUN: rm -rf %t && split-file %s %t && cd %t + +; Generate bitcode files with summary. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +; Generate fake object files for mock.py to return. +RUN: touch t1.o t2.o + +; Create an empty subdirectory to avoid having to account for the input files. +RUN: mkdir %t/out && cd %t/out + +; Define a substitution to share the common DTLTO arguments. +DEFINE: %{command} = llvm-lto2 run ../t1.bc ../t2.bc -o t.o \ +DEFINE: -dtlto-distributor=%python \ +DEFINE: -dtlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py,../t1.o,../t2.o \ +DEFINE: -r=../t1.bc,t1,px \ +DEFINE: -r=../t2.bc,t2,px + +; Perform DTLTO. mock.py does not do any compilation, instead it simply writes +; the contents of the object files supplied on the command line into the +; output object files in job order. +RUN: %{command} + +; Check that the expected output files have been created. +RUN: ls | count 2 +RUN: ls | FileCheck %s --check-prefix=THINLTO + +; llvm-lto2 ThinLTO output files. +THINLTO-DAG: {{^}}t.o.1{{$}} +THINLTO-DAG: {{^}}t.o.2{{$}} + +RUN: cd .. && rm -rf %t/out && mkdir %t/out && cd %t/out + +; Perform DTLTO with --save-temps. +RUN: %{command} --save-temps + +; Check that the expected output files have been created. +RUN: ls | count 12 +RUN: ls | FileCheck %s --check-prefixes=THINLTO,SAVETEMPS + +; Common -save-temps files from llvm-lto2. +SAVETEMPS-DAG: {{^}}t.o.resolution.txt{{$}} +SAVETEMPS-DAG: {{^}}t.o.index.bc{{$}} +SAVETEMPS-DAG: {{^}}t.o.index.dot{{$}} + +; -save-temps incremental files. +SAVETEMPS-DAG: {{^}}t.o.0.0.preopt.bc{{$}} +SAVETEMPS-DAG: {{^}}t.o.0.2.internalize.bc{{$}} + +; A jobs description JSON. +SAVETEMPS-DAG: {{^}}t.[[#]].dist-file.json{{$}} + +; Summary shards emitted for DTLTO. +SAVETEMPS-DAG: {{^}}t1.1.[[#]].native.o.thinlto.bc{{$}} +SAVETEMPS-DAG: {{^}}t2.2.[[#]].native.o.thinlto.bc{{$}} + +; DTLTO native output files (the results of the external backend compilations). +SAVETEMPS-DAG: {{^}}t1.1.[[#]].native.o{{$}} +SAVETEMPS-DAG: {{^}}t2.2.[[#]].native.o{{$}} + +;--- t1.ll + +target triple = "x86_64-unknown-linux-gnu" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @t1() { + ret void +} + +;--- t2.ll + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t2() { + ret void +} diff --git a/llvm/test/ThinLTO/X86/dtlto/imports.ll b/llvm/test/ThinLTO/X86/dtlto/imports.ll new file mode 100644 index 0000000000000..6cf904d9cf1e5 --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/imports.ll @@ -0,0 +1,71 @@ +; Check that DTLTO creates imports lists correctly. + +RUN: rm -rf %t && split-file %s %t && cd %t + +; Generate ThinLTO bitcode files. +RUN: opt -thinlto-bc 0.ll -o 0.bc -O2 +RUN: opt -thinlto-bc 1.ll -o 1.bc -O2 + +; Define a substitution to share the common DTLTO arguments. Note that the use +; of validate.py will cause a failure as it does not create output files. +DEFINE: %{command} = llvm-lto2 run 0.bc 1.bc -o t.o \ +DEFINE: -dtlto-distributor=%python \ +DEFINE: -dtlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \ +DEFINE: -r=0.bc,g,px \ +DEFINE: -r=1.bc,f,px \ +DEFINE: -r=1.bc,g + +; We expect an import from 0.bc into 1.bc but no imports into 0.bc. Check that +; the expected input files have been added to the JSON to account for this. +RUN: not %{command} 2>&1 | FileCheck %s --check-prefixes=INPUTS,ERR + +; 1.bc should not appear in the list of inputs for 0.bc. +INPUTS: "jobs": +INPUTS: "inputs": [ +INPUTS-NEXT: "0.bc", +INPUTS-NEXT: "0.1.[[#]].native.o.thinlto.bc" +INPUTS-NEXT: ] + +; 0.bc should appear in the list of inputs for 1.bc. +INPUTS: "inputs": [ +INPUTS-NEXT: "1.bc", +INPUTS-NEXT: "1.2.[[#]].native.o.thinlto.bc", +INPUTS-NEXT: "0.bc" +INPUTS-NEXT: ] + +; This check ensures that we have failed for the expected reason. +ERR: failed: DTLTO backend compilation: cannot open native object file: + +; Check that imports files are not created even if -save-temps is active. +RUN: not %{command} -save-temps 2>&1 \ +RUN: | FileCheck %s --check-prefixes=ERR +RUN: ls | FileCheck %s --check-prefix=NOIMPORTFILES +NOIMPORTFILES-NOT: imports + +; Check that imports files are created with -thinlto-emit-imports. +RUN: not %{command} -thinlto-emit-imports 2>&1 \ +RUN: | FileCheck %s --check-prefixes=ERR +RUN: ls | FileCheck %s --check-prefix=IMPORTFILES +IMPORTFILES: 0.bc.imports +IMPORTFILES: 1.bc.imports + +;--- 0.ll +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @g() { +entry: + ret void +} + +;--- 1.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g(...) + +define void @f() { +entry: + call void (...) @g() + ret void +} diff --git a/llvm/test/ThinLTO/X86/dtlto/json.ll b/llvm/test/ThinLTO/X86/dtlto/json.ll new file mode 100644 index 0000000000000..1a38438e7b791 --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/json.ll @@ -0,0 +1,84 @@ +; Check that the JSON output from DTLTO is as expected. Note that validate.py +; checks the JSON structure so we just check the field contents in this test. + +RUN: rm -rf %t && split-file %s %t && cd %t + +; Generate bitcode files with summary. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +; Perform DTLTO. +RUN: not llvm-lto2 run t1.bc t2.bc -o my.output \ +RUN: -r=t1.bc,t1,px -r=t2.bc,t2,px \ +RUN: -dtlto-distributor=%python \ +RUN: -dtlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py,--da1=10,--da2=10 \ +RUN: -dtlto-compiler=my_clang.exe \ +RUN: -dtlto-compiler-arg=--rota1=10,--rota2=20 \ +RUN: 2>&1 | FileCheck %s + +CHECK: distributor_args=['--da1=10', '--da2=10'] + +; Check the common object. +CHECK: "linker_output": "my.output" +CHECK: "args": +CHECK-NEXT: "my_clang.exe" +CHECK-NEXT: "-c" +CHECK-NEXT: "--target=x86_64-unknown-linux-gnu" +CHECK-NEXT: "-O2" +CHECK-NEXT: "-fpic" +CHECK-NEXT: "-Wno-unused-command-line-argument" +CHECK-NEXT: "--rota1=10" +CHECK-NEXT: "--rota2=20" +CHECK-NEXT: ] +CHECK: "inputs": [] + +; Check the first job entry. +CHECK: "args": +CHECK-NEXT: "t1.bc" +CHECK-NEXT: "-fthinlto-index=t1.1.[[#]].native.o.thinlto.bc" +CHECK-NEXT: "-o" +CHECK-NEXT: "t1.1.[[#]].native.o" +CHECK-NEXT: ] +CHECK: "inputs": [ +CHECK-NEXT: "t1.bc" +CHECK-NEXT: "t1.1.[[#]].native.o.thinlto.bc" +CHECK-NEXT: ] +CHECK: "outputs": [ +CHECK-NEXT: "t1.1.[[#]].native.o" +CHECK-NEXT: ] + +; Check the second job entry. +CHECK: "args": [ +CHECK-NEXT: "t2.bc" +CHECK-NEXT: "-fthinlto-index=t2.2.[[#]].native.o.thinlto.bc" +CHECK-NEXT: "-o" +CHECK-NEXT: "t2.2.[[#]].native.o" +CHECK-NEXT: ] +CHECK-NEXT: "inputs": [ +CHECK-NEXT: "t2.bc" +CHECK-NEXT: "t2.2.[[#]].native.o.thinlto.bc" +CHECK-NEXT: ] +CHECK-NEXT: "outputs": [ +CHECK-NEXT: "t2.2.[[#]].native.o" +CHECK-NEXT: ] + +; This check ensures that we have failed for the expected reason. +CHECK: failed: DTLTO backend compilation: cannot open native object file: + +;--- t1.ll +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t1() { +entry: + ret void +} + +;--- t2.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t2() { +entry: + ret void +} diff --git a/llvm/test/ThinLTO/X86/dtlto/summary.ll b/llvm/test/ThinLTO/X86/dtlto/summary.ll new file mode 100644 index 0000000000000..2365fa4f4ea42 --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/summary.ll @@ -0,0 +1,51 @@ +; Check that DTLTO creates identical summary index shard files as are created +; for an equivalent ThinLTO link. + +RUN: rm -rf %t && split-file %s %t && cd %t + +; Generate ThinLTO bitcode files. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +; Generate fake object files for mock.py to return. +RUN: touch t1.o t2.o + +; Define a substitution to share the common arguments. +DEFINE: %{command} = llvm-lto2 run t1.bc t2.bc -o t.o \ +DEFINE: -r=t1.bc,t1,px \ +DEFINE: -r=t2.bc,t2,px \ +DEFINE: -r=t2.bc,t1 \ +DEFINE: -thinlto-emit-indexes + +; Perform DTLTO. +RUN: %{command} \ +RUN: -dtlto-distributor=%python \ +RUN: -dtlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py,t1.o,t2.o + +; Perform ThinLTO. +RUN: %{command} + +; Check for equivalence. We use a wildcard to account for the PID. +RUN: cmp t1.1.*.native.o.thinlto.bc t1.bc.thinlto.bc +RUN: cmp t2.2.*.native.o.thinlto.bc t2.bc.thinlto.bc + +;--- t1.ll +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t1() { +entry: + ret void +} + +;--- t2.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @t1(...) + +define void @t2() { +entry: + call void (...) @t1() + ret void +} diff --git a/llvm/test/ThinLTO/X86/dtlto/triple.ll b/llvm/test/ThinLTO/X86/dtlto/triple.ll new file mode 100644 index 0000000000000..87f37cb85729d --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/triple.ll @@ -0,0 +1,44 @@ +; Test that DTLTO uses the target triple from the first file in the link. + +RUN: rm -rf %t && split-file %s %t && cd %t + +; Generate bitcode files with summary. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +; Define a substitution to share the common DTLTO arguments. Note that the use +; of validate.py will cause a failure as it does not create output files. +DEFINE: %{command} = llvm-lto2 run -o t.o -save-temps \ +DEFINE: -dtlto-distributor=%python \ +DEFINE: -dtlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \ +DEFINE: -r=t1.bc,t1,px \ +DEFINE: -r=t2.bc,t2,px + +; Test case where t1.bc is first. +RUN: not %{command} t1.bc t2.bc 2>&1 | FileCheck %s \ +RUN: --check-prefixes=TRIPLE1,ERR --implicit-check-not=--target +TRIPLE1: --target=x86_64-unknown-linux-gnu + +; Test case where t2.bc is first. +RUN: not %{command} t2.bc t1.bc 2>&1 | FileCheck %s \ +RUN: --check-prefixes=TRIPLE2,ERR --implicit-check-not=--target +TRIPLE2: --target=x86_64-unknown-unknown-gnu + +; This check ensures that we have failed for the expected reason. +ERR: failed: DTLTO backend compilation: cannot open native object file: + +;--- t1.ll + +target triple = "x86_64-unknown-linux-gnu" + +define void @t1() { + ret void +} + +;--- t2.ll + +target triple = "x86_64-unknown-unknown-gnu" + +define void @t2() { + ret void +} diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index aad7a088551b2..6722064d2a7b6 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -91,6 +91,7 @@ def get_asan_rtlib(): config.substitutions.append(("%shlibext", config.llvm_shlib_ext)) config.substitutions.append(("%pluginext", config.llvm_plugin_ext)) config.substitutions.append(("%exeext", config.llvm_exe_ext)) +config.substitutions.append(("%llvm_src_root", config.llvm_src_root)) lli_args = [] diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index d4f022ef021a4..733c7074b8e1b 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -97,6 +97,26 @@ static cl::opt "specified with -thinlto-emit-indexes or " "-thinlto-distributed-indexes")); +static cl::opt DTLTODistributor( + "dtlto-distributor", + cl::desc("Distributor to use for ThinLTO backend compilations. Specifying " + "this enables DTLTO.")); + +static cl::list DTLTODistributorArgs( + "dtlto-distributor-arg", cl::CommaSeparated, + cl::desc("Arguments to pass to the DTLTO distributor process."), + cl::value_desc("arg")); + +static cl::opt DTLTOCompiler( + "dtlto-compiler", + cl::desc("Compiler to use for DTLTO ThinLTO backend compilations.")); + +static cl::list DTLTOCompilerArgs( + "dtlto-compiler-arg", cl::CommaSeparated, + cl::desc("Arguments to pass to the remote compiler for backend " + "compilations."), + cl::value_desc("arg")); + // Default to using all available threads in the system, but using only one // thread per core (no SMT). // Use -thinlto-threads=all to use hardware_concurrency() instead, which means @@ -344,6 +364,14 @@ static int run(int argc, char **argv) { Conf.PTO.LoopVectorization = Conf.OptLevel > 1; Conf.PTO.SLPVectorization = Conf.OptLevel > 1; + if (ThinLTODistributedIndexes && !DTLTODistributor.empty()) + llvm::errs() << "-thinlto-distributed-indexes cannot be specfied together " + "with -dtlto-distributor\n"; + auto DTLTODistributorArgsSV = llvm::to_vector<0>(llvm::map_range( + DTLTODistributorArgs, [](const std::string &S) { return StringRef(S); })); + auto DTLTOCompilerArgsSV = llvm::to_vector<0>(llvm::map_range( + DTLTOCompilerArgs, [](const std::string &S) { return StringRef(S); })); + ThinBackend Backend; if (ThinLTODistributedIndexes) Backend = createWriteIndexesThinBackend(llvm::hardware_concurrency(Threads), @@ -353,7 +381,13 @@ static int run(int argc, char **argv) { ThinLTOEmitImports, /*LinkedObjectsFile=*/nullptr, /*OnWrite=*/{}); - else + else if (!DTLTODistributor.empty()) { + Backend = createOutOfProcessThinBackend( + llvm::heavyweight_hardware_concurrency(Threads), + /*OnWrite=*/{}, ThinLTOEmitIndexes, ThinLTOEmitImports, OutputFilename, + DTLTODistributor, DTLTODistributorArgsSV, DTLTOCompiler, + DTLTOCompilerArgsSV, SaveTemps); + } else Backend = createInProcessThinBackend( llvm::heavyweight_hardware_concurrency(Threads), /* OnWrite */ {}, ThinLTOEmitIndexes, ThinLTOEmitImports); diff --git a/llvm/utils/dtlto/local.py b/llvm/utils/dtlto/local.py new file mode 100644 index 0000000000000..304b1bcb46026 --- /dev/null +++ b/llvm/utils/dtlto/local.py @@ -0,0 +1,28 @@ +""" +DTLTO local serial distributor. + +This script parses the Distributed ThinLTO (DTLTO) JSON file and serially +executes the specified code generation tool on the local host to perform each +backend compilation job. This simple functional distributor is intended to be +used for integration tests. + +Usage: + python local.py + +Arguments: + - : JSON file describing the DTLTO jobs. +""" + +import subprocess +import sys +import json +from pathlib import Path + +if __name__ == "__main__": + # Load the DTLTO information from the input JSON file. + with Path(sys.argv[-1]).open() as f: + data = json.load(f) + + # Iterate over the jobs and execute the codegen tool. + for job in data["jobs"]: + subprocess.check_call(data["common"]["args"] + job["args"]) diff --git a/llvm/utils/dtlto/mock.py b/llvm/utils/dtlto/mock.py new file mode 100644 index 0000000000000..5177a010c7be2 --- /dev/null +++ b/llvm/utils/dtlto/mock.py @@ -0,0 +1,42 @@ +""" +DTLTO Mock Distributor. + +This script acts as a mock distributor for Distributed ThinLTO (DTLTO). It is +used for testing DTLTO when a Clang binary is not be available to invoke to +perform the backend compilation jobs. + +Usage: + python mock.py ... + +Arguments: + - , , ... : Input files to be copied. + - : JSON file describing the DTLTO jobs. + +The script performs the following: + 1. Reads the JSON file containing job descriptions. + 2. For each job copies the corresponding input file to the output location + specified for that job. + 3. Validates the JSON format using the `validate` module. +""" + +import sys +import json +import shutil +from pathlib import Path +import validate + +if __name__ == "__main__": + json_arg = sys.argv[-1] + input_files = sys.argv[1:-1] + + # Load the DTLTO information from the input JSON file. + with Path(json_arg).open() as f: + data = json.load(f) + + # Iterate over the jobs and create the output + # files by copying over the supplied input files. + for job_index, job in enumerate(data["jobs"]): + shutil.copy(input_files[job_index], job["outputs"][0]) + + # Check the format of the JSON. + validate.validate(data) diff --git a/llvm/utils/dtlto/validate.py b/llvm/utils/dtlto/validate.py new file mode 100644 index 0000000000000..2cbd52dda32ae --- /dev/null +++ b/llvm/utils/dtlto/validate.py @@ -0,0 +1,78 @@ +""" +DTLTO JSON Validator. + +This script is used for DTLTO testing to check that the distributor has +been invoked correctly. + +Usage: + python validate.py + +Arguments: + - : JSON file describing the DTLTO jobs. + +The script does the following: + 1. Prints the supplied distributor arguments. + 2. Loads the JSON file. + 3. Pretty prints the JSON. + 4. Validates the structure and required fields. +""" + +import sys +import json +from pathlib import Path + + +def take(jvalue, jpath): + parts = jpath.split(".") + for part in parts[:-1]: + jvalue = jvalue[part] + return jvalue.pop(parts[-1], KeyError) + + +def validate(jdoc): + # Check the format of the JSON + assert type(take(jdoc, "common.linker_output")) is str + + args = take(jdoc, "common.args") + assert type(args) is list + assert len(args) > 0 + assert all(type(i) is str for i in args) + + inputs = take(jdoc, "common.inputs") + assert type(inputs) is list + assert all(type(i) is str for i in inputs) + + assert len(take(jdoc, "common")) == 0 + + jobs = take(jdoc, "jobs") + assert type(jobs) is list + for j in jobs: + assert type(j) is dict + + for attr, min_size in (("args", 0), ("inputs", 2), ("outputs", 1)): + array = take(j, attr) + assert len(array) >= min_size + assert type(array) is list + assert all(type(a) is str for a in array) + + assert len(j) == 0 + + assert len(jdoc) == 0 + + +if __name__ == "__main__": + json_arg = Path(sys.argv[-1]) + distributor_args = sys.argv[1:-1] + + # Print the supplied distributor arguments. + print(f"{distributor_args=}") + + # Load the DTLTO information from the input JSON file. + with json_arg.open() as f: + jdoc = json.load(f) + + # Write the input JSON to stdout. + print(json.dumps(jdoc, indent=4)) + + # Check the format of the JSON. + validate(jdoc)