diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4076,7 +4076,7 @@ auto TC = ToolChains.begin(); for (Action *&A : DeviceActions) { - A = ConstructPhaseAction(C, Args, Phase, A); + A = ConstructPhaseAction(C, Args, Phase, A, Action::OFK_OpenMP); if (isa(A)) { HostAction->setCannotBeCollapsedWithNextDependentAction(); @@ -4196,6 +4196,12 @@ Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction(Input, Output); } + if (isUsingLTO(/* IsOffload */ true) && + TargetDeviceOffloadKind == Action::OFK_OpenMP) { + types::ID Output = + Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; + return C.MakeAction(Input, Output); + } if (Args.hasArg(options::OPT_emit_llvm) || (TargetDeviceOffloadKind == Action::OFK_HIP && Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4615,7 +4615,7 @@ if (JA.getType() == types::TY_LLVM_BC) CmdArgs.push_back("-emit-llvm-uselists"); - if (IsUsingLTO) { + if (IsUsingLTO && !Args.hasArg(options::OPT_fopenmp_new_driver)) { // Only AMDGPU supports device-side LTO. if (IsDeviceOffloadAction && !Triple.isAMDGPU()) { D.Diag(diag::err_drv_unsupported_opt_for_target) @@ -8147,6 +8147,39 @@ const char *LinkingOutput) const { ArgStringList CmdArgs; + if (getToolChain().getDriver().isUsingLTO(/* IsOffload */ true)) { + // Pass in target features for each toolchain. + auto OpenMPTCRange = C.getOffloadToolChains(); + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; + ++TI) { + const ToolChain *TC = TI->second; + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + ArgStringList FeatureArgs; + TC->addClangTargetOptions(TCArgs, FeatureArgs, Action::OFK_OpenMP); + auto FeatureIt = llvm::find(FeatureArgs, "-target-feature"); + CmdArgs.push_back(Args.MakeArgString( + "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1))); + } + + // Pass in the optimization level to use for LTO. + if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { + StringRef OOpt; + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + OOpt = "3"; + else if (A->getOption().matches(options::OPT_O)) { + OOpt = A->getValue(); + if (OOpt == "g") + OOpt = "1"; + else if (OOpt == "s" || OOpt == "z") + OOpt = "2"; + } else if (A->getOption().matches(options::OPT_O0)) + OOpt = "0"; + if (!OOpt.empty()) + CmdArgs.push_back(Args.MakeArgString(Twine("-opt-level=O") + OOpt)); + } + } + // Construct the link job so we can wrap around it. Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); const auto &LinkCommand = C.getJobs().getJobs().back(); diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt --- a/clang/tools/clang-linker-wrapper/CMakeLists.txt +++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -1,4 +1,15 @@ -set(LLVM_LINK_COMPONENTS BitWriter Core BinaryFormat IRReader Object Support) +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + BitWriter + Core + BinaryFormat + MC + Passes + IRReader + Object + Support + CodeGen + LTO) if(NOT CLANG_BUILT_STANDALONE) set(tablegen_deps intrinsics_gen) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -17,9 +17,12 @@ #include "clang/Basic/Version.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/CodeGen/CommandFlags.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" +#include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/Binary.h" @@ -36,6 +39,7 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -58,6 +62,15 @@ cl::desc("Path of linker binary"), cl::cat(ClangLinkerWrapperCategory)); +static cl::opt + TargetFeatures("target-feature", cl::desc("Target features for triple"), + cl::cat(ClangLinkerWrapperCategory)); + +static cl::opt OptLevel("opt-level", + cl::desc("Optimization level for LTO"), + cl::init("O0"), + cl::cat(ClangLinkerWrapperCategory)); + // Do not parse linker options. static cl::list HostLinkerArgs(cl::Sink, cl::desc("...")); @@ -68,6 +81,9 @@ /// Temporary files created by the linker wrapper. static SmallVector TempFiles; +/// Codegen flags for LTO backend. +static codegen::RegisterCodeGenFlags CodeGenFlags; + /// Magic section string that marks the existence of offloading data. The /// section string will be formatted as `.llvm.offloading..`. #define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading." @@ -191,6 +207,28 @@ if (ToBeStripped.empty()) return None; + // If the object file to strip doesn't exist we need to write it so we can + // pass it to llvm-strip. + SmallString<128> StripFile = Obj.getFileName(); + if (!sys::fs::exists(StripFile)) { + SmallString<128> TempFile; + if (std::error_code EC = sys::fs::createTemporaryFile( + sys::path::stem(StripFile), "o", TempFile)) + return createFileError(TempFile, EC); + TempFiles.push_back(static_cast(TempFile)); + + auto Contents = Obj.getMemoryBufferRef().getBuffer(); + Expected> OutputOrErr = + FileOutputBuffer::create(TempFile, Contents.size()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr Output = std::move(*OutputOrErr); + std::copy(Contents.begin(), Contents.end(), Output->getBufferStart()); + if (Error E = Output->commit()) + return E; + StripFile = TempFile; + } + // We will use llvm-strip to remove the now unneeded section containing the // offloading code. ErrorOr StripPath = sys::findProgramByName( @@ -210,7 +248,7 @@ SmallVector StripArgs; StripArgs.push_back(*StripPath); StripArgs.push_back("--no-strip-all"); - StripArgs.push_back(Obj.getFileName()); + StripArgs.push_back(StripFile); for (auto &Section : ToBeStripped) { StripArgs.push_back("--remove-section"); StripArgs.push_back(Section); @@ -411,6 +449,44 @@ // TODO: Move these to a separate file. namespace nvptx { +Expected assemble(StringRef InputFile, Triple TheTriple, + StringRef Arch) { + // NVPTX uses the nvlink binary to link device object files. + ErrorOr PtxasPath = + sys::findProgramByName("ptxas", sys::path::parent_path(LinkerExecutable)); + if (!PtxasPath) + PtxasPath = sys::findProgramByName("ptxas"); + if (!PtxasPath) + return createStringError(PtxasPath.getError(), + "Unable to find 'ptxas' in path"); + + // Create a new file to write the linked device image to. + SmallString<128> TempFile; + if (std::error_code EC = sys::fs::createTemporaryFile( + TheTriple.getArchName() + "-" + Arch, "cubin", TempFile)) + return createFileError(TempFile, EC); + TempFiles.push_back(static_cast(TempFile)); + + // TODO: Pass in arguments like `-g` and `-v` from the driver. + SmallVector CmdArgs; + std::string Opt = "-" + OptLevel; + CmdArgs.push_back(*PtxasPath); + CmdArgs.push_back(TheTriple.isArch64Bit() ? "-m64" : "-m32"); + CmdArgs.push_back("-o"); + CmdArgs.push_back(TempFile); + CmdArgs.push_back(Opt); + CmdArgs.push_back("--gpu-name"); + CmdArgs.push_back(Arch); + CmdArgs.push_back("-c"); + + CmdArgs.push_back(InputFile); + + if (sys::ExecuteAndWait(*PtxasPath, CmdArgs)) + return createStringError(inconvertibleErrorCode(), "'ptxas' failed"); + + return static_cast(TempFile); +} + Expected link(ArrayRef InputFiles, ArrayRef LinkerArgs, Triple TheTriple, StringRef Arch) { @@ -474,6 +550,221 @@ } } +void diagnosticHandler(const DiagnosticInfo &DI) { + std::string ErrStorage; + raw_string_ostream OS(ErrStorage); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + + switch (DI.getSeverity()) { + case DS_Error: + WithColor::error(errs(), LinkerExecutable) << ErrStorage; + break; + case DS_Warning: + WithColor::warning(errs(), LinkerExecutable) << ErrStorage; + break; + case DS_Note: + WithColor::note(errs(), LinkerExecutable) << ErrStorage; + break; + case DS_Remark: + WithColor::remark(errs(), LinkerExecutable) << ErrStorage; + break; + } +} + +// Get the target features passed in from the driver as =. +std::vector getTargetFeatures(const Triple &TheTriple) { + std::vector Features; + auto TargetAndFeatures = StringRef(TargetFeatures).split('='); + if (TargetAndFeatures.first != TheTriple.getTriple()) + return Features; + + for (auto Feature : llvm::split(TargetAndFeatures.second, ',')) + Features.push_back(Feature.str()); + return Features; +} + +CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) { + switch (OptLevel) { + case 0: + return CodeGenOpt::None; + case 1: + return CodeGenOpt::Less; + case 2: + return CodeGenOpt::Default; + case 3: + return CodeGenOpt::Aggressive; + } + llvm_unreachable("Invalid optimization level"); +} + +std::unique_ptr createLTO(const Triple &TheTriple, StringRef Arch, + bool WholeProgram) { + lto::Config Conf; + lto::ThinBackend Backend; + // TODO: Handle index-only thin-LTO + Backend = lto::createInProcessThinBackend( + llvm::heavyweight_hardware_concurrency(1)); + + Conf.CPU = Arch.str(); + Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple); + + Conf.MAttrs = getTargetFeatures(TheTriple); + Conf.CGOptLevel = getCGOptLevel(OptLevel[1] - '0'); + Conf.OptLevel = OptLevel[1] - '0'; + Conf.DefaultTriple = TheTriple.getTriple(); + Conf.DiagHandler = diagnosticHandler; + + Conf.PTO.LoopVectorization = Conf.OptLevel > 1; + Conf.PTO.SLPVectorization = Conf.OptLevel > 1; + + // TODO: Handle outputting bitcode using a module hook. + if (TheTriple.isNVPTX()) + Conf.CGFileType = CGFT_AssemblyFile; + else + Conf.CGFileType = CGFT_ObjectFile; + + // TODO: Handle remark files + Conf.HasWholeProgramVisibility = WholeProgram; + + return std::make_unique(std::move(Conf), Backend); +} + +// Returns true if \p S is valid as a C language identifier and will be given +// `__start_` and `__stop_` symbols. +bool isValidCIdentifier(StringRef S) { + return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && + std::all_of(S.begin() + 1, S.end(), + [](char C) { return C == '_' || isAlnum(C); }); +} + +Expected> linkBitcodeFiles(ArrayRef InputFiles, + const Triple &TheTriple, + StringRef Arch) { + SmallVector, 4> SavedBuffers; + SmallVector, 4> BitcodeFiles; + StringMap UsedInRegularObj; + + // Search for bitcode files in the input and create an LTO input file. If it + // is not a bitcode file, scan its symbol table for symbols we need to + // save. + for (StringRef File : InputFiles) { + ErrorOr> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(File); + if (std::error_code EC = BufferOrErr.getError()) + return createFileError(File, EC); + + file_magic Type = identify_magic((*BufferOrErr)->getBuffer()); + if (Type != file_magic::bitcode) { + Expected> ObjFile = + ObjectFile::createObjectFile(**BufferOrErr, Type); + if (!ObjFile) + return ObjFile.takeError(); + + for (auto &Sym : (*ObjFile)->symbols()) { + Expected Name = Sym.getName(); + if (!Name) + return Name.takeError(); + + UsedInRegularObj[*Name] = true; + } + } else { + Expected> InputFileOrErr = + llvm::lto::InputFile::create(**BufferOrErr); + if (!InputFileOrErr) + return InputFileOrErr.takeError(); + + BitcodeFiles.push_back(std::move(*InputFileOrErr)); + SavedBuffers.push_back(std::move(*BufferOrErr)); + } + } + + if (BitcodeFiles.empty()) + return None; + + // We have visibility of the whole program if every input is bitcode, all + // inputs are statically linked so there should be no external references. + bool WholeProgram = BitcodeFiles.size() == InputFiles.size(); + StringMap PrevailingSymbols; + + // TODO: Run more tests to verify that this is correct. + // Create the LTO instance with the necessary config and add the bitcode files + // to it after resolving symbols. We make a few assumptions about symbol + // resolution. + // 1. The target is going to be a stand-alone executable file. + // 2. We do not support relocatable object files. + // 3. All inputs are relocatable object files extracted from host binaries, so + // there is no resolution to a dynamic library. + auto LTOBackend = createLTO(TheTriple, Arch, WholeProgram); + for (auto &BitcodeFile : BitcodeFiles) { + const auto Symbols = BitcodeFile->symbols(); + SmallVector Resolutions(Symbols.size()); + size_t Idx = 0; + for (auto &Sym : Symbols) { + lto::SymbolResolution &Res = Resolutions[Idx++]; + + // We will use this as the prevailing symbol definition in LTO unless + // it is undefined in the module or another symbol has already been used. + Res.Prevailing = !Sym.isUndefined() && !PrevailingSymbols[Sym.getName()]; + + // We need LTO to preserve symbols referenced in other object files, or + // are needed by the rest of the toolchain. + Res.VisibleToRegularObj = + UsedInRegularObj[Sym.getName()] || + isValidCIdentifier(Sym.getSectionName()) || + (Res.Prevailing && Sym.getName().startswith("__omp")); + + // We do not currently support shared libraries, so no symbols will be + // referenced externally by shared libraries. + Res.ExportDynamic = false; + + // The result will currently always be an executable, so the only time the + // definition will not reside in this link unit is if it's undefined. + Res.FinalDefinitionInLinkageUnit = !Sym.isUndefined(); + + // We do not support linker redefined symbols (e.g. --wrap) for device + // image linking, so the symbols will not be changed after LTO. + Res.LinkerRedefined = false; + + // Mark this symbol as the prevailing one. + PrevailingSymbols[Sym.getName()] |= Res.Prevailing; + } + + // Add the bitcode file with its resolved symbols to the LTO job. + if (Error Err = LTOBackend->add(std::move(BitcodeFile), Resolutions)) + return Err; + } + + // Run the LTO job to compile the bitcode. + size_t MaxTasks = LTOBackend->getMaxTasks(); + std::vector> Files(MaxTasks); + auto AddStream = [&](size_t Task) -> std::unique_ptr { + int FD = -1; + auto &TempFile = Files[Task]; + StringRef Extension = (TheTriple.isNVPTX()) ? "s" : "o"; + if (std::error_code EC = sys::fs::createTemporaryFile( + "lto-" + TheTriple.getTriple(), Extension, FD, TempFile)) + return nullptr; + TempFiles.push_back(static_cast(TempFile)); + return std::make_unique( + std::make_unique(FD, true)); + }; + if (Error Err = LTOBackend->run(AddStream)) + return Err; + + for (auto &File : Files) { + if (!TheTriple.isNVPTX()) + continue; + + auto FileOrErr = nvptx::assemble(File, TheTriple, Arch); + if (!FileOrErr) + return FileOrErr.takeError(); + File = *FileOrErr; + } + + return static_cast(Files.front()); +} + /// Runs the appropriate linking action on all the device files specified in \p /// DeviceFiles. The linked device images are returned in \p LinkedImages. Error linkDeviceFiles(ArrayRef DeviceFiles, @@ -491,6 +782,12 @@ StringRef Arch(TargetFeatures.second); // TODO: Run LTO or bitcode linking before the final link job. + auto ObjectOrErr = + linkBitcodeFiles(LinkerInput.getValue(), TheTriple, Arch); + if (!ObjectOrErr) + return ObjectOrErr.takeError(); + if ((*ObjectOrErr).hasValue()) + LinkerInput.getValue() = {**ObjectOrErr}; auto ImageOrErr = linkDevice(LinkerInput.getValue(), LinkerArgs, TheTriple, Arch); @@ -517,7 +814,7 @@ // Create a new file to write the wrapped bitcode file to. SmallString<128> BitcodeFile; if (std::error_code EC = - sys::fs::createTemporaryFile("offload", "bc", BitcodeFile)) + sys::fs::createTemporaryFile("wrapper", "bc", BitcodeFile)) return createFileError(BitcodeFile, EC); TempFiles.push_back(static_cast(BitcodeFile)); @@ -546,7 +843,7 @@ // Create a new file to write the wrapped bitcode file to. SmallString<128> ObjectFile; if (std::error_code EC = - sys::fs::createTemporaryFile("offload", "o", ObjectFile)) + sys::fs::createTemporaryFile("image", "o", ObjectFile)) return createFileError(BitcodeFile, EC); TempFiles.push_back(static_cast(ObjectFile)); @@ -584,6 +881,8 @@ Optional searchLibraryBaseName(StringRef Name, ArrayRef SearchPaths) { for (StringRef Dir : SearchPaths) { + if (Optional File = findFile(Dir, "lib" + Name + ".so")) + return None; if (Optional File = findFile(Dir, "lib" + Name + ".a")) return File; } @@ -606,6 +905,11 @@ int main(int argc, const char **argv) { InitLLVM X(argc, argv); + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); LinkerExecutable = argv[0]; sys::PrintStackTraceOnErrorSignal(argv[0]);