diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -161,6 +161,8 @@ "invalid Xarch argument: '%0', options requiring arguments are unsupported">; def err_drv_Xopenmp_target_missing_triple : Error< "cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target=">; +def err_drv_openmp_jit_without_lto : Error< + "cannot enable OpenMP offloading JIT, specify bitcode compilation with '-foffload-lto'">; def err_drv_invalid_Xopenmp_target_with_args : Error< "invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">; def err_drv_argument_only_allowed_with : Error< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2460,6 +2460,10 @@ Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group, + HelpText<"Enable JIT comilation for OpenMP Offloading">, Flags<[ NoArgumentUnused]>; +def fno_openmp_target_jit : Flag<["-"], "fno-openmp-target-jit">, Group, + Flags<[NoArgumentUnused, HelpHidden]>; defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime", LangOpts<"OpenMPTargetNewRuntime">, DefaultTrue, PosFlag, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8133,6 +8133,12 @@ const char *LinkingOutput) const { ArgStringList CmdArgs; + if (!C.getDriver().isUsingLTO(/* IsOffload */ true) && + Args.hasFlag(options::OPT_fopenmp_target_jit, + options::OPT_fno_openmp_target_jit, /*Default*/ false)) { + C.getDriver().Diag(clang::diag::err_drv_openmp_jit_without_lto); + } + if (getToolChain().getDriver().isUsingLTO(/* IsOffload */ true)) { // Pass in target features for each toolchain. auto OpenMPTCRange = C.getOffloadToolChains(); @@ -8192,6 +8198,11 @@ if (!OOpt.empty()) CmdArgs.push_back(Args.MakeArgString(Twine("-opt-level=O") + OOpt)); } + + if (Args.hasFlag(options::OPT_fopenmp_target_jit, + options::OPT_fno_openmp_target_jit, + /*Default=*/false)) + CmdArgs.push_back(Args.MakeArgString("-target-embed-bc")); } // Construct the link job so we can wrap around it. diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -76,6 +76,11 @@ cl::desc("Path for the target bitcode library"), cl::cat(ClangLinkerWrapperCategory)); +static cl::opt EmbedBC( + "target-embed-bc", cl::ZeroOrMore, + cl::desc("Embed linked bitcode instead of an executable device image."), + cl::init(true), cl::cat(ClangLinkerWrapperCategory)); + // Do not parse linker options. static cl::list HostLinkerArgs(cl::Sink, cl::desc("...")); @@ -422,8 +427,8 @@ std::unique_ptr Buffer = MemoryBuffer::getMemBuffer(Library.getMemoryBufferRef(), false); - if (Error Err = writeArchive(TempFile, Members, true, Library.kind(), - true, Library.isThin(), std::move(Buffer))) + if (Error Err = writeArchive(TempFile, Members, true, Library.kind(), true, + Library.isThin(), std::move(Buffer))) return std::move(Err); return static_cast(TempFile); @@ -748,6 +753,12 @@ return std::move(Err); } + auto HandleError = [&](std::error_code EC) { + logAllUnhandledErrors(errorCodeToError(EC), + WithColor::error(errs(), LinkerExecutable)); + exit(1); + }; + // Run the LTO job to compile the bitcode. size_t MaxTasks = LTOBackend->getMaxTasks(); std::vector> Files(MaxTasks); @@ -757,16 +768,36 @@ StringRef Extension = (TheTriple.isNVPTX()) ? "s" : "o"; if (std::error_code EC = sys::fs::createTemporaryFile( "lto-" + TheTriple.getTriple(), Extension, FD, TempFile)) - return nullptr; + HandleError(EC); TempFiles.push_back(static_cast(TempFile)); return std::make_unique( std::make_unique(FD, true)); }; + auto LinkOnly = [&](size_t Task, const Module &M) { + int FD = -1; + Files.resize(1); + auto &TempFile = Files.front(); + if (std::error_code EC = sys::fs::createTemporaryFile( + "jit-" + TheTriple.getTriple(), "bc", FD, TempFile)) + HandleError(EC); + std::error_code EC; + raw_fd_ostream LinkedBitcode(TempFile, EC, sys::fs::OF_None); + if (EC) + HandleError(EC); + WriteBitcodeToFile(M, LinkedBitcode); + TempFiles.push_back(static_cast(TempFile)); + return false; + }; + + // If we are doing JIT, stop after the module has been sompletely linked. + if (EmbedBC) + LTOBackend->getLTOConfig().PostInternalizeModuleHook = LinkOnly; + if (Error Err = LTOBackend->run(AddStream)) return std::move(Err); for (auto &File : Files) { - if (!TheTriple.isNVPTX()) + if (!TheTriple.isNVPTX() || EmbedBC) continue; auto FileOrErr = nvptx::assemble(File, TheTriple, Arch); @@ -794,11 +825,16 @@ Triple TheTriple(TargetFeatures.first); StringRef Arch(TargetFeatures.second); - // TODO: Run LTO or bitcode linking before the final link job. auto ObjectOrErr = linkBitcodeFiles(LinkerInput.getValue(), TheTriple, Arch); if (!ObjectOrErr) return ObjectOrErr.takeError(); + + if (EmbedBC) { + LinkedImages.push_back(**ObjectOrErr); + continue; + } + if ((*ObjectOrErr).hasValue()) LinkerInput.getValue() = {**ObjectOrErr}; diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -259,6 +259,9 @@ /// full description of tasks see LTOBackend.h. unsigned getMaxTasks() const; + /// Returns the current configuration for the LTO object. + Config &getLTOConfig(); + /// Runs the LTO pipeline. This function calls the supplied AddStream /// function to add native object files to the link. /// diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -958,6 +958,8 @@ return RegularLTO.ParallelCodeGenParallelismLevel + ModuleCount; } +Config <O::getLTOConfig() { return Conf; } + // If only some of the modules were split, we cannot correctly handle // code that contains type tests or type checked loads. Error LTO::checkPartiallySplit() {