diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -264,6 +264,10 @@ if (DriverArgs.hasArg(options::OPT_nogpulib)) return; + // Link the bitcode library late if we're using device LTO. + if (getDriver().isUsingLTO(/* IsOffload */ true)) + return; + std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, true)) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8147,6 +8147,34 @@ "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1))); } + // Pass in the bitcode library to be linked during LTO. + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; + ++TI) { + const ToolChain *TC = TI->second; + const Driver &D = TC->getDriver(); + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); + + std::string BitcodeSuffix; + if (TCArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, + options::OPT_fno_openmp_target_new_runtime, true)) + BitcodeSuffix += "new-"; + if (TC->getTriple().isNVPTX()) + BitcodeSuffix += "nvptx-"; + else if (TC->getTriple().isAMDGPU()) + BitcodeSuffix += "amdgpu-"; + BitcodeSuffix += Arch; + + ArgStringList BitcodeLibrary; + addOpenMPDeviceRTL(D, TCArgs, BitcodeLibrary, BitcodeSuffix, + TC->getTriple()); + + if (!BitcodeLibrary.empty()) + CmdArgs.push_back( + Args.MakeArgString("-target-library=" + TC->getTripleString() + + "-" + Arch + "=" + BitcodeLibrary.back())); + } + // Pass in the optimization level to use for LTO. if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { StringRef OOpt; diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -744,6 +744,10 @@ return; } + // Link the bitcode library late if we're using device LTO. + if (getDriver().isUsingLTO(/* IsOffload */ true)) + return; + std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, true)) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -71,6 +71,11 @@ cl::init("O0"), cl::cat(ClangLinkerWrapperCategory)); +static cl::opt + BitcodeLibrary("target-library", + cl::desc("Path for the target bitcode library"), + cl::cat(ClangLinkerWrapperCategory)); + // Do not parse linker options. static cl::list HostLinkerArgs(cl::Sink, cl::desc("...")); @@ -976,6 +981,14 @@ } } + // Add the device bitcode library to the device files if it was passed in. + if (!BitcodeLibrary.empty()) { + auto DeviceAndPath = StringRef(BitcodeLibrary).split('='); + auto TripleAndArch = DeviceAndPath.first.rsplit('-'); + DeviceFiles.emplace_back(TripleAndArch.first, TripleAndArch.second, + DeviceAndPath.second); + } + // Link the device images extracted from the linker input. SmallVector LinkedImages; if (Error Err = linkDeviceFiles(DeviceFiles, LinkerArgs, LinkedImages))