diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4357,9 +4357,9 @@ IsHeaderModulePrecompile ? HeaderModuleInput : Inputs[0]; InputInfoList ModuleHeaderInputs; + InputInfoList OpenMPHostInputs; const InputInfo *CudaDeviceInput = nullptr; const InputInfo *OpenMPDeviceInput = nullptr; - const InputInfo *OpenMPHostInput = nullptr; for (const InputInfo &I : Inputs) { if (&I == &Input) { // This is the primary input. @@ -4376,8 +4376,8 @@ CudaDeviceInput = &I; } else if (IsOpenMPDevice && !OpenMPDeviceInput) { OpenMPDeviceInput = &I; - } else if (IsOpenMPHost && !OpenMPHostInput) { - OpenMPHostInput = &I; + } else if (IsOpenMPHost) { + OpenMPHostInputs.push_back(I); } else { llvm_unreachable("unexpectedly given multiple inputs"); } @@ -6870,6 +6870,32 @@ } } + // Host-side OpenMP offloading recieves the device object files and embeds it + // in a named section including the associated target triple and architecture. + if (IsOpenMPHost && !OpenMPHostInputs.empty()) { + SmallString<128> InputFiles("-fembed-offload-binary="); + SmallString<128> InputSections("-fembed-offload-section="); + + auto InputFile = OpenMPHostInputs.begin(); + auto OpenMPTCs = C.getOffloadToolChains(); + for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE; + ++TI, ++InputFile) { + const ToolChain *TC = TI->second; + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + InputSections += TC->getTripleString() + "."; + InputSections += TCArgs.getLastArgValue(options::OPT_march_EQ); + InputSections += ","; + + InputFiles += C.getArgs().MakeArgString(TC->getInputFilename(*InputFile)); + InputFiles += ","; + } + InputSections.pop_back(); + InputFiles.pop_back(); + + CmdArgs.push_back(Args.MakeArgString(InputFiles.str())); + CmdArgs.push_back(Args.MakeArgString(InputSections.str())); + } + if (Triple.isAMDGPU()) { handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);