Index: include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- include/clang/Basic/DiagnosticDriverKinds.td +++ include/clang/Basic/DiagnosticDriverKinds.td @@ -123,6 +123,8 @@ def err_drv_optimization_remark_pattern : Error< "%0 in '%1'">; def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">; +def err_drv_invalid_omp_target : Error< + "OpenMP target is invalid: '%0'">; def warn_O4_is_O3 : Warning<"-O4 is equivalent to -O3">, InGroup; def warn_drv_lto_libpath : Warning<"libLTO.dylib relative to clang installed dir not found; using 'ld' default search path instead">, Index: include/clang/Driver/Action.h =================================================================== --- include/clang/Driver/Action.h +++ include/clang/Driver/Action.h @@ -55,9 +55,11 @@ DsymutilJobClass, VerifyDebugInfoJobClass, VerifyPCHJobClass, + OffloadBundlingJobClass, + OffloadUnbundlingJobClass, - JobClassFirst=PreprocessJobClass, - JobClassLast=VerifyPCHJobClass + JobClassFirst = PreprocessJobClass, + JobClassLast = OffloadUnbundlingJobClass }; static const char *getClassName(ActionClass AC); @@ -172,6 +174,7 @@ class JobAction : public Action { virtual void anchor(); protected: + JobAction(ActionClass Kind, std::unique_ptr Input); JobAction(ActionClass Kind, std::unique_ptr Input, types::ID Type); JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type); @@ -182,6 +185,30 @@ } }; +class OffloadBundlingJobAction : public JobAction { + void anchor() override; + +public: + // Offloading bundling doesn't change the type of output. + OffloadBundlingJobAction(std::unique_ptr Input); + + static bool classof(const Action *A) { + return A->getKind() == OffloadBundlingJobClass; + } +}; + +class OffloadUnbundlingJobAction : public JobAction { + void anchor() override; + +public: + // Offloading unbundling doesn't change the type of output. + OffloadUnbundlingJobAction(std::unique_ptr Input); + + static bool classof(const Action *A) { + return A->getKind() == OffloadUnbundlingJobClass; + } +}; + class PreprocessJobAction : public JobAction { void anchor() override; public: Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -673,6 +673,15 @@ def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">, HelpText<"Enable function overloads based on CUDA target attributes.">; +//===----------------------------------------------------------------------===// +// OpenMP Options +//===----------------------------------------------------------------------===// + +def fopenmp_is_device : Flag<["-"], "fopenmp-is-device">, + HelpText<"Generate code only for an OpenMP target device.">; +def omp_host_ir_file_path : Separate<["-"], "omp-host-ir-file-path">, + HelpText<"Path to the IR file produced by the frontend for the host.">; + } // let Flags = [CC1Option] Index: include/clang/Driver/Driver.h =================================================================== --- include/clang/Driver/Driver.h +++ include/clang/Driver/Driver.h @@ -14,6 +14,8 @@ #include "clang/Basic/LLVM.h" #include "clang/Driver/Phases.h" #include "clang/Driver/Types.h" +#include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" #include "clang/Driver/Util.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -46,11 +48,10 @@ class Action; class Command; class Compilation; - class InputInfo; + class InputAction; class JobList; class JobAction; class SanitizerArgs; - class ToolChain; /// Describes the kind of LTO mode selected via -f(no-)?lto(=.*)? options. enum LTOKind { @@ -201,7 +202,49 @@ /// stored in it, and will clean them up when torn down. mutable llvm::StringMap ToolChains; + /// \brief Cache of all the ToolChains in use by the driver. + /// + /// This maps from the string representation of a triple that refers to an + /// offloading target to a ToolChain created targeting that triple. The driver + /// owns all the ToolChain objects stored in it, and will clean them up when + /// torn down. We use a different cache for offloading as it is possible to + /// have offloading toolchains with the same triple the host has, and the + /// implementation has to differentiate the two in order to adjust the + /// commands for offloading. + mutable llvm::StringMap OffloadToolChains; + + /// \brief Array of the toolchains of offloading targets in the order they + /// were requested by the user. + SmallVector OrderedOffloadingToolchains; + + /// \brief Type for the cache of the results for the offloading host emitted + /// so far. The host results can be required by the device tools. + typedef llvm::DenseMap OffloadingHostResultsTy; + private: + /// CreateUnbundledOffloadingResult - Create a command to unbundle the input + /// and use the resulting input info. If there are inputs already cached in + /// OffloadingHostResults for that action use them instead. If offloading + /// is not supported, just return the provided input info. + InputInfo CreateUnbundledOffloadingResult( + Compilation &C, const OffloadUnbundlingJobAction *CurAction, + const ToolChain *TC, InputInfo Result, + OffloadingHostResultsTy &OffloadingHostResults) const; + + /// CreateBundledOffloadingResult - Create a bundle of all provided results + /// and return the InputInfo of the bundled file. + InputInfo CreateBundledOffloadingResult( + Compilation &C, const OffloadBundlingJobAction *CurAction, + const ToolChain *TC, InputInfoList Results) const; + + /// PostProcessOffloadingInputsAndResults - Update the input and output + /// information to suit the needs of the offloading implementation. This used + /// to, e.g., to pass extra results from host to device side and vice-versa. + void PostProcessOffloadingInputsAndResults( + Compilation &C, const JobAction *JA, const ToolChain *TC, + InputInfoList &Inputs, InputInfo &Result, + OffloadingHostResultsTy &OffloadingHostResults) const; + /// TranslateInputArgs - Create a new derived argument list from the input /// arguments, after applying the standard argument translations. llvm::opt::DerivedArgList * @@ -388,7 +431,8 @@ bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - InputInfo &Result) const; + InputInfo &Result, + OffloadingHostResultsTy &OffloadingHostResults) const; /// Returns the default name for linked images (e.g., "a.out"). const char *getDefaultImageName() const; @@ -435,9 +479,11 @@ /// \brief Retrieves a ToolChain for a particular \p Target triple. /// /// Will cache ToolChains for the life of the driver object, and create them - /// on-demand. - const ToolChain &getToolChain(const llvm::opt::ArgList &Args, - const llvm::Triple &Target) const; + /// on-demand. \a OffloadingKind specifies if the toolchain being created + /// refers to any kind of offloading (e.g. OpenMP). + const ToolChain &getToolChain( + const llvm::opt::ArgList &Args, const llvm::Triple &Target, + ToolChain::OffloadingKind OffloadingKind = ToolChain::OK_None) const; /// @} Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1614,6 +1614,8 @@ def object : Flag<["-"], "object">; def o : JoinedOrSeparate<["-"], "o">, Flags<[DriverOption, RenderAsInput, CC1Option, CC1AsOption]>, HelpText<"Write output to ">, MetaVarName<"">; +def omptargets_EQ : CommaJoined<["-"], "omptargets=">, Flags<[DriverOption, CC1Option]>, + HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">; def pagezero__size : JoinedOrSeparate<["-"], "pagezero_size">; def pass_exit_codes : Flag<["-", "--"], "pass-exit-codes">, Flags<[Unsupported]>; def pedantic_errors : Flag<["-", "--"], "pedantic-errors">, Group, Flags<[CC1Option]>; Index: include/clang/Driver/ToolChain.h =================================================================== --- include/clang/Driver/ToolChain.h +++ include/clang/Driver/ToolChain.h @@ -64,6 +64,12 @@ RM_DisabledImplicitly }; + enum OffloadingKind { + OK_None, + OK_OpenMP_Host, + OK_OpenMP_Device, + }; + private: const Driver &D; const llvm::Triple Triple; @@ -71,6 +77,7 @@ // We need to initialize CachedRTTIArg before CachedRTTIMode const llvm::opt::Arg *const CachedRTTIArg; const RTTIMode CachedRTTIMode; + OffloadingKind CachedOffloadingKind; /// The list of toolchain specific path prefixes to search for /// files. @@ -83,10 +90,12 @@ mutable std::unique_ptr Clang; mutable std::unique_ptr Assemble; mutable std::unique_ptr Link; + mutable std::unique_ptr OffloadBundler; Tool *getClang() const; Tool *getAssemble() const; Tool *getLink() const; Tool *getClangAs() const; + Tool *getOffloadBundler() const; mutable std::unique_ptr SanitizerArguments; @@ -127,6 +136,9 @@ vfs::FileSystem &getVFS() const; const llvm::Triple &getTriple() const { return Triple; } + OffloadingKind getOffloadingKind() const { return CachedOffloadingKind; } + void setOffloadingKind(OffloadingKind OT); + llvm::Triple::ArchType getArch() const { return Triple.getArch(); } StringRef getArchName() const { return Triple.getArchName(); } StringRef getPlatform() const { return Triple.getVendorName(); } @@ -183,6 +195,18 @@ return nullptr; } + /// TranslateOffloadArgs - Create a new derived argument list for any argument + /// translations this ToolChain may wish to perform if supporting offloading, + // or 0 if no tool chain specific translations are needed. If this tool chain + // does not refer to an offloading tool chain 0 is returned too. + /// + /// \param BoundArch - The bound architecture name, or 0. + virtual llvm::opt::DerivedArgList * + TranslateOffloadArgs(const llvm::opt::DerivedArgList &Args, + const char *BoundArch) const { + return nullptr; + } + /// Choose a tool to use to handle the action \p JA. /// /// This can be overridden when a particular ToolChain needs to use Index: include/clang/Driver/Types.h =================================================================== --- include/clang/Driver/Types.h +++ include/clang/Driver/Types.h @@ -69,6 +69,11 @@ /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and headers). bool isObjC(ID Id); + /// isSrcFile - Is this a source file, i.e. something that still has to be + /// preprocessed. The logic behind this is the same that decides the first + /// compilation phase is a preprocesing one. + bool isSrcFile(ID Id); + /// lookupTypeForExtension - Lookup the type to use for the file /// extension \p Ext. ID lookupTypeForExtension(const char *Ext); Index: lib/Driver/Action.cpp =================================================================== --- lib/Driver/Action.cpp +++ lib/Driver/Action.cpp @@ -26,6 +26,10 @@ case BindArchClass: return "bind-arch"; case CudaDeviceClass: return "cuda-device"; case CudaHostClass: return "cuda-host"; + case OffloadBundlingJobClass: + return "clang-offload-bundler"; + case OffloadUnbundlingJobClass: + return "clang-offload-unbundler"; case PreprocessJobClass: return "preprocessor"; case PrecompileJobClass: return "precompiler"; case AnalyzeJobClass: return "analyzer"; @@ -75,6 +79,9 @@ void JobAction::anchor() {} +JobAction::JobAction(ActionClass Kind, std::unique_ptr Input) + : Action(Kind, std::move(Input)) {} + JobAction::JobAction(ActionClass Kind, std::unique_ptr Input, types::ID Type) : Action(Kind, std::move(Input), Type) {} @@ -83,6 +90,18 @@ : Action(Kind, Inputs, Type) { } +void OffloadBundlingJobAction::anchor() {} + +OffloadBundlingJobAction::OffloadBundlingJobAction( + std::unique_ptr Input) + : JobAction(OffloadBundlingJobClass, std::move(Input)) {} + +void OffloadUnbundlingJobAction::anchor() {} + +OffloadUnbundlingJobAction::OffloadUnbundlingJobAction( + std::unique_ptr Input) + : JobAction(OffloadUnbundlingJobClass, std::move(Input)) {} + void PreprocessJobAction::anchor() {} PreprocessJobAction::PreprocessJobAction(std::unique_ptr Input, Index: lib/Driver/Compilation.cpp =================================================================== --- lib/Driver/Compilation.cpp +++ lib/Driver/Compilation.cpp @@ -60,9 +60,16 @@ DerivedArgList *&Entry = TCArgs[std::make_pair(TC, BoundArch)]; if (!Entry) { - Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch); - if (!Entry) - Entry = TranslatedArgs; + DerivedArgList *DefaultArgs = TC->TranslateArgs(*TranslatedArgs, BoundArch); + Entry = (DefaultArgs) ? DefaultArgs : TranslatedArgs; + + // Check if there is any offloading specific translation to do. + DerivedArgList *OffloadArgs = TC->TranslateOffloadArgs(*Entry, BoundArch); + if (OffloadArgs) { + // There are offloading translated args, so we have to use them instead. + delete DefaultArgs; + Entry = OffloadArgs; + } } return *Entry; Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -85,6 +85,7 @@ delete Opts; llvm::DeleteContainerSeconds(ToolChains); + llvm::DeleteContainerSeconds(OffloadToolChains); } void Driver::ParseDriverMode(ArrayRef Args) { @@ -140,7 +141,10 @@ } // Warn about -mcpu= without an argument. - if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) { + if ((A->getOption().matches(options::OPT_mcpu_EQ) && + A->containsValue("")) || + (A->getOption().matches(options::OPT_omptargets_EQ) && + !A->getNumValues())) { Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(Args); } } @@ -196,6 +200,251 @@ return FinalPhase; } +/// \brief Return true if the provided arguments require OpenMP offloading. +static bool RequiresOpenMPOffloading(ArgList &Args) { + if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false)) { + StringRef OpenMPRuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME); + if (const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ)) + OpenMPRuntimeName = A->getValue(); + + if (OpenMPRuntimeName == "libomp" || OpenMPRuntimeName == "libiomp5") { + auto *A = Args.getLastArg(options::OPT_omptargets_EQ); + return A != nullptr && A->getNumValues(); + } + } + return false; +} +/// \brief Return true if the provided tool chain require OpenMP offloading. +static bool RequiresOpenMPOffloading(const ToolChain *TC) { + return TC->getOffloadingKind() == ToolChain::OK_OpenMP_Host || + TC->getOffloadingKind() == ToolChain::OK_OpenMP_Device; +} + +/// \brief Dump the job bindings for a given action. +static void DumpJobBindings(ArrayRef TCs, StringRef ToolName, + ArrayRef Inputs, + ArrayRef Outputs) { + + llvm::errs() << "# \""; + for (unsigned i = 0, e = TCs.size(); i != e; ++i) { + llvm::errs() << TCs[i]->getTripleString(); + if (i + 1 != e) + llvm::errs() << ", "; + } + + llvm::errs() << "\" - \"" << ToolName << "\", inputs: ["; + for (unsigned i = 0, e = Inputs.size(); i != e; ++i) { + llvm::errs() << Inputs[i].getAsString(); + if (i + 1 != e) + llvm::errs() << ", "; + } + llvm::errs() << "], "; + llvm::errs() << ((Outputs.size() > 1) ? "outputs: [" : "output: "); + for (unsigned i = 0, e = Outputs.size(); i != e; ++i) { + llvm::errs() << Outputs[i].getAsString(); + if (i + 1 != e) + llvm::errs() << ", "; + } + llvm::errs() << ((Outputs.size() > 1) ? "]\n" : "\n"); + return; +} + +/// \brief Create output for a given action, if any. +static InputInfo CreateActionResult(Compilation &C, const Action *A, + const char *BaseInput, + const char *BoundArch, bool AtTopLevel, + bool MultipleArchs) { + InputInfo Result; + const JobAction *JA = cast(A); + if (JA->getType() == types::TY_Nothing) + Result = InputInfo(A->getType(), BaseInput); + else + Result = + InputInfo(C.getDriver().GetNamedOutputPath(C, *JA, BaseInput, BoundArch, + AtTopLevel, MultipleArchs), + A->getType(), BaseInput); + return Result; +} + +static const char *CreateOffloadingPseudoArchName(Compilation &C, + const ToolChain *TC) { + SmallString<128> Name; + switch (TC->getOffloadingKind()) { + default: + llvm_unreachable("Offload information was not specified."); + break; + case ToolChain::OK_OpenMP_Host: + Name = "offload-host-"; + break; + case ToolChain::OK_OpenMP_Device: + Name = "offload-device-"; + break; + } + + Name += TC->getTripleString(); + return C.getArgs().MakeArgString(Name.str()); +} + +InputInfo Driver::CreateUnbundledOffloadingResult( + Compilation &C, const OffloadUnbundlingJobAction *CurAction, + const ToolChain *TC, InputInfo Result, + OffloadingHostResultsTy &OffloadingHostResults) const { + assert(!OrderedOffloadingToolchains.empty() && + !types::isSrcFile(Result.getType()) && + "Not expecting to create a bundling action!"); + + // If this is an offloading device toolchain, we need to use the results + // cached when the host input was processed, except if the input is a source + // file. + if (TC->getOffloadingKind() == ToolChain::OK_OpenMP_Device) { + // If this is not a source file, it had to be part of a bundle. So we need + // to checkout the results created by the host when this input was processed + // for the host toolchain. + auto ILIt = OffloadingHostResults.find(CurAction); + assert(ILIt != OffloadingHostResults.end() && + "Offloading inputs do not exist??"); + InputInfoList &IL = ILIt->getSecond(); + assert(IL.size() == OrderedOffloadingToolchains.size() + 1 && + "Not all offloading inputs exist??"); + + // Get the order of the toolchain and retrieve the input; + unsigned Order = 1; + for (auto *OffloadTC : OrderedOffloadingToolchains) { + if (OffloadTC == TC) + break; + ++Order; + } + return IL[Order]; + } + + // Otherwise, this input is expected to be bundled. Therefore we need to issue + // an unbundling command. + + // The bundled file is the input. + InputInfo BundledFile = Result; + + // Create the input info for the unbundled files. + InputInfoList &UnbundledFiles = OffloadingHostResults[CurAction]; + { + InputInfo HostResult = CreateActionResult( + C, CurAction, Result.getBaseInput(), + CreateOffloadingPseudoArchName(C, TC), /*AtTopLevel=*/ + false, /*MultipleArchs=*/false); + UnbundledFiles.push_back(HostResult); + for (auto *OffloadTC : OrderedOffloadingToolchains) { + InputInfo TargetResult = CreateActionResult( + C, CurAction, Result.getBaseInput(), + CreateOffloadingPseudoArchName(C, OffloadTC), /*AtTopLevel=*/ + false, /*MultipleArchs=*/false); + UnbundledFiles.push_back(TargetResult); + } + } + + auto OffloadBundlerTool = TC->SelectTool(*CurAction); + + // Emit the command or dump the bindings. + if (CCCPrintBindings && !CCGenDiagnostics) { + SmallVector AllToolChains; + AllToolChains.push_back(TC); + AllToolChains.append(OrderedOffloadingToolchains.begin(), + OrderedOffloadingToolchains.end()); + DumpJobBindings(AllToolChains, OffloadBundlerTool->getName(), BundledFile, + UnbundledFiles); + } else { + OffloadBundlerTool->ConstructJob(C, *CurAction, BundledFile, UnbundledFiles, + C.getArgs(), nullptr); + } + + // The host result is the first of the unbundled files. + return UnbundledFiles.front(); +} + +InputInfo Driver::CreateBundledOffloadingResult( + Compilation &C, const OffloadBundlingJobAction *CurAction, + const ToolChain *TC, InputInfoList Results) const { + assert(!OrderedOffloadingToolchains.empty() && + "Not expecting to create a bundling action!"); + + // Get the result file based on BaseInput file name and the previous host + // action. + InputInfo BundledFile = CreateActionResult( + C, *CurAction->begin(), Results[0].getBaseInput(), /*BoundArch=*/nullptr, + /*AtTopLevel=*/true, /*MultipleArchs=*/false); + + // The unbundled files are the previous action result for each target. + InputInfoList &UnbundledFiles = Results; + + // Create the bundling command. + auto OffloadBundlerTool = TC->SelectTool(*CurAction); + + // Emit the command or dump the bindings. + if (CCCPrintBindings && !CCGenDiagnostics) { + SmallVector AllToolChains; + AllToolChains.push_back(TC); + AllToolChains.append(OrderedOffloadingToolchains.begin(), + OrderedOffloadingToolchains.end()); + DumpJobBindings(AllToolChains, OffloadBundlerTool->getName(), + UnbundledFiles, BundledFile); + } else { + OffloadBundlerTool->ConstructJob(C, *CurAction, BundledFile, UnbundledFiles, + C.getArgs(), nullptr); + } + + return BundledFile; +} + +void Driver::PostProcessOffloadingInputsAndResults( + Compilation &C, const JobAction *JA, const ToolChain *TC, + InputInfoList &Inputs, InputInfo &Result, + OffloadingHostResultsTy &OffloadingHostResults) const { + + // If this driver run requires OpenMP offloading we need to make sure + // everything gets combined at link time. Also, all the compile phase results + // obtained for the host are used as inputs in the device side. + if (RequiresOpenMPOffloading(TC)) { + + if (isa(JA) && + TC->getOffloadingKind() == ToolChain::OK_OpenMP_Host) { + // Get link results for all targets. + InputInfoList TgtLinkResults(OrderedOffloadingToolchains.size()); + for (unsigned i = 0; i < OrderedOffloadingToolchains.size(); ++i) { + const ToolChain *TgtTC = OrderedOffloadingToolchains[i]; + BuildJobsForAction(C, JA, TgtTC, + CreateOffloadingPseudoArchName(C, TgtTC), + /*AtTopLevel=*/false, + /*MultipleArchs=*/true, /*LinkingOutput=*/nullptr, + TgtLinkResults[i], OffloadingHostResults); + } + Inputs.append(TgtLinkResults.begin(), TgtLinkResults.end()); + return; + } + + if (isa(JA) && + TC->getOffloadingKind() == ToolChain::OK_OpenMP_Device) { + // Find the host compile result. + auto ILIt = OffloadingHostResults.find(JA); + assert(ILIt != OffloadingHostResults.end() && + "The OpenMP host side action is expected to be processed before!"); + InputInfoList &IL = ILIt->getSecond(); + assert(IL.size() == 1 && "Host compile results should only be one!"); + Inputs.push_back(IL.front()); + return; + } + + // If this is a host action, make sure it is recorded in the offloading + // results cache. + if (TC->getOffloadingKind() == ToolChain::OK_OpenMP_Host) + OffloadingHostResults[JA].push_back(Result); + + return; + } + + // + // Add post-processing code for other offloading implementations here. + // +} + static Arg *MakeInputArg(DerivedArgList &Args, OptTable *Opts, StringRef Value) { Arg *A = new Arg(Opts->getOption(options::OPT_INPUT), Value, @@ -484,9 +733,44 @@ // Perform the default argument translations. DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs); + // Check if we need offloading support by the toolchains. + ToolChain::OffloadingKind HostOffloadingKind = ToolChain::OK_None; + ToolChain::OffloadingKind DeviceOffloadingKind = ToolChain::OK_None; + // Check if we need OpenMP offloading + if (RequiresOpenMPOffloading(*UArgs)) { + HostOffloadingKind = ToolChain::OK_OpenMP_Host; + DeviceOffloadingKind = ToolChain::OK_OpenMP_Device; + } + // Owned by the host. const ToolChain &TC = - getToolChain(*UArgs, computeTargetTriple(DefaultTargetTriple, *UArgs)); + getToolChain(*UArgs, computeTargetTriple(DefaultTargetTriple, *UArgs), + HostOffloadingKind); + + // Get the toolchains for the offloading targets if any. We need to read the + // offloading toolchains only if we have a compatible runtime library, ant + // that would be either libomp or libiomp. + OrderedOffloadingToolchains.clear(); + + if (DeviceOffloadingKind == ToolChain::OK_OpenMP_Device) { + Arg *Tgts = UArgs->getLastArg(options::OPT_omptargets_EQ); + assert(Tgts && Tgts->getNumValues() && + "OpenMP offloading has to have targets specified."); + + for (unsigned v = 0; v < Tgts->getNumValues(); ++v) { + const char *Val = Tgts->getValue(v); + llvm::Triple TT(Val); + + // If the specified target is invalid, emit error + if (TT.getArch() == llvm::Triple::UnknownArch) + Diag(clang::diag::err_drv_invalid_omp_target) << Val; + else { + const ToolChain &OffloadTC = + getToolChain(*UArgs, TT, DeviceOffloadingKind); + OrderedOffloadingToolchains.push_back(&OffloadTC); + } + } + } // The compilation takes ownership of Args. Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs); @@ -1478,6 +1762,15 @@ // Build the pipeline for this file. std::unique_ptr Current(new InputAction(*InputArg, InputType)); + + // If we need to support offloading, run an unbundling job before each input + // to make sure that bundled files get unbundled. If the input is a source + // file that is not required. + if (!OrderedOffloadingToolchains.empty() && + InputArg->getOption().getKind() == llvm::opt::Option::InputClass && + !types::isSrcFile(InputType)) + Current.reset(new OffloadUnbundlingJobAction(std::move(Current))); + for (SmallVectorImpl::iterator i = PL.begin(), e = PL.end(); i != e; ++i) { phases::ID Phase = *i; @@ -1514,8 +1807,15 @@ } // If we ended with something, add to the output list. - if (Current) + if (Current) { + // If we need to support offloading, run a bundling job for each output + // that is not a linker action. Linker actions is when device images are + // usually embedded into the host to form a fat binary. + if (!OrderedOffloadingToolchains.empty()) + Current.reset(new OffloadBundlingJobAction(std::move(Current))); + Actions.push_back(Current.release()); + } } // Add a link action if necessary. @@ -1646,6 +1946,10 @@ if (A->getOption().matches(options::OPT_arch)) ArchNames.insert(A->getValue()); + // Cleanup the offloading host cache so that cached results of previous runs + // are not used. This is required for when clang is used as library. + OffloadingHostResultsTy OffloadingHostResults; + for (Action *A : C.getActions()) { // If we are linking an image for multiple archs then the linker wants // -arch_multiple and -final_output . Unfortunately, this @@ -1666,7 +1970,8 @@ /*BoundArch*/ nullptr, /*AtTopLevel*/ true, /*MultipleArchs*/ ArchNames.size() > 1, - /*LinkingOutput*/ LinkingOutput, II); + /*LinkingOutput*/ LinkingOutput, II, + OffloadingHostResults); } // If the user passed -Qunused-arguments or there were errors, don't warn @@ -1737,28 +2042,35 @@ // A BackendJob is always preceded by a CompileJob, and without // -save-temps they will always get combined together, so instead of // checking the backend tool, check if the tool for the CompileJob - // has an integrated assembler. - const ActionList *BackendInputs = &(*Inputs)[0]->getInputs(); - // Compile job may be wrapped in CudaHostAction, extract it if - // that's the case and update CollapsedCHA if we combine phases. - CudaHostAction *CHA = dyn_cast(*BackendInputs->begin()); - JobAction *CompileJA = - cast(CHA ? *CHA->begin() : *BackendInputs->begin()); - assert(CompileJA && "Backend job is not preceeded by compile job."); - const Tool *Compiler = TC->SelectTool(*CompileJA); - if (!Compiler) + // has an integrated assembler. However, if OpenMP offloading is required + // the backend and compile jobs have to be kept separate and an integrated + // assembler of the backend job will be queried instead. + JobAction *CurJA = cast(*Inputs->begin()); + const ActionList *BackendInputs = &CurJA->getInputs(); + CudaHostAction *CHA = nullptr; + if (!RequiresOpenMPOffloading(TC)) { + // Compile job may be wrapped in CudaHostAction, extract it if + // that's the case and update CollapsedCHA if we combine phases. + CHA = dyn_cast(*CurJA->begin()); + CurJA = + cast(CHA ? *CHA->begin() : *BackendInputs->begin()); + assert(CurJA && "Backend job is not preceeded by compile job."); + } + const Tool *CurTool = TC->SelectTool(*CurJA); + if (!CurTool) return nullptr; - if (Compiler->hasIntegratedAssembler()) { - Inputs = &CompileJA->getInputs(); - ToolForJob = Compiler; + if (CurTool->hasIntegratedAssembler()) { + Inputs = &CurJA->getInputs(); + ToolForJob = CurTool; CollapsedCHA = CHA; } } // A backend job should always be combined with the preceding compile job // unless OPT_save_temps is enabled and the compiler is capable of emitting - // LLVM IR as an intermediate output. - if (isa(JA)) { + // LLVM IR as an intermediate output. The OpenMP offloading implementation + // also requires the Compile and Backend jobs to be separate. + if (isa(JA) && !RequiresOpenMPOffloading(TC)) { // Check if the compiler supports emitting LLVM IR. assert(Inputs->size() == 1); // Compile job may be wrapped in CudaHostAction, extract it if @@ -1798,7 +2110,8 @@ const ToolChain *TC, const char *BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - InputInfo &Result) const { + InputInfo &Result, + OffloadingHostResultsTy &OffloadingHostResults) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); InputInfoList CudaDeviceInputInfos; @@ -1807,7 +2120,7 @@ // Append outputs of device jobs to the input list. for (const Action *DA : CHA->getDeviceActions()) { BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel, - /*MultipleArchs*/ false, LinkingOutput, II); + /*MultipleArchs*/ false, LinkingOutput, II, OffloadingHostResults); CudaDeviceInputInfos.push_back(II); } // Override current action with a real host compile action and continue @@ -1815,6 +2128,38 @@ A = *CHA->begin(); } + if (const OffloadUnbundlingJobAction *OUA = + dyn_cast(A)) { + // The input of the unbundling job has to be a single input non-source file, + // so we do not consider it having multiple architectures. We just use the + // naming that a regular host input file would have. + BuildJobsForAction(C, *OUA->begin(), TC, BoundArch, AtTopLevel, + /*MultipleArchs=*/false, LinkingOutput, Result, + OffloadingHostResults); + Result = CreateUnbundledOffloadingResult(C, OUA, TC, Result, + OffloadingHostResults); + return; + } + + if (const OffloadBundlingJobAction *OBA = + dyn_cast(A)) { + // Compute the input action for all devices and emit a bundling command. + InputInfoList Results(OrderedOffloadingToolchains.size() + 1); + for (unsigned i = 0; i < Results.size(); ++i) { + const ToolChain *CurTC = i ? OrderedOffloadingToolchains[i - 1] : TC; + // The input job of the bundling action is meant for multiple targets and + // is not a top level job - the bundling job is the top level for the + // current output. + BuildJobsForAction(C, *OBA->begin(), CurTC, + CreateOffloadingPseudoArchName(C, CurTC), + /*AtTopLevel=*/false, + /*MultipleArchs=*/true, LinkingOutput, Results[i], + OffloadingHostResults); + } + Result = CreateBundledOffloadingResult(C, OBA, TC, Results); + return; + } + if (const InputAction *IA = dyn_cast(A)) { // FIXME: It would be nice to not claim this here; maybe the old scheme of // just using Args was better? @@ -1841,7 +2186,8 @@ TC = &C.getDefaultToolChain(); BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel, - MultipleArchs, LinkingOutput, Result); + MultipleArchs, LinkingOutput, Result, + OffloadingHostResults); return; } @@ -1851,7 +2197,7 @@ assert(CDA->getGpuArchName() && "No GPU name in device action."); BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(), CDA->getGpuArchName(), CDA->isAtTopLevel(), - /*MultipleArchs*/ true, LinkingOutput, Result); + /*MultipleArchs*/ true, LinkingOutput, Result, OffloadingHostResults); return; } @@ -1870,7 +2216,8 @@ InputInfo II; for (const Action *DA : CollapsedCHA->getDeviceActions()) { BuildJobsForAction(C, DA, TC, "", AtTopLevel, - /*MultipleArchs*/ false, LinkingOutput, II); + /*MultipleArchs*/ false, LinkingOutput, II, + OffloadingHostResults); CudaDeviceInputInfos.push_back(II); } } @@ -1887,7 +2234,7 @@ InputInfo II; BuildJobsForAction(C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, - LinkingOutput, II); + LinkingOutput, II, OffloadingHostResults); InputInfos.push_back(II); } @@ -1904,26 +2251,19 @@ InputInfos.append(CudaDeviceInputInfos.begin(), CudaDeviceInputInfos.end()); // Determine the place to write output to, if any. - if (JA->getType() == types::TY_Nothing) - Result = InputInfo(A->getType(), BaseInput); - else - Result = InputInfo(GetNamedOutputPath(C, *JA, BaseInput, BoundArch, - AtTopLevel, MultipleArchs), - A->getType(), BaseInput); + Result = + CreateActionResult(C, A, BaseInput, BoundArch, AtTopLevel, MultipleArchs); - if (CCCPrintBindings && !CCGenDiagnostics) { - llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"' - << " - \"" << T->getName() << "\", inputs: ["; - for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) { - llvm::errs() << InputInfos[i].getAsString(); - if (i + 1 != e) - llvm::errs() << ", "; - } - llvm::errs() << "], output: " << Result.getAsString() << "\n"; - } else { + // Post-process inputs and results to suit the needs of the offloading + // implementations. + PostProcessOffloadingInputsAndResults(C, JA, TC, InputInfos, Result, + OffloadingHostResults); + + if (CCCPrintBindings && !CCGenDiagnostics) + DumpJobBindings(&T->getToolChain(), T->getName(), InputInfos, Result); + else T->ConstructJob(C, *JA, Result, InputInfos, C.getArgsForToolChain(TC, BoundArch), LinkingOutput); - } } const char *Driver::getDefaultImageName() const { @@ -2223,10 +2563,14 @@ return Path.str(); } -const ToolChain &Driver::getToolChain(const ArgList &Args, - const llvm::Triple &Target) const { - - ToolChain *&TC = ToolChains[Target.str()]; +const ToolChain & +Driver::getToolChain(const ArgList &Args, const llvm::Triple &Target, + ToolChain::OffloadingKind OffloadingKind) const { + // If this is an offload toolchain we need to try to get it from the right + // cache. + bool IsOffloadingDevice = (OffloadingKind == ToolChain::OK_OpenMP_Device); + ToolChain *&TC = *((IsOffloadingDevice) ? &OffloadToolChains[Target.str()] + : &ToolChains[Target.str()]); if (!TC) { switch (Target.getOS()) { case llvm::Triple::CloudABI: @@ -2332,6 +2676,8 @@ } } } + // Set the offloading kind for this toolchain. + TC->setOffloadingKind(OffloadingKind); return *TC; } Index: lib/Driver/ToolChain.cpp =================================================================== --- lib/Driver/ToolChain.cpp +++ lib/Driver/ToolChain.cpp @@ -67,7 +67,8 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, const ArgList &Args) : D(D), Triple(T), Args(Args), CachedRTTIArg(GetRTTIArgument(Args)), - CachedRTTIMode(CalculateRTTIMode(Args, Triple, CachedRTTIArg)) { + CachedRTTIMode(CalculateRTTIMode(Args, Triple, CachedRTTIArg)), + CachedOffloadingKind(OK_None) { if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) if (!isThreadModelSupported(A->getValue())) D.Diag(diag::err_drv_invalid_thread_model_for_target) @@ -184,6 +185,12 @@ return std::make_pair(Target, ModeFlag); } +void ToolChain::setOffloadingKind(OffloadingKind OK) { + assert(CachedOffloadingKind == OK_None && + "Offloading kind not expected to change once it is set."); + CachedOffloadingKind = OK; +} + StringRef ToolChain::getDefaultUniversalArchName() const { // In universal driver terms, the arch name accepted by -arch isn't exactly // the same as the ones that appear in the triple. Roughly speaking, this is @@ -237,6 +244,12 @@ return Link.get(); } +Tool *ToolChain::getOffloadBundler() const { + if (!OffloadBundler) + OffloadBundler.reset(new tools::OffloadBundler(*this)); + return OffloadBundler.get(); +} + Tool *ToolChain::getTool(Action::ActionClass AC) const { switch (AC) { case Action::AssembleJobClass: @@ -262,6 +275,10 @@ case Action::VerifyPCHJobClass: case Action::BackendJobClass: return getClang(); + + case Action::OffloadBundlingJobClass: + case Action::OffloadUnbundlingJobClass: + return getOffloadBundler(); } llvm_unreachable("Invalid tool kind."); Index: lib/Driver/ToolChains.h =================================================================== --- lib/Driver/ToolChains.h +++ lib/Driver/ToolChains.h @@ -205,6 +205,9 @@ bool isPIEDefault() const override; bool isPICDefaultForced() const override; bool IsIntegratedAssemblerDefault() const override; + llvm::opt::DerivedArgList * + TranslateOffloadArgs(const llvm::opt::DerivedArgList &Args, + const char *BoundArch) const override; protected: Tool *getTool(Action::ActionClass AC) const override; Index: lib/Driver/ToolChains.cpp =================================================================== --- lib/Driver/ToolChains.cpp +++ lib/Driver/ToolChains.cpp @@ -2420,6 +2420,45 @@ return true; } +llvm::opt::DerivedArgList * +Generic_GCC::TranslateOffloadArgs(const llvm::opt::DerivedArgList &Args, + const char *BoundArch) const { + // Make sure we always generate a shared library for an OpenMP offloading + // target regardless the commands the user passed to the host. + + if (getOffloadingKind() != OK_OpenMP_Device) + return nullptr; + + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); + const OptTable &Opts = getDriver().getOpts(); + + // Request the shared library. + DAL->AddFlagArg(0, Opts.getOption(options::OPT_shared)); + DAL->AddFlagArg(0, Opts.getOption(options::OPT_fPIC)); + + // Filter all the arguments we don't care passing to the offloading toolchain + // as they can mess up with the creation of a shared library. + for (auto *A : Args) { + switch ((options::ID)A->getOption().getID()) { + default: + DAL->append(A); + break; + case options::OPT_shared: + case options::OPT_static: + case options::OPT_fPIC: + case options::OPT_fno_PIC: + case options::OPT_fpic: + case options::OPT_fno_pic: + case options::OPT_fPIE: + case options::OPT_fno_PIE: + case options::OPT_fpie: + case options::OPT_fno_pie: + break; + } + } + + return DAL; +} void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs, ArgStringList &CC1Args) const { Index: lib/Driver/Tools.h =================================================================== --- lib/Driver/Tools.h +++ lib/Driver/Tools.h @@ -132,6 +132,19 @@ const char *LinkingOutput) const override; }; +/// \brief Offload bundler tool. +class LLVM_LIBRARY_VISIBILITY OffloadBundler : public Tool { +public: + OffloadBundler(const ToolChain &TC) + : Tool("Offload bundler", "clang-offload-bundler", TC) {} + + bool hasIntegratedCPP() const override { return false; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + /// \brief Base class for all GNU tools that provide the same behavior when /// it comes to response files support class LLVM_LIBRARY_VISIBILITY GnuTool : public Tool { Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -209,12 +209,24 @@ static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, const ArgList &Args, ArgStringList &CmdArgs) { const Driver &D = TC.getDriver(); + unsigned NumberOfInputs = Inputs.size(); + + // If the current toolchain is an OpenMP host toolchain, we need to ignore + // the last inputs - one for each offloading device - as they are going to be + // embedded in the fat binary by a custom linker script. + if (TC.getOffloadingKind() == ToolChain::OK_OpenMP_Host) { + Arg *Tgts = Args.getLastArg(options::OPT_omptargets_EQ); + assert(Tgts && Tgts->getNumValues() && + "OpenMP offloading has to have targets specified."); + NumberOfInputs -= Tgts->getNumValues(); + } // Add extra linker input arguments which are not treated as inputs // (constructed via -Xarch_). Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input); - for (const auto &II : Inputs) { + for (unsigned i = 0; i < NumberOfInputs; ++i) { + const auto &II = Inputs[i]; if (!TC.HasNativeLLVMSupport()) { // Don't try to pass LLVM inputs unless we have native support. if (II.getType() == types::TY_LLVM_IR || @@ -252,6 +264,98 @@ addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); } +/// \brief Add OpenMP linker script arguments at the end of the argument list +/// so that the fat binary is built by embedding each of the device images into +/// the host. The device images are the last inputs, one for each device and +/// come in the same order the triples are passed through the omptargets option. +/// The linker script also defines a few symbols required by the code generation +/// so that the images can be easily retrieved at runtime by the offloading +/// library. This should be used in tool chains that support linker scripts. +static void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, ArgStringList &CmdArgs) { + + // If this is not an OpenMP host toolchain, we don't need to do anything. + if (TC.getOffloadingKind() != ToolChain::OK_OpenMP_Host) + return; + + // Gather the pairs (target triple)-(file name). The files names are at the + // end of the input list. So we do a reverse scanning. + SmallVector, 4> Targets; + + Arg *Tgts = Args.getLastArg(options::OPT_omptargets_EQ); + assert(Tgts && Tgts->getNumValues() && + "OpenMP offloading has to have targets specified."); + + auto TriplesIt = Tgts->getValues().end(); + auto FileNamesIt = Inputs.end(); + for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { + --TriplesIt; + --FileNamesIt; + Targets.push_back( + std::make_pair(llvm::Triple(*TriplesIt), FileNamesIt->getFilename())); + } + + // Create temporary linker script + StringRef Name = llvm::sys::path::filename(Output.getFilename()); + std::pair Split = Name.rsplit('.'); + std::string TmpName = C.getDriver().GetTemporaryPath(Split.first, "lk"); + const char *LKS = C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); + + // Open script file in order to write contents + std::error_code EC; + llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None); + + if (EC) { + C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); + return; + } + + // Add commands to embed target binaries. We ensure that each section and + // image s 16-byte aligned. This is not mandatory, but increases the + // likelihood of data to be aligned with a cache block in several main host + // machines. + Lksf << "TARGET(binary)\n"; + for (unsigned i = 0; i < Targets.size(); ++i) + Lksf << "INPUT(" << Targets[i].second << ")\n"; + + Lksf << "SECTIONS\n"; + Lksf << "{\n"; + Lksf << " .omp_offloading :\n"; + Lksf << " ALIGN(0x10)\n"; + Lksf << " {\n"; + + for (unsigned i = 0; i < Targets.size(); ++i) { + std::string TgtName(Targets[i].first.getTriple()); + // std::replace(TgtName.begin(), TgtName.end(), '-', '_'); + Lksf << " . = ALIGN(0x10);\n"; + Lksf << " PROVIDE_HIDDEN(.omp_offloading.img_start." << TgtName + << " = .);\n"; + Lksf << " " << Targets[i].second << "\n"; + Lksf << " PROVIDE_HIDDEN(.omp_offloading.img_end." << TgtName + << " = .);\n"; + } + + Lksf << " }\n"; + // Add commands to define host entries begin and end + Lksf << " .omp_offloading.entries :\n"; + Lksf << " ALIGN(0x10)\n"; + Lksf << " SUBALIGN(0x01)\n"; + Lksf << " {\n"; + Lksf << " PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n"; + Lksf << " *(.omp_offloading.entries)\n"; + Lksf << " PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n"; + Lksf << " }\n"; + Lksf << "}\n"; + Lksf << "INSERT BEFORE .data\n"; + + Lksf.close(); + + CmdArgs.push_back("-T"); + CmdArgs.push_back(LKS); +} + /// \brief Determine whether Objective-C automated reference counting is /// enabled. static bool isObjCAutoRefCount(const ArgList &Args) { @@ -3291,10 +3395,16 @@ assert(Inputs.size() >= 1 && "Must have at least one input."); const InputInfo &Input = Inputs[0]; // CUDA compilation may have multiple inputs (source file + results of - // device-side compilations). All other jobs are expected to have exactly one - // input. + // device-side compilations). OpenMP offloading device compile jobs also take + // the host IR as an extra input. All other jobs are expected to have exactly + // one input. bool IsCuda = types::isCuda(Input.getType()); - assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs."); + bool IsOpenMPDeviceCompileJob = + isa(JA) && + getToolChain().getOffloadingKind() == ToolChain::OK_OpenMP_Device; + assert((IsCuda || (IsOpenMPDeviceCompileJob && Inputs.size() == 2) || + Inputs.size() == 1) && + "Unable to handle multiple inputs."); // Invoke ourselves in -cc1 mode. // @@ -5336,6 +5446,37 @@ CmdArgs.push_back(I->getFilename()); } + // OpenMP offloading device jobs take the argument -omp-host-ir-file-path + // to specify the result of the compile phase on the host, so the meaningful + // device declarations can be identified. Also, -fopenmp-is-device is passed + // along to tell the frontend that it is generating code for a device, so that + // only the relevant declarations are emitted. + if (IsOpenMPDeviceCompileJob) { + CmdArgs.push_back("-fopenmp-is-device"); + CmdArgs.push_back("-omp-host-ir-file-path"); + CmdArgs.push_back(Args.MakeArgString(Inputs.back().getFilename())); + } + + // For all the host OpenMP offloading compile jobs we need to pass the targets + // information using -omptargets= option. + if (isa(JA) && + getToolChain().getOffloadingKind() == ToolChain::OK_OpenMP_Host) { + SmallString<128> TargetInfo("-omptargets="); + + Arg *Tgts = Args.getLastArg(options::OPT_omptargets_EQ); + assert(Tgts && Tgts->getNumValues() && + "OpenMP offloading has to have targets specified."); + for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { + if (i) + TargetInfo += ','; + // We need to get the string from the triple because it may be not exactly + // the same as the one we get directly from the arguments. + llvm::Triple T(Tgts->getValue(i)); + TargetInfo += T.getTriple(); + } + CmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); + } + // Finally add the compile command to the compilation. if (Args.hasArg(options::OPT__SLASH_fallback) && Output.getType() == types::TY_Object && @@ -5881,6 +6022,72 @@ SplitDebugName(Args, Input)); } +void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const { + + // The (un)bundling command looks like this: + // clang-offload-bundler -type=bc + // -omptargets=host-triple,device-triple1,device-triple2 + // -inputs=input_file + // -outputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2" + // (-unbundle) + + auto BundledFile = Output; + auto UnbundledFiles = Inputs; + + bool IsUnbundle = isa(JA); + + ArgStringList CmdArgs; + + // Get the type. + CmdArgs.push_back(TCArgs.MakeArgString( + Twine("-type=") + types::getTypeTempSuffix(BundledFile.getType()))); + + // Get the triples. The order is the same that comes in omptargets option. + { + SmallString<128> Triples; + Triples += "-targets=offload-host-"; + Triples += getToolChain().getTripleString(); + + Arg *TargetsArg = TCArgs.getLastArg(options::OPT_omptargets_EQ); + for (auto *A : TargetsArg->getValues()) { + // We have to use the string that exactly matches the triple here. + llvm::Triple T(A); + Triples += ",offload-device-"; + Triples += T.getTriple(); + } + CmdArgs.push_back(TCArgs.MakeArgString(Triples)); + } + + // Get bundled file command. + CmdArgs.push_back( + TCArgs.MakeArgString(Twine(IsUnbundle ? "-inputs=" : "-outputs=") + + BundledFile.getFilename())); + + // Get unbundled files command. + { + SmallString<128> UB(IsUnbundle ? "-outputs=" : "-inputs="); + for (unsigned i = 0; i < UnbundledFiles.size(); ++i) { + if (i) + UB += ','; + UB += UnbundledFiles[i].getFilename(); + } + CmdArgs.push_back(TCArgs.MakeArgString(UB)); + } + + if (IsUnbundle) + CmdArgs.push_back("-unbundle"); + + // All the inputs are encoded as commands. + C.addCommand(llvm::make_unique( + JA, *this, + TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, None)); +} + void GnuTool::anchor() {} void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, @@ -8659,6 +8866,8 @@ // Already diagnosed. break; } + if (getToolChain().getOffloadingKind() == ToolChain::OK_OpenMP_Host) + CmdArgs.push_back("-lomptarget"); } AddRunTimeLibs(ToolChain, D, CmdArgs, Args); @@ -8691,6 +8900,9 @@ } else if (Args.hasArg(options::OPT_rtlib_EQ)) AddRunTimeLibs(ToolChain, D, CmdArgs, Args); + // Add OpenMP offloading linker script args if required. + AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs); + C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } Index: lib/Driver/Types.cpp =================================================================== --- lib/Driver/Types.cpp +++ lib/Driver/Types.cpp @@ -140,6 +140,10 @@ } } +bool types::isSrcFile(ID Id) { + return Id != TY_Object && getPreprocessedType(Id) != TY_INVALID; +} + types::ID types::lookupTypeForExtension(const char *Ext) { return llvm::StringSwitch(Ext) .Case("c", TY_C) Index: test/OpenMP/target_driver.c =================================================================== --- /dev/null +++ test/OpenMP/target_driver.c @@ -0,0 +1,195 @@ +/// +/// Perform several driver tests for OpenMP offloading +/// + +/// ########################################################################### + +/// Check whether an invalid OpenMP target is specified: +// RUN: %clang -### -fopenmp=libomp -omptargets=aaa-bbb-ccc-ddd %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s +// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' + +/// ########################################################################### + +/// Check warning for empty -omptargets +// RUN: %clang -### -fopenmp=libomp -omptargets= %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s +// CHK-EMPTY-OMPTARGETS: warning: joined argument expects additional value: '-omptargets=' + +/// ########################################################################### + +/// Check the phases graph when using a single target, different from the host. +/// The actions should be exactly the same as if not offloading was being used. +// RUN: %clang -ccc-print-phases -fopenmp=libomp -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES %s + +// CHK-PHASES-DAG: {{.*}}: linker, {[[A0:[0-9]+]]}, image +// CHK-PHASES-DAG: [[A0]]: assembler, {[[A1:[0-9]+]]}, object +// CHK-PHASES-DAG: [[A1]]: backend, {[[A2:[0-9]+]]}, assembler +// CHK-PHASES-DAG: [[A2]]: compiler, {[[A3:[0-9]+]]}, ir +// CHK-PHASES-DAG: [[A3]]: preprocessor, {[[I:[0-9]+]]}, cpp-output +// CHK-PHASES-DAG: [[I]]: input, {{.*}}, c + +/// ########################################################################### + +/// Check the phases when using multiple targets. Again, the actions are the +/// same as if no offloading was being used. Here we also add a library to make +/// sure it is not treated as input. +// RUN: %clang -ccc-print-phases -lm -fopenmp=libomp -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s + +// CHK-PHASES-LIB-DAG: {{.*}}: linker, {[[L0:[0-9]+]], [[A0:[0-9]+]]}, image +// CHK-PHASES-LIB-DAG: [[A0]]: assembler, {[[A1:[0-9]+]]}, object +// CHK-PHASES-LIB-DAG: [[A1]]: backend, {[[A2:[0-9]+]]}, assembler +// CHK-PHASES-LIB-DAG: [[A2]]: compiler, {[[A3:[0-9]+]]}, ir +// CHK-PHASES-LIB-DAG: [[A3]]: preprocessor, {[[I:[0-9]+]]}, cpp-output +// CHK-PHASES-LIB-DAG: [[I]]: input, {{.*}}, c +// CHK-PHASES-LIB-DAG: [[L0]]: input, "m", object + +/// ########################################################################### + +/// Check the phases when using multiple targets and passing an object file as +/// input. An unbundling action has to be created. +// RUN: echo 'bla' > %t.o +// RUN: %clang -ccc-print-phases -lm -fopenmp=libomp -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-OBJ %s + +// CHK-PHASES-OBJ-DAG: {{.*}}: linker, {[[L0:[0-9]+]], [[A0:[0-9]+]], [[B0:[0-9]+]]}, image +// CHK-PHASES-OBJ-DAG: [[A0]]: assembler, {[[A1:[0-9]+]]}, object +// CHK-PHASES-OBJ-DAG: [[A1]]: backend, {[[A2:[0-9]+]]}, assembler +// CHK-PHASES-OBJ-DAG: [[A2]]: compiler, {[[A3:[0-9]+]]}, ir +// CHK-PHASES-OBJ-DAG: [[A3]]: preprocessor, {[[I:[0-9]+]]}, cpp-output +// CHK-PHASES-OBJ-DAG: [[I]]: input, {{.*}}, c +// CHK-PHASES-OBJ-DAG: [[L0]]: input, "m", object +// CHK-PHASES-OBJ-DAG: [[B0]]: clang-offload-unbundler, {[[B1:[0-9]+]]}, object +// CHK-PHASES-OBJ-DAG: [[B1]]: input, "{{.*}}.o", object + +/// ########################################################################### + +/// Check the phases when using multiple targets and separate compilation. +// RUN: echo 'bla' > %t.s +// RUN: %clang -ccc-print-phases -c -lm -fopenmp=libomp -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %t.s -x cpp-output %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-SEP %s + +// CHK-PHASES-SEP-DAG: [[A:[0-9]+]]: input, "{{.*}}.c", cpp-output +// CHK-PHASES-SEP-DAG: [[A1:[0-9]+]]: clang-offload-unbundler, {[[A]]}, cpp-output +// CHK-PHASES-SEP-DAG: [[A2:[0-9]+]]: compiler, {[[A1]]}, ir +// CHK-PHASES-SEP-DAG: [[A3:[0-9]+]]: backend, {[[A2]]}, assembler +// CHK-PHASES-SEP-DAG: [[A4:[0-9]+]]: assembler, {[[A3]]}, object +// CHK-PHASES-SEP-DAG: {{.*}}: clang-offload-bundler, {[[A4]]}, object + +// CHK-PHASES-SEP-DAG: [[B:[0-9]+]]: input, "{{.*}}.s", assembler +// CHK-PHASES-SEP-DAG: [[B1:[0-9]+]]: clang-offload-unbundler, {[[B]]}, assembler +// CHK-PHASES-SEP-DAG: [[B2:[0-9]+]]: assembler, {[[B1]]}, object +// CHK-PHASES-SEP-DAG: {{.*}}: clang-offload-bundler, {[[B2]]}, object + +/// ########################################################################### + +/// Check of the commands passed to each tool when using valid OpenMP targets. +/// Here we also check that offloading does not break the use of integrated +/// assembler. It does however preclude the use of integrated preprocessor as +/// host IR is shared by all the compile phases. There are also two offloading +/// specific commands: +/// -fopenmp-is-device: will tell the frontend that it will generate code for a +/// target. +/// -omp-host-ir-file-path: specifies the host IR file that can be loaded by +/// the target code generation to gather information about which declaration +/// really need to be emitted. +/// +// RUN: %clang -### -fopenmp=libomp -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS %s +// RUN: %clang -### -fopenmp=libomp -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST %s +// + +// Final linking - host (ppc64le) +// CHK-COMMANDS-DAG: ld" {{.*}}"-m" "elf64lppc" {{.*}}"-o" "a.out" {{.*}}"[[HSTOBJ:.+]].o" "-lomp" "-lomptarget" {{.*}}"-T" "[[LKSCRIPT:.+]].lk" +// CHK-COMMANDS-ST-DAG: ld" {{.*}}"-m" "elf64lppc" {{.*}}"-o" "a.out" {{.*}}"[[HSTOBJ:.+]].o" "-lomp" "-lomptarget" {{.*}}"-T" "[[LKSCRIPT:.+]].lk" + +// Target 2 commands (x86_64) +// CHK-COMMANDS-DAG: ld" {{.*}}"-m" "elf_x86_64" {{.*}}"-shared" {{.*}}"-o" "[[T2LIB:.+]]" {{.*}}"[[T2OBJ:.+]].o" {{.*}}"-lomp" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ]].o" "-x" "ir" "[[T2BC:.+]].bc" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC]].bc" "-x" "c" "[[SRC:.+]].c" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[HSTBC:.+]].bc" + +// CHK-COMMANDS-ST-DAG: ld" {{.*}}"-m" "elf_x86_64" {{.*}}"-shared" {{.*}}"-o" "[[T2LIB:.+]]" {{.*}}"[[T2OBJ:.+]].o" {{.*}}"-lomp" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ]].o" "[[T2ASM:.+]].s" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM]].s" "-x" "ir" "[[T2BC:.+]].bc" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC]].bc" "-x" "cpp-output" "[[T2PP:.+]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[HSTBC:.+]].bc" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2PP]].i" "-x" "c" "[[SRC:.+]].c" + +// Target 1 commands (ppc64le) +// CHK-COMMANDS-DAG: ld" {{.*}}"-m" "elf64lppc" {{.*}}"-shared" {{.*}}"-o" "[[T1LIB:.+]]" {{.*}}"[[T1OBJ:.+]].o" {{.*}}"-lomp" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ]].o" "-x" "ir" "[[T1BC:.+]].bc" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC]].bc" "-x" "c" "[[SRC]].c" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[HSTBC]].bc" + +// CHK-COMMANDS-ST-DAG: ld" {{.*}}"-m" "elf64lppc" {{.*}}"-shared" {{.*}}"-o" "[[T1LIB:.+]]" {{.*}}"[[T1OBJ:.+]].o" {{.*}}"-lomp" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ]].o" "[[T1ASM:.+]].s" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM]].s" "-x" "ir" "[[T1BC:.+]].bc" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC]].bc" "-x" "cpp-output" "[[T1PP:.+]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[HSTBC]].bc" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1PP]].i" "-x" "c" "[[SRC]].c" + +// Host object generation +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[HSTOBJ]].o" "-x" "ir" "[[HSTBC]].bc" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc"{{.*}}"-fopenmp" {{.*}}"-o" "[[HSTBC]].bc" "-x" "c" "[[SRC]].c" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[HSTOBJ]].o" "[[HSTASM:.+]].s" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S"{{.*}}"-fopenmp" {{.*}}"-o" "[[HSTASM]].s" "-x" "ir" "[[HSTBC:.+]].bc" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc"{{.*}}"-fopenmp" {{.*}}"-o" "[[HSTBC]].bc" "-x" "cpp-output" "[[HSTPP:.+]].i" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E"{{.*}}"-fopenmp" {{.*}}"-o" "[[HSTPP]].i" "-x" "c" "[[SRC]].c" + +/// ########################################################################### + +/// Check separate compilation +/// +// RUN: echo 'bla' > %t.s +// RUN: %clang -### -fopenmp=libomp -c -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.s -x cpp-output %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-SEP %s +// RUN: %clang -### -fopenmp=libomp -c -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.s -x cpp-output %s -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-SEP-ST %s +// + +// Unbundle the input files. +// CHK-COMMANDS-SEP-DAG: clang-offload-bundler{{.*}}" "-type=s" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-inputs=[[AAASM:.+]].s" "-outputs=[[AAHASM:.+]].s,[[AAT1ASM:.+]].s,[[AAT2ASM:.+]].s" "-unbundle" +// CHK-COMMANDS-SEP-DAG: clang-offload-bundler{{.*}}" "-type=i" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-inputs=[[BBPP:.+]].c" "-outputs=[[BBHPP:.+]].i,[[BBT1PP:.+]].i,[[BBT2PP:.+]].i" "-unbundle" + +// CHK-COMMANDS-SEP-ST-DAG: clang-offload-bundler{{.*}}" "-type=s" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-inputs=[[AAASM:.+]].s" "-outputs=[[AAHASM:.+]].s,[[AAT1ASM:.+]].s,[[AAT2ASM:.+]].s" "-unbundle" +// CHK-COMMANDS-SEP-ST-DAG: clang-offload-bundler{{.*}}" "-type=i" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-inputs=[[BBPP:.+]].c" "-outputs=[[BBHPP:.+]].i,[[BBT1PP:.+]].i,[[BBT2PP:.+]].i" "-unbundle" + +// Create 1st bundle. +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[AAHOBJ:.+]].o" "[[AAHASM]].s" +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[AAT1OBJ:.+]].o" "[[AAT1ASM]].s" +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[AAT2OBJ:.+]].o" "[[AAT2ASM]].s" +// CHK-COMMANDS-SEP-DAG: clang-offload-bundler{{.*}}" "-type=o" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-outputs=[[AAOBJ:.+]].o" "-inputs=[[AAHOBJ]].o,[[AAT1OBJ]].o,[[AAT2OBJ]].o" + +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[AAHOBJ:.+]].o" "[[AAHASM]].s" +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[AAT1OBJ:.+]].o" "[[AAT1ASM]].s" +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[AAT2OBJ:.+]].o" "[[AAT2ASM]].s" +// CHK-COMMANDS-SEP-ST-DAG: clang-offload-bundler{{.*}}" "-type=o" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-outputs=[[AAOBJ:.+]].o" "-inputs=[[AAHOBJ]].o,[[AAT1OBJ]].o,[[AAT2OBJ]].o" + +// Create 2nd bundle. +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc"{{.*}}"-fopenmp" {{.*}}"-o" "[[BBHBC:.+]].bc" "-x" "cpp-output" "[[BBHPP]].i" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBHOBJ:.+]].o" "-x" "ir" "[[BBHBC]].bc" + +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc"{{.*}}"-fopenmp" {{.*}}"-o" "[[BBHBC:.+]].bc" "-x" "cpp-output" "[[BBHPP]].i" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBHASM:.+]].s" "-x" "ir" "[[BBHBC]].bc" +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[BBHOBJ:.+]].o" "[[BBHASM]].s" + +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT1BC:.+]].bc" "-x" "cpp-output" "[[BBT1PP]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[BBHBC]].bc" +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT1OBJ:.+]].o" "-x" "ir" "[[BBT1BC]].bc" + +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT1BC:.+]].bc" "-x" "cpp-output" "[[BBT1PP]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[BBHBC]].bc" +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT1ASM:.+]].s" "-x" "ir" "[[BBT1BC]].bc" +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[BBT1OBJ:.+]].o" "[[BBT1ASM]].s" + +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT2BC:.+]].bc" "-x" "cpp-output" "[[BBT2PP]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[BBHBC]].bc" +// CHK-COMMANDS-SEP-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT2OBJ:.+]].o" "-x" "ir" "[[BBT2BC]].bc" + +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT2BC:.+]].bc" "-x" "cpp-output" "[[BBT2PP]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[BBHBC]].bc" +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT2ASM:.+]].s" "-x" "ir" "[[BBT2BC]].bc" +// CHK-COMMANDS-SEP-ST-DAG: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[BBT2OBJ:.+]].o" "[[BBT2ASM]].s" + +// CHK-COMMANDS-SEP-DAG: clang-offload-bundler{{.*}}" "-type=o" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-outputs=[[BBOBJ:.+]].o" "-inputs=[[BBHOBJ]].o,[[BBT1OBJ]].o,[[BBT2OBJ]].o" +// CHK-COMMANDS-SEP-ST-DAG: clang-offload-bundler{{.*}}" "-type=o" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-outputs=[[BBOBJ:.+]].o" "-inputs=[[BBHOBJ]].o,[[BBT1OBJ]].o,[[BBT2OBJ]].o" + + +