Index: include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- include/clang/Basic/DiagnosticDriverKinds.td +++ include/clang/Basic/DiagnosticDriverKinds.td @@ -117,6 +117,13 @@ def err_drv_optimization_remark_pattern : Error< "%0 in '%1'">; def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">; +def err_drv_invalid_omp_target : Error< + "OpenMP target is invalid: '%0'">; +def err_drv_omp_target_toolchain_not_available : Error< + "Toolchain for target '%0' is not supporting OpenMP offloading.">; +def warn_drv_target_file_found : Warning< + "OpenMP target file '%0' is being implicitly used in the '%1' toolchain.">, + InGroup; def warn_O4_is_O3 : Warning<"-O4 is equivalent to -O3">, InGroup; def warn_drv_optimization_value : Warning<"optimization level '%0' is not supported; using '%1%2' instead">, Index: include/clang/Basic/DiagnosticGroups.td =================================================================== --- include/clang/Basic/DiagnosticGroups.td +++ include/clang/Basic/DiagnosticGroups.td @@ -741,6 +741,7 @@ def SourceUsesOpenMP : DiagGroup<"source-uses-openmp">; def OpenMPClauses : DiagGroup<"openmp-clauses">; def OpenMPLoopForm : DiagGroup<"openmp-loop-form">; +def OpenMPOffloading : DiagGroup<"openmp-offloading">; // Backend warnings. def BackendInlineAsm : DiagGroup<"inline-asm">; Index: include/clang/Driver/Action.h =================================================================== --- include/clang/Driver/Action.h +++ include/clang/Driver/Action.h @@ -41,6 +41,7 @@ enum ActionClass { InputClass = 0, BindArchClass, + BindTargetClass, PreprocessJobClass, PrecompileJobClass, AnalyzeJobClass, @@ -70,25 +71,33 @@ unsigned OwnsInputs : 1; + /// Is this action referring to the main host or an OpenMP offloading device + const char *OffloadingDevice; + protected: Action(ActionClass _Kind, types::ID _Type) - : Kind(_Kind), Type(_Type), OwnsInputs(true) {} + : Kind(_Kind), Type(_Type), OwnsInputs(true), OffloadingDevice(0) {} Action(ActionClass _Kind, std::unique_ptr Input, types::ID _Type) - : Kind(_Kind), Type(_Type), Inputs(1, Input.release()), OwnsInputs(true) { - } + : Kind(_Kind), Type(_Type), Inputs(1, Input.release()), OwnsInputs(true), + OffloadingDevice(0) {} Action(ActionClass _Kind, std::unique_ptr Input) : Kind(_Kind), Type(Input->getType()), Inputs(1, Input.release()), - OwnsInputs(true) {} + OwnsInputs(true), OffloadingDevice(0) {} Action(ActionClass _Kind, const ActionList &_Inputs, types::ID _Type) - : Kind(_Kind), Type(_Type), Inputs(_Inputs), OwnsInputs(true) {} + : Kind(_Kind), Type(_Type), Inputs(_Inputs), OwnsInputs(true), + OffloadingDevice(0) {} + public: virtual ~Action(); const char *getClassName() const { return Action::getClassName(getKind()); } - bool getOwnsInputs() { return OwnsInputs; } + bool getOwnsInputs() const { return OwnsInputs; } void setOwnsInputs(bool Value) { OwnsInputs = Value; } + const char *getOffloadingDevice() const { return OffloadingDevice; } + void setOffloadingDevice(const char *Value) { OffloadingDevice = Value; } + ActionClass getKind() const { return Kind; } types::ID getType() const { return Type; } @@ -133,6 +142,22 @@ } }; +class BindTargetAction : public Action { + virtual void anchor(); + /// The offloading target to bind, or 0 if the default (host) architecture + /// should be bound. + const char *TargetName; + +public: + BindTargetAction(std::unique_ptr Input, const char *_TargetName); + + const char *getTargetName() const { return TargetName; } + + static bool classof(const Action *A) { + return A->getKind() == BindTargetClass; + } +}; + class JobAction : public Action { virtual void anchor(); protected: @@ -190,6 +215,10 @@ void anchor() override; public: CompileJobAction(std::unique_ptr Input, types::ID OutputType); + // If offloading is being used, the compiler phase will take the host + // IR file as input in addition to the preprocessed source file. In this + // case, a list of inputs is required instead of a single action. + CompileJobAction(ActionList &Inputs, types::ID OutputType); static bool classof(const Action *A) { return A->getKind() == CompileJobClass; Index: include/clang/Driver/Compilation.h =================================================================== --- include/clang/Driver/Compilation.h +++ include/clang/Driver/Compilation.h @@ -52,8 +52,11 @@ JobList Jobs; /// Cache of translated arguments for a particular tool chain and bound - /// architecture. - llvm::DenseMap, + /// architecture. We use the second element of the pair to record whether + /// the toolchain belongs to an offloading target, as the same toolchain + /// may be used for host and targets, each using different arguments. + typedef std::pair ToolChainWithTargetInfo; + llvm::DenseMap, llvm::opt::DerivedArgList *> TCArgs; /// Temporary files which should be removed on exit. @@ -111,8 +114,12 @@ /// tool chain \p TC (or the default tool chain, if TC is not specified). /// /// \param BoundArch - The bound architecture name, or 0. + /// \param isOpenMPTarget - True if this tool chain refer to an OpenMP target + /// \param isSuccess - set to true if the arguments were successfully obtained const llvm::opt::DerivedArgList &getArgsForToolChain(const ToolChain *TC, - const char *BoundArch); + const char *BoundArch, + bool isOpenMPTarget, + bool &isSuccess); /// addTempFile - Add a file to remove on exit, and returns its /// argument. Index: include/clang/Driver/Driver.h =================================================================== --- include/clang/Driver/Driver.h +++ include/clang/Driver/Driver.h @@ -15,6 +15,7 @@ #include "clang/Driver/Phases.h" #include "clang/Driver/Types.h" #include "clang/Driver/Util.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" @@ -182,6 +183,23 @@ /// created targeting that triple. The driver owns all the ToolChain objects /// stored in it, and will clean them up when torn down. mutable llvm::StringMap ToolChains; + mutable llvm::StringMap ToolChainsOffloading; + + /// \brief ToolChain used by the host + const ToolChain *HostToolChain; + + /// \brief Array of the triples of offloading targets in the order they were + /// requested + /// by the user. + SmallVector OffloadingTriples; + + /// \brief Cache that records the outputs obtained for a given action in a + /// given toolchain to avoid computing that multiple times + typedef llvm::DenseMap + OutputsForActionPerToolChainTy; + typedef llvm::DenseMap + OutputsForActionCacheTy; + mutable OutputsForActionCacheTy OutputsForActionCache; private: /// TranslateInputArgs - Create a new derived argument list from the input @@ -355,10 +373,12 @@ /// ConstructAction - Construct the appropriate action to do for /// \p Phase on the \p Input, taking in to account arguments - /// like -fsyntax-only or --analyze. + /// like -fsyntax-only or --analyze as well as any dependency + /// related with offloading (host-target dependencies). std::unique_ptr ConstructPhaseAction(const ToolChain &TC, const llvm::opt::ArgList &Args, - phases::ID Phase, std::unique_ptr Input) const; + phases::ID Phase, std::unique_ptr Input, + std::unique_ptr OffloadingDepInput) const; /// BuildJobsForAction - Construct the jobs to perform for the /// action \p A. @@ -385,12 +405,12 @@ /// \param BoundArch - The bound architecture. /// \param AtTopLevel - Whether this is a "top-level" action. /// \param MultipleArchs - Whether multiple -arch options were supplied. - const char *GetNamedOutputPath(Compilation &C, - const JobAction &JA, - const char *BaseInput, - const char *BoundArch, - bool AtTopLevel, - bool MultipleArchs) const; + /// \param HasOffloadingTargetSuffixApended - Whether the base input was + /// already appended with an offloading target suffix. + const char *GetNamedOutputPath(Compilation &C, const JobAction &JA, + const char *BaseInput, const char *BoundArch, + bool AtTopLevel, bool MultipleArchs, + bool HasOffloadingTargetSuffixApended) const; /// GetTemporaryPath - Return the pathname of a temporary file to use /// as part of compilation; the file will have the given prefix and suffix. @@ -408,9 +428,11 @@ /// \brief Retrieves a ToolChain for a particular target triple. /// /// Will cache ToolChains for the life of the driver object, and create them - /// on-demand. + /// on-demand. If \p TripleString is provided, the triple is obtained + /// from it. const ToolChain &getToolChain(const llvm::opt::ArgList &Args, - StringRef DarwinArchName = "") const; + StringRef DarwinArchName = "", + const char *TripleString = nullptr) const; /// @} Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -311,6 +311,8 @@ def Wno_write_strings : Flag<["-"], "Wno-write-strings">, Group, Flags<[CC1Option]>; def W_Joined : Joined<["-"], "W">, Group, Flags<[CC1Option, CoreOption]>, MetaVarName<"">, HelpText<"Enable the specified warning">; +def Womp_implicit_target_files : Flag<["-"], "Womp-implicit-target-files">, Group, + Flags<[CC1Option, NoArgumentUnused]>; def Xanalyzer : Separate<["-"], "Xanalyzer">, HelpText<"Pass to the static analyzer">, MetaVarName<"">; def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[DriverOption]>; @@ -1490,6 +1492,7 @@ def object : Flag<["-"], "object">; def o : JoinedOrSeparate<["-"], "o">, Flags<[DriverOption, RenderAsInput, CC1Option, CC1AsOption]>, HelpText<"Write output to ">, MetaVarName<"">; +def omptargets_EQ : CommaJoined<["-"], "omptargets=">, Flags<[DriverOption, CC1Option]>; def pagezero__size : JoinedOrSeparate<["-"], "pagezero_size">; def pass_exit_codes : Flag<["-", "--"], "pass-exit-codes">, Flags<[Unsupported]>; def pedantic_errors : Flag<["-", "--"], "pedantic-errors">, Group, Flags<[CC1Option]>; Index: include/clang/Driver/ToolChain.h =================================================================== --- include/clang/Driver/ToolChain.h +++ include/clang/Driver/ToolChain.h @@ -86,11 +86,16 @@ mutable std::unique_ptr SanitizerArguments; + /// This is set to true when the toolchain is created if it refers to an + /// offloading target toolchain + unsigned IsOffloadingTargetToolchain : 1; + protected: MultilibSet Multilibs; ToolChain(const Driver &D, const llvm::Triple &T, - const llvm::opt::ArgList &Args); + const llvm::opt::ArgList &Args, + bool IsOffloadingTargetToolchain = false); virtual Tool *buildAssembler() const; virtual Tool *buildLinker() const; @@ -120,6 +125,9 @@ const Driver &getDriver() const; const llvm::Triple &getTriple() const { return Triple; } + bool isOffloadingTargetToolchain() const { + return IsOffloadingTargetToolchain; + } llvm::Triple::ArchType getArch() const { return Triple.getArch(); } StringRef getArchName() const { return Triple.getArchName(); } @@ -157,11 +165,13 @@ /// specific translations are needed. /// /// \param BoundArch - The bound architecture name, or 0. + /// \param isOffloadingTarget - True if this toolchain is an offloading + /// target. + /// \param isSuccess - set to True if the arguments were successfully + /// translated. virtual llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const { - return nullptr; - } + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + bool isOffloadingTarget, bool &isSuccess) const; /// Choose a tool to use to handle the action \p JA. Tool *SelectTool(const JobAction &JA) const; Index: include/clang/Driver/Types.def =================================================================== --- include/clang/Driver/Types.def +++ include/clang/Driver/Types.def @@ -88,6 +88,7 @@ TYPE("remap", Remap, INVALID, "remap", "") TYPE("precompiled-header", PCH, INVALID, "gch", "A") TYPE("object", Object, INVALID, "o", "") +TYPE("shared-object", SObject, INVALID, "so", "") TYPE("treelang", Treelang, INVALID, nullptr, "u") TYPE("image", Image, INVALID, "out", "") TYPE("dSYM", dSYM, INVALID, "dSYM", "A") Index: lib/Driver/Action.cpp =================================================================== --- lib/Driver/Action.cpp +++ lib/Driver/Action.cpp @@ -24,6 +24,8 @@ switch (AC) { case InputClass: return "input"; case BindArchClass: return "bind-arch"; + case BindTargetClass: + return "bind-target"; case PreprocessJobClass: return "preprocessor"; case PrecompileJobClass: return "precompiler"; case AnalyzeJobClass: return "analyzer"; @@ -53,6 +55,14 @@ const char *_ArchName) : Action(BindArchClass, std::move(Input)), ArchName(_ArchName) {} +void BindTargetAction::anchor() {} + +BindTargetAction::BindTargetAction(std::unique_ptr Input, + const char *_TargetName) + : Action(BindTargetClass, std::move(Input)), TargetName(_TargetName) { + setOffloadingDevice(_TargetName); +} + void JobAction::anchor() {} JobAction::JobAction(ActionClass Kind, std::unique_ptr Input, @@ -93,6 +103,9 @@ types::ID OutputType) : JobAction(CompileJobClass, std::move(Input), OutputType) {} +CompileJobAction::CompileJobAction(ActionList &Inputs, types::ID OutputType) + : JobAction(CompileJobClass, Inputs, OutputType) {} + void BackendJobAction::anchor() {} BackendJobAction::BackendJobAction(std::unique_ptr Input, Index: lib/Driver/Compilation.cpp =================================================================== --- lib/Driver/Compilation.cpp +++ lib/Driver/Compilation.cpp @@ -33,9 +33,10 @@ delete Args; // Free any derived arg lists. - for (llvm::DenseMap, - DerivedArgList*>::iterator it = TCArgs.begin(), - ie = TCArgs.end(); it != ie; ++it) + for (llvm::DenseMap, + DerivedArgList *>::iterator it = TCArgs.begin(), + ie = TCArgs.end(); + it != ie; ++it) if (it->second != TranslatedArgs) delete it->second; @@ -53,17 +54,36 @@ } const DerivedArgList &Compilation::getArgsForToolChain(const ToolChain *TC, - const char *BoundArch) { + const char *BoundArch, + bool isOffloadingTarget, + bool &isSuccess) { if (!TC) TC = &DefaultToolChain; - DerivedArgList *&Entry = TCArgs[std::make_pair(TC, BoundArch)]; - if (!Entry) { - Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch); - if (!Entry) - Entry = TranslatedArgs; + // Try to locate the args in the toolchain map + llvm::DenseMap, + llvm::opt::DerivedArgList *>::iterator it = + TCArgs.find( + std::make_pair(std::make_pair(TC, isOffloadingTarget), BoundArch)); + + // We have already tried to translate these args + if (it != TCArgs.end()) { + // if null, we already failed to translate + isSuccess = (it->second); + return *(it->second); } + // This is the first attempt to translate these arguments + DerivedArgList *&Entry = + TCArgs[std::make_pair(std::make_pair(TC, isOffloadingTarget), BoundArch)]; + + Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, isOffloadingTarget, + isSuccess); + // If we didn't get derived arguments but the translation is signaled as + // successful, we should used the untranslated arguments + if (!Entry && isSuccess) + Entry = TranslatedArgs; + return *Entry; } Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -55,7 +55,8 @@ CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr), CCCPrintBindings(false), CCPrintHeaders(false), CCLogDiagnostics(false), CCGenDiagnostics(false), CCCGenericGCCName(""), CheckInputsExist(true), - CCCUsePCH(true), SuppressMissingInputWarning(false) { + CCCUsePCH(true), SuppressMissingInputWarning(false), + HostToolChain(nullptr) { Name = llvm::sys::path::filename(ClangExecutable); Dir = llvm::sys::path::parent_path(ClangExecutable); @@ -77,6 +78,7 @@ delete Opts; llvm::DeleteContainerSeconds(ToolChains); + llvm::DeleteContainerSeconds(ToolChainsOffloading); } void Driver::ParseDriverMode(ArrayRef Args) { @@ -132,9 +134,11 @@ continue; } - // Warn about -mcpu= without an argument. - if (A->getOption().matches(options::OPT_mcpu_EQ) && - A->containsValue("")) { + // Warn about -mcpu= and -omptargets= without an argument. + if ((A->getOption().matches(options::OPT_mcpu_EQ) && + A->containsValue("")) || + (A->getOption().matches(options::OPT_omptargets_EQ) && + !A->getNumValues())) { Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(*Args); } @@ -374,10 +378,37 @@ DerivedArgList *TranslatedArgs = TranslateInputArgs(*Args); // Owned by the host. - const ToolChain &TC = getToolChain(*Args); + HostToolChain = &getToolChain(*Args); + + // Get the triples for the offloading targets if any + OffloadingTriples.clear(); + + if (const Arg *A = Args->getLastArg(options::OPT_fopenmp_EQ)) { + if (StringRef(A->getValue()) == "libiomp5") { + // check if there is any openmp target we care generating code to + Arg *Tgts = Args->getLastArg(options::OPT_omptargets_EQ); + + // If omptargets was specified use only the required targets + if (Tgts && Tgts->getNumValues()) { + for (unsigned v = 0; v < Tgts->getNumValues(); ++v) { + std::string error; + const char *val = Tgts->getValue(v); + + llvm::Triple TT(val); + + // If the specified target is invalid, emit error + if (TT.getArch() == llvm::Triple::UnknownArch) + Diag(clang::diag::err_drv_invalid_omp_target) << val; + else { + OffloadingTriples.push_back(val); + } + } + } + } + } // The compilation takes ownership of Args. - Compilation *C = new Compilation(*this, TC, Args, TranslatedArgs); + Compilation *C = new Compilation(*this, *HostToolChain, Args, TranslatedArgs); if (!HandleImmediateArgs(*C)) return C; @@ -388,7 +419,7 @@ // Construct the list of abstract actions to perform for this compilation. On // MachO targets this uses the driver-driver and universal actions. - if (TC.getTriple().isOSBinFormatMachO()) + if (HostToolChain->getTriple().isOSBinFormatMachO()) BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs, C->getActions()); else @@ -1204,9 +1235,14 @@ } } - // Construct the actions to perform. - ActionList LinkerInputs; + // We need an array of actions to trace the actions for the main target + // and each available omp target detected before + unsigned OffloadingTargetsNum = OffloadingTriples.size(); + std::unique_ptr *ActionsForTarget = + new std::unique_ptr[1 + OffloadingTargetsNum]; + ActionList *LinkerInputsForTarget = new ActionList[1 + OffloadingTargetsNum]; + // Construct the actions to perform. llvm::SmallVector PL; for (unsigned i = 0, e = Inputs.size(); i != e; ++i) { types::ID InputType = Inputs[i].first; @@ -1251,7 +1287,17 @@ } // Build the pipeline for this file. - std::unique_ptr Current(new InputAction(*InputArg, InputType)); + + // Initialize with the current input + ActionsForTarget[0].reset(new InputAction(*InputArg, InputType)); + + // The number of targets we should take into account. + // Before the preprocessor phase there is only one and from + // the OffloadingToolChainNum phase on we will have N+1 targets + // where N is all the available omp targets + unsigned PreviousTotalTargets = 1; + unsigned TotalTargets = 1; + for (SmallVectorImpl::iterator i = PL.begin(), e = PL.end(); i != e; ++i) { phases::ID Phase = *i; @@ -1260,33 +1306,224 @@ if (Phase > FinalPhase) break; - // Queue linker inputs. - if (Phase == phases::Link) { - assert((i + 1) == e && "linking must be final compilation step."); - LinkerInputs.push_back(Current.release()); - break; + bool NothingElseToDo = false; + + // keep track of the number of targets in the previous phase + PreviousTotalTargets = TotalTargets; + + // if it is the preprocess phase or after, the total targets also contain + // the omp targets + if (Phase >= phases::Preprocess) + TotalTargets = 1 + OffloadingTargetsNum; + + // if this is after a preprocessing phase, and only one input is available + // for all targets, we need to locate the target corresponding inputs. + // We have to make sure we only do this for input files and not other + // options. + if (Phase > phases::Preprocess && PreviousTotalTargets < TotalTargets && + InputArg->getOption().getKind() == llvm::opt::Option::InputClass) { + + // The unique available input has to be an input action + InputAction *IA = cast(ActionsForTarget[0].get()); + + const Arg &HostArg = IA->getInputArg(); + StringRef HostFileName(HostArg.getValue(0)); + + // Lets find out if we have an input for each target + bool HaveDifferentInputsForEachTarget = true; + llvm::SmallVector, 4> TargetFileNames( + OffloadingTargetsNum); + + for (unsigned tgt = 0; tgt < OffloadingTargetsNum; ++tgt) { + // Name of the file + TargetFileNames[tgt] += HostFileName; + + // Target suffix + TargetFileNames[tgt] += ".tgt-"; + TargetFileNames[tgt] += OffloadingTriples[tgt]; + + // Check if the file exists. If not, don't bother trying to find other + // files as will not be able to use them. + if (!llvm::sys::fs::exists(TargetFileNames[tgt].c_str())) { + HaveDifferentInputsForEachTarget = false; + break; + } + } + + for (unsigned tgt = 0; tgt < OffloadingTargetsNum; ++tgt) { + + // If we do not have an input file for each target, we use null to + // indicate it and potential + if (!HaveDifferentInputsForEachTarget) { + ActionsForTarget[1 + tgt].reset(nullptr); + continue; + } + + // Notify the user we found a target file that we will be using + if (Args.hasArg(options::OPT_Womp_implicit_target_files)) + Diag(clang::diag::warn_drv_target_file_found) + << TargetFileNames[tgt].c_str() << OffloadingTriples[tgt]; + + Arg *TargetArg = Args.MakePositionalArg( + &HostArg, HostArg.getOption(), + Args.MakeArgString(TargetFileNames[tgt].c_str())); + + std::unique_ptr TgtIA; + TgtIA.reset(new InputAction(*TargetArg, IA->getType())); + TgtIA.get()->setOffloadingDevice(OffloadingTriples[tgt]); + + BindTargetAction *BindedTgtIA = + new BindTargetAction(std::move(TgtIA), OffloadingTriples[tgt]); + + ActionsForTarget[1 + tgt].reset(BindedTgtIA); + } + + // If we have an input for each target, this is like supporting multiple + // targets in a previous phase. If we are ahead of the compiling phase + // we cannot use the host files anymore, so we need to have target + // files already. If the input is set to null this will result in the + // target actions not to be generated at all in the code below. + if (HaveDifferentInputsForEachTarget || Phase > phases::Compile) + PreviousTotalTargets = TotalTargets; } - // Some types skip the assembler phase (e.g., llvm-bc), but we can't - // encode this in the steps because the intermediate type depends on - // arguments. Just special case here. - if (Phase == phases::Assemble && Current->getType() != types::TY_PP_Asm) - continue; + // If we have multiple OpenMP targets we use the host compile output + // as a target input in order to host information can be used during + // target codegen + Action *LastHostInput = nullptr; + Action *LastHostOutput = nullptr; + + for (unsigned tgt = 0; tgt < TotalTargets; ++tgt) { + std::unique_ptr CurrentInput; + + // Do we need to reuse the host target file for all other targets? + if (PreviousTotalTargets < TotalTargets) { + // Each target picks its input file from the host + if (tgt == 0) + CurrentInput.reset(LastHostInput = ActionsForTarget[0].release()); + else + CurrentInput.reset(LastHostInput); + + assert(CurrentInput.get() && "Expecting an input to be defined"); + } else { + // Each target uses its own target input file + CurrentInput.reset(ActionsForTarget[tgt].release()); + + assert((tgt || CurrentInput.get()) && "Host must have an input!"); - // Otherwise construct the appropriate action. - Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current)); - if (Current->getType() == types::TY_Nothing) + // If no action for this target is defined we just move to the next + // target + if (!CurrentInput.get()) + continue; + } + + // Queue linker inputs. + if (Phase == phases::Link) { + assert((i + 1) == e && "linking must be final compilation step."); + LinkerInputsForTarget[tgt].push_back(CurrentInput.release()); + NothingElseToDo = true; + continue; + } + + // Some types skip the assembler phase (e.g., llvm-bc), but we can't + // encode this in the steps because the intermediate type depends on + // arguments. Just special case here. + if (Phase == phases::Assemble && + CurrentInput->getType() != types::TY_PP_Asm) { + // Just store the current input to be used directly in the next phase + ActionsForTarget[tgt].reset(CurrentInput.release()); + continue; + } + + // Build the phase action. We pass the last llvm IR file that was + // produced for the host for an compile phase for the OpenMP targets + std::unique_ptr DepAction; + if (Phase == phases::Compile && tgt > 0 && + LastHostOutput->getType() == types::TY_LLVM_BC) { + DepAction.reset(LastHostOutput); + // Bind action to the host toolchain - nullptr + DepAction.reset(new BindTargetAction(std::move(DepAction), nullptr)); + DepAction.get()->setOwnsInputs(false); + } + ActionsForTarget[tgt].reset( + ConstructPhaseAction(TC, Args, Phase, std::move(CurrentInput), + std::move(DepAction)) + .release()); + + // Save the last host output has it may be required for some OpenMP + // host-target dependency + if (tgt == 0) + LastHostOutput = ActionsForTarget[0].get(); + + if (ActionsForTarget[tgt]->getType() == types::TY_Nothing) + NothingElseToDo = true; + + // If we are coming from a single to multiple target phase set the input + // ownership flag to false. + // Also, if this is a compile phase for the OpenMP host and we are + // supporting multiple targets, we also need to set this to false + ActionsForTarget[tgt]->setOwnsInputs( + !((Phase == phases::Compile && !tgt && TotalTargets > 1 && + ActionsForTarget[0].get()->getType() == types::TY_LLVM_BC) || + (PreviousTotalTargets < TotalTargets))); + + // If we are processing a target action, we need to bind it to the + // target it refers to + if (tgt > 0) { + ActionsForTarget[tgt]->setOffloadingDevice( + OffloadingTriples[tgt - 1]); + ActionsForTarget[tgt].reset(new BindTargetAction( + std::move(ActionsForTarget[tgt]), OffloadingTriples[tgt - 1])); + } + } + + if (NothingElseToDo) break; } // If we ended with something, add to the output list. - if (Current) - Actions.push_back(Current.release()); + for (unsigned tgt = 0; tgt < TotalTargets; ++tgt) { + if (ActionsForTarget[tgt]) + Actions.push_back(ActionsForTarget[tgt].release()); + } + } + + // release the Actions-For-Target array + delete[] ActionsForTarget; + + // Create link action for each target if any + for (unsigned tgt = 0; tgt < OffloadingTargetsNum; ++tgt) { + + if (LinkerInputsForTarget[tgt + 1].empty()) + continue; + + std::unique_ptr TgtLinkAction; + // Link target action: produces a shared library + TgtLinkAction.reset( + new LinkJobAction(LinkerInputsForTarget[tgt + 1], types::TY_SObject)); + TgtLinkAction.get()->setOffloadingDevice(OffloadingTriples[tgt]); + + // if the target link phase takes an input that is not binded to it, it + // means it does not own it, as it may be used by other targets and host too + for (const auto &II : LinkerInputsForTarget[tgt + 1]) { + if (!isa(II)) + TgtLinkAction.get()->setOwnsInputs(false); + } + + // Bind action to target + TgtLinkAction.reset( + new BindTargetAction(std::move(TgtLinkAction), OffloadingTriples[tgt])); + // Include the resulting object as part of the host linking + LinkerInputsForTarget[0].push_back(TgtLinkAction.release()); } // Add a link action if necessary. - if (!LinkerInputs.empty()) - Actions.push_back(new LinkJobAction(LinkerInputs, types::TY_Image)); + if (!LinkerInputsForTarget[0].empty()) + Actions.push_back( + new LinkJobAction(LinkerInputsForTarget[0], types::TY_Image)); + + // Release the linking input arrays + delete[] LinkerInputsForTarget; // If we are linking, claim any options which are obviously only used for // compilation. @@ -1301,8 +1538,8 @@ std::unique_ptr Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args, - phases::ID Phase, - std::unique_ptr Input) const { + phases::ID Phase, std::unique_ptr Input, + std::unique_ptr OffloadingDepInput) const { llvm::PrettyStackTraceString CrashInfo("Constructing phase actions"); // Build the appropriate action. switch (Phase) { @@ -1356,8 +1593,13 @@ if (Args.hasArg(options::OPT_verify_pch)) return llvm::make_unique(std::move(Input), types::TY_Nothing); - return llvm::make_unique(std::move(Input), - types::TY_LLVM_BC); + // In a compile phase, an offloading dependency file may be provided. If so, + // we add it to the input list. + ActionList AL; + AL.push_back(Input.release()); + if (OffloadingDepInput.get()) + AL.push_back(OffloadingDepInput.release()); + return llvm::make_unique(AL, types::TY_LLVM_BC); } case phases::Backend: { if (IsUsingLTO(TC, Args)) { @@ -1391,17 +1633,26 @@ return false; } +//// cache that records the outputs obtained for a given action in a given +//// toolchain to avoid computing that multiple times +// typedef llvm::DenseMap +// OutputsForActionPerToolChainTy; +// typedef llvm::DenseMap +// OutputsForActionCacheTy; +// static OutputsForActionCacheTy OutputsForActionCache; + void Driver::BuildJobs(Compilation &C) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o); // It is an error to provide a -o option if we are making multiple output - // files. + // files, except if the action producing the output is an offloading + // one. if (FinalOutput) { unsigned NumOutputs = 0; for (const Action *A : C.getActions()) - if (A->getType() != types::TY_Nothing) + if (A->getType() != types::TY_Nothing && !A->getOffloadingDevice()) ++NumOutputs; if (NumOutputs > 1) { @@ -1417,6 +1668,10 @@ if (A->getOption().matches(options::OPT_arch)) ArchNames.insert(A->getValue()); + // Make sure the outputs cache is empty prior starting computing jobs and + // inputs. + OutputsForActionCache.clear(); + for (Action *A : C.getActions()) { // If we are linking an image for multiple archs then the linker wants // -arch_multiple and -final_output . Unfortunately, this @@ -1485,49 +1740,79 @@ } } +static Action *GetPreviousSingleAction(const ActionList *&Inputs) { + // Determine the single action that generated the current input, if any + Action *PrevSingleAction = nullptr; + if (Inputs->size() == 1) { + PrevSingleAction = dyn_cast(*Inputs->begin()); + // Bypass the binding of a target + if (PrevSingleAction && isa(PrevSingleAction)) + PrevSingleAction = *PrevSingleAction->getInputs().begin(); + } + return PrevSingleAction; +} + static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps, + bool isLegalToMergeCompilerAndBackend, const ToolChain *TC, const JobAction *JA, const ActionList *&Inputs) { const Tool *ToolForJob = nullptr; + Action *PrevSingleAction = GetPreviousSingleAction(Inputs); // See if we should look for a compiler with an integrated assembler. We match // bottom up, so what we are actually looking for is an assembler job with a // compiler input. - if (TC->useIntegratedAs() && - !SaveTemps && + if (TC->useIntegratedAs() && !SaveTemps && !C.getArgs().hasArg(options::OPT_via_file_asm) && !C.getArgs().hasArg(options::OPT__SLASH_FA) && !C.getArgs().hasArg(options::OPT__SLASH_Fa) && - isa(JA) && - Inputs->size() == 1 && isa(*Inputs->begin())) { + isa(JA) && PrevSingleAction && + isa(PrevSingleAction)) { // A BackendJob is always preceded by a CompileJob, and without // -save-temps they will always get combined together, so instead of - // checking the backend tool, check if the tool for the CompileJob - // has an integrated assembler. - const ActionList *BackendInputs = &(*Inputs)[0]->getInputs(); - JobAction *CompileJA = cast(*BackendInputs->begin()); - const Tool *Compiler = TC->SelectTool(*CompileJA); - if (!Compiler) - return nullptr; - if (Compiler->hasIntegratedAssembler()) { - Inputs = &(*BackendInputs)[0]->getInputs(); - ToolForJob = Compiler; + // checking the backend tool, check if the tool for the CompileJobt + // has an integrated assembler. The isLegalToMergeCompileAndBackend flag + // has to be set so that Backend and Compile phases can integrated into a + // single one though. + if (isLegalToMergeCompilerAndBackend) { + const ActionList *BackendInputs = &PrevSingleAction->getInputs(); + Action *PrevPrevSingleAction = GetPreviousSingleAction(BackendInputs); + + JobAction *CompileJA = cast(PrevPrevSingleAction); + const Tool *Compiler = TC->SelectTool(*CompileJA); + if (!Compiler) + return nullptr; + if (Compiler->hasIntegratedAssembler()) { + Inputs = &PrevPrevSingleAction->getInputs(); + PrevSingleAction = GetPreviousSingleAction(Inputs); + ToolForJob = Compiler; + } + } else { + JobAction *BackendJA = cast(PrevSingleAction); + const Tool *Backend = TC->SelectTool(*BackendJA); + if (!Backend) + return nullptr; + if (Backend->hasIntegratedAssembler()) { + Inputs = &PrevSingleAction->getInputs(); + return Backend; + } } } // A backend job should always be combined with the preceding compile job // unless OPT_save_temps is enabled and the compiler is capable of emitting // LLVM IR as an intermediate output. - if (isa(JA)) { + if (isa(JA) && isLegalToMergeCompilerAndBackend) { // Check if the compiler supports emitting LLVM IR. - assert(Inputs->size() == 1); - JobAction *CompileJA = cast(*Inputs->begin()); + assert(PrevSingleAction); + JobAction *CompileJA = cast(PrevSingleAction); const Tool *Compiler = TC->SelectTool(*CompileJA); if (!Compiler) return nullptr; if (!Compiler->canEmitIR() || !SaveTemps) { - Inputs = &(*Inputs)[0]->getInputs(); + Inputs = &PrevSingleAction->getInputs(); + PrevSingleAction = GetPreviousSingleAction(Inputs); ToolForJob = Compiler; } } @@ -1539,13 +1824,12 @@ // See if we should use an integrated preprocessor. We do so when we have // exactly one input, since this is the only use case we care about // (irrelevant since we don't support combine yet). - if (Inputs->size() == 1 && isa(*Inputs->begin()) && + if (PrevSingleAction && isa(PrevSingleAction) && !C.getArgs().hasArg(options::OPT_no_integrated_cpp) && - !C.getArgs().hasArg(options::OPT_traditional_cpp) && - !SaveTemps && + !C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps && !C.getArgs().hasArg(options::OPT_rewrite_objc) && ToolForJob->hasIntegratedCPP()) - Inputs = &(*Inputs)[0]->getInputs(); + Inputs = &PrevSingleAction->getInputs(); return ToolForJob; } @@ -1560,16 +1844,39 @@ InputInfo &Result) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); + // Find out whether we already created a job for this exact action and + // toolchain. + { + auto PrevOutputsPerToolchainIt = + OutputsForActionCache.find(const_cast(A)); + // Any toolchains used to generate outputs for this actions? + if (PrevOutputsPerToolchainIt != OutputsForActionCache.end()) { + auto &PrevOutputsPerToolchain = PrevOutputsPerToolchainIt->getSecond(); + auto PrevOutputsIt = + PrevOutputsPerToolchain.find(const_cast(TC)); + // Is there any output for the requested toolchain? + if (PrevOutputsIt != PrevOutputsPerToolchain.end()) { + Result = PrevOutputsIt->getSecond(); + return; + } + } + } + if (const InputAction *IA = dyn_cast(A)) { // FIXME: It would be nice to not claim this here; maybe the old scheme of // just using Args was better? const Arg &Input = IA->getInputArg(); Input.claim(); + + // If the input action has an offloading device associated, it means that + // the file is already a target file and therefore has the target suffix + // already appended to it. if (Input.getOption().matches(options::OPT_INPUT)) { const char *Name = Input.getValue(); - Result = InputInfo(Name, A->getType(), Name); + Result = InputInfo(Name, A, Name, !IA->getOffloadingDevice()); } else - Result = InputInfo(&Input, A->getType(), ""); + Result = InputInfo(&Input, A, "", !IA->getOffloadingDevice()); + return; } @@ -1587,10 +1894,36 @@ return; } + if (const BindTargetAction *BTA = dyn_cast(A)) { + const ToolChain *TC; + const char *OffloadingTargetName = BTA->getTargetName(); + + if (OffloadingTargetName) { + TC = &getToolChain(C.getArgs(), "", OffloadingTargetName); + // If the toolchain does not have isOffloadingTargetToolchain set it means + // there was some problem creating the toolchain + if (!TC->isOffloadingTargetToolchain()) { + Diag(clang::diag::err_drv_omp_target_toolchain_not_available) + << OffloadingTargetName; + return; + } + } else { + TC = HostToolChain; + OffloadingTargetName = nullptr; + } + + BuildJobsForAction(C, *BTA->begin(), TC, OffloadingTargetName, AtTopLevel, + false, LinkingOutput, Result); + return; + } + const ActionList *Inputs = &A->getInputs(); const JobAction *JA = cast(A); - const Tool *T = SelectToolForJob(C, isSaveTempsEnabled(), TC, JA, Inputs); + const Tool *T = SelectToolForJob( + C, isSaveTempsEnabled(), + /*isLegalToMergeCompilerAndBackend=*/OffloadingTriples.empty(), TC, JA, + Inputs); if (!T) return; @@ -1613,6 +1946,11 @@ // Always use the first input as the base input. const char *BaseInput = InputInfos[0].getBaseInput(); + // We need to propagate the information on whether a target suffix has to be + // added to the name of the files. + bool HasTargetSuffixApended = + InputInfos[0].hasOffloadingTargetSuffixApended(); + // ... except dsymutil actions, which use their actual input as the base // input. if (JA->getType() == types::TY_dSYM) @@ -1620,11 +1958,15 @@ // Determine the place to write output to, if any. if (JA->getType() == types::TY_Nothing) - Result = InputInfo(A->getType(), BaseInput); + Result = InputInfo(A, BaseInput, HasTargetSuffixApended); else - Result = InputInfo(GetNamedOutputPath(C, *JA, BaseInput, BoundArch, - AtTopLevel, MultipleArchs), - A->getType(), BaseInput); + Result = + InputInfo(GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, + MultipleArchs, HasTargetSuffixApended), + A, BaseInput, HasTargetSuffixApended); + + OutputsForActionCache[const_cast(A)][const_cast(TC)] = + Result; if (CCCPrintBindings && !CCGenDiagnostics) { llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"' @@ -1636,8 +1978,20 @@ } llvm::errs() << "], output: " << Result.getAsString() << "\n"; } else { - T->ConstructJob(C, *JA, Result, InputInfos, - C.getArgsForToolChain(TC, BoundArch), LinkingOutput); + + bool isSuccess; + + // Get the derived args for the target + const DerivedArgList &DArgs = C.getArgsForToolChain( + TC, BoundArch, JA->getOffloadingDevice(), isSuccess); + + // If we were unable to successfully get the arguments we cannot create the + // job. This will happen if using an offloading target toolchain that is not + // prepared to translate host arguments into target's. + if (!isSuccess) + return; + + T->ConstructJob(C, *JA, Result, InputInfos, DArgs, LinkingOutput); } } @@ -1678,18 +2032,26 @@ return Args.MakeArgString(Filename.c_str()); } -const char *Driver::GetNamedOutputPath(Compilation &C, - const JobAction &JA, - const char *BaseInput, - const char *BoundArch, - bool AtTopLevel, - bool MultipleArchs) const { +const char * +Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, + const char *BaseInput, const char *BoundArch, + bool AtTopLevel, bool MultipleArchs, + bool HasOffloadingTargetSuffixApended) const { llvm::PrettyStackTraceString CrashInfo("Computing output path"); // Output to a user requested destination? if (AtTopLevel && !isa(JA) && !isa(JA)) { - if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) - return C.addResultFile(FinalOutput->getValue(), &JA); + if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) { + SmallString<128> Suffixed(FinalOutput->getValue()); + + // Append offloading target suffix to output file + if (JA.getOffloadingDevice()) { + Suffixed += ".tgt-"; + Suffixed.append(BoundArch); + } + + return C.addResultFile(C.getArgs().MakeArgString(Suffixed.c_str()), &JA); + } } // For /P, preprocess to file named after BaseInput. @@ -1775,8 +2137,13 @@ assert(Suffix && "All types used for output should have a suffix."); std::string::size_type End = std::string::npos; - if (!types::appendSuffixForType(JA.getType())) + if (!types::appendSuffixForType(JA.getType())) { End = BaseName.rfind('.'); + // If this is an offloading target job that already contains the target + // suffix we need to skip both type and target suffix + if (JA.getOffloadingDevice() && !HasOffloadingTargetSuffixApended) + End = BaseName.substr(0, End).rfind('.'); + } SmallString<128> Suffixed(BaseName.substr(0, End)); if (MultipleArchs && BoundArch) { Suffixed += "-"; @@ -1790,6 +2157,13 @@ Suffixed += ".tmp"; Suffixed += '.'; Suffixed += Suffix; + + // Append offloading target suffix to output file + if (JA.getOffloadingDevice()) { + Suffixed += ".tgt-"; + Suffixed.append(BoundArch); + } + NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str()); } @@ -2023,12 +2397,30 @@ return Target; } -const ToolChain &Driver::getToolChain(const ArgList &Args, - StringRef DarwinArchName) const { - llvm::Triple Target = computeTargetTriple(DefaultTargetTriple, Args, - DarwinArchName); +const ToolChain & +Driver::getToolChain(const ArgList &Args, StringRef DarwinArchName, + const char *OffloadingTripleString) const { + + llvm::Triple Target; + ToolChain *TC; + bool IsOffloadingTargetToolchain = OffloadingTripleString != nullptr; + + // if a specific triple is passed, that means it was already parsed + // before while creating the actions for offloading targets, therefore we + // should + // be able to get the architecture directly from it. + + if (IsOffloadingTargetToolchain) { + Target = llvm::Triple(OffloadingTripleString); + assert(Target.getArch() != llvm::Triple::UnknownArch && + "Target unknown - impossible to infer toolchain!"); + + TC = ToolChainsOffloading[Target.str()]; + } else { + Target = computeTargetTriple(DefaultTargetTriple, Args, DarwinArchName); + TC = ToolChains[Target.str()]; + } - ToolChain *&TC = ToolChains[Target.str()]; if (!TC) { switch (Target.getOS()) { case llvm::Triple::CloudABI: @@ -2061,7 +2453,8 @@ if (Target.getArch() == llvm::Triple::hexagon) TC = new toolchains::Hexagon_TC(*this, Target, Args); else - TC = new toolchains::Linux(*this, Target, Args); + TC = new toolchains::Linux(*this, Target, Args, + IsOffloadingTargetToolchain); break; case llvm::Triple::NaCl: TC = new toolchains::NaCl_TC(*this, Target, Args); @@ -2112,24 +2505,41 @@ break; } if (Target.isOSBinFormatELF()) { - TC = new toolchains::Generic_ELF(*this, Target, Args); + TC = new toolchains::Generic_ELF(*this, Target, Args, + IsOffloadingTargetToolchain); break; } if (Target.isOSBinFormatMachO()) { TC = new toolchains::MachO(*this, Target, Args); break; } - TC = new toolchains::Generic_GCC(*this, Target, Args); + TC = new toolchains::Generic_GCC(*this, Target, Args, + IsOffloadingTargetToolchain); break; } } + + // Store the toolchain in the map in case it is requested later + if (IsOffloadingTargetToolchain) + ToolChainsOffloading[Target.str()] = TC; + else + ToolChains[Target.str()] = TC; + + // If this is not an OffloadingToolchain the information in TC needs to + // be consistent with the flag IsOffloadingTargetToolchain + assert((IsOffloadingTargetToolchain || + IsOffloadingTargetToolchain == TC->isOffloadingTargetToolchain()) && + "Unable to initialize toolchain for offloading!!!"); + return *TC; } bool Driver::ShouldUseClangCompiler(const JobAction &JA) const { // Check if user requested no clang, or clang doesn't understand this type (we - // only handle single inputs for now). - if (JA.size() != 1 || + // only handle single inputs for now except for ofloading target compile + // phases). + if (!(JA.size() == 1 || (JA.size() == 2 && JA.getOffloadingDevice() && + isa(JA))) || !types::isAcceptedByClang((*JA.begin())->getType())) return false; Index: lib/Driver/InputInfo.h =================================================================== --- lib/Driver/InputInfo.h +++ lib/Driver/InputInfo.h @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_LIB_DRIVER_INPUTINFO_H #define LLVM_CLANG_LIB_DRIVER_INPUTINFO_H +#include "clang/Driver/Action.h" #include "clang/Driver/Types.h" #include "llvm/Option/Arg.h" #include @@ -38,28 +39,40 @@ const llvm::opt::Arg *InputArg; } Data; Class Kind; - types::ID Type; const char *BaseInput; + // Action that originates this info + const Action *OrigAction; + + // True if the filename associated with this info has the offloading target + // suffix appended + bool HasOffloadingTargetSuffixApended; + public: InputInfo() {} - InputInfo(types::ID _Type, const char *_BaseInput) - : Kind(Nothing), Type(_Type), BaseInput(_BaseInput) { - } - InputInfo(const char *_Filename, types::ID _Type, const char *_BaseInput) - : Kind(Filename), Type(_Type), BaseInput(_BaseInput) { + InputInfo(const Action *_OrigAction, const char *_BaseInput, + bool _HasOffloadingTargetSuffixApended) + : Kind(Nothing), BaseInput(_BaseInput), OrigAction(_OrigAction), + HasOffloadingTargetSuffixApended(_HasOffloadingTargetSuffixApended) {} + InputInfo(const char *_Filename, const Action *_OrigAction, + const char *_BaseInput, bool _HasOffloadingTargetSuffixApended) + : Kind(Filename), BaseInput(_BaseInput), OrigAction(_OrigAction), + HasOffloadingTargetSuffixApended(_HasOffloadingTargetSuffixApended) { Data.Filename = _Filename; } - InputInfo(const llvm::opt::Arg *_InputArg, types::ID _Type, - const char *_BaseInput) - : Kind(InputArg), Type(_Type), BaseInput(_BaseInput) { + InputInfo(const llvm::opt::Arg *_InputArg, const Action *_OrigAction, + const char *_BaseInput, bool _HasOffloadingTargetSuffixApended) + : Kind(InputArg), BaseInput(_BaseInput), OrigAction(_OrigAction), + HasOffloadingTargetSuffixApended(_HasOffloadingTargetSuffixApended) { Data.InputArg = _InputArg; } bool isNothing() const { return Kind == Nothing; } bool isFilename() const { return Kind == Filename; } bool isInputArg() const { return Kind == InputArg; } - types::ID getType() const { return Type; } + types::ID getType() const { + return OrigAction ? OrigAction->getType() : types::TY_Nothing; + } const char *getBaseInput() const { return BaseInput; } const char *getFilename() const { @@ -81,6 +94,16 @@ else return "(nothing)"; } + + /// getOriginalAction - Return the action that produced this input info + const Action *getOriginalAction() const { return OrigAction; } + + /// hasOffloadingTargetSuffixApended - Return true if the target suffix is + /// appended to + /// to this input filename + bool hasOffloadingTargetSuffixApended() { + return HasOffloadingTargetSuffixApended; + } }; } // end namespace driver Index: lib/Driver/ToolChain.cpp =================================================================== --- lib/Driver/ToolChain.cpp +++ lib/Driver/ToolChain.cpp @@ -60,9 +60,10 @@ } ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, - const ArgList &Args) + const ArgList &Args, bool IsOffloadingTargetToolchain) : D(D), Triple(T), Args(Args), CachedRTTIArg(GetRTTIArgument(Args)), - CachedRTTIMode(CalculateRTTIMode(Args, Triple, CachedRTTIArg)) { + CachedRTTIMode(CalculateRTTIMode(Args, Triple, CachedRTTIArg)), + IsOffloadingTargetToolchain(IsOffloadingTargetToolchain) { if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) if (!isThreadModelSupported(A->getValue())) D.Diag(diag::err_drv_invalid_thread_model_for_target) @@ -151,6 +152,7 @@ case Action::InputClass: case Action::BindArchClass: + case Action::BindTargetClass: case Action::LipoJobClass: case Action::DsymutilJobClass: case Action::VerifyDebugInfoJobClass: @@ -169,6 +171,25 @@ llvm_unreachable("Invalid tool kind."); } +llvm::opt::DerivedArgList * +ToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, + const char *BoundArch, bool isOffloadingTarget, + bool &isSuccess) const { + // In the event this toolchain refers to an offloading target, a translation + // from host to target arguments should be implemented. This can be used. + // among other things, to force the resulting image to be a shared library + // that can be loaded by the offloading runtime libraries. + if (isOffloadingTarget) { + isSuccess = false; + llvm_unreachable( + "Toolchain not prepared to translate offloading arguments."); + return nullptr; + } + + isSuccess = true; + return nullptr; +} + Tool *ToolChain::SelectTool(const JobAction &JA) const { if (getDriver().ShouldUseClangCompiler(JA)) return getClang(); Index: lib/Driver/ToolChains.h =================================================================== --- lib/Driver/ToolChains.h +++ lib/Driver/ToolChains.h @@ -101,7 +101,7 @@ public: GCCInstallationDetector() : IsValid(false) {} void init(const Driver &D, const llvm::Triple &TargetTriple, - const llvm::opt::ArgList &Args); + const llvm::opt::ArgList &Args, bool isOffloadingTarget); /// \brief Check whether we detected a valid GCC install. bool isValid() const { return IsValid; } @@ -144,6 +144,7 @@ const llvm::opt::ArgList &Args, const std::string &LibDir, StringRef CandidateTriple, + bool isOffloadingTarget, bool NeedsBiarchSuffix = false); }; @@ -151,7 +152,8 @@ public: Generic_GCC(const Driver &D, const llvm::Triple &Triple, - const llvm::opt::ArgList &Args); + const llvm::opt::ArgList &Args, + bool IsOffloadingTargetToolchain = false); ~Generic_GCC() override; void printVerboseInfo(raw_ostream &OS) const override; @@ -162,6 +164,10 @@ bool isPICDefaultForced() const override; bool IsIntegratedAssemblerDefault() const override; + virtual llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + bool isOffloadingTarget, bool &isSuccess) const override; + protected: Tool *getTool(Action::ActionClass AC) const override; Tool *buildAssembler() const override; @@ -258,8 +264,8 @@ bool HasNativeLLVMSupport() const override; llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + bool isOffloadingTarget, bool &isSuccess) const override; bool IsBlocksDefault() const override { // Always allow blocks on Apple; users interested in versioning are @@ -443,8 +449,8 @@ bool isCrossCompiling() const override { return false; } llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + bool isOffloadingTarget, bool &isSuccess) const override; ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const override; bool hasBlocksRuntime() const override; @@ -514,8 +520,9 @@ virtual void anchor(); public: Generic_ELF(const Driver &D, const llvm::Triple &Triple, - const llvm::opt::ArgList &Args) - : Generic_GCC(D, Triple, Args) {} + const llvm::opt::ArgList &Args, + bool IsOffloadingTargetToolchain = false) + : Generic_GCC(D, Triple, Args, IsOffloadingTargetToolchain) {} void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; @@ -668,7 +675,8 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF { public: Linux(const Driver &D, const llvm::Triple &Triple, - const llvm::opt::ArgList &Args); + const llvm::opt::ArgList &Args, + bool IsOffloadingTargetToolchain = false); bool HasNativeLLVMSupport() const override; Index: lib/Driver/ToolChains.cpp =================================================================== --- lib/Driver/ToolChains.cpp +++ lib/Driver/ToolChains.cpp @@ -658,7 +658,20 @@ } DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, - const char *BoundArch) const { + const char *BoundArch, + bool isOffloadingTarget, + bool &isSuccess) const { + + if (isOffloadingTarget) { + // This translation is not dealing with offloading targets yet. + isSuccess = false; + llvm_unreachable( + "Toolchain not prepared to translate offloading arguments."); + return nullptr; + } + + isSuccess = true; + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); @@ -889,11 +902,22 @@ AddLinkRuntimeLib(Args, CmdArgs, CompilerRT, false, true); } - DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args, - const char *BoundArch) const { + const char *BoundArch, + bool isOffloadingTarget, + bool &isSuccess) const { + + if (isOffloadingTarget) { + // This translation is not dealing with offloading targets yet. + isSuccess = false; + llvm_unreachable( + "Toolchain not prepared to translate offloading arguments."); + return nullptr; + } + // First get the generic Apple args, before moving onto Darwin-specific ones. - DerivedArgList *DAL = MachO::TranslateArgs(Args, BoundArch); + DerivedArgList *DAL = + MachO::TranslateArgs(Args, BoundArch, isOffloadingTarget, isSuccess); const OptTable &Opts = getDriver().getOpts(); // If no architecture is bound, none of the translations here are relevant. @@ -1192,9 +1216,9 @@ /// should instead pull the target out of the driver. This is currently /// necessary because the driver doesn't store the final version of the target /// triple. -void -Generic_GCC::GCCInstallationDetector::init( - const Driver &D, const llvm::Triple &TargetTriple, const ArgList &Args) { +void Generic_GCC::GCCInstallationDetector::init( + const Driver &D, const llvm::Triple &TargetTriple, const ArgList &Args, + bool isOffloadingTarget) { llvm::Triple BiarchVariantTriple = TargetTriple.isArch32Bit() ? TargetTriple.get64BitArchVariant() : TargetTriple.get32BitArchVariant(); @@ -1244,7 +1268,7 @@ continue; for (unsigned k = 0, ke = CandidateTripleAliases.size(); k < ke; ++k) ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, - CandidateTripleAliases[k]); + CandidateTripleAliases[k], isOffloadingTarget); } for (unsigned j = 0, je = CandidateBiarchLibDirs.size(); j < je; ++j) { const std::string LibDir = Prefixes[i] + CandidateBiarchLibDirs[j].str(); @@ -1254,7 +1278,8 @@ ++k) ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, CandidateBiarchTripleAliases[k], - /*NeedsBiarchSuffix=*/ true); + isOffloadingTarget, + /*NeedsBiarchSuffix=*/true); } } } @@ -1943,7 +1968,7 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple( const llvm::Triple &TargetTriple, const ArgList &Args, const std::string &LibDir, StringRef CandidateTriple, - bool NeedsBiarchSuffix) { + bool isOffloadingTarget, bool NeedsBiarchSuffix) { llvm::Triple::ArchType TargetArch = TargetTriple.getArch(); // There are various different suffixes involving the triple we // check for. We also record what is necessary to walk from each back @@ -2016,9 +2041,10 @@ } } -Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple& Triple, - const ArgList &Args) - : ToolChain(D, Triple, Args), GCCInstallation() { +Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args, bool IsOffloadingTargetToolchain) + : ToolChain(D, Triple, Args, IsOffloadingTargetToolchain), + GCCInstallation() { getProgramPaths().push_back(getDriver().getInstalledDir()); if (getDriver().getInstalledDir() != getDriver().Dir) getProgramPaths().push_back(getDriver().Dir); @@ -2071,6 +2097,53 @@ return false; } +llvm::opt::DerivedArgList * +Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, + const char *BoundArch, bool isOffloadingTarget, + bool &isSuccess) const { + + isSuccess = true; + + // If not a target tool chain we can use the arguments directly without + // translation + if (!isOffloadingTarget) + return 0; + + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); + const OptTable &Opts = getDriver().getOpts(); + + // Make sure we always generate a shared library for an offloading target + // regardless the commands the user passed to the host. + DAL->AddFlagArg(0, Opts.getOption(options::OPT_shared)); + DAL->AddFlagArg(0, Opts.getOption(options::OPT_fPIC)); + + // Filter all the arguments we don't care passing to the offloading toolchain + // as they can mess up with the creation of a shared library. + for (ArgList::const_iterator it = Args.begin(), ie = Args.end(); it != ie; + ++it) { + Arg *A = *it; + + switch ((options::ID)A->getOption().getID()) { + default: + DAL->append(A); + break; + case options::OPT_shared: + case options::OPT_static: + case options::OPT_fPIC: + case options::OPT_fno_PIC: + case options::OPT_fpic: + case options::OPT_fno_pic: + case options::OPT_fPIE: + case options::OPT_fno_PIE: + case options::OPT_fpie: + case options::OPT_fno_pie: + break; + } + } + + return DAL; +} + bool Generic_GCC::IsIntegratedAssemblerDefault() const { return getTriple().getArch() == llvm::Triple::x86 || getTriple().getArch() == llvm::Triple::x86_64 || @@ -3142,9 +3215,10 @@ return Triple.isArch32Bit() ? "lib" : "lib64"; } -Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) - : Generic_ELF(D, Triple, Args) { - GCCInstallation.init(D, Triple, Args); +Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, + bool IsOffloadingTargetToolchain) + : Generic_ELF(D, Triple, Args, IsOffloadingTargetToolchain) { + GCCInstallation.init(D, Triple, Args, IsOffloadingTargetToolchain); Multilibs = GCCInstallation.getMultilibs(); llvm::Triple::ArchType Arch = Triple.getArch(); std::string SysRoot = computeSysRoot(); Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -191,9 +191,9 @@ } } -static void AddLinkerInputs(const ToolChain &TC, - const InputInfoList &Inputs, const ArgList &Args, - ArgStringList &CmdArgs) { +static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, + const ArgList &Args, ArgStringList &CmdArgs, + bool isOffloadingTargetLinkage = false) { const Driver &D = TC.getDriver(); // Add extra linker input arguments which are not treated as inputs @@ -211,8 +211,15 @@ << TC.getTripleString(); } - // Add filenames immediately. + // Add filenames immediately except if this is a host linker phase and the + // the object files (inputs) are produced by a target assembler. Those are + // handled by a linker script. if (II.isFilename()) { + + if (!isOffloadingTargetLinkage && + II.getOriginalAction()->getOffloadingDevice()) + continue; + CmdArgs.push_back(II.getFilename()); continue; } @@ -240,6 +247,101 @@ addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); } +static void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, + const JobAction &JA, const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, ArgStringList &CmdArgs) { + + const Arg *OpenMPArg = Args.getLastArg(options::OPT_fopenmp_EQ); + + if (!OpenMPArg) + return; + if (!(StringRef(OpenMPArg->getValue()) == "libiomp5")) + return; + + // This is the linkage for the target + if (JA.getOffloadingDevice()) + return; + + // Add OpenMP target arguments by employing + + // FIXME: check if the toolchain supports a linker script + + // Gather the pairs (target triple)-(file name) + + std::vector> Targets; + + for (InputInfoList::const_iterator it = Inputs.begin(), ie = Inputs.end(); + it != ie; ++it) { + const InputInfo &II = *it; + + if (const char *tname = II.getOriginalAction()->getOffloadingDevice()) { + Targets.push_back( + std::pair(tname, II.getFilename())); + } + } + + if (Targets.empty()) + return; + + // Create temporary linker script + StringRef Name = llvm::sys::path::filename(Output.getFilename()); + std::pair Split = Name.rsplit('.'); + std::string TmpName = C.getDriver().GetTemporaryPath(Split.first, "lk"); + const char *LKS = C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); + + // Open script file in order to write contents + std::error_code EC; + llvm::raw_fd_ostream lksf(LKS, EC, llvm::sys::fs::F_None); + + if (EC) { + // FIXME: maybe use a file open failure message here + C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); + return; + } + + // Add commands to embed target binaries. We ensure that each section and + // image s 16-byte aligned. This is not mandatory, but increases the + // likelihood of data to be aligned with a cache block in several main host + // machines. + lksf << "TARGET(binary)\n"; + for (unsigned i = 0; i < Targets.size(); ++i) + lksf << "INPUT(" << Targets[i].second << ")\n"; + + lksf << "SECTIONS\n"; + lksf << "{\n"; + lksf << " .openmptgt :\n"; + lksf << " ALIGN(0x10)\n"; + lksf << " {\n"; + + for (unsigned i = 0; i < Targets.size(); ++i) { + std::string tgt_name(Targets[i].first); + std::replace(tgt_name.begin(), tgt_name.end(), '-', '_'); + lksf << " . = ALIGN(0x10);\n"; + lksf << " PROVIDE_HIDDEN(__omptgt__img_start_" << tgt_name << " = .);\n"; + lksf << " " << Targets[i].second << "\n"; + lksf << " PROVIDE_HIDDEN(__omptgt__img_end_" << tgt_name << " = .);\n"; + } + + lksf << " }\n"; + // Add commands to define host entries begin and end + lksf << " .openmptgt_host_entries :\n"; + lksf << " ALIGN(0x10)\n"; + lksf << " SUBALIGN(0x01)\n"; + lksf << " {\n"; + lksf << " PROVIDE_HIDDEN(__omptgt__host_entries_begin = .);\n"; + lksf << " *(.openmptgt_host_entries)\n"; + lksf << " PROVIDE_HIDDEN(__omptgt__host_entries_end = .);\n"; + lksf << " }\n"; + lksf << "}\n"; + lksf << "INSERT BEFORE .data\n"; + + lksf.close(); + + CmdArgs.push_back("-T"); + CmdArgs.push_back(LKS); +} + /// \brief Determine whether Objective-C automated reference counting is /// enabled. static bool isObjCAutoRefCount(const ArgList &Args) { @@ -2607,7 +2709,10 @@ getToolChain().getTriple().isWindowsCygwinEnvironment(); bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); - assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); + assert(((Inputs.size() == 1) || + (Inputs.size() == 2 && JA.getOffloadingDevice() && + isa(JA))) && + "Expecting 1 or 2 inputs (offloading host file)"); const InputInfo &Input = Inputs[0]; // Invoke ourselves in -cc1 mode. @@ -2615,6 +2720,47 @@ // FIXME: Implement custom jobs for internal actions. CmdArgs.push_back("-cc1"); + // Add the OpenMP arguments, including the ones related to offloading. + if (const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ)) { + if (StringRef(A->getValue()) == "libiomp5") { + CmdArgs.push_back("-fopenmp=libiomp5"); + + // pass the targets we are generating code to + if (Arg *Tgts = Args.getLastArg(options::OPT_omptargets_EQ)) { + + ArrayRef Vals = Tgts->getValues(); + + if (!Vals.empty()) { + std::string S("-omptargets="); + S += Vals[0]; + for (unsigned i = 1; i < Vals.size(); ++i) { + S += ','; + S += Vals[i]; + } + CmdArgs.push_back(Args.MakeArgString(S)); + } + } + + // Inform the frontend we are generating code for a target. + if (JA.getOffloadingDevice()) { + CmdArgs.push_back("-omp-target-mode"); + // For compile jobs, the host resulting IR is also passed, so that the + // required elements that require to be emitted for the target are read + // from metadata presented in there. + if (isa(JA)) { + CmdArgs.push_back("-omp-host-output-file-path"); + CmdArgs.push_back(Args.MakeArgString(Inputs[1].getFilename())); + } + } + + // The frontend components needs to know the path of the original source + // file given that the target functions use that to generate a unique + // names. + CmdArgs.push_back("-omp-main-file-path"); + CmdArgs.push_back(Args.MakeArgString(Input.getBaseInput())); + } + } + // Add the "effective" target triple. CmdArgs.push_back("-triple"); std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args); @@ -4679,14 +4825,12 @@ assert(Output.isNothing() && "Invalid output."); } - for (const auto &II : Inputs) { - addDashXForInput(Args, II, CmdArgs); + addDashXForInput(Args, Input, CmdArgs); - if (II.isFilename()) - CmdArgs.push_back(II.getFilename()); - else - II.getInputArg().renderAsInput(Args, CmdArgs); - } + if (Input.isFilename()) + CmdArgs.push_back(Input.getFilename()); + else + Input.getInputArg().renderAsInput(Args, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_undef); @@ -5230,6 +5374,8 @@ const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; + bool isLinkJob = JA.getKind() == Action::LinkJobClass; + for (const auto &A : Args) { if (forwardToGCC(A->getOption())) { // Don't forward any -g arguments to assembly steps. @@ -5309,9 +5455,13 @@ CmdArgs.push_back(types::getTypeName(II.getType())); } - if (II.isFilename()) + if (II.isFilename()) { + bool isTargetLinkage = isLinkJob && JA.getOffloadingDevice(); + if (!isTargetLinkage && II.getOriginalAction()->getOffloadingDevice()) + continue; + CmdArgs.push_back(II.getFilename()); - else { + } else { const Arg &A = II.getInputArg(); // Reverse translate some rewritten options. @@ -5334,6 +5484,9 @@ } else GCCName = "gcc"; + if (isLinkJob) + AddOpenMPLinkerScript(getToolChain(), C, JA, Output, Inputs, Args, CmdArgs); + const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName)); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs)); @@ -6301,7 +6454,8 @@ break; } - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, + JA.getOffloadingDevice()); // Build the input file for -filelist (list of linker input files) in case we // need it later for (const auto &II : Inputs) { @@ -6364,6 +6518,8 @@ Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_F); + AddOpenMPLinkerScript(getToolChain(), C, JA, Output, Inputs, Args, CmdArgs); + // -iframework should be forwarded as -F. for (auto it = Args.filtered_begin(options::OPT_iframework), ie = Args.filtered_end(); it != ie; ++it) @@ -7976,7 +8132,7 @@ CmdArgs.push_back("--no-demangle"); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); + AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA.getOffloadingDevice()); // The profile runtime also needs access to system libraries. addProfileRT(getToolChain(), Args, CmdArgs); @@ -8025,6 +8181,9 @@ break; case LibIOMP5: CmdArgs.push_back("-liomp5"); + if (Args.hasArg(options::OPT_omptargets_EQ) && + !JA.getOffloadingDevice()) + CmdArgs.push_back("-lomptarget"); break; case LibUnknown: break; @@ -8059,6 +8218,8 @@ } } + AddOpenMPLinkerScript(getToolChain(), C, JA, Output, Inputs, Args, CmdArgs); + C.addCommand( llvm::make_unique(JA, *this, ToolChain.Linker.c_str(), CmdArgs)); } @@ -8075,8 +8236,8 @@ const char *LinkingOutput) const { const toolchains::NaCl_TC& ToolChain = static_cast(getToolChain()); - InputInfo NaClMacros(ToolChain.GetNaClArmMacrosPath(), types::TY_PP_Asm, - "nacl-arm-macros.s"); + InputInfo NaClMacros(ToolChain.GetNaClArmMacrosPath(), &JA, + "nacl-arm-macros.s", false); InputInfoList NewInputs; NewInputs.push_back(NaClMacros); NewInputs.append(Inputs.begin(), Inputs.end()); Index: test/OpenMP/target_driver.c =================================================================== --- /dev/null +++ test/OpenMP/target_driver.c @@ -0,0 +1,205 @@ +/// +/// Perform several driver tests for OpenMP offloading +/// + +/// ########################################################################### + +/// Check whether an invalid OpenMP target is specified: +// RUN: %clang -### -fopenmp=libiomp5 -omptargets=aaa-bbb-ccc-ddd %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s +// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' + +/// ########################################################################### + +/// Check error for empty -omptargets +// RUN: %clang -### -fopenmp=libiomp5 -omptargets= %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s +// CHK-EMPTY-OMPTARGETS: warning: joined argument expects additional value: '-omptargets=' + +/// ########################################################################### + +/// Check whether we are using a target whose toolchain was not prepared to +/// to support offloading - e.g. x86_64-apple-darwin: +// RUN: %clang -### -fopenmp=libiomp5 -omptargets=x86_64-apple-darwin %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-SUPPORT %s +// CHK-NO-SUPPORT: error: Toolchain for target 'x86_64-apple-darwin' is not supporting OpenMP offloading. + +/// ########################################################################### + +/// Check the phases graph when using a single target, different from the host. +/// Each target phase must be binded to a target and linked into a shared +/// library. The host compiler phase result is used in the compiler phase of the +/// the target +// RUN: %clang -ccc-print-phases -fopenmp=libiomp5 -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES %s + +// Host linking +// CHK-PHASES-DAG: {{.*}}: linker, {[[A0:[0-9]+]], [[BL1:[0-9]+]]}, image + +// Target 1 library generation +// CHK-PHASES-DAG: [[BL1]]: bind-target, {[[L1:[0-9]+]]}, shared-object +// CHK-PHASES-DAG: [[L1]]: linker, {[[BA1:[0-9]+]]}, shared-object +// CHK-PHASES-DAG: [[BA1]]: bind-target, {[[A1:[0-9]+]]}, object +// CHK-PHASES-DAG: [[A1]]: assembler, {[[BB1:[0-9]+]]}, object +// CHK-PHASES-DAG: [[BB1]]: bind-target, {[[B1:[0-9]+]]}, assembler +// CHK-PHASES-DAG: [[B1]]: backend, {[[BC1:[0-9]+]]}, assembler +// CHK-PHASES-DAG: [[BC1]]: bind-target, {[[C1:[0-9]+]]}, ir +// CHK-PHASES-DAG: [[C1]]: compiler, {[[BP1:[0-9]+]], [[BC01:[0-9]+]]}, ir +// CHK-PHASES-DAG: [[BC01]]: bind-target, {[[C0:[0-9]+]]}, ir +// CHK-PHASES-DAG: [[BP1]]: bind-target, {[[P1:[0-9]+]]}, cpp-output +// CHK-PHASES-DAG: [[P1]]: preprocessor, {[[I:[0-9]+]]}, cpp-output + +// Host objects generation: +// CHK-PHASES-DAG: [[A0]]: assembler, {[[B0:[0-9]+]]}, object +// CHK-PHASES-DAG: [[B0]]: backend, {[[C0]]}, assembler +// CHK-PHASES-DAG: [[C0]]: compiler, {[[P0:[0-9]+]]}, ir +// CHK-PHASES-DAG: [[P0]]: preprocessor, {[[I]]}, cpp-output + +// Single input file: +// CHK-PHASES-DAG: [[I]]: input, {{.*}}, c + +/// ########################################################################### + +/// Check the phases graph when using two targets, and one of them is the same +/// as the host. We also add a library to make sure they are not treated has +/// inputs. +/// Each target phase must be binded to a target and linked into a shared +/// library. The host compiler phase result is used in the compiler phase of the +/// the target +// RUN: %clang -ccc-print-phases -lm -fopenmp=libiomp5 -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES2 %s + +// Host linking +// CHK-PHASES2-DAG: {{.*}}: linker, {[[L:[0-9]+]], [[A0:[0-9]+]], [[BL1:[0-9]+]], [[BL2:[0-9]+]]}, image + +// Target 2 library generation +// CHK-PHASES2-DAG: [[BL2]]: bind-target, {[[L2:[0-9]+]]}, shared-object +// CHK-PHASES2-DAG: [[L2]]: linker, {[[L]], [[BA2:[0-9]+]]}, shared-object +// CHK-PHASES2-DAG: [[BA2]]: bind-target, {[[A2:[0-9]+]]}, object +// CHK-PHASES2-DAG: [[A2]]: assembler, {[[BB2:[0-9]+]]}, object +// CHK-PHASES2-DAG: [[BB2]]: bind-target, {[[B2:[0-9]+]]}, assembler +// CHK-PHASES2-DAG: [[B2]]: backend, {[[BC2:[0-9]+]]}, assembler +// CHK-PHASES2-DAG: [[BC2]]: bind-target, {[[C2:[0-9]+]]}, ir +// CHK-PHASES2-DAG: [[C2]]: compiler, {[[BP2:[0-9]+]], [[BC02:[0-9]+]]}, ir +// CHK-PHASES2-DAG: [[BC02]]: bind-target, {[[C0:[0-9]+]]}, ir +// CHK-PHASES2-DAG: [[BP2]]: bind-target, {[[P2:[0-9]+]]}, cpp-output +// CHK-PHASES2-DAG: [[P2]]: preprocessor, {[[I:[0-9]+]]}, cpp-output + +// Target 1 library generation +// CHK-PHASES2-DAG: [[BL1]]: bind-target, {[[L1:[0-9]+]]}, shared-object +// CHK-PHASES2-DAG: [[L1]]: linker, {[[L]], [[BA1:[0-9]+]]}, shared-object +// CHK-PHASES2-DAG: [[BA1]]: bind-target, {[[A1:[0-9]+]]}, object +// CHK-PHASES2-DAG: [[A1]]: assembler, {[[BB1:[0-9]+]]}, object +// CHK-PHASES2-DAG: [[BB1]]: bind-target, {[[B1:[0-9]+]]}, assembler +// CHK-PHASES2-DAG: [[B1]]: backend, {[[BC1:[0-9]+]]}, assembler +// CHK-PHASES2-DAG: [[BC1]]: bind-target, {[[C1:[0-9]+]]}, ir +// CHK-PHASES2-DAG: [[C1]]: compiler, {[[BP1:[0-9]+]], [[BC01:[0-9]+]]}, ir +// CHK-PHASES2-DAG: [[BC01]]: bind-target, {[[C0]]}, ir +// CHK-PHASES2-DAG: [[BP1]]: bind-target, {[[P1:[0-9]+]]}, cpp-output +// CHK-PHASES2-DAG: [[P1]]: preprocessor, {[[I:[0-9]+]]}, cpp-output + +// Host objects generation: +// CHK-PHASES2-DAG: [[A0]]: assembler, {[[B0:[0-9]+]]}, object +// CHK-PHASES2-DAG: [[B0]]: backend, {[[C0]]}, assembler +// CHK-PHASES2-DAG: [[C0]]: compiler, {[[P0:[0-9]+]]}, ir +// CHK-PHASES2-DAG: [[P0]]: preprocessor, {[[I]]}, cpp-output + +// Single input file: +// CHK-PHASES2-DAG: [[I]]: input, {{.*}}, c +// CHK-PHASES2-DAG: [[L]]: input, "m", object + +/// ########################################################################### + +/// Check of the commands passed to each tool when using valid OpenMP targets. +/// Here we also check that offloading does not break the use of integrated +/// assembler. It does however preclude the use of integrated preprocessor as +/// host IR is shared by all the compile phases. There several offloading +/// specific commands: +/// -omp-target-mode: will tell the frontend that it will generate code for a +/// target. +/// -omp-main-file-path: the original source file that relates with that +/// frontend run, will be used to generate unique variable names (IDs) that are +/// the same for all targets. +/// -omp-host-output-file-path: specifies the host IR file that can be loaded by +/// the target code generation to gather information about which declaration +/// really need to be emitted. +/// +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS %s +// + +// Final linking - host (ppc64le) +// CHK-COMMANDS-DAG: ld" {{.*}}"-m" "elf64lppc" {{.*}}"-o" "a.out" {{.*}}"[[HSTOBJ:.+]].o" "-liomp5" "-lomptarget" {{.*}}"-T" "[[LKSCRIPT:.+]].lk" + +// Target 2 commands (x86_64) +// CHK-COMMANDS-DAG: ld" {{.*}}"-m" "elf_x86_64" {{.*}}"-shared" {{.*}}"-o" "[[T2LIB:.+]].so" {{.*}}"[[T2OBJ:.+]].o" {{.*}}"-liomp5" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" {{.*}}"-fopenmp=libiomp5" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" "-omp-target-mode" "-omp-main-file-path" "[[SRC:.+]].c" {{.*}}"-triple" "x86_64-pc-linux-gnu" {{.*}}"-emit-obj" {{.*}}"-o" "[[T2OBJ]].o" "-x" "ir" "[[T2BC:.+]].bc" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" {{.*}}"-fopenmp=libiomp5" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" "-omp-target-mode" "-omp-host-output-file-path" "[[HSTBC:.+]].bc" "-omp-main-file-path" "[[SRC]].c" {{.*}}"-triple" "x86_64-pc-linux-gnu" {{.*}}"-emit-llvm-bc" {{.*}}"-o" "[[T2BC]].bc" "-x" "cpp-output" "[[T2PP:.+]].i" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" {{.*}}"-fopenmp=libiomp5" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" "-omp-target-mode" "-omp-main-file-path" "[[SRC]].c" {{.*}}"-triple" "x86_64-pc-linux-gnu" {{.*}}"-E" {{.*}}"-o" "[[T2PP]].i" "-x" "c" "[[SRC]].c" + +// Target 1 commands (ppc64le) +// CHK-COMMANDS-DAG: ld" {{.*}}"-m" "elf64lppc" {{.*}}"-shared" {{.*}}"-o" "[[T1LIB:.+]].so" {{.*}}"[[T1OBJ:.+]].o" {{.*}}"-liomp5" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" {{.*}}"-fopenmp=libiomp5" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" "-omp-target-mode" "-omp-main-file-path" "[[SRC]].c" {{.*}}"-triple" "powerpc64le-ibm-linux-gnu" {{.*}}"-emit-obj" {{.*}}"-o" "[[T1OBJ]].o" "-x" "ir" "[[T1BC:.+]].bc" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" {{.*}}"-fopenmp=libiomp5" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" "-omp-target-mode" "-omp-host-output-file-path" "[[HSTBC]].bc" "-omp-main-file-path" "[[SRC]].c" {{.*}}"-triple" "powerpc64le-ibm-linux-gnu" {{.*}}"-emit-llvm-bc" {{.*}}"-o" "[[T1BC]].bc" "-x" "cpp-output" "[[T1PP:.+]].i" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" {{.*}}"-fopenmp=libiomp5" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" "-omp-target-mode" "-omp-main-file-path" "[[SRC]].c" {{.*}}"-triple" "powerpc64le-ibm-linux-gnu" {{.*}}"-E" {{.*}}"-o" "[[T1PP]].i" "-x" "c" "[[SRC]].c" + +// Host object generation +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" {{.*}}"-fopenmp=libiomp5" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" "-omp-main-file-path" "[[SRC]].c" {{.*}}"-triple" "powerpc64le--linux" {{.*}}"-emit-obj" {{.*}}"-o" "[[HSTOBJ]].o" "-x" "ir" "[[HSTBC]].bc" +// CHK-COMMANDS-DAG: clang{{.*}}" "-cc1" {{.*}}"-fopenmp=libiomp5" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" "-omp-main-file-path" "[[SRC]].c" {{.*}}"-triple" "powerpc64le--linux" {{.*}}"-emit-llvm-bc" {{.*}}"-o" "[[HSTBC]].bc" "-x" "c" "[[SRC]].c" + +/// ########################################################################### + +/// Check the automatic detection of target files. The driver will automatically +/// detect if a target file is in the same path as the host file and include +/// that in the compilation. The user can choose to have the compiler generating +/// a warning if such file is included. +/// Create dummy host and target files. +// RUN: echo ' ' > %t.i +// RUN: echo ' ' > %t.i.tgt-x86_64-pc-linux-gnu +// RUN: echo ' ' > %t.bc +// RUN: echo ' ' > %t.bc.tgt-x86_64-pc-linux-gnu +// RUN: echo ' ' > %t.s +// RUN: echo ' ' > %t.s.tgt-x86_64-pc-linux-gnu +// RUN: echo ' ' > %t.o +// RUN: echo ' ' > %t.o.tgt-x86_64-pc-linux-gnu + +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.i -c -emit-llvm -Womp-implicit-target-files 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET-WARN-IMPLICIT %s +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.i -c -emit-llvm 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET-NOTWARN-IMPLICIT %s +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.bc -S -Womp-implicit-target-files 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET-WARN-IMPLICIT %s +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.bc -S 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET-NOTWARN-IMPLICIT %s +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.s -c -Womp-implicit-target-files 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET-WARN-IMPLICIT %s +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.s -c 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET-NOTWARN-IMPLICIT %s +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.o -Womp-implicit-target-files 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET-WARN-IMPLICIT %s +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET-NOTWARN-IMPLICIT %s + +// CHK-TARGET-WARN-IMPLICIT: warning: OpenMP target file '{{.*}}.tgt-x86_64-pc-linux-gnu' is being implicitly used in the 'x86_64-pc-linux-gnu' toolchain. +// CHK-TARGET-NOTWARN-IMPLICIT-NOT: warning: OpenMP target file '{{.*}}.tgt-x86_64-pc-linux-gnu' is being implicitly used in the 'x86_64-pc-linux-gnu' toolchain. + +/// ########################################################################### + +/// Check separate compilation feature - the ability of the driver to assign +/// host and target files to different phases. Only the host files are passed +/// to the driver. The driver will detect all the target files +// RUN: echo ' ' > %t.1.s +// RUN: echo ' ' > %t.1.s.tgt-x86_64-pc-linux-gnu +// RUN: echo ' ' > %t.2.o +// RUN: %clang -### -fopenmp=libiomp5 -target powerpc64-linux -omptargets=x86_64-pc-linux-gnu %t.1.s %t.2.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-SEP-COMPILATION %s + +// Final linking +// CHK-SEP-COMPILATION-DAG: ld" {{.*}}"-m" "elf64ppc" {{.*}}"[[HOSTOBJ:.+]].o" "[[HOSTOBJ2:.+]].o" "-liomp5" "-lomptarget" {{.*}}"-T" "[[LKS:.+]].lk" + +// Target image generation +// CHK-SEP-COMPILATION-DAG: ld" {{.*}}"-m" "elf_x86_64" {{.*}}"-shared" {{.*}}"-o" "[[TGTSO:.+]].so" {{.*}}"[[TGTOBJ:.+]].o" {{.*}}"-liomp5" +// CHK-SEP-COMPILATION-DAG: clang{{.*}}" "-cc1as" {{.*}}"-triple" "x86_64-pc-linux-gnu" {{.*}}"-o" "[[TGTOBJ]].o" "[[TGTASM:.+]].s.tgt-x86_64-pc-linux-gnu" + +// Host object generation +// CHK-SEP-COMPILATION-DAG: clang{{.*}}" "-cc1as" {{.*}}"-triple" "powerpc64--linux" {{.*}}"-o" "[[HOSTOBJ]].o" "[[HOSTASM:.+]].s"