Index: include/clang/Driver/Action.h =================================================================== --- include/clang/Driver/Action.h +++ include/clang/Driver/Action.h @@ -157,9 +157,12 @@ /// Return a string containing the offload kind of the action. std::string getOffloadingKindPrefix() const; /// Return a string that can be used as prefix in order to generate unique - /// files for each offloading kind. - std::string - getOffloadingFileNamePrefix(llvm::StringRef NormalizedTriple) const; + /// files for each offloading kind. By default, no prefix is used for + /// non-device kinds, except if \a CreatePrefixForHost is set. + static std::string + GetOffloadingFileNamePrefix(OffloadKind Kind, + llvm::StringRef NormalizedTriple, + bool CreatePrefixForHost = false); /// Return a string containing a offload kind name. static StringRef GetOffloadKindName(OffloadKind Kind); Index: include/clang/Driver/Driver.h =================================================================== --- include/clang/Driver/Driver.h +++ include/clang/Driver/Driver.h @@ -12,6 +12,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" +#include "clang/Driver/Action.h" #include "clang/Driver/Phases.h" #include "clang/Driver/Types.h" #include "clang/Driver/Util.h" @@ -42,7 +43,6 @@ namespace driver { - class Action; class Command; class Compilation; class InputInfo; @@ -417,14 +417,14 @@ /// BuildJobsForAction - Construct the jobs to perform for the action \p A and /// return an InputInfo for the result of running \p A. Will only construct - /// jobs for a given (Action, ToolChain, BoundArch) tuple once. + /// jobs for a given (Action, ToolChain, BoundArch, DeviceKind) tuple once. InputInfo BuildJobsForAction(Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfo> &CachedResults, - bool BuildForOffloadDevice) const; + Action::OffloadKind TargetDeviceOffloadKind) const; /// Returns the default name for linked images (e.g., "a.out"). const char *getDefaultImageName() const; @@ -495,7 +495,7 @@ bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfo> &CachedResults, - bool BuildForOffloadDevice) const; + Action::OffloadKind TargetDeviceOffloadKind) const; public: /// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and Index: include/clang/Driver/Tool.h =================================================================== --- include/clang/Driver/Tool.h +++ include/clang/Driver/Tool.h @@ -129,6 +129,20 @@ const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const = 0; + /// Construct jobs to perform the action \p JA, writing to the \p Outputs and + /// with \p Inputs, and add the jobs to \p C. The default implementation + /// assumes a single output and is expected to be overloaded for the tools + /// that support multiple inputs. + /// + /// \param TCArgs The argument list for this toolchain, with any + /// tool chain specific translations applied. + /// \param LinkingOutput If this output will eventually feed the + /// linker, then this is the final output name of the linked image. + virtual void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfoList &Outputs, + const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const; }; } // end namespace driver Index: lib/Driver/Action.cpp =================================================================== --- lib/Driver/Action.cpp +++ lib/Driver/Action.cpp @@ -115,15 +115,18 @@ return Res; } +/// Return a string that can be used as prefix in order to generate unique files +/// for each offloading kind. std::string -Action::getOffloadingFileNamePrefix(llvm::StringRef NormalizedTriple) const { - // A file prefix is only generated for device actions and consists of the - // offload kind and triple. - if (!OffloadingDeviceKind) +Action::GetOffloadingFileNamePrefix(OffloadKind Kind, + llvm::StringRef NormalizedTriple, + bool CreatePrefixForHost) { + // Don't generate prefix for host actions unless required. + if (!CreatePrefixForHost && (Kind == OFK_None || Kind == OFK_Host)) return ""; std::string Res("-"); - Res += getOffloadingKindPrefix(); + Res += GetOffloadKindName(Kind); Res += "-"; Res += NormalizedTriple; return Res; Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -2593,7 +2593,7 @@ /*AtTopLevel*/ true, /*MultipleArchs*/ ArchNames.size() > 1, /*LinkingOutput*/ LinkingOutput, CachedResults, - /*BuildForOffloadDevice*/ false); + /*TargetDeviceOffloadKind*/ Action::OFK_None); } // If the user passed -Qunused-arguments or there were errors, don't warn @@ -2926,31 +2926,38 @@ }; } -InputInfo Driver::BuildJobsForAction( - Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, - bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map, InputInfo> &CachedResults, - bool BuildForOffloadDevice) const { - // The bound arch is not necessarily represented in the toolchain's triple -- - // for example, armv7 and armv7s both map to the same triple -- so we need - // both in our map. Also, we need to add the offloading device kind, as the - // same tool chain can be used for host and device for some programming - // models, e.g. OpenMP. +/// Return a string that uniquely identifies the result of a job. The bound arch +/// is not necessarily represented in the toolchain's triple -- for example, +/// armv7 and armv7s both map to the same triple -- so we need both in our map. +/// Also, we need to add the offloading device kind, as the same tool chain can +/// be used for host and device for some programming models, e.g. OpenMP. +static std::string GetTriplePlusArchString(const ToolChain *TC, + StringRef BoundArch, + Action::OffloadKind OffloadKind) { std::string TriplePlusArch = TC->getTriple().normalize(); if (!BoundArch.empty()) { TriplePlusArch += "-"; TriplePlusArch += BoundArch; } TriplePlusArch += "-"; - TriplePlusArch += A->getOffloadingKindPrefix(); - std::pair ActionTC = {A, TriplePlusArch}; + TriplePlusArch += Action::GetOffloadKindName(OffloadKind); + return TriplePlusArch; +} + +InputInfo Driver::BuildJobsForAction( + Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, + std::map, InputInfo> &CachedResults, + Action::OffloadKind TargetDeviceOffloadKind) const { + std::pair ActionTC = { + A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; auto CachedResult = CachedResults.find(ActionTC); if (CachedResult != CachedResults.end()) { return CachedResult->second; } InputInfo Result = BuildJobsForActionNoCache( C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, - CachedResults, BuildForOffloadDevice); + CachedResults, TargetDeviceOffloadKind); CachedResults[ActionTC] = Result; return Result; } @@ -2959,10 +2966,11 @@ Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfo> &CachedResults, - bool BuildForOffloadDevice) const { + Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); InputInfoList OffloadDependencesInputInfo; + bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None; if (const OffloadAction *OA = dyn_cast(A)) { // The offload action is expected to be used in four different situations. // @@ -2995,7 +3003,7 @@ DevA = BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, - CachedResults, /*BuildForOffloadDevice=*/true); + CachedResults, DepA->getOffloadingDeviceKind()); }); return DevA; } @@ -3005,16 +3013,15 @@ // generate the host dependences and override the action with the device // dependence. The dependences can't therefore be a top-level action. OA->doOnEachDependence( - /*IsHostDependence=*/BuildForOffloadDevice, + /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { OffloadDependencesInputInfo.push_back(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, - /*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() != - Action::OFK_None)); + DepA->getOffloadingDeviceKind())); }); - A = BuildForOffloadDevice + A = BuildingForOffloadDevice ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) : OA->getHostDependence(); } @@ -3044,7 +3051,7 @@ return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, - BuildForOffloadDevice); + TargetDeviceOffloadKind); } @@ -3063,13 +3070,12 @@ // need to build jobs for host/device-side inputs it may have held. for (const auto *OA : CollapsedOffloadActions) cast(OA)->doOnEachDependence( - /*IsHostDependence=*/BuildForOffloadDevice, + /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { OffloadDependencesInputInfo.push_back(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false, /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, - /*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() != - Action::OFK_None)); + DepA->getOffloadingDeviceKind())); }); // Only use pipes when there is exactly one input. @@ -3082,7 +3088,7 @@ AtTopLevel && (isa(A) || isa(A)); InputInfos.push_back(BuildJobsForAction( C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, - CachedResults, BuildForOffloadDevice)); + CachedResults, A->getOffloadingDeviceKind())); } // Always use the first input as the base input. @@ -3114,13 +3120,59 @@ // Determine the place to write output to, if any. InputInfo Result; - if (JA->getType() == types::TY_Nothing) + InputInfoList UnbundlingResults; + if (auto *UA = dyn_cast(JA)) { + // If we have an unbundling job, we need to create results for all the + // outputs. We also update the results cache so that other actions using + // this unbundling action can get the right results. + for (auto &UI : UA->getDependentActionsInfo()) { + assert(UI.DependentOffloadKind != Action::OFK_None && + "Unbundling with no offloading??"); + + // Unbundling actions are never at the top level. When we generate the + // offloading prefix, we also do that for the host file because the + // unbundling action does not change the type of the output which can + // cause a overwrite. + std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix( + UI.DependentOffloadKind, + UI.DependentToolChain->getTriple().normalize(), + /*CreatePrefixForHost=*/true); + auto CurI = InputInfo( + UA, GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch, + /*AtTopLevel=*/false, MultipleArchs, + OffloadingPrefix), + BaseInput); + // Save the unbundling result. + UnbundlingResults.push_back(CurI); + + // Get the unique string identifier for this dependence and cache the + // result. + CachedResults[{A, GetTriplePlusArchString( + UI.DependentToolChain, UI.DependentBoundArch, + UI.DependentOffloadKind)}] = CurI; + } + + // Now that we have all the results generated, select the one that should be + // returned for the current depending action. + std::pair ActionTC = { + A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + assert(CachedResults.find(ActionTC) != CachedResults.end() && + "Result does not exist??"); + Result = CachedResults[ActionTC]; + } else if (JA->getType() == types::TY_Nothing) Result = InputInfo(A, BaseInput); - else + else { + // We only have to generate a prefix for the host if this is not a top-level + // action. + std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix( + A->getOffloadingDeviceKind(), TC->getTriple().normalize(), + /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() && + !AtTopLevel); Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, MultipleArchs, - TC->getTriple().normalize()), + OffloadingPrefix), BaseInput); + } if (CCCPrintBindings && !CCGenDiagnostics) { llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"' @@ -3130,12 +3182,28 @@ if (i + 1 != e) llvm::errs() << ", "; } - llvm::errs() << "], output: " << Result.getAsString() << "\n"; + if (UnbundlingResults.empty()) + llvm::errs() << "], output: " << Result.getAsString() << "\n"; + else { + llvm::errs() << "], outputs: ["; + for (unsigned i = 0, e = UnbundlingResults.size(); i != e; ++i) { + llvm::errs() << UnbundlingResults[i].getAsString(); + if (i + 1 != e) + llvm::errs() << ", "; + } + llvm::errs() << "] \n"; + } } else { - T->ConstructJob( - C, *JA, Result, InputInfos, - C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), - LinkingOutput); + if (UnbundlingResults.empty()) + T->ConstructJob( + C, *JA, Result, InputInfos, + C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), + LinkingOutput); + else + T->ConstructJob( + C, *JA, UnbundlingResults, InputInfos, + C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), + LinkingOutput); } return Result; } @@ -3182,7 +3250,7 @@ const char *BaseInput, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, - StringRef NormalizedTriple) const { + StringRef OffloadingPrefix) const { llvm::PrettyStackTraceString CrashInfo("Computing output path"); // Output to a user requested destination? if (AtTopLevel && !isa(JA) && !isa(JA)) { @@ -3268,7 +3336,7 @@ MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image); } else { SmallString<128> Output(getDefaultImageName()); - Output += JA.getOffloadingFileNamePrefix(NormalizedTriple); + Output += OffloadingPrefix; if (MultipleArchs && !BoundArch.empty()) { Output += "-"; Output.append(BoundArch); @@ -3285,7 +3353,7 @@ if (!types::appendSuffixForType(JA.getType())) End = BaseName.rfind('.'); SmallString<128> Suffixed(BaseName.substr(0, End)); - Suffixed += JA.getOffloadingFileNamePrefix(NormalizedTriple); + Suffixed += OffloadingPrefix; if (MultipleArchs && !BoundArch.empty()) { Suffixed += "-"; Suffixed.append(BoundArch); Index: lib/Driver/Tool.cpp =================================================================== --- lib/Driver/Tool.cpp +++ lib/Driver/Tool.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "clang/Driver/Tool.h" +#include "InputInfo.h" using namespace clang::driver; @@ -21,3 +22,12 @@ Tool::~Tool() { } + +void Tool::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfoList &Outputs, + const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const { + assert(Outputs.size() == 1 && "Expected only one output by default!"); + ConstructJob(C, JA, Outputs.front(), Inputs, TCArgs, LinkingOutput); +}; Index: lib/Driver/Tools.h =================================================================== --- lib/Driver/Tools.h +++ lib/Driver/Tools.h @@ -148,6 +148,10 @@ const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const override; + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfoList &Outputs, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; }; /// \brief Base class for all GNU tools that provide the same behavior when Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -6222,7 +6222,8 @@ if (!JA.isDeviceOffloading(Action::OFK_None) && !JA.isDeviceOffloading(Action::OFK_Host)) { llvm::sys::path::replace_extension(F, ""); - F += JA.getOffloadingFileNamePrefix(Triple.normalize()); + F += Action::GetOffloadingFileNamePrefix(JA.getOffloadingDeviceKind(), + Triple.normalize()); F += "-"; F += JA.getOffloadingArch(); } @@ -7058,6 +7059,7 @@ const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const { + // The version with only one output is expected to refer to a bundling job. assert(isa(JA) && "Expecting bundling job!"); // The bundling command looks like this: @@ -7119,6 +7121,68 @@ CmdArgs, None)); } +void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfoList &Outputs, + const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const { + // The version with multiple outputs is expected to refer to a unbundling job. + auto &UA = cast(JA); + + // The unbundling command looks like this: + // clang-offload-bundler -type=bc + // -targets=host-triple,openmp-triple1,openmp-triple2 + // -inputs=input_file + // -outputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2" + // -unbundle + + ArgStringList CmdArgs; + + assert(Inputs.size() == 1 && "Expecting to unbundle a single file!"); + InputInfo Input = Inputs.front(); + + // Get the type. + CmdArgs.push_back(TCArgs.MakeArgString( + Twine("-type=") + types::getTypeTempSuffix(Input.getType()))); + + // Get the targets. + SmallString<128> Triples; + Triples += "-targets="; + auto DepInfo = UA.getDependentActionsInfo(); + for (unsigned I = 0; I < DepInfo.size(); ++I) { + if (I) + Triples += ','; + + auto &Dep = DepInfo[I]; + Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind); + Triples += '-'; + Triples += Dep.DependentToolChain->getTriple().normalize(); + } + + CmdArgs.push_back(TCArgs.MakeArgString(Triples)); + + // Get bundled file command. + CmdArgs.push_back( + TCArgs.MakeArgString(Twine("-inputs=") + Input.getFilename())); + + // Get unbundled files command. + SmallString<128> UB; + UB += "-outputs="; + for (unsigned I = 0; I < Outputs.size(); ++I) { + if (I) + UB += ','; + UB += Outputs[I].getFilename(); + } + CmdArgs.push_back(TCArgs.MakeArgString(UB)); + CmdArgs.push_back("-unbundle"); + + // All the inputs are encoded as commands. + C.addCommand(llvm::make_unique( + JA, *this, + TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, None)); +} + void GnuTool::anchor() {} void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, Index: test/Driver/cuda-bindings.cu =================================================================== --- test/Driver/cuda-bindings.cu +++ test/Driver/cuda-bindings.cu @@ -34,7 +34,7 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM %s -// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.s" +// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" // @@ -62,8 +62,8 @@ // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM2 %s -// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.s" -// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_35.s" +// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" +// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s" // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" // @@ -101,7 +101,7 @@ // RUN: | FileCheck -check-prefix=DBIN %s // DBIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: // DBIN-NOT: cuda-bindings-device-cuda-nvptx64 -// DBIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.o" +// DBIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o" // // Test single gpu architecture up to the assemble phase in device-only @@ -110,7 +110,7 @@ // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \ // RUN: | FileCheck -check-prefix=DASM %s -// DASM: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.s" +// DASM: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" // // Test two gpu architectures with complete compilation in device-only @@ -121,10 +121,10 @@ // RUN: | FileCheck -check-prefix=DBIN2 %s // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64 -// DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.o" +// DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o" // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64 -// DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_35.o" +// DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.o" // // Test two gpu architectures up to the assemble phase in device-only @@ -133,5 +133,5 @@ // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \ // RUN: | FileCheck -check-prefix=DASM2 %s -// DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.s" -// DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_35.s" +// DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" +// DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s" Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -210,8 +210,8 @@ // CHK-LKS: TARGET(binary) // CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) // CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) -// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]) -// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]]) // CHK-LKS: SECTIONS // CHK-LKS: { // CHK-LKS: .omp_offloading : @@ -389,3 +389,92 @@ // CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTASM:.+\.s]]" "-x" "ir" "[[HOSTBC]]" // CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "[[HOSTASM]]" // CHK-BUJOBS-ST: clang-offload-bundler" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=[[RES:.+\.o]]" "-inputs=[[T1OBJ]],[[T2OBJ]],[[HOSTOBJ]]" + +/// ########################################################################### + +/// Check separate compilation with offloading - unbundling jobs construct +// RUN: touch %t.i +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS %s +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS-ST %s +// RUN: touch %t.o +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS2 %s +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.o -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS2-ST %s + +// Unbundle and create host BC. +// CHK-UBJOBS: clang-offload-bundler" "-type=i" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.i]]" "-outputs=[[HOSTPP:.+\.i]],[[T1PP:.+\.i]],[[T2PP:.+\.i]]" "-unbundle" +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-UBJOBS-ST: clang-offload-bundler" "-type=i" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.i]]" "-outputs=[[HOSTPP:.+\.i]],[[T1PP:.+\.i]],[[T2PP:.+\.i]]" "-unbundle" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// Create target 1 object. +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS: ld" {{.*}}"-o" "[[T1BIN:.+\.out]]" {{.*}}"[[T1OBJ]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" +// CHK-UBJOBS-ST: ld" {{.*}}"-o" "[[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"[[T1OBJ]]" + +// Create target 2 object. +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS: ld" {{.*}}"-o" "[[T2BIN:.+\.out]]" {{.*}}"[[T2OBJ]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM:.+\.s]]" "-x" "ir" "[[T2BC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "[[T2ASM]]" +// CHK-UBJOBS-ST: ld" {{.*}}"-o" "[[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"[[T2OBJ]]" + +// Create binary. +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "-x" "ir" "[[HOSTBC]]" +// CHK-UBJOBS: ld" {{.*}}"-o" "[[HOSTBIN:.+\.out]]" {{.*}}"[[HOSTOBJ]]" {{.*}}"-T" "[[LKS:.+\.lk]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTASM:.+\.s]]" "-x" "ir" "[[HOSTBC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "[[HOSTASM]]" +// CHK-UBJOBS-ST: ld" {{.*}}"-o" "[[HOSTBIN:.+\.out]]" {{.*}}"[[HOSTOBJ]]" {{.*}}"-T" "[[LKS:.+\.lk]]" + +// Unbundle object file. +// CHK-UBJOBS2: clang-offload-bundler" "-type=o" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.o]]" "-outputs=[[HOSTOBJ:.+\.o]],[[T1OBJ:.+\.o]],[[T2OBJ:.+\.o]]" "-unbundle" +// CHK-UBJOBS2: ld" {{.*}}"-o" "[[T1BIN:.+\.out]]" {{.*}}"[[T1OBJ]]" +// CHK-UBJOBS2: ld" {{.*}}"-o" "[[T2BIN:.+\.out]]" {{.*}}"[[T2OBJ]]" +// CHK-UBJOBS2: ld" {{.*}}"-o" "[[HOSTBIN:.+\.out]]" {{.*}}"[[HOSTOBJ]]" {{.*}}"-T" "[[LKS:.+\.lk]]" +// CHK-UBJOBS2-ST: clang-offload-bundler" "-type=o" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.o]]" "-outputs=[[HOSTOBJ:.+\.o]],[[T1OBJ:.+\.o]],[[T2OBJ:.+\.o]]" "-unbundle" +// CHK-UBJOBS2-ST: ld" {{.*}}"-o" "[[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"[[T1OBJ]]" +// CHK-UBJOBS2-ST: ld" {{.*}}"-o" "[[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"[[T2OBJ]]" +// CHK-UBJOBS2-ST: ld" {{.*}}"-o" "[[HOSTBIN:.+\.out]]" {{.*}}"[[HOSTOBJ]]" {{.*}}"-T" "[[LKS:.+\.lk]]" + +/// ########################################################################### + +/// Check separate compilation with offloading - unbundling/bundling jobs +/// construct +// RUN: touch %t.i +// RUN: %clang -### -fopenmp -c %t.o -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBUJOBS %s +// RUN: %clang -### -fopenmp -c %t.o -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBUJOBS-ST %s + +// Unbundle and create host BC. +// CHK-UBUJOBS: clang-offload-bundler" "-type=i" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.i]]" "-outputs=[[HOSTPP:.+\.i]],[[T1PP:.+\.i]],[[T2PP:.+\.i]]" "-unbundle" +// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// CHK-UBUJOBS-ST: clang-offload-bundler" "-type=i" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.i]]" "-outputs=[[HOSTPP:.+\.i]],[[T1PP:.+\.i]],[[T2PP:.+\.i]]" "-unbundle" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// Create target 1 object. +// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" + +// Create target 2 object. +// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM:.+\.s]]" "-x" "ir" "[[T2BC]]" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "[[T2ASM]]" + +// Create binary. +// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "-x" "ir" "[[HOSTBC]]" +// CHK-UBUJOBS: clang-offload-bundler" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=[[RES:.+\.o]]" "-inputs=[[T1OBJ]],[[T2OBJ]],[[HOSTOBJ]]" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTASM:.+\.s]]" "-x" "ir" "[[HOSTBC]]" +// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "[[HOSTASM]]" +// CHK-UBUJOBS-ST: clang-offload-bundler" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=[[RES:.+\.o]]" "-inputs=[[T1OBJ]],[[T2OBJ]],[[HOSTOBJ]]" Index: test/Driver/opt-record.c =================================================================== --- test/Driver/opt-record.c +++ test/Driver/opt-record.c @@ -11,7 +11,7 @@ // CHECK-NO-O: "-cc1" // CHECK-NO-O-DAG: "-opt-record-file" "opt-record.opt.yaml" -// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-device-cuda-{{nvptx64|nvptx}}-nvidia-cuda-sm_20.opt.yaml" +// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-cuda-{{nvptx64|nvptx}}-nvidia-cuda-sm_20.opt.yaml" // CHECK-EQ: "-cc1" // CHECK-EQ: "-opt-record-file" "BAR.txt"