Index: cfe/trunk/include/clang/Driver/Action.h =================================================================== --- cfe/trunk/include/clang/Driver/Action.h +++ cfe/trunk/include/clang/Driver/Action.h @@ -139,17 +139,15 @@ virtual void anchor(); /// GPU architecture to bind -- e.g 'sm_35'. const char *GpuArchName; - const char *DeviceTriple; /// True when action results are not consumed by the host action (e.g when /// -fsyntax-only or --cuda-device-only options are used). bool AtTopLevel; public: CudaDeviceAction(std::unique_ptr Input, const char *ArchName, - const char *DeviceTriple, bool AtTopLevel); + bool AtTopLevel); const char *getGpuArchName() const { return GpuArchName; } - const char *getDeviceTriple() const { return DeviceTriple; } bool isAtTopLevel() const { return AtTopLevel; } static bool classof(const Action *A) { @@ -160,16 +158,13 @@ class CudaHostAction : public Action { virtual void anchor(); ActionList DeviceActions; - const char *DeviceTriple; public: - CudaHostAction(std::unique_ptr Input, const ActionList &DeviceActions, - const char *DeviceTriple); + CudaHostAction(std::unique_ptr Input, + const ActionList &DeviceActions); ~CudaHostAction() override; - ActionList &getDeviceActions() { return DeviceActions; } const ActionList &getDeviceActions() const { return DeviceActions; } - const char *getDeviceTriple() const { return DeviceTriple; } static bool classof(const Action *A) { return A->getKind() == CudaHostClass; } }; Index: cfe/trunk/include/clang/Driver/Compilation.h =================================================================== --- cfe/trunk/include/clang/Driver/Compilation.h +++ cfe/trunk/include/clang/Driver/Compilation.h @@ -38,6 +38,9 @@ /// The default tool chain. const ToolChain &DefaultToolChain; + const ToolChain *CudaHostToolChain; + const ToolChain *CudaDeviceToolChain; + /// The original (untranslated) input argument list. llvm::opt::InputArgList *Args; @@ -81,6 +84,17 @@ const Driver &getDriver() const { return TheDriver; } const ToolChain &getDefaultToolChain() const { return DefaultToolChain; } + const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; } + const ToolChain *getCudaDeviceToolChain() const { + return CudaDeviceToolChain; + } + + void setCudaHostToolChain(const ToolChain *HostToolChain) { + CudaHostToolChain = HostToolChain; + } + void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) { + CudaDeviceToolChain = DeviceToolChain; + } const llvm::opt::InputArgList &getInputArgs() const { return *Args; } Index: cfe/trunk/include/clang/Driver/Driver.h =================================================================== --- cfe/trunk/include/clang/Driver/Driver.h +++ cfe/trunk/include/clang/Driver/Driver.h @@ -297,22 +297,23 @@ /// BuildActions - Construct the list of actions to perform for the /// given arguments, which are only done for a single architecture. /// + /// \param C - The compilation that is being built. /// \param TC - The default host tool chain. /// \param Args - The input arguments. /// \param Actions - The list to store the resulting actions onto. - void BuildActions(const ToolChain &TC, llvm::opt::DerivedArgList &Args, - const InputList &Inputs, ActionList &Actions) const; + void BuildActions(Compilation &C, const ToolChain &TC, + llvm::opt::DerivedArgList &Args, const InputList &Inputs, + ActionList &Actions) const; /// BuildUniversalActions - Construct the list of actions to perform /// for the given arguments, which may require a universal build. /// + /// \param C - The compilation that is being built. /// \param TC - The default host tool chain. /// \param Args - The input arguments. /// \param Actions - The list to store the resulting actions onto. - void BuildUniversalActions(const ToolChain &TC, - llvm::opt::DerivedArgList &Args, - const InputList &BAInputs, - ActionList &Actions) const; + void BuildUniversalActions(Compilation &C, const ToolChain &TC, + const InputList &BAInputs) const; /// BuildJobs - Bind actions to concrete tools and translate /// arguments to form the list of jobs to run. Index: cfe/trunk/lib/Driver/Action.cpp =================================================================== --- cfe/trunk/lib/Driver/Action.cpp +++ cfe/trunk/lib/Driver/Action.cpp @@ -58,18 +58,15 @@ void CudaDeviceAction::anchor() {} CudaDeviceAction::CudaDeviceAction(std::unique_ptr Input, - const char *ArchName, - const char *DeviceTriple, bool AtTopLevel) + const char *ArchName, bool AtTopLevel) : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName), - DeviceTriple(DeviceTriple), AtTopLevel(AtTopLevel) {} + AtTopLevel(AtTopLevel) {} void CudaHostAction::anchor() {} CudaHostAction::CudaHostAction(std::unique_ptr Input, - const ActionList &DeviceActions, - const char *DeviceTriple) - : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions), - DeviceTriple(DeviceTriple) {} + const ActionList &DeviceActions) + : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions) {} CudaHostAction::~CudaHostAction() { for (auto &DA : DeviceActions) Index: cfe/trunk/lib/Driver/Compilation.cpp =================================================================== --- cfe/trunk/lib/Driver/Compilation.cpp +++ cfe/trunk/lib/Driver/Compilation.cpp @@ -24,8 +24,9 @@ Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain, InputArgList *_Args, DerivedArgList *_TranslatedArgs) - : TheDriver(D), DefaultToolChain(_DefaultToolChain), Args(_Args), - TranslatedArgs(_TranslatedArgs), Redirects(nullptr), + : TheDriver(D), DefaultToolChain(_DefaultToolChain), + CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr), + Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr), ForDiagnostics(false) {} Compilation::~Compilation() { Index: cfe/trunk/lib/Driver/Driver.cpp =================================================================== --- cfe/trunk/lib/Driver/Driver.cpp +++ cfe/trunk/lib/Driver/Driver.cpp @@ -491,6 +491,10 @@ // The compilation takes ownership of Args. Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs); + C->setCudaDeviceToolChain( + &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"))); if (!HandleImmediateArgs(*C)) return C; @@ -501,10 +505,9 @@ // Construct the list of abstract actions to perform for this compilation. On // MachO targets this uses the driver-driver and universal actions. if (TC.getTriple().isOSBinFormatMachO()) - BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs, - C->getActions()); + BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs); else - BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs, + BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs, C->getActions()); if (CCCPrintPhases) { @@ -617,9 +620,9 @@ // Darwin OSes this uses the driver-driver and builds universal actions. const ToolChain &TC = C.getDefaultToolChain(); if (TC.getTriple().isOSBinFormatMachO()) - BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions()); + BuildUniversalActions(C, TC, Inputs); else - BuildActions(TC, C.getArgs(), Inputs, C.getActions()); + BuildActions(C, TC, C.getArgs(), Inputs, C.getActions()); BuildJobs(C); @@ -948,7 +951,7 @@ os << '"' << CDA->getGpuArchName() << '"' << ", {" << PrintActions1(C, *CDA->begin(), Ids) << "}"; } else { - ActionList *AL; + const ActionList *AL; if (CudaHostAction *CHA = dyn_cast(A)) { os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}" << ", gpu binaries "; @@ -997,9 +1000,10 @@ return false; } -void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args, - const InputList &BAInputs, - ActionList &Actions) const { +void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, + const InputList &BAInputs) const { + DerivedArgList &Args = C.getArgs(); + ActionList &Actions = C.getActions(); llvm::PrettyStackTraceString CrashInfo("Building universal build actions"); // Collect the list of architectures. Duplicates are allowed, but should only // be handled once (in the order seen). @@ -1028,7 +1032,7 @@ Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName())); ActionList SingleActions; - BuildActions(TC, Args, BAInputs, SingleActions); + BuildActions(C, TC, Args, BAInputs, SingleActions); // Add in arch bindings for every top level action, as well as lipo and // dsymutil steps if needed. @@ -1279,21 +1283,15 @@ // and returns a new CudaHostAction which wraps /p Current and device // side actions. static std::unique_ptr -buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args, - const Arg *InputArg, std::unique_ptr HostAction, - ActionList &Actions) { - // Figure out which NVPTX triple to use for device-side compilation based on - // whether host is 64-bit. - const char *DeviceTriple = TC.getTriple().isArch64Bit() - ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"; +buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg, + std::unique_ptr HostAction, ActionList &Actions) { Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only, options::OPT_cuda_device_only); // Host-only compilation case. if (PartialCompilationArg && PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only)) return std::unique_ptr( - new CudaHostAction(std::move(HostAction), {}, DeviceTriple)); + new CudaHostAction(std::move(HostAction), {})); // Collect all cuda_gpu_arch parameters, removing duplicates. SmallVector GpuArchList; @@ -1317,8 +1315,11 @@ CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg)); // Build actions for all device inputs. + assert(C.getCudaDeviceToolChain() && + "Missing toolchain for device-side compilation."); ActionList CudaDeviceActions; - D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions); + C.getDriver().BuildActions(C, *C.getCudaDeviceToolChain(), Args, + CudaDeviceInputs, CudaDeviceActions); assert(GpuArchList.size() == CudaDeviceActions.size() && "Failed to create actions for all devices"); @@ -1342,14 +1343,15 @@ // -o is ambiguous if we have more than one top-level action. if (Args.hasArg(options::OPT_o) && (!DeviceOnlyCompilation || GpuArchList.size() > 1)) { - D.Diag(clang::diag::err_drv_output_argument_with_multiple_files); + C.getDriver().Diag( + clang::diag::err_drv_output_argument_with_multiple_files); return nullptr; } for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) Actions.push_back(new CudaDeviceAction( std::unique_ptr(CudaDeviceActions[I]), GpuArchList[I], - DeviceTriple, /* AtTopLevel */ true)); + /* AtTopLevel */ true)); // Kill host action in case of device-only compilation. if (DeviceOnlyCompilation) HostAction.reset(nullptr); @@ -1362,15 +1364,16 @@ for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) DeviceActions.push_back(new CudaDeviceAction( std::unique_ptr(CudaDeviceActions[I]), GpuArchList[I], - DeviceTriple, /* AtTopLevel */ false)); + /* AtTopLevel */ false)); // Return a new host action that incorporates original host action and all // device actions. return std::unique_ptr( - new CudaHostAction(std::move(HostAction), DeviceActions, DeviceTriple)); + new CudaHostAction(std::move(HostAction), DeviceActions)); } -void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, - const InputList &Inputs, ActionList &Actions) const { +void Driver::BuildActions(Compilation &C, const ToolChain &TC, + DerivedArgList &Args, const InputList &Inputs, + ActionList &Actions) const { llvm::PrettyStackTraceString CrashInfo("Building compilation actions"); if (!SuppressMissingInputWarning && Inputs.empty()) { @@ -1500,8 +1503,8 @@ Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current)); if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) { - Current = buildCudaActions(*this, TC, Args, InputArg, - std::move(Current), Actions); + Current = + buildCudaActions(C, Args, InputArg, std::move(Current), Actions); if (!Current) break; } @@ -1803,7 +1806,7 @@ InputInfo II; // Append outputs of device jobs to the input list. for (const Action *DA : CHA->getDeviceActions()) { - BuildJobsForAction(C, DA, TC, "", AtTopLevel, + BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel, /*MultipleArchs*/ false, LinkingOutput, II); CudaDeviceInputInfos.push_back(II); } @@ -1843,11 +1846,12 @@ } if (const CudaDeviceAction *CDA = dyn_cast(A)) { - BuildJobsForAction( - C, *CDA->begin(), - &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())), - CDA->getGpuArchName(), CDA->isAtTopLevel(), - /*MultipleArchs*/ true, LinkingOutput, Result); + // Initial processing of CudaDeviceAction carries host params. + // Call BuildJobsForAction() again, now with correct device parameters. + assert(CDA->getGpuArchName() && "No GPU name in device action."); + BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(), + CDA->getGpuArchName(), CDA->isAtTopLevel(), + /*MultipleArchs*/ true, LinkingOutput, Result); return; } Index: cfe/trunk/lib/Driver/Tools.cpp =================================================================== --- cfe/trunk/lib/Driver/Tools.cpp +++ cfe/trunk/lib/Driver/Tools.cpp @@ -3262,6 +3262,23 @@ CmdArgs.push_back("-triple"); CmdArgs.push_back(Args.MakeArgString(TripleStr)); + if (IsCuda) { + // FIXME: We need a (better) way to pass information about + // particular compilation pass we're constructing here. For now we + // can check which toolchain we're using and pick the other one to + // extract the triple. + const ToolChain *AuxToolChain; + if (&getToolChain() == C.getCudaDeviceToolChain()) + AuxToolChain = C.getCudaHostToolChain(); + else if (&getToolChain() == C.getCudaHostToolChain()) + AuxToolChain = C.getCudaDeviceToolChain(); + else + llvm_unreachable("Can't figure out CUDA compilation mode."); + assert(AuxToolChain != nullptr && "No aux toolchain."); + CmdArgs.push_back("-aux-triple"); + CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str())); + } + if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || Triple.getArch() == llvm::Triple::thumb)) { unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6; Index: cfe/trunk/test/Driver/cuda-options.cu =================================================================== --- cfe/trunk/test/Driver/cuda-options.cu +++ cfe/trunk/test/Driver/cuda-options.cu @@ -111,14 +111,6 @@ // Make sure we don't link anything. // RUN: -check-prefix CUDA-NL %s -// Match device-side preprocessor, and compiler phases with -save-temps -// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" -// CUDA-D1S-SAME: "-fcuda-is-device" -// CUDA-D1S-SAME: "-x" "cuda" -// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" -// CUDA-D1S-SAME: "-fcuda-is-device" -// CUDA-D1S-SAME: "-x" "cuda-cpp-output" - // --cuda-host-only should never trigger unused arg warning. // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -c %s 2>&1 | \ // RUN: FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s @@ -133,34 +125,47 @@ // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -x c -c %s 2>&1 | \ // RUN: FileCheck -check-prefix CUDA-UNUSED-CDO %s +// Match device-side preprocessor, and compiler phases with -save-temps +// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu" +// CUDA-D1S-SAME: "-fcuda-is-device" +// CUDA-D1S-SAME: "-x" "cuda" + +// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu" +// CUDA-D1S-SAME: "-fcuda-is-device" +// CUDA-D1S-SAME: "-x" "cuda-cpp-output" + // Match the job that produces PTX assembly -// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" +// CUDA-D1: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA-D1NS-SAME: "-aux-triple" "x86_64--linux-gnu" // CUDA-D1-SAME: "-fcuda-is-device" // CUDA-D1-SM35-SAME: "-target-cpu" "sm_35" // CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]" // CUDA-D1NS-SAME: "-x" "cuda" // CUDA-D1S-SAME: "-x" "ir" -// Match anothe device-side compilation -// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" +// Match another device-side compilation +// CUDA-D2: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA-D2-SAME: "-aux-triple" "x86_64--linux-gnu" // CUDA-D2-SAME: "-fcuda-is-device" // CUDA-D2-SM30-SAME: "-target-cpu" "sm_30" // CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]" // CUDA-D2-SAME: "-x" "cuda" // Match no device-side compilation -// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" +// CUDA-ND-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda" // CUDA-ND-SAME-NOT: "-fcuda-is-device" // Match host-side preprocessor job with -save-temps -// CUDA-HS: "-cc1" "-triple" -// CUDA-HS-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda" +// CUDA-HS: "-cc1" "-triple" "x86_64--linux-gnu" +// CUDA-HS-SAME: "-aux-triple" "nvptx64-nvidia-cuda" // CUDA-HS-SAME-NOT: "-fcuda-is-device" // CUDA-HS-SAME: "-x" "cuda" // Match host-side compilation -// CUDA-H: "-cc1" "-triple" -// CUDA-H-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda" +// CUDA-H: "-cc1" "-triple" "x86_64--linux-gnu" +// CUDA-H-SAME: "-aux-triple" "nvptx64-nvidia-cuda" // CUDA-H-SAME-NOT: "-fcuda-is-device" // CUDA-H-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" // CUDA-HNS-SAME: "-x" "cuda" Index: cfe/trunk/test/SemaCUDA/function-target-hd.cu =================================================================== --- cfe/trunk/test/SemaCUDA/function-target-hd.cu +++ cfe/trunk/test/SemaCUDA/function-target-hd.cu @@ -8,9 +8,9 @@ // host device functions are not allowed to call device functions. // RUN: %clang_cc1 -fsyntax-only -verify %s -// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s +// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -verify %s // RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD -// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD +// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD #include "Inputs/cuda.h"