Index: include/clang/Driver/Compilation.h =================================================================== --- include/clang/Driver/Compilation.h +++ include/clang/Driver/Compilation.h @@ -38,6 +38,9 @@ /// The default tool chain. const ToolChain &DefaultToolChain; + const ToolChain *CudaHostToolChain; + const ToolChain *CudaDeviceToolChain; + /// The original (untranslated) input argument list. llvm::opt::InputArgList *Args; @@ -81,6 +84,17 @@ const Driver &getDriver() const { return TheDriver; } const ToolChain &getDefaultToolChain() const { return DefaultToolChain; } + const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; } + const ToolChain *getCudaDeviceToolChain() const { + return CudaDeviceToolChain; + } + + void setCudaHostToolChain(const ToolChain *HostToolChain) { + CudaHostToolChain = HostToolChain; + } + void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) { + CudaDeviceToolChain = DeviceToolChain; + } const llvm::opt::InputArgList &getInputArgs() const { return *Args; } Index: include/clang/Driver/Driver.h =================================================================== --- include/clang/Driver/Driver.h +++ include/clang/Driver/Driver.h @@ -297,22 +297,23 @@ /// BuildActions - Construct the list of actions to perform for the /// given arguments, which are only done for a single architecture. /// + /// \param C - The compilation that is being built. /// \param TC - The default host tool chain. /// \param Args - The input arguments. /// \param Actions - The list to store the resulting actions onto. - void BuildActions(const ToolChain &TC, llvm::opt::DerivedArgList &Args, - const InputList &Inputs, ActionList &Actions) const; + void BuildActions(Compilation &C, const ToolChain &TC, + llvm::opt::DerivedArgList &Args, const InputList &Inputs, + ActionList &Actions) const; /// BuildUniversalActions - Construct the list of actions to perform /// for the given arguments, which may require a universal build. /// + /// \param C - The compilation that is being built. /// \param TC - The default host tool chain. /// \param Args - The input arguments. /// \param Actions - The list to store the resulting actions onto. - void BuildUniversalActions(const ToolChain &TC, - llvm::opt::DerivedArgList &Args, - const InputList &BAInputs, - ActionList &Actions) const; + void BuildUniversalActions(Compilation &C, const ToolChain &TC, + const InputList &BAInputs) const; /// BuildJobs - Bind actions to concrete tools and translate /// arguments to form the list of jobs to run. @@ -433,6 +434,13 @@ /// compilation based on which -f(no-)?lto(=.*)? option occurs last. void setLTOMode(const llvm::opt::ArgList &Args); + /// @} + + /// \brief Get bitmasks for which option flags to include and exclude based on + /// the driver mode. + std::pair getIncludeExcludeOptionFlagMasks() const; + +public: /// \brief Retrieves a ToolChain for a particular \p Target triple. /// /// Will cache ToolChains for the life of the driver object, and create them @@ -440,13 +448,6 @@ const ToolChain &getToolChain(const llvm::opt::ArgList &Args, const llvm::Triple &Target) const; - /// @} - - /// \brief Get bitmasks for which option flags to include and exclude based on - /// the driver mode. - std::pair getIncludeExcludeOptionFlagMasks() const; - -public: /// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and /// return the grouped values as integers. Numbers which are not /// provided are set to 0. Index: lib/Driver/Compilation.cpp =================================================================== --- lib/Driver/Compilation.cpp +++ lib/Driver/Compilation.cpp @@ -24,8 +24,9 @@ Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain, InputArgList *_Args, DerivedArgList *_TranslatedArgs) - : TheDriver(D), DefaultToolChain(_DefaultToolChain), Args(_Args), - TranslatedArgs(_TranslatedArgs), Redirects(nullptr), + : TheDriver(D), DefaultToolChain(_DefaultToolChain), + CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr), + Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr), ForDiagnostics(false) {} Compilation::~Compilation() { Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -500,10 +500,9 @@ // Construct the list of abstract actions to perform for this compilation. On // MachO targets this uses the driver-driver and universal actions. if (TC.getTriple().isOSBinFormatMachO()) - BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs, - C->getActions()); + BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs); else - BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs, + BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs, C->getActions()); if (CCCPrintPhases) { @@ -616,9 +615,9 @@ // Darwin OSes this uses the driver-driver and builds universal actions. const ToolChain &TC = C.getDefaultToolChain(); if (TC.getTriple().isOSBinFormatMachO()) - BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions()); + BuildUniversalActions(C, TC, Inputs); else - BuildActions(TC, C.getArgs(), Inputs, C.getActions()); + BuildActions(C, TC, C.getArgs(), Inputs, C.getActions()); BuildJobs(C); @@ -996,9 +995,10 @@ return false; } -void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args, - const InputList &BAInputs, - ActionList &Actions) const { +void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, + const InputList &BAInputs) const { + DerivedArgList &Args = C.getArgs(); + ActionList &Actions = C.getActions(); llvm::PrettyStackTraceString CrashInfo("Building universal build actions"); // Collect the list of architectures. Duplicates are allowed, but should only // be handled once (in the order seen). @@ -1027,7 +1027,7 @@ Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName())); ActionList SingleActions; - BuildActions(TC, Args, BAInputs, SingleActions); + BuildActions(C, TC, Args, BAInputs, SingleActions); // Add in arch bindings for every top level action, as well as lipo and // dsymutil steps if needed. @@ -1278,14 +1278,16 @@ // and returns a new CudaHostAction which wraps /p Current and device // side actions. static std::unique_ptr -buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args, - const Arg *InputArg, std::unique_ptr HostAction, - ActionList &Actions) { +buildCudaActions(Compilation &C, const Driver &D, const ToolChain &TC, + DerivedArgList &Args, const Arg *InputArg, + std::unique_ptr HostAction, ActionList &Actions) { // Figure out which NVPTX triple to use for device-side compilation based on // whether host is 64-bit. const char *DeviceTriple = TC.getTriple().isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda"; + C.setCudaDeviceToolChain( + &D.getToolChain(C.getArgs(), llvm::Triple(DeviceTriple))); Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only, options::OPT_cuda_device_only); // Host-only compilation case. @@ -1317,7 +1319,7 @@ // Build actions for all device inputs. ActionList CudaDeviceActions; - D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions); + D.BuildActions(C, TC, Args, CudaDeviceInputs, CudaDeviceActions); assert(GpuArchList.size() == CudaDeviceActions.size() && "Failed to create actions for all devices"); @@ -1368,8 +1370,9 @@ new CudaHostAction(std::move(HostAction), DeviceActions, DeviceTriple)); } -void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, - const InputList &Inputs, ActionList &Actions) const { +void Driver::BuildActions(Compilation &C, const ToolChain &TC, + DerivedArgList &Args, const InputList &Inputs, + ActionList &Actions) const { llvm::PrettyStackTraceString CrashInfo("Building compilation actions"); if (!SuppressMissingInputWarning && Inputs.empty()) { @@ -1499,7 +1502,7 @@ Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current)); if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) { - Current = buildCudaActions(*this, TC, Args, InputArg, + Current = buildCudaActions(C, *this, TC, Args, InputArg, std::move(Current), Actions); if (!Current) break; @@ -1802,7 +1805,7 @@ InputInfo II; // Append outputs of device jobs to the input list. for (const Action *DA : CHA->getDeviceActions()) { - BuildJobsForAction(C, DA, TC, "", AtTopLevel, + BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel, /*MultipleArchs*/ false, LinkingOutput, II); CudaDeviceInputInfos.push_back(II); } @@ -1842,11 +1845,12 @@ } if (const CudaDeviceAction *CDA = dyn_cast(A)) { - BuildJobsForAction( - C, *CDA->begin(), - &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())), - CDA->getGpuArchName(), CDA->isAtTopLevel(), - /*MultipleArchs*/ true, LinkingOutput, Result); + // Initial processing of CudaDeviceAction carries host params. + // Call BuildJobsForAction() again, now with correct device parameters. + assert(CDA->getGpuArchName() && "No GPU name in device action."); + BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(), + CDA->getGpuArchName(), CDA->isAtTopLevel(), + /*MultipleArchs*/ true, LinkingOutput, Result); return; } Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -3215,6 +3215,20 @@ CmdArgs.push_back("-triple"); CmdArgs.push_back(Args.MakeArgString(TripleStr)); + if (IsCuda) { + const ToolChain *AuxToolChain; + if (&getToolChain() == C.getCudaDeviceToolChain()) + AuxToolChain = C.getCudaHostToolChain(); + else if (&getToolChain() == C.getCudaHostToolChain()) + AuxToolChain = C.getCudaDeviceToolChain(); + else + llvm_unreachable("Can't figure out CUDA compilation mode."); + if (AuxToolChain) { + CmdArgs.push_back("-aux-triple"); + CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str())); + } + } + if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || Triple.getArch() == llvm::Triple::thumb)) { unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6; Index: test/Driver/cuda-options.cu =================================================================== --- test/Driver/cuda-options.cu +++ test/Driver/cuda-options.cu @@ -111,14 +111,6 @@ // Make sure we don't link anything. // RUN: -check-prefix CUDA-NL %s -// Match device-side preprocessor, and compiler phases with -save-temps -// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" -// CUDA-D1S-SAME: "-fcuda-is-device" -// CUDA-D1S-SAME: "-x" "cuda" -// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" -// CUDA-D1S-SAME: "-fcuda-is-device" -// CUDA-D1S-SAME: "-x" "cuda-cpp-output" - // --cuda-host-only should never trigger unused arg warning. // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -c %s 2>&1 | \ // RUN: FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s @@ -133,34 +125,47 @@ // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -x c -c %s 2>&1 | \ // RUN: FileCheck -check-prefix CUDA-UNUSED-CDO %s +// Match device-side preprocessor, and compiler phases with -save-temps +// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu" +// CUDA-D1S-SAME: "-fcuda-is-device" +// CUDA-D1S-SAME: "-x" "cuda" + +// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu" +// CUDA-D1S-SAME: "-fcuda-is-device" +// CUDA-D1S-SAME: "-x" "cuda-cpp-output" + // Match the job that produces PTX assembly -// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" +// CUDA-D1: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA-D1NS-SAME: "-aux-triple" "x86_64--linux-gnu" // CUDA-D1-SAME: "-fcuda-is-device" // CUDA-D1-SM35-SAME: "-target-cpu" "sm_35" // CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]" // CUDA-D1NS-SAME: "-x" "cuda" // CUDA-D1S-SAME: "-x" "ir" -// Match anothe device-side compilation -// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" +// Match another device-side compilation +// CUDA-D2: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA-D2-SAME: "-aux-triple" "x86_64--linux-gnu" // CUDA-D2-SAME: "-fcuda-is-device" // CUDA-D2-SM30-SAME: "-target-cpu" "sm_30" // CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]" // CUDA-D2-SAME: "-x" "cuda" // Match no device-side compilation -// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda" +// CUDA-ND-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda" // CUDA-ND-SAME-NOT: "-fcuda-is-device" // Match host-side preprocessor job with -save-temps -// CUDA-HS: "-cc1" "-triple" -// CUDA-HS-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda" +// CUDA-HS: "-cc1" "-triple" "x86_64--linux-gnu" +// CUDA-HS-SAME: "-aux-triple" "nvptx64-nvidia-cuda" // CUDA-HS-SAME-NOT: "-fcuda-is-device" // CUDA-HS-SAME: "-x" "cuda" // Match host-side compilation -// CUDA-H: "-cc1" "-triple" -// CUDA-H-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda" +// CUDA-H: "-cc1" "-triple" "x86_64--linux-gnu" +// CUDA-H-SAME: "-aux-triple" "nvptx64-nvidia-cuda" // CUDA-H-SAME-NOT: "-fcuda-is-device" // CUDA-H-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" // CUDA-HNS-SAME: "-x" "cuda" Index: test/SemaCUDA/function-target-hd.cu =================================================================== --- test/SemaCUDA/function-target-hd.cu +++ test/SemaCUDA/function-target-hd.cu @@ -8,9 +8,9 @@ // host device functions are not allowed to call device functions. // RUN: %clang_cc1 -fsyntax-only -verify %s -// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s +// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -verify %s // RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD -// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD +// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD #include "Inputs/cuda.h"