Index: include/clang/Driver/Action.h =================================================================== --- include/clang/Driver/Action.h +++ include/clang/Driver/Action.h @@ -139,15 +139,17 @@ virtual void anchor(); /// GPU architecture to bind -- e.g 'sm_35'. const char *GpuArchName; + const char *DeviceTriple; /// True when action results are not consumed by the host action (e.g when /// -fsyntax-only or --cuda-device-only options are used). bool AtTopLevel; public: CudaDeviceAction(std::unique_ptr Input, const char *ArchName, - bool AtTopLevel); + const char *DeviceTriple, bool AtTopLevel); const char *getGpuArchName() const { return GpuArchName; } + const char *getDeviceTriple() const { return DeviceTriple; } bool isAtTopLevel() const { return AtTopLevel; } static bool classof(const Action *A) { Index: lib/Driver/Action.cpp =================================================================== --- lib/Driver/Action.cpp +++ lib/Driver/Action.cpp @@ -58,9 +58,10 @@ void CudaDeviceAction::anchor() {} CudaDeviceAction::CudaDeviceAction(std::unique_ptr Input, - const char *ArchName, bool AtTopLevel) + const char *ArchName, + const char *DeviceTriple, bool AtTopLevel) : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName), - AtTopLevel(AtTopLevel) {} + DeviceTriple(DeviceTriple), AtTopLevel(AtTopLevel) {} void CudaHostAction::anchor() {} Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -1237,11 +1237,8 @@ // side actions. static std::unique_ptr buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args, - const Arg *InputArg, const types::ID InputType, - std::unique_ptr Current, ActionList &Actions) { - - assert(InputType == types::TY_CUDA && - "CUDA Actions only apply to CUDA inputs."); + const Arg *InputArg, std::unique_ptr HostAction, + ActionList &Actions) { // Collect all cuda_gpu_arch parameters, removing duplicates. SmallVector GpuArchList; @@ -1279,6 +1276,12 @@ } } + // Figure out which NVPTX triple to use for device-side compilation based on + // whether host is 64-bit. + const char *DeviceTriple = TC.getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"; + // Figure out what to do with device actions -- pass them as inputs to the // host action or run each of them independently. bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only); @@ -1295,26 +1298,26 @@ } for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - Actions.push_back( - new CudaDeviceAction(std::unique_ptr(CudaDeviceActions[I]), - GpuArchList[I], /* AtTopLevel */ true)); + Actions.push_back(new CudaDeviceAction( + std::unique_ptr(CudaDeviceActions[I]), GpuArchList[I], + DeviceTriple, /* AtTopLevel */ true)); // Kill host action in case of device-only compilation. if (DeviceOnlyCompilation) - Current.reset(nullptr); - return Current; + HostAction.reset(nullptr); + return HostAction; } // Outputs of device actions during complete CUDA compilation get created // with AtTopLevel=false and become inputs for the host action. ActionList DeviceActions; for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - DeviceActions.push_back( - new CudaDeviceAction(std::unique_ptr(CudaDeviceActions[I]), - GpuArchList[I], /* AtTopLevel */ false)); + DeviceActions.push_back(new CudaDeviceAction( + std::unique_ptr(CudaDeviceActions[I]), GpuArchList[I], + DeviceTriple, /* AtTopLevel */ false)); // Return a new host action that incorporates original host action and all // device actions. return std::unique_ptr( - new CudaHostAction(std::move(Current), DeviceActions)); + new CudaHostAction(std::move(HostAction), DeviceActions)); } void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, @@ -1451,7 +1454,7 @@ Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current)); if (InjectCuda && Phase == CudaInjectionPhase) { - Current = buildCudaActions(*this, TC, Args, InputArg, InputType, + Current = buildCudaActions(*this, TC, Args, InputArg, std::move(Current), Actions); if (!Current) break; @@ -1794,15 +1797,11 @@ } if (const CudaDeviceAction *CDA = dyn_cast(A)) { - // Figure out which NVPTX triple to use for device-side compilation based on - // whether host is 64-bit. - llvm::Triple DeviceTriple(TC->getTriple().isArch64Bit() - ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"); - BuildJobsForAction(C, *CDA->begin(), - &getToolChain(C.getArgs(), DeviceTriple), - CDA->getGpuArchName(), CDA->isAtTopLevel(), - /*MultipleArchs*/ true, LinkingOutput, Result); + BuildJobsForAction( + C, *CDA->begin(), + &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())), + CDA->getGpuArchName(), CDA->isAtTopLevel(), + /*MultipleArchs*/ true, LinkingOutput, Result); return; }