Index: cfe/trunk/include/clang/Driver/Action.h =================================================================== --- cfe/trunk/include/clang/Driver/Action.h +++ cfe/trunk/include/clang/Driver/Action.h @@ -139,15 +139,17 @@ virtual void anchor(); /// GPU architecture to bind -- e.g 'sm_35'. const char *GpuArchName; + const char *DeviceTriple; /// True when action results are not consumed by the host action (e.g when /// -fsyntax-only or --cuda-device-only options are used). bool AtTopLevel; public: CudaDeviceAction(std::unique_ptr Input, const char *ArchName, - bool AtTopLevel); + const char *DeviceTriple, bool AtTopLevel); const char *getGpuArchName() const { return GpuArchName; } + const char *getDeviceTriple() const { return DeviceTriple; } bool isAtTopLevel() const { return AtTopLevel; } static bool classof(const Action *A) { Index: cfe/trunk/lib/Driver/Action.cpp =================================================================== --- cfe/trunk/lib/Driver/Action.cpp +++ cfe/trunk/lib/Driver/Action.cpp @@ -58,9 +58,10 @@ void CudaDeviceAction::anchor() {} CudaDeviceAction::CudaDeviceAction(std::unique_ptr Input, - const char *ArchName, bool AtTopLevel) + const char *ArchName, + const char *DeviceTriple, bool AtTopLevel) : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName), - AtTopLevel(AtTopLevel) {} + DeviceTriple(DeviceTriple), AtTopLevel(AtTopLevel) {} void CudaHostAction::anchor() {} Index: cfe/trunk/lib/Driver/Driver.cpp =================================================================== --- cfe/trunk/lib/Driver/Driver.cpp +++ cfe/trunk/lib/Driver/Driver.cpp @@ -1238,11 +1238,8 @@ // CudaHostAction which combines both host and device side actions. static std::unique_ptr buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args, - const Arg *InputArg, const types::ID InputType, - std::unique_ptr Current, ActionList &Actions) { - - assert(InputType == types::TY_CUDA && - "CUDA Actions only apply to CUDA inputs."); + const Arg *InputArg, std::unique_ptr HostAction, + ActionList &Actions) { // Collect all cuda_gpu_arch parameters, removing duplicates. SmallVector GpuArchList; @@ -1280,6 +1277,12 @@ } } + // Figure out which NVPTX triple to use for device-side compilation based on + // whether host is 64-bit. + const char *DeviceTriple = TC.getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"; + // Figure out what to do with device actions -- pass them as inputs to the // host action or run each of them independently. bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only); @@ -1296,26 +1299,26 @@ } for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - Actions.push_back( - new CudaDeviceAction(std::unique_ptr(CudaDeviceActions[I]), - GpuArchList[I], /* AtTopLevel */ true)); + Actions.push_back(new CudaDeviceAction( + std::unique_ptr(CudaDeviceActions[I]), GpuArchList[I], + DeviceTriple, /* AtTopLevel */ true)); // Kill host action in case of device-only compilation. if (DeviceOnlyCompilation) - Current.reset(nullptr); - return Current; + HostAction.reset(nullptr); + return HostAction; } // Outputs of device actions during complete CUDA compilation get created // with AtTopLevel=false and become inputs for the host action. ActionList DeviceActions; for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - DeviceActions.push_back( - new CudaDeviceAction(std::unique_ptr(CudaDeviceActions[I]), - GpuArchList[I], /* AtTopLevel */ false)); + DeviceActions.push_back(new CudaDeviceAction( + std::unique_ptr(CudaDeviceActions[I]), GpuArchList[I], + DeviceTriple, /* AtTopLevel */ false)); // Return a new host action that incorporates original host action and all // device actions. return std::unique_ptr( - new CudaHostAction(std::move(Current), DeviceActions)); + new CudaHostAction(std::move(HostAction), DeviceActions)); } void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, @@ -1461,7 +1464,7 @@ if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase && !Args.hasArg(options::OPT_cuda_host_only)) { - Current = buildCudaActions(*this, TC, Args, InputArg, InputType, + Current = buildCudaActions(*this, TC, Args, InputArg, std::move(Current), Actions); if (!Current) break; @@ -1791,15 +1794,11 @@ } if (const CudaDeviceAction *CDA = dyn_cast(A)) { - // Figure out which NVPTX triple to use for device-side compilation based on - // whether host is 64-bit. - llvm::Triple DeviceTriple(TC->getTriple().isArch64Bit() - ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"); - BuildJobsForAction(C, *CDA->begin(), - &getToolChain(C.getArgs(), DeviceTriple), - CDA->getGpuArchName(), CDA->isAtTopLevel(), - /*MultipleArchs*/ true, LinkingOutput, Result); + BuildJobsForAction( + C, *CDA->begin(), + &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())), + CDA->getGpuArchName(), CDA->isAtTopLevel(), + /*MultipleArchs*/ true, LinkingOutput, Result); return; }