Index: clang/include/clang/Driver/ToolChain.h =================================================================== --- clang/include/clang/Driver/ToolChain.h +++ clang/include/clang/Driver/ToolChain.h @@ -38,6 +38,7 @@ namespace driver { class Compilation; + class CudaInstallationDetector; class Driver; class JobAction; class RegisterEffectiveTriple; Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -470,14 +470,18 @@ if (llvm::any_of(Inputs, [](std::pair &I) { return types::isCuda(I.first); })) { - const ToolChain &TC = getToolChain( - C.getInputArgs(), - llvm::Triple(C.getSingleOffloadToolChain() - ->getTriple() - .isArch64Bit() - ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda")); - C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + const llvm::Triple &HostTriple = HostTC->getTriple(); + llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + // Use the CUDA and host triples as the key into the ToolChains map, because + // the device toolchain we create depends on both. + ToolChain *&CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()]; + if (!CudaTC) { + CudaTC = new toolchains::CudaToolChain(*this, CudaTriple, *HostTC, + C.getInputArgs()); + } + C.addOffloadDeviceToolChain(CudaTC, Action::OFK_Cuda); } // @@ -3602,9 +3606,6 @@ break; } break; - case llvm::Triple::CUDA: - TC = new toolchains::CudaToolChain(*this, Target, Args); - break; case llvm::Triple::PS4: TC = new toolchains::PS4CPU(*this, Target, Args); break; @@ -3646,6 +3647,12 @@ } } } + + // Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA + // compiles always need two toolchains, the CUDA toolchain and the host + // toolchain. So the only valid way to create a CUDA toolchain is via + // CreateOffloadingDeviceToolChains. + return *TC; } Index: clang/lib/Driver/ToolChains.h =================================================================== --- clang/lib/Driver/ToolChains.h +++ clang/lib/Driver/ToolChains.h @@ -24,6 +24,60 @@ namespace clang { namespace driver { + +/// A class to find a viable CUDA installation +class CudaInstallationDetector { +private: + const Driver &D; + bool IsValid = false; + CudaVersion Version = CudaVersion::UNKNOWN; + std::string InstallPath; + std::string BinPath; + std::string LibPath; + std::string LibDevicePath; + std::string IncludePath; + llvm::StringMap LibDeviceMap; + + // CUDA architectures for which we have raised an error in + // CheckCudaVersionSupportsArch. + mutable llvm::SmallSet ArchsWithVersionTooLowErrors; + +public: + CudaInstallationDetector(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args); + + void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const; + + /// \brief Emit an error if Version does not support the given Arch. + /// + /// If either Version or Arch is unknown, does not emit an error. Emits at + /// most one error per Arch. + void CheckCudaVersionSupportsArch(CudaArch Arch) const; + + /// \brief Check whether we detected a valid Cuda install. + bool isValid() const { return IsValid; } + /// \brief Print information about the detected CUDA installation. + void print(raw_ostream &OS) const; + + /// \brief Get the detected Cuda install's version. + CudaVersion version() const { return Version; } + /// \brief Get the detected Cuda installation path. + StringRef getInstallPath() const { return InstallPath; } + /// \brief Get the detected path to Cuda's bin directory. + StringRef getBinPath() const { return BinPath; } + /// \brief Get the detected Cuda Include path. + StringRef getIncludePath() const { return IncludePath; } + /// \brief Get the detected Cuda library path. + StringRef getLibPath() const { return LibPath; } + /// \brief Get the detected Cuda device library path. + StringRef getLibDevicePath() const { return LibDevicePath; } + /// \brief Get libdevice file for given architecture + std::string getLibDeviceFile(StringRef Gpu) const { + return LibDeviceMap.lookup(Gpu); + } +}; + namespace toolchains { /// Generic_GCC - A tool chain using the 'gcc' command to perform @@ -157,57 +211,6 @@ protected: GCCInstallationDetector GCCInstallation; - - // \brief A class to find a viable CUDA installation - class CudaInstallationDetector { - private: - const Driver &D; - bool IsValid = false; - CudaVersion Version = CudaVersion::UNKNOWN; - std::string InstallPath; - std::string BinPath; - std::string LibPath; - std::string LibDevicePath; - std::string IncludePath; - llvm::StringMap LibDeviceMap; - - // CUDA architectures for which we have raised an error in - // CheckCudaVersionSupportsArch. - mutable llvm::SmallSet ArchsWithVersionTooLowErrors; - - public: - CudaInstallationDetector(const Driver &D) : D(D) {} - void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args); - - /// \brief Emit an error if Version does not support the given Arch. - /// - /// If either Version or Arch is unknown, does not emit an error. Emits at - /// most one error per Arch. - void CheckCudaVersionSupportsArch(CudaArch Arch) const; - - /// \brief Check whether we detected a valid Cuda install. - bool isValid() const { return IsValid; } - /// \brief Print information about the detected CUDA installation. - void print(raw_ostream &OS) const; - - /// \brief Get the detected Cuda install's version. - CudaVersion version() const { return Version; } - /// \brief Get the detected Cuda installation path. - StringRef getInstallPath() const { return InstallPath; } - /// \brief Get the detected path to Cuda's bin directory. - StringRef getBinPath() const { return BinPath; } - /// \brief Get the detected Cuda Include path. - StringRef getIncludePath() const { return IncludePath; } - /// \brief Get the detected Cuda library path. - StringRef getLibPath() const { return LibPath; } - /// \brief Get the detected Cuda device library path. - StringRef getLibDevicePath() const { return LibDevicePath; } - /// \brief Get libdevice file for given architecture - std::string getLibDeviceFile(StringRef Gpu) const { - return LibDeviceMap.lookup(Gpu); - } - }; - CudaInstallationDetector CudaInstallation; public: @@ -403,6 +406,8 @@ /// The OS version we are targeting. mutable VersionTuple TargetVersion; + CudaInstallationDetector CudaInstallation; + private: void AddDeploymentTarget(llvm::opt::DerivedArgList &Args) const; @@ -543,6 +548,9 @@ ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const override; bool hasBlocksRuntime() const override; + void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + bool UseObjCMixedDispatch() const override { // This is only used with the non-fragile ABI and non-legacy dispatch. @@ -572,6 +580,8 @@ bool SupportsEmbeddedBitcode() const override; SanitizerMask getSupportedSanitizers() const override; + + void printVerboseInfo(raw_ostream &OS) const override; }; /// DarwinClang - The Darwin toolchain used by Clang. @@ -867,10 +877,10 @@ Tool *buildLinker() const override; }; -class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux { +class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain { public: CudaToolChain(const Driver &D, const llvm::Triple &Triple, - const llvm::opt::ArgList &Args); + const ToolChain &HostTC, const llvm::opt::ArgList &Args); llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, @@ -881,16 +891,29 @@ // Never try to use the integrated assembler with CUDA; always fork out to // ptxas. bool useIntegratedAs() const override { return false; } + bool isCrossCompiling() const override { return true; } + bool isPICDefault() const override { return false; } + bool isPIEDefault() const override { return false; } + bool isPICDefaultForced() const override { return false; } + bool SupportsProfiling() const override { return false; } + bool SupportsObjCGC() const override { return false; } void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; - const Generic_GCC::CudaInstallationDetector &cudaInstallation() const { - return CudaInstallation; - } - Generic_GCC::CudaInstallationDetector &cudaInstallation() { - return CudaInstallation; - } + void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; + CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + void AddClangCXXStdlibIncludeArgs( + const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CC1Args) const override; + void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + + const ToolChain &HostTC; + CudaInstallationDetector CudaInstallation; protected: Tool *buildAssembler() const override; // ptxas Index: clang/lib/Driver/ToolChains.cpp =================================================================== --- clang/lib/Driver/ToolChains.cpp +++ clang/lib/Driver/ToolChains.cpp @@ -52,7 +52,8 @@ /// Darwin - Darwin tool chain for i386 and x86_64. Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) - : MachO(D, Triple, Args), TargetInitialized(false) {} + : MachO(D, Triple, Args), TargetInitialized(false), + CudaInstallation(D, Triple, Args) {} types::ID MachO::LookupTypeForExtension(StringRef Ext) const { types::ID Ty = types::lookupTypeForExtension(Ext); @@ -99,6 +100,11 @@ } } +void Darwin::AddCudaIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); +} + // This is just a MachO name translation routine and there's no // way to join this into ARMTargetParser without breaking all // other assumptions. Maybe MachO should consider standardising @@ -1296,6 +1302,10 @@ return Res; } +void Darwin::printVerboseInfo(raw_ostream &OS) const { + CudaInstallation.print(OS); +} + /// Generic_GCC - A tool chain using the 'gcc' command to perform /// all subcommands; this relies on gcc translating the majority of /// command line options. @@ -1811,10 +1821,10 @@ return CudaVersion::UNKNOWN; } -// \brief -- try common CUDA installation paths looking for files we need for -// CUDA compilation. -void Generic_GCC::CudaInstallationDetector::init( - const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args) { +CudaInstallationDetector::CudaInstallationDetector( + const Driver &D, const llvm::Triple &TargetTriple, + const llvm::opt::ArgList &Args) + : D(D) { SmallVector CudaPathCandidates; if (Args.hasArg(options::OPT_cuda_path_EQ)) @@ -1835,13 +1845,25 @@ BinPath = CudaPath + "/bin"; IncludePath = InstallPath + "/include"; LibDevicePath = InstallPath + "/nvvm/libdevice"; - LibPath = InstallPath + (TargetTriple.isArch64Bit() ? "/lib64" : "/lib"); auto &FS = D.getVFS(); - if (!(FS.exists(IncludePath) && FS.exists(BinPath) && FS.exists(LibPath) && + if (!(FS.exists(IncludePath) && FS.exists(BinPath) && FS.exists(LibDevicePath))) continue; + // On Linux, we have both lib and lib64 directories, and we need to choose + // based on our triple. On MacOS, we have only a lib directory. + // + // It's sufficient for our purposes to be flexible: If both lib and lib64 + // exist, we choose whichever one matches our triple. Otherwise, if only + // lib exists, we use it. + if (TargetTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64")) + LibPath = InstallPath + "/lib64"; + else if (FS.exists(InstallPath + "/lib")) + LibPath = InstallPath + "/lib"; + else + continue; + llvm::ErrorOr> VersionFile = FS.getBufferForFile(InstallPath + "/version.txt"); if (!VersionFile) { @@ -1898,7 +1920,33 @@ } } -void Generic_GCC::CudaInstallationDetector::CheckCudaVersionSupportsArch( +void CudaInstallationDetector::AddCudaIncludeArgs( + const ArgList &DriverArgs, ArgStringList &CC1Args) const { + if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { + // Add cuda_wrappers/* to our system include path. This lets us wrap + // standard library headers. + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "include"); + llvm::sys::path::append(P, "cuda_wrappers"); + CC1Args.push_back("-internal-isystem"); + CC1Args.push_back(DriverArgs.MakeArgString(P)); + } + + if (DriverArgs.hasArg(options::OPT_nocudainc)) + return; + + if (!isValid()) { + D.Diag(diag::err_drv_no_cuda_installation); + return; + } + + CC1Args.push_back("-internal-isystem"); + CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); + CC1Args.push_back("-include"); + CC1Args.push_back("__clang_cuda_runtime_wrapper.h"); +} + +void CudaInstallationDetector::CheckCudaVersionSupportsArch( CudaArch Arch) const { if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN || ArchsWithVersionTooLowErrors.count(Arch) > 0) @@ -1913,7 +1961,7 @@ } } -void Generic_GCC::CudaInstallationDetector::print(raw_ostream &OS) const { +void CudaInstallationDetector::print(raw_ostream &OS) const { if (isValid()) OS << "Found CUDA installation: " << InstallPath << ", version " << CudaVersionToString(Version) << "\n"; @@ -2756,7 +2804,8 @@ Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) - : ToolChain(D, Triple, Args), GCCInstallation(D), CudaInstallation(D) { + : ToolChain(D, Triple, Args), GCCInstallation(D), + CudaInstallation(D, Triple, Args) { getProgramPaths().push_back(getDriver().getInstalledDir()); if (getDriver().getInstalledDir() != getDriver().Dir) getProgramPaths().push_back(getDriver().Dir); @@ -4162,7 +4211,6 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : Generic_ELF(D, Triple, Args) { GCCInstallation.init(Triple, Args); - CudaInstallation.init(Triple, Args); Multilibs = GCCInstallation.getMultilibs(); llvm::Triple::ArchType Arch = Triple.getArch(); std::string SysRoot = computeSysRoot(); @@ -4767,26 +4815,7 @@ void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { - // Add cuda_wrappers/* to our system include path. This lets us wrap - // standard library headers. - SmallString<128> P(getDriver().ResourceDir); - llvm::sys::path::append(P, "include"); - llvm::sys::path::append(P, "cuda_wrappers"); - addSystemInclude(DriverArgs, CC1Args, P); - } - - if (DriverArgs.hasArg(options::OPT_nocudainc)) - return; - - if (!CudaInstallation.isValid()) { - getDriver().Diag(diag::err_drv_no_cuda_installation); - return; - } - - addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath()); - CC1Args.push_back("-include"); - CC1Args.push_back("__clang_cuda_runtime_wrapper.h"); + CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); } void Linux::AddIAMCUIncludeArgs(const ArgList &DriverArgs, @@ -4968,16 +4997,18 @@ /// together object files from the assembler into a single blob. CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args) - : Linux(D, Triple, Args) { + const ToolChain &HostTC, const ArgList &Args) + : ToolChain(D, Triple, Args), HostTC(HostTC), + CudaInstallation(D, Triple, Args) { if (CudaInstallation.isValid()) getProgramPaths().push_back(CudaInstallation.getBinPath()); } -void -CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const { - Linux::addClangTargetOptions(DriverArgs, CC1Args); +void CudaToolChain::addClangTargetOptions( + const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const { + HostTC.addClangTargetOptions(DriverArgs, CC1Args); + CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, @@ -5019,13 +5050,18 @@ assert(!Arch.empty() && "Must have an explicit GPU arch."); CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch)); } - Linux::AddCudaIncludeArgs(DriverArgs, CC1Args); + CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); } llvm::opt::DerivedArgList * CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch, Action::OffloadKind) const { - DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); + StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { + DerivedArgList *DAL = + HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); + if (!DAL) + DAL = new DerivedArgList(Args.getBaseArgs()); + const OptTable &Opts = getDriver().getOpts(); for (Arg *A : Args) { @@ -5077,6 +5113,30 @@ return new tools::NVPTX::Linker(*this); } +void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { + HostTC.addClangWarningOptions(CC1Args); +} + +ToolChain::CXXStdlibType +CudaToolChain::GetCXXStdlibType(const ArgList &Args) const { + return HostTC.GetCXXStdlibType(Args); +} + +void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); +} + +void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args, + ArgStringList &CC1Args) const { + HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args); +} + +void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args, + ArgStringList &CC1Args) const { + HostTC.AddIAMCUIncludeArgs(Args, CC1Args); +} + /// XCore tool chain XCoreToolChain::XCoreToolChain(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) Index: clang/lib/Driver/Tools.cpp =================================================================== --- clang/lib/Driver/Tools.cpp +++ clang/lib/Driver/Tools.cpp @@ -11981,7 +11981,7 @@ // Check that our installation's ptxas supports gpu_arch. if (!Args.hasArg(options::OPT_no_cuda_version_check)) { - TC.cudaInstallation().CheckCudaVersionSupportsArch(gpu_arch); + TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch); } ArgStringList CmdArgs; Index: clang/test/Driver/cuda-detect.cu =================================================================== --- clang/test/Driver/cuda-detect.cu +++ clang/test/Driver/cuda-detect.cu @@ -5,10 +5,18 @@ // # Check that we properly detect CUDA installation. // RUN: %clang -v --target=i386-unknown-linux \ // RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA +// RUN: %clang -v --target=i386-apple-macosx \ +// RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA + // RUN: %clang -v --target=i386-unknown-linux \ // RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s +// RUN: %clang -v --target=i386-apple-macosx \ +// RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s + // RUN: %clang -v --target=i386-unknown-linux \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s +// RUN: %clang -v --target=i386-apple-macosx \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s // Make sure we map libdevice bitcode files to proper GPUs. These // tests use Inputs/CUDA_80 which has full set of libdevice files. @@ -51,33 +59,51 @@ // RUN: | FileCheck %s -check-prefix COMMON \ // RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE50 - // Verify that -nocudainc prevents adding include path to CUDA headers. // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ // RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \ // RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ +// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \ +// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 + // We should not add any CUDA include paths if there's no valid CUDA installation // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ // RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC +// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \ +// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC // Verify that we get an error if there's no libdevice library to link with. // NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_20 for this purpose. // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_20 \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE +// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE // Verify that -nocudalib prevents linking libdevice bitcode in. // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ // RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE +// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \ +// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE + // Verify that we don't add include paths, link with libdevice or // -include __clang_cuda_runtime_wrapper.h without valid CUDA installation. // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ // RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON \ // RUN: -check-prefix NOCUDAINC -check-prefix NOLIBDEVICE +// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \ +// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON \ +// RUN: -check-prefix NOCUDAINC -check-prefix NOLIBDEVICE // Verify that C++ include paths are passed for both host and device frontends. // RUN: %clang -### -no-canonical-prefixes -target x86_64-linux-gnu %s \ Index: clang/test/Driver/cuda-external-tools.cu =================================================================== --- clang/test/Driver/cuda-external-tools.cu +++ clang/test/Driver/cuda-external-tools.cu @@ -1,4 +1,5 @@ -// Tests that ptxas and fatbinary are correctly during CUDA compilation. +// Tests that ptxas and fatbinary are invoked correctly during CUDA +// compilation. // // REQUIRES: clang-driver // REQUIRES: x86-registered-target @@ -56,6 +57,14 @@ // RUN: | FileCheck -check-prefix SM20 -check-prefix PTXAS-EXTRA \ // RUN: -check-prefix FATBINARY-EXTRA %s +// MacOS spot-checks +// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s +// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s +// RUN: %clang -### -target x86_32-apple-macosx -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s + // Match clang job that produces PTX assembly. // CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" // SM20: "-target-cpu" "sm_20" Index: clang/test/Driver/cuda-macosx.cu =================================================================== --- /dev/null +++ clang/test/Driver/cuda-macosx.cu @@ -0,0 +1,8 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target +// +// RUN: %clang -v --target=i386-apple-macosx \ +// RUN: --sysroot=%S/Inputs/CUDA-macosx 2>&1 | FileCheck %s + +// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-macosx/usr/local/cuda