Index: include/clang/Driver/Driver.h =================================================================== --- include/clang/Driver/Driver.h +++ include/clang/Driver/Driver.h @@ -405,7 +405,6 @@ bool IsUsingLTO(const llvm::opt::ArgList &Args) const; -private: /// \brief Retrieves a ToolChain for a particular \p Target triple. /// /// Will cache ToolChains for the life of the driver object, and create them @@ -415,6 +414,7 @@ /// @} +private: /// \brief Get bitmasks for which option flags to include and exclude based on /// the driver mode. std::pair getIncludeExcludeOptionFlagMasks() const; Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1558,6 +1558,8 @@ def no__dead__strip__inits__and__terms : Flag<["-"], "no_dead_strip_inits_and_terms">; def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option]>, HelpText<"Disable builtin #include directories">; +def nocudainc : Flag<["-"], "nocudainc">; +def nocudalib : Flag<["-"], "nocudalib">; def nodefaultlibs : Flag<["-"], "nodefaultlibs">; def nofixprebinding : Flag<["-"], "nofixprebinding">; def nolibc : Flag<["-"], "nolibc">; Index: include/clang/Driver/ToolChain.h =================================================================== --- include/clang/Driver/ToolChain.h +++ include/clang/Driver/ToolChain.h @@ -350,6 +350,10 @@ AddFastMathRuntimeIfAvailable(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// \brief Add arguments to use system-specific CUDA includes. + virtual void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const; + /// \brief Return sanitizers which are available in this toolchain. virtual SanitizerMask getSupportedSanitizers() const; }; Index: lib/Driver/ToolChain.cpp =================================================================== --- lib/Driver/ToolChain.cpp +++ lib/Driver/ToolChain.cpp @@ -501,3 +501,6 @@ Res |= CFIICall; return Res; } + +void ToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const {} Index: lib/Driver/ToolChains.h =================================================================== --- lib/Driver/ToolChains.h +++ lib/Driver/ToolChains.h @@ -165,6 +165,7 @@ std::string CudaLibPath; std::string CudaLibDevicePath; std::string CudaIncludePath; + llvm::StringMap CudaLibDeviceMap; public: CudaInstallationDetector() : IsValid(false) {} @@ -185,6 +186,9 @@ /// \brief Get the detected Cuda device library path. StringRef getLibDevicePath() const { return CudaLibDevicePath; } /// \brief Get libdevice file for given architecture + StringRef getLibDeviceFile(StringRef Gpu) const { + return CudaLibDeviceMap.lookup(Gpu); + } }; CudaInstallationDetector CudaInstallation; @@ -722,6 +726,8 @@ void AddClangCXXStdlibIncludeArgs( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; + void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; bool isPIEDefault() const override; SanitizerMask getSupportedSanitizers() const override; Index: lib/Driver/ToolChains.cpp =================================================================== --- lib/Driver/ToolChains.cpp +++ lib/Driver/ToolChains.cpp @@ -1512,6 +1512,31 @@ llvm::sys::fs::exists(CudaLibDevicePath))) continue; + const StringRef LibDeviceName = "libdevice."; + std::error_code EC; + for (llvm::sys::fs::directory_iterator LI(CudaLibDevicePath, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef FilePath = LI->path(); + StringRef FileName = llvm::sys::path::filename(FilePath); + // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc + if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc"))) + continue; + StringRef GpuArch = FileName.slice( + LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); + CudaLibDeviceMap[GpuArch] = FilePath.str(); + // Insert map entries for specifc devices with this compute capability. + if (GpuArch == "compute_20") { + CudaLibDeviceMap["sm_20"] = FilePath; + CudaLibDeviceMap["sm_21"] = FilePath; + } else if (GpuArch == "compute_30") { + CudaLibDeviceMap["sm_30"] = FilePath; + CudaLibDeviceMap["sm_32"] = FilePath; + } else if (GpuArch == "compute_35") { + CudaLibDeviceMap["sm_35"] = FilePath; + CudaLibDeviceMap["sm_37"] = FilePath; + } + } + IsValid = true; break; } @@ -3785,6 +3810,15 @@ } } +void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + if (DriverArgs.hasArg(options::OPT_nocudainc)) + return; + + if (CudaInstallation.isValid()) + addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath()); +} + bool Linux::isPIEDefault() const { return getSanitizerArgs().requiresPIE(); } SanitizerMask Linux::getSupportedSanitizers() const { @@ -3855,6 +3889,23 @@ llvm::opt::ArgStringList &CC1Args) const { Linux::addClangTargetOptions(DriverArgs, CC1Args); CC1Args.push_back("-fcuda-is-device"); + + if (DriverArgs.hasArg(options::OPT_nocudalib)) + return; + + std::string LibDeviceFile = CudaInstallation.getLibDeviceFile( + DriverArgs.getLastArgValue(options::OPT_march_EQ)); + if (!LibDeviceFile.empty()) { + CC1Args.push_back("-mlink-bitcode-file"); + CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); + CC1Args.push_back("-fcuda-uses-libdevice"); + + // Libdevice in CUDA-7.0 requires PTX version that's more recent + // than LLVM defaults to. Use PTX4.2 which is the PTX version that + // came with CUDA-7.0. + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+ptx42"); + } } llvm::opt::DerivedArgList * Index: lib/Driver/Tools.h =================================================================== --- lib/Driver/Tools.h +++ lib/Driver/Tools.h @@ -55,7 +55,8 @@ const Driver &D, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const InputInfo &Output, - const InputInfoList &Inputs) const; + const InputInfoList &Inputs, + const char *AuxTriple) const; void AddAArch64TargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -248,7 +248,8 @@ const Driver &D, const ArgList &Args, ArgStringList &CmdArgs, const InputInfo &Output, - const InputInfoList &Inputs) const { + const InputInfoList &Inputs, + const char *AuxTriple) const { Arg *A; CheckPreprocessingOptions(D, Args); @@ -446,6 +447,16 @@ // Add system include arguments. getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs); + + // Add CUDA include arguments + if (types::isCuda(Inputs[0].getType())) { + getToolChain().AddCudaIncludeArgs(Args, CmdArgs); + if (AuxTriple) { + const ToolChain &AuxTC = D.getToolChain(Args, llvm::Triple(AuxTriple)); + AuxTC.AddClangCXXStdlibIncludeArgs(Args, CmdArgs); + AuxTC.AddClangSystemIncludeArgs(Args, CmdArgs); + } + } } // FIXME: Move to target hook. @@ -3202,9 +3213,12 @@ CmdArgs.push_back("-triple"); CmdArgs.push_back(Args.MakeArgString(TripleStr)); - if (IsCuda && AuxTriple != nullptr) { - CmdArgs.push_back("-aux-triple"); - CmdArgs.push_back(AuxTriple); + if (IsCuda) { + if (AuxTriple != nullptr) { + CmdArgs.push_back("-aux-triple"); + CmdArgs.push_back(AuxTriple); + } + CmdArgs.push_back("-fcuda-target-overloads"); } if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || @@ -3981,7 +3995,7 @@ // // FIXME: Support -fpreprocessed if (types::getPreprocessedType(InputType) != types::TY_INVALID) - AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs); + AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs, AuxTriple); // Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes // that "The compiler can only warn and ignore the option if not recognized". Index: test/Driver/cuda-detect.cu =================================================================== --- test/Driver/cuda-detect.cu +++ test/Driver/cuda-detect.cu @@ -1,10 +1,56 @@ // REQUIRES: clang-driver // REQUIRES: x86-registered-target // +// # Check that we properly detect CUDA installation. // RUN: %clang -v --target=i386-unknown-linux \ // RUN: --sysroot=/tmp/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA // RUN: %clang -v --target=i386-unknown-linux \ +// RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s +// RUN: %clang -v --target=i386-unknown-linux \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s +// Make sure we map libdevice bitcode files to proper GPUs. +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix LIBDEVICE -check-prefix LIBDEVICE21 +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 \ +// RUN: -check-prefix CUDAINC +// Verify that -nocudainc prevents adding include path to CUDA headers. +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ +// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 \ +// RUN: -check-prefix NOCUDAINC + +// Verify that no options related to bitcode linking are passes if +// there's no bitcode file. +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix NOLIBDEVICE +// .. or if we explicitly passed -nocudalib +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ +// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix NOLIBDEVICE + // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda // NOCUDA-NOT: Found CUDA installation: + +// LIBDEVICE: "-triple" "nvptx-nvidia-cuda" +// LIBDEVICE-SAME: "-fcuda-is-device" +// LIBDEVICE-SAME: "-mlink-bitcode-file" +// LIBDEVICE21-SAME: libdevice.compute_20.10.bc +// LIBDEVICE35-SAME: libdevice.compute_35.10.bc +// LIBDEVICE-SAME: "-fcuda-uses-libdevice" +// LIBDEVICE-SAME: "-target-feature" "+ptx42" +// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include" +// NOCUDAINC-NOT: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include" +// LIBDEVICE-SAME: "-x" "cuda" + +// NOLIBDEVICE: "-triple" "nvptx-nvidia-cuda" +// NOLIBDEVICE-SAME: "-fcuda-is-device" +// NOLIBDEVICE-NOT: "-mlink-bitcode-file" +// NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc +// NOLIBDEVICE-NOT: "-fcuda-uses-libdevice" +// NOLIBDEVICE-NOT: "-target-feature" +// NOLIBDEVICE-SAME: "-x" "cuda"