Index: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td +++ cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td @@ -26,6 +26,9 @@ def err_drv_no_cuda_installation : Error< "cannot find CUDA installation. Provide its path via --cuda-path, or pass " "-nocudainc to build without CUDA includes.">; +def err_drv_no_cuda_libdevice : Error< + "cannot find libdevice for %0. Provide path to different CUDA installation " + "via --cuda-path, or pass -nocudalib to build without linking with libdevice.">; def err_drv_cuda_version_too_low : Error< "GPU arch %1 requires CUDA version at least %3, but installation at %0 is %2. " "Use --cuda-path to specify a different CUDA install, or pass " Index: cfe/trunk/lib/Driver/ToolChains.cpp =================================================================== --- cfe/trunk/lib/Driver/ToolChains.cpp +++ cfe/trunk/lib/Driver/ToolChains.cpp @@ -1791,22 +1791,32 @@ LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); LibDeviceMap[GpuArch] = FilePath.str(); // Insert map entries for specifc devices with this compute capability. + // NVCC's choice of libdevice library version is rather peculiar: + // http://docs.nvidia.com/cuda/libdevice-users-guide/basic-usage.html#version-selection + // TODO: this will need to be updated once CUDA-8 is released. if (GpuArch == "compute_20") { LibDeviceMap["sm_20"] = FilePath; LibDeviceMap["sm_21"] = FilePath; + LibDeviceMap["sm_32"] = FilePath; } else if (GpuArch == "compute_30") { LibDeviceMap["sm_30"] = FilePath; - LibDeviceMap["sm_32"] = FilePath; - } else if (GpuArch == "compute_35") { - LibDeviceMap["sm_35"] = FilePath; - LibDeviceMap["sm_37"] = FilePath; - } else if (GpuArch == "compute_50") { + // compute_30 is the fallback libdevice variant for sm_30+, + // unless CUDA specifies different version for specific GPU + // arch. LibDeviceMap["sm_50"] = FilePath; LibDeviceMap["sm_52"] = FilePath; LibDeviceMap["sm_53"] = FilePath; + // sm_6? are currently all aliases for sm_53 in LLVM and + // should use compute_30. LibDeviceMap["sm_60"] = FilePath; LibDeviceMap["sm_61"] = FilePath; LibDeviceMap["sm_62"] = FilePath; + } else if (GpuArch == "compute_35") { + LibDeviceMap["sm_35"] = FilePath; + LibDeviceMap["sm_37"] = FilePath; + } else if (GpuArch == "compute_50") { + // NVCC does not use compute_50 libdevice at all at the moment. + // The version that's shipped with CUDA-7.5 is a copy of compute_30. } } @@ -4759,18 +4769,23 @@ if (DriverArgs.hasArg(options::OPT_nocudalib)) return; - std::string LibDeviceFile = CudaInstallation.getLibDeviceFile( - DriverArgs.getLastArgValue(options::OPT_march_EQ)); - if (!LibDeviceFile.empty()) { - CC1Args.push_back("-mlink-cuda-bitcode"); - CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); - - // Libdevice in CUDA-7.0 requires PTX version that's more recent - // than LLVM defaults to. Use PTX4.2 which is the PTX version that - // came with CUDA-7.0. - CC1Args.push_back("-target-feature"); - CC1Args.push_back("+ptx42"); + StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + assert(!GpuArch.empty() && "Must have an explicit GPU arch."); + std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch); + + if (LibDeviceFile.empty()) { + getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch; + return; } + + CC1Args.push_back("-mlink-cuda-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); + + // Libdevice in CUDA-7.0 requires PTX version that's more recent + // than LLVM defaults to. Use PTX4.2 which is the PTX version that + // came with CUDA-7.0. + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+ptx42"); } void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, Index: cfe/trunk/test/Driver/cuda-detect.cu =================================================================== --- cfe/trunk/test/Driver/cuda-detect.cu +++ cfe/trunk/test/Driver/cuda-detect.cu @@ -10,15 +10,41 @@ // RUN: %clang -v --target=i386-unknown-linux \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s -// Make sure we map libdevice bitcode files to proper GPUs. +// Make sure we map libdevice bitcode files to proper GPUs. These +// tests use Inputs/CUDA_80 which has full set of libdevice files. +// However, libdevice mapping only matches CUDA-7.x at the moment. +// sm_2x, sm_32 -> compute_20 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \ -// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON \ +// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20 +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_32 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON \ +// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20 +// sm_30, sm_5x and sm_6x map to compute_30 +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON \ -// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE21 +// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30 +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON \ +// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30 +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON \ +// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30 +// sm_35 and sm_37 -> compute_35 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ -// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \ // RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_37 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \ +// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 + // Verify that -nocudainc prevents adding include path to CUDA headers. // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ // RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ @@ -29,12 +55,13 @@ // RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC -// Verify that no options related to bitcode linking are passes if -// there's no bitcode file. +// Verify that we get an error if there's no libdevice library to link with. +// NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_30 for this purpose. // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ -// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE -// .. or if we explicitly passed -nocudalib +// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE + +// Verify that -nocudalib prevents linking libdevice bitcode in. // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ // RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE @@ -48,16 +75,19 @@ // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda // NOCUDA-NOT: Found CUDA installation: +// MISSINGLIBDEVICE: error: cannot find libdevice for sm_30. + // COMMON: "-triple" "nvptx-nvidia-cuda" // COMMON-SAME: "-fcuda-is-device" // LIBDEVICE-SAME: "-mlink-cuda-bitcode" // NOLIBDEVICE-NOT: "-mlink-cuda-bitcode" -// LIBDEVICE21-SAME: libdevice.compute_20.10.bc +// LIBDEVICE20-SAME: libdevice.compute_20.10.bc +// LIBDEVICE30-SAME: libdevice.compute_30.10.bc // LIBDEVICE35-SAME: libdevice.compute_35.10.bc // NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc // LIBDEVICE-SAME: "-target-feature" "+ptx42" // NOLIBDEVICE-NOT: "-target-feature" "+ptx42" -// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include" +// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include" // NOCUDAINC-NOT: "-internal-isystem" "{{.*}}/cuda/include" // CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h" // NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h"