Index: clang/include/clang/Basic/Cuda.h =================================================================== --- clang/include/clang/Basic/Cuda.h +++ clang/include/clang/Basic/Cuda.h @@ -21,6 +21,7 @@ CUDA_70, CUDA_75, CUDA_80, + CUDA_90, }; const char *CudaVersionToString(CudaVersion V); @@ -41,6 +42,7 @@ SM_60, SM_61, SM_62, + SM_70, }; const char *CudaArchToString(CudaArch A); @@ -60,6 +62,7 @@ COMPUTE_60, COMPUTE_61, COMPUTE_62, + COMPUTE_70, }; const char *CudaVirtualArchToString(CudaVirtualArch A); Index: clang/lib/Basic/Cuda.cpp =================================================================== --- clang/lib/Basic/Cuda.cpp +++ clang/lib/Basic/Cuda.cpp @@ -16,6 +16,8 @@ return "7.5"; case CudaVersion::CUDA_80: return "8.0"; + case CudaVersion::CUDA_90: + return "9.0"; } llvm_unreachable("invalid enum"); } @@ -48,6 +50,8 @@ return "sm_61"; case CudaArch::SM_62: return "sm_62"; + case CudaArch::SM_70: + return "sm_70"; } llvm_unreachable("invalid enum"); } @@ -66,6 +70,7 @@ .Case("sm_60", CudaArch::SM_60) .Case("sm_61", CudaArch::SM_61) .Case("sm_62", CudaArch::SM_62) + .Case("sm_70", CudaArch::SM_70) .Default(CudaArch::UNKNOWN); } @@ -95,6 +100,8 @@ return "compute_61"; case CudaVirtualArch::COMPUTE_62: return "compute_62"; + case CudaVirtualArch::COMPUTE_70: + return "compute_70"; } llvm_unreachable("invalid enum"); } @@ -112,6 +119,7 @@ .Case("compute_60", CudaVirtualArch::COMPUTE_60) .Case("compute_61", CudaVirtualArch::COMPUTE_61) .Case("compute_62", CudaVirtualArch::COMPUTE_62) + .Case("compute_70", CudaVirtualArch::COMPUTE_70) .Default(CudaVirtualArch::UNKNOWN); } @@ -142,6 +150,8 @@ return CudaVirtualArch::COMPUTE_61; case CudaArch::SM_62: return CudaVirtualArch::COMPUTE_62; + case CudaArch::SM_70: + return CudaVirtualArch::COMPUTE_70; } llvm_unreachable("invalid enum"); } @@ -164,6 +174,8 @@ case CudaArch::SM_61: case CudaArch::SM_62: return CudaVersion::CUDA_80; + case CudaArch::SM_70: + return CudaVersion::CUDA_90; } llvm_unreachable("invalid enum"); } Index: clang/lib/Basic/Targets/NVPTX.cpp =================================================================== --- clang/lib/Basic/Targets/NVPTX.cpp +++ clang/lib/Basic/Targets/NVPTX.cpp @@ -183,6 +183,8 @@ return "610"; case CudaArch::SM_62: return "620"; + case CudaArch::SM_70: + return "700"; } llvm_unreachable("unhandled CudaArch"); }(); Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -49,6 +49,8 @@ return CudaVersion::CUDA_75; if (Major == 8 && Minor == 0) return CudaVersion::CUDA_80; + if (Major == 9 && Minor == 0) + return CudaVersion::CUDA_90; return CudaVersion::UNKNOWN; } @@ -112,43 +114,55 @@ Version = ParseCudaVersionFile((*VersionFile)->getBuffer()); } - std::error_code EC; - for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE; - !EC && LI != LE; LI = LI.increment(EC)) { - StringRef FilePath = LI->path(); - StringRef FileName = llvm::sys::path::filename(FilePath); - // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc - const StringRef LibDeviceName = "libdevice."; - if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc"))) - continue; - StringRef GpuArch = FileName.slice( - LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); - LibDeviceMap[GpuArch] = FilePath.str(); - // Insert map entries for specifc devices with this compute - // capability. NVCC's choice of the libdevice library version is - // rather peculiar and depends on the CUDA version. - if (GpuArch == "compute_20") { - LibDeviceMap["sm_20"] = FilePath; - LibDeviceMap["sm_21"] = FilePath; - LibDeviceMap["sm_32"] = FilePath; - } else if (GpuArch == "compute_30") { - LibDeviceMap["sm_30"] = FilePath; - if (Version < CudaVersion::CUDA_80) { - LibDeviceMap["sm_50"] = FilePath; - LibDeviceMap["sm_52"] = FilePath; - LibDeviceMap["sm_53"] = FilePath; - } - LibDeviceMap["sm_60"] = FilePath; - LibDeviceMap["sm_61"] = FilePath; - LibDeviceMap["sm_62"] = FilePath; - } else if (GpuArch == "compute_35") { - LibDeviceMap["sm_35"] = FilePath; - LibDeviceMap["sm_37"] = FilePath; - } else if (GpuArch == "compute_50") { - if (Version >= CudaVersion::CUDA_80) { - LibDeviceMap["sm_50"] = FilePath; - LibDeviceMap["sm_52"] = FilePath; - LibDeviceMap["sm_53"] = FilePath; + if (Version == CudaVersion::CUDA_90) { + // CUDA-9 uses single libdevice file for all GPU variants. + std::string FilePath = LibDevicePath + "/libdevice.10.bc"; + if (FS.exists(FilePath)) { + for (const char *GpuArch : + {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53", + "sm_60", "sm_61", "sm_62", "sm_70"}) + LibDeviceMap[GpuArch] = FilePath; + } + } else { + std::error_code EC; + for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef FilePath = LI->path(); + StringRef FileName = llvm::sys::path::filename(FilePath); + // Process all bitcode filenames that look like + // libdevice.compute_XX.YY.bc + const StringRef LibDeviceName = "libdevice."; + if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc"))) + continue; + StringRef GpuArch = FileName.slice( + LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); + LibDeviceMap[GpuArch] = FilePath.str(); + // Insert map entries for specifc devices with this compute + // capability. NVCC's choice of the libdevice library version is + // rather peculiar and depends on the CUDA version. + if (GpuArch == "compute_20") { + LibDeviceMap["sm_20"] = FilePath; + LibDeviceMap["sm_21"] = FilePath; + LibDeviceMap["sm_32"] = FilePath; + } else if (GpuArch == "compute_30") { + LibDeviceMap["sm_30"] = FilePath; + if (Version < CudaVersion::CUDA_80) { + LibDeviceMap["sm_50"] = FilePath; + LibDeviceMap["sm_52"] = FilePath; + LibDeviceMap["sm_53"] = FilePath; + } + LibDeviceMap["sm_60"] = FilePath; + LibDeviceMap["sm_61"] = FilePath; + LibDeviceMap["sm_62"] = FilePath; + } else if (GpuArch == "compute_35") { + LibDeviceMap["sm_35"] = FilePath; + LibDeviceMap["sm_37"] = FilePath; + } else if (GpuArch == "compute_50") { + if (Version >= CudaVersion::CUDA_80) { + LibDeviceMap["sm_50"] = FilePath; + LibDeviceMap["sm_52"] = FilePath; + LibDeviceMap["sm_53"] = FilePath; + } } } } Index: clang/lib/Headers/__clang_cuda_runtime_wrapper.h =================================================================== --- clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -62,7 +62,7 @@ #include "cuda.h" #if !defined(CUDA_VERSION) #error "cuda.h did not define CUDA_VERSION" -#elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000 +#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9000 #error "Unsupported CUDA version!" #endif @@ -86,7 +86,11 @@ #define __COMMON_FUNCTIONS_H__ #undef __CUDACC__ +#if CUDA_VERSION < 9000 #define __CUDABE__ +#else +#define __CUDA_LIBDEVICE__ +#endif // Disables definitions of device-side runtime support stubs in // cuda_device_runtime_api.h #include "driver_types.h" @@ -94,6 +98,7 @@ #include "host_defines.h" #undef __CUDABE__ +#undef __CUDA_LIBDEVICE__ #define __CUDACC__ #include "cuda_runtime.h" @@ -105,7 +110,9 @@ #define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n) #define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n) +#if CUDA_VERSION < 9000 #include "crt/device_runtime.h" +#endif #include "crt/host_runtime.h" // device_runtime.h defines __cxa_* macros that will conflict with // cxxabi.h. Index: clang/test/Driver/cuda-arch-translation.cu =================================================================== --- clang/test/Driver/cuda-arch-translation.cu +++ clang/test/Driver/cuda-arch-translation.cu @@ -5,26 +5,36 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target -// CHECK:fatbinary - // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM20 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_21 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM21 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM21 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_30 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM30 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM30 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_32 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM32 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM32 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_35 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM35 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_37 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM37 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM37 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_50 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM50 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM50 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_52 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM52 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM52 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_53 %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM53 %s +// RUN: | FileCheck -check-prefixes=COMMON,SM53 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_60 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,SM60 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_61 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,SM61 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_62 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,SM62 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_70 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,SM70 %s + +// COMMON: ptxas +// COMMON-SAME: -m64 +// COMMON: fatbinary // SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20 // SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20 @@ -35,3 +45,7 @@ // SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50 // SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52 // SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53 +// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60 +// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61 +// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62 +// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70 Index: llvm/lib/Target/NVPTX/NVPTX.td =================================================================== --- llvm/lib/Target/NVPTX/NVPTX.td +++ llvm/lib/Target/NVPTX/NVPTX.td @@ -50,6 +50,8 @@ "Target SM 6.1">; def SM62 : SubtargetFeature<"sm_62", "SmVersion", "62", "Target SM 6.2">; +def SM70 : SubtargetFeature<"sm_70", "SmVersion", "70", + "Target SM 7.0">; def SATOM : SubtargetFeature<"satom", "HasAtomScope", "true", "Atomic operations with scope">; @@ -67,6 +69,8 @@ "Use PTX version 4.3">; def PTX50 : SubtargetFeature<"ptx50", "PTXVersion", "50", "Use PTX version 5.0">; +def PTX60 : SubtargetFeature<"ptx60", "PTXVersion", "60", + "Use PTX version 6.0">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -87,6 +91,7 @@ def : Proc<"sm_60", [SM60, PTX50, SATOM]>; def : Proc<"sm_61", [SM61, PTX50, SATOM]>; def : Proc<"sm_62", [SM62, PTX50, SATOM]>; +def : Proc<"sm_70", [SM70, PTX60, SATOM]>; def NVPTXInstrInfo : InstrInfo { } Index: llvm/test/CodeGen/NVPTX/sm-version-70.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/NVPTX/sm-version-70.ll @@ -0,0 +1,5 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_70 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 | FileCheck %s + +; CHECK: .version 6.0 +; CHECK: .target sm_70