diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -444,7 +444,8 @@ CUDA Support in Clang --------------------- -- ... +- Clang now supports CUDA SDK up to 11.8 +- Added support for targeting sm_{87,89,90} GPUs. RISC-V Support in Clang ----------------------- diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -23,7 +23,13 @@ #pragma push_macro("SM_75") #pragma push_macro("SM_80") #pragma push_macro("SM_86") -#define SM_86 "sm_86" +#pragma push_macro("SM_87") +#pragma push_macro("SM_89") +#pragma push_macro("SM_90") +#define SM_90 "sm_90" +#define SM_89 "sm_87|" SM_90 +#define SM_87 "sm_89|" SM_89 +#define SM_86 "sm_89|" SM_87 #define SM_80 "sm_80|" SM_86 #define SM_75 "sm_75|" SM_80 #define SM_72 "sm_72|" SM_75 @@ -45,7 +51,13 @@ #pragma push_macro("PTX73") #pragma push_macro("PTX74") #pragma push_macro("PTX75") -#define PTX75 "ptx75" +#pragma push_macro("PTX76") +#pragma push_macro("PTX77") +#pragma push_macro("PTX78") +#define PTX78 "ptx78" +#define PTX77 "ptx77|" PTX78 +#define PTX76 "ptx76|" PTX77 +#define PTX75 "ptx75|" PTX76 #define PTX74 "ptx74|" PTX75 #define PTX73 "ptx73|" PTX74 #define PTX72 "ptx72|" PTX73 @@ -944,6 +956,9 @@ #pragma pop_macro("SM_75") #pragma pop_macro("SM_80") #pragma pop_macro("SM_86") +#pragma pop_macro("SM_87") +#pragma pop_macro("SM_89") +#pragma pop_macro("SM_90") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") @@ -956,3 +971,6 @@ #pragma pop_macro("PTX73") #pragma pop_macro("PTX74") #pragma pop_macro("PTX75") +#pragma pop_macro("PTX76") +#pragma pop_macro("PTX77") +#pragma pop_macro("PTX78") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -34,9 +34,12 @@ CUDA_113, CUDA_114, CUDA_115, + CUDA_116, + CUDA_117, + CUDA_118, FULLY_SUPPORTED = CUDA_115, PARTIALLY_SUPPORTED = - CUDA_115, // Partially supported. Proceed with a warning. + CUDA_118, // Partially supported. Proceed with a warning. NEW = 10000, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -63,6 +66,9 @@ SM_75, SM_80, SM_86, + SM_87, + SM_89, + SM_90, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -34,6 +34,9 @@ CUDA_ENTRY(11, 3), CUDA_ENTRY(11, 4), CUDA_ENTRY(11, 5), + CUDA_ENTRY(11, 6), + CUDA_ENTRY(11, 7), + CUDA_ENTRY(11, 8), {"", CudaVersion::NEW, std::numeric_limits<int>::max()}, {"unknown", CudaVersion::UNKNOWN, 0} // End of list tombstone. }; @@ -86,6 +89,9 @@ SM(70), SM(72), // Volta SM(75), // Turing SM(80), SM(86), // Ampere + SM(87), // Jetson/Drive AGX Orin + SM(89), // Ada Lovelace + SM(90), // Hopper GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -191,6 +197,11 @@ return CudaVersion::CUDA_110; case CudaArch::SM_86: return CudaVersion::CUDA_111; + case CudaArch::SM_87: + return CudaVersion::CUDA_114; + case CudaArch::SM_89: + case CudaArch::SM_90: + return CudaVersion::CUDA_118; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -246,6 +246,12 @@ return "800"; case CudaArch::SM_86: return "860"; + case CudaArch::SM_87: + return "870"; + case CudaArch::SM_89: + return "890"; + case CudaArch::SM_90: + return "900"; } llvm_unreachable("unhandled CudaArch"); }(); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -3634,6 +3634,9 @@ case CudaArch::SM_75: case CudaArch::SM_80: case CudaArch::SM_86: + case CudaArch::SM_87: + case CudaArch::SM_89: + case CudaArch::SM_90: case CudaArch::GFX600: case CudaArch::GFX601: case CudaArch::GFX602: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -67,6 +67,12 @@ return CudaVersion::CUDA_114; if (raw_version < 11060) return CudaVersion::CUDA_115; + if (raw_version < 11070) + return CudaVersion::CUDA_116; + if (raw_version < 11080) + return CudaVersion::CUDA_117; + if (raw_version < 11090) + return CudaVersion::CUDA_118; return CudaVersion::NEW; } @@ -572,6 +578,9 @@ case CudaVersion::CUDA_##CUDA_VER: \ PtxFeature = "+ptx" #PTX_VER; \ break; + CASE_CUDA_VERSION(118, 78); + CASE_CUDA_VERSION(117, 77); + CASE_CUDA_VERSION(116, 76); CASE_CUDA_VERSION(115, 75); CASE_CUDA_VERSION(114, 74); CASE_CUDA_VERSION(113, 73); diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -59,6 +59,12 @@ "Target SM 8.0">; def SM86 : SubtargetFeature<"sm_86", "SmVersion", "86", "Target SM 8.6">; +def SM87 : SubtargetFeature<"sm_87", "SmVersion", "87", + "Target SM 8.7">; +def SM89 : SubtargetFeature<"sm_89", "SmVersion", "89", + "Target SM 8.9">; +def SM90 : SubtargetFeature<"sm_90", "SmVersion", "90", + "Target SM 9.0">; // PTX Versions def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32", @@ -95,6 +101,12 @@ "Use PTX version 7.4">; def PTX75 : SubtargetFeature<"ptx75", "PTXVersion", "75", "Use PTX version 7.5">; +def PTX76 : SubtargetFeature<"ptx76", "PTXVersion", "76", + "Use PTX version 7.6">; +def PTX77 : SubtargetFeature<"ptx77", "PTXVersion", "77", + "Use PTX version 7.7">; +def PTX78 : SubtargetFeature<"ptx78", "PTXVersion", "78", + "Use PTX version 7.8">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -120,6 +132,9 @@ def : Proc<"sm_75", [SM75, PTX63]>; def : Proc<"sm_80", [SM80, PTX70]>; def : Proc<"sm_86", [SM86, PTX71]>; +def : Proc<"sm_87", [SM87, PTX74]>; +def : Proc<"sm_89", [SM89, PTX78]>; +def : Proc<"sm_90", [SM90, PTX78]>; def NVPTXInstrInfo : InstrInfo { }