diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -369,6 +369,10 @@ case llvm::Triple::amdgcn: amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features); break; + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + NVPTX::getNVPTXTargetFeatures(D, Triple, Args, Features); + break; case llvm::Triple::m68k: m68k::getM68kTargetFeatures(D, Triple, Args, Features); break; diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -124,6 +124,11 @@ const char *LinkingOutput) const override; }; +void getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args, + std::vector &Features, + Optional CudaVersion = None); + } // end namespace NVPTX } // end namespace tools diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -630,6 +630,43 @@ Exec, CmdArgs, Inputs, Output)); } +void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args, + std::vector &Features, + Optional CudaVersion) { + if (!CudaVersion) { + CudaInstallationDetector CudaInstallation(D, Triple, Args); + CudaVersion = CudaInstallation.version(); + } + + // New CUDA versions often introduce new instructions that are only supported + // by new PTX version, so we need to raise PTX level to enable them in NVPTX + // back-end. + const char *PtxFeature = nullptr; + switch (*CudaVersion) { +#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ + case CudaVersion::CUDA_##CUDA_VER: \ + PtxFeature = "+ptx" #PTX_VER; \ + break; + CASE_CUDA_VERSION(115, 75); + CASE_CUDA_VERSION(114, 74); + CASE_CUDA_VERSION(113, 73); + CASE_CUDA_VERSION(112, 72); + CASE_CUDA_VERSION(111, 71); + CASE_CUDA_VERSION(110, 70); + CASE_CUDA_VERSION(102, 65); + CASE_CUDA_VERSION(101, 64); + CASE_CUDA_VERSION(100, 63); + CASE_CUDA_VERSION(92, 61); + CASE_CUDA_VERSION(91, 61); + CASE_CUDA_VERSION(90, 60); +#undef CASE_CUDA_VERSION + default: + PtxFeature = "+ptx42"; + } + Features.push_back(PtxFeature); +} + /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, /// which isn't properly a linker but nonetheless performs the step of stitching /// together object files from the assembler into a single blob. @@ -701,32 +738,11 @@ clang::CudaVersion CudaInstallationVersion = CudaInstallation.version(); - // New CUDA versions often introduce new instructions that are only supported - // by new PTX version, so we need to raise PTX level to enable them in NVPTX - // back-end. - const char *PtxFeature = nullptr; - switch (CudaInstallationVersion) { -#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ - case CudaVersion::CUDA_##CUDA_VER: \ - PtxFeature = "+ptx" #PTX_VER; \ - break; - CASE_CUDA_VERSION(115, 75); - CASE_CUDA_VERSION(114, 74); - CASE_CUDA_VERSION(113, 73); - CASE_CUDA_VERSION(112, 72); - CASE_CUDA_VERSION(111, 71); - CASE_CUDA_VERSION(110, 70); - CASE_CUDA_VERSION(102, 65); - CASE_CUDA_VERSION(101, 64); - CASE_CUDA_VERSION(100, 63); - CASE_CUDA_VERSION(92, 61); - CASE_CUDA_VERSION(91, 61); - CASE_CUDA_VERSION(90, 60); -#undef CASE_CUDA_VERSION - default: - PtxFeature = "+ptx42"; - } - CC1Args.append({"-target-feature", PtxFeature}); + std::vector Features; + NVPTX::getNVPTXTargetFeatures(getDriver(), getTriple(), DriverArgs, Features, + CudaInstallationVersion); + for (StringRef PtxFeature : Features) + CC1Args.append({"-target-feature", DriverArgs.MakeArgString(PtxFeature)}); if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr, options::OPT_fno_cuda_short_ptr, false)) CC1Args.append({"-mllvm", "--nvptx-short-ptr"});