diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -72,36 +72,20 @@ GFX1012, LAST, }; -const char *CudaArchToString(CudaArch A); -// The input should have the form "sm_20". -CudaArch StringToCudaArch(llvm::StringRef S); +static inline bool IsNVIDIAGpuArch(CudaArch A) { + return A >= CudaArch::SM_20 && A < CudaArch::GFX600; +} -enum class CudaVirtualArch { - UNKNOWN, - COMPUTE_20, - COMPUTE_30, - COMPUTE_32, - COMPUTE_35, - COMPUTE_37, - COMPUTE_50, - COMPUTE_52, - COMPUTE_53, - COMPUTE_60, - COMPUTE_61, - COMPUTE_62, - COMPUTE_70, - COMPUTE_72, - COMPUTE_75, - COMPUTE_AMDGCN, -}; -const char *CudaVirtualArchToString(CudaVirtualArch A); +static inline bool IsAMDGpuArch(CudaArch A) { + return A >= CudaArch::GFX600 && A < CudaArch::LAST; +} -// The input should have the form "compute_20". -CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S); +const char *CudaArchToString(CudaArch A); +const char *CudaArchToVirtualArchString(CudaArch A); -/// Get the compute_xx corresponding to an sm_yy. -CudaVirtualArch VirtualArchForCudaArch(CudaArch A); +// The input should have the form "sm_20". +CudaArch StringToCudaArch(llvm::StringRef S); /// Get the earliest CudaVersion that supports the given CudaArch. CudaVersion MinVersionForCudaArch(CudaArch A); diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -45,250 +45,81 @@ .Default(CudaVersion::UNKNOWN); } -const char *CudaArchToString(CudaArch A) { - switch (A) { - case CudaArch::LAST: - break; - case CudaArch::UNKNOWN: - return "unknown"; - case CudaArch::SM_20: - return "sm_20"; - case CudaArch::SM_21: - return "sm_21"; - case CudaArch::SM_30: - return "sm_30"; - case CudaArch::SM_32: - return "sm_32"; - case CudaArch::SM_35: - return "sm_35"; - case CudaArch::SM_37: - return "sm_37"; - case CudaArch::SM_50: - return "sm_50"; - case CudaArch::SM_52: - return "sm_52"; - case CudaArch::SM_53: - return "sm_53"; - case CudaArch::SM_60: - return "sm_60"; - case CudaArch::SM_61: - return "sm_61"; - case CudaArch::SM_62: - return "sm_62"; - case CudaArch::SM_70: - return "sm_70"; - case CudaArch::SM_72: - return "sm_72"; - case CudaArch::SM_75: - return "sm_75"; - case CudaArch::GFX600: // tahiti - return "gfx600"; - case CudaArch::GFX601: // pitcairn, verde, oland,hainan - return "gfx601"; - case CudaArch::GFX700: // kaveri - return "gfx700"; - case CudaArch::GFX701: // hawaii - return "gfx701"; - case CudaArch::GFX702: // 290,290x,R390,R390x - return "gfx702"; - case CudaArch::GFX703: // kabini mullins - return "gfx703"; - case CudaArch::GFX704: // bonaire - return "gfx704"; - case CudaArch::GFX801: // carrizo - return "gfx801"; - case CudaArch::GFX802: // tonga,iceland - return "gfx802"; - case CudaArch::GFX803: // fiji,polaris10 - return "gfx803"; - case CudaArch::GFX810: // stoney - return "gfx810"; - case CudaArch::GFX900: // vega, instinct - return "gfx900"; - case CudaArch::GFX902: // TBA - return "gfx902"; - case CudaArch::GFX904: // TBA - return "gfx904"; - case CudaArch::GFX906: // TBA - return "gfx906"; - case CudaArch::GFX908: // TBA - return "gfx908"; - case CudaArch::GFX909: // TBA - return "gfx909"; - case CudaArch::GFX1010: // TBA - return "gfx1010"; - case CudaArch::GFX1011: // TBA - return "gfx1011"; - case CudaArch::GFX1012: // TBA - return "gfx1012"; - } - llvm_unreachable("invalid enum"); -} +struct CudaArchToStringMap { + CudaArch arch; + const char *arch_name; + const char *virtual_arch_name; +}; -CudaArch StringToCudaArch(llvm::StringRef S) { - return llvm::StringSwitch(S) - .Case("sm_20", CudaArch::SM_20) - .Case("sm_21", CudaArch::SM_21) - .Case("sm_30", CudaArch::SM_30) - .Case("sm_32", CudaArch::SM_32) - .Case("sm_35", CudaArch::SM_35) - .Case("sm_37", CudaArch::SM_37) - .Case("sm_50", CudaArch::SM_50) - .Case("sm_52", CudaArch::SM_52) - .Case("sm_53", CudaArch::SM_53) - .Case("sm_60", CudaArch::SM_60) - .Case("sm_61", CudaArch::SM_61) - .Case("sm_62", CudaArch::SM_62) - .Case("sm_70", CudaArch::SM_70) - .Case("sm_72", CudaArch::SM_72) - .Case("sm_75", CudaArch::SM_75) - .Case("gfx600", CudaArch::GFX600) - .Case("gfx601", CudaArch::GFX601) - .Case("gfx700", CudaArch::GFX700) - .Case("gfx701", CudaArch::GFX701) - .Case("gfx702", CudaArch::GFX702) - .Case("gfx703", CudaArch::GFX703) - .Case("gfx704", CudaArch::GFX704) - .Case("gfx801", CudaArch::GFX801) - .Case("gfx802", CudaArch::GFX802) - .Case("gfx803", CudaArch::GFX803) - .Case("gfx810", CudaArch::GFX810) - .Case("gfx900", CudaArch::GFX900) - .Case("gfx902", CudaArch::GFX902) - .Case("gfx904", CudaArch::GFX904) - .Case("gfx906", CudaArch::GFX906) - .Case("gfx908", CudaArch::GFX908) - .Case("gfx909", CudaArch::GFX909) - .Case("gfx1010", CudaArch::GFX1010) - .Case("gfx1011", CudaArch::GFX1011) - .Case("gfx1012", CudaArch::GFX1012) - .Default(CudaArch::UNKNOWN); -} +#define SM2(sm, ca) \ + { CudaArch::SM_##sm, "sm_" #sm, ca } +#define SM(sm) SM2(sm, "compute_" #sm) +#define GFX(gpu) \ + { CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" } +CudaArchToStringMap arch_names[] = { + // clang-format off + SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi + SM(30), SM(32), SM(35), SM(37), // Kepler + SM(50), SM(52), SM(53), // Maxwell + SM(60), SM(61), SM(62), // Pascal + SM(70), SM(72), // Volta + SM(75), // Turing + GFX(600), // tahiti + GFX(601), // pitcairn, verde, oland,hainan + GFX(700), // kaveri + GFX(701), // hawaii + GFX(702), // 290,290x,R390,R390x + GFX(703), // kabini mullins + GFX(704), // bonaire + GFX(801), // carrizo + GFX(802), // tonga,iceland + GFX(803), // fiji,polaris10 + GFX(810), // stoney + GFX(900), // vega, instinct + GFX(902), GFX(904), GFX(906), GFX(908), GFX(909), + GFX(1010), GFX(1011), GFX(1012), + // clang-format on +}; +#undef SM +#undef SM2 +#undef GFX -const char *CudaVirtualArchToString(CudaVirtualArch A) { - switch (A) { - case CudaVirtualArch::UNKNOWN: +const char *CudaArchToString(CudaArch A) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [A](const CudaArchToStringMap &map) { return A == map.arch; }); + if (result == std::end(arch_names)) return "unknown"; - case CudaVirtualArch::COMPUTE_20: - return "compute_20"; - case CudaVirtualArch::COMPUTE_30: - return "compute_30"; - case CudaVirtualArch::COMPUTE_32: - return "compute_32"; - case CudaVirtualArch::COMPUTE_35: - return "compute_35"; - case CudaVirtualArch::COMPUTE_37: - return "compute_37"; - case CudaVirtualArch::COMPUTE_50: - return "compute_50"; - case CudaVirtualArch::COMPUTE_52: - return "compute_52"; - case CudaVirtualArch::COMPUTE_53: - return "compute_53"; - case CudaVirtualArch::COMPUTE_60: - return "compute_60"; - case CudaVirtualArch::COMPUTE_61: - return "compute_61"; - case CudaVirtualArch::COMPUTE_62: - return "compute_62"; - case CudaVirtualArch::COMPUTE_70: - return "compute_70"; - case CudaVirtualArch::COMPUTE_72: - return "compute_72"; - case CudaVirtualArch::COMPUTE_75: - return "compute_75"; - case CudaVirtualArch::COMPUTE_AMDGCN: - return "compute_amdgcn"; - } - llvm_unreachable("invalid enum"); + return result->arch_name; } -CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) { - return llvm::StringSwitch(S) - .Case("compute_20", CudaVirtualArch::COMPUTE_20) - .Case("compute_30", CudaVirtualArch::COMPUTE_30) - .Case("compute_32", CudaVirtualArch::COMPUTE_32) - .Case("compute_35", CudaVirtualArch::COMPUTE_35) - .Case("compute_37", CudaVirtualArch::COMPUTE_37) - .Case("compute_50", CudaVirtualArch::COMPUTE_50) - .Case("compute_52", CudaVirtualArch::COMPUTE_52) - .Case("compute_53", CudaVirtualArch::COMPUTE_53) - .Case("compute_60", CudaVirtualArch::COMPUTE_60) - .Case("compute_61", CudaVirtualArch::COMPUTE_61) - .Case("compute_62", CudaVirtualArch::COMPUTE_62) - .Case("compute_70", CudaVirtualArch::COMPUTE_70) - .Case("compute_72", CudaVirtualArch::COMPUTE_72) - .Case("compute_75", CudaVirtualArch::COMPUTE_75) - .Case("compute_amdgcn", CudaVirtualArch::COMPUTE_AMDGCN) - .Default(CudaVirtualArch::UNKNOWN); +const char *CudaArchToVirtualArchString(CudaArch A) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [A](const CudaArchToStringMap &map) { return A == map.arch; }); + if (result == std::end(arch_names)) + return "unknown"; + return result->virtual_arch_name; } -CudaVirtualArch VirtualArchForCudaArch(CudaArch A) { - switch (A) { - case CudaArch::LAST: - break; - case CudaArch::UNKNOWN: - return CudaVirtualArch::UNKNOWN; - case CudaArch::SM_20: - case CudaArch::SM_21: - return CudaVirtualArch::COMPUTE_20; - case CudaArch::SM_30: - return CudaVirtualArch::COMPUTE_30; - case CudaArch::SM_32: - return CudaVirtualArch::COMPUTE_32; - case CudaArch::SM_35: - return CudaVirtualArch::COMPUTE_35; - case CudaArch::SM_37: - return CudaVirtualArch::COMPUTE_37; - case CudaArch::SM_50: - return CudaVirtualArch::COMPUTE_50; - case CudaArch::SM_52: - return CudaVirtualArch::COMPUTE_52; - case CudaArch::SM_53: - return CudaVirtualArch::COMPUTE_53; - case CudaArch::SM_60: - return CudaVirtualArch::COMPUTE_60; - case CudaArch::SM_61: - return CudaVirtualArch::COMPUTE_61; - case CudaArch::SM_62: - return CudaVirtualArch::COMPUTE_62; - case CudaArch::SM_70: - return CudaVirtualArch::COMPUTE_70; - case CudaArch::SM_72: - return CudaVirtualArch::COMPUTE_72; - case CudaArch::SM_75: - return CudaVirtualArch::COMPUTE_75; - case CudaArch::GFX600: - case CudaArch::GFX601: - case CudaArch::GFX700: - case CudaArch::GFX701: - case CudaArch::GFX702: - case CudaArch::GFX703: - case CudaArch::GFX704: - case CudaArch::GFX801: - case CudaArch::GFX802: - case CudaArch::GFX803: - case CudaArch::GFX810: - case CudaArch::GFX900: - case CudaArch::GFX902: - case CudaArch::GFX904: - case CudaArch::GFX906: - case CudaArch::GFX908: - case CudaArch::GFX909: - case CudaArch::GFX1010: - case CudaArch::GFX1011: - case CudaArch::GFX1012: - return CudaVirtualArch::COMPUTE_AMDGCN; - } - llvm_unreachable("invalid enum"); +CudaArch StringToCudaArch(llvm::StringRef S) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [S](const CudaArchToStringMap &map) { return S == map.arch_name; }); + if (result == std::end(arch_names)) + return CudaArch::UNKNOWN; + return result->arch; } CudaVersion MinVersionForCudaArch(CudaArch A) { - switch (A) { - case CudaArch::LAST: - break; - case CudaArch::UNKNOWN: + if (A == CudaArch::UNKNOWN) return CudaVersion::UNKNOWN; + + // AMD GPUs do not depend on CUDA versions. + if (IsAMDGpuArch(A)) + return CudaVersion::CUDA_70; + + switch (A) { case CudaArch::SM_20: case CudaArch::SM_21: case CudaArch::SM_30: @@ -309,53 +140,21 @@ return CudaVersion::CUDA_91; case CudaArch::SM_75: return CudaVersion::CUDA_100; - case CudaArch::GFX600: - case CudaArch::GFX601: - case CudaArch::GFX700: - case CudaArch::GFX701: - case CudaArch::GFX702: - case CudaArch::GFX703: - case CudaArch::GFX704: - case CudaArch::GFX801: - case CudaArch::GFX802: - case CudaArch::GFX803: - case CudaArch::GFX810: - case CudaArch::GFX900: - case CudaArch::GFX902: - case CudaArch::GFX904: - case CudaArch::GFX906: - case CudaArch::GFX908: - case CudaArch::GFX909: - case CudaArch::GFX1010: - case CudaArch::GFX1011: - case CudaArch::GFX1012: - return CudaVersion::CUDA_70; + default: + llvm_unreachable("invalid enum"); } - llvm_unreachable("invalid enum"); } CudaVersion MaxVersionForCudaArch(CudaArch A) { + // AMD GPUs do not depend on CUDA versions. + if (IsAMDGpuArch(A)) + return CudaVersion::LATEST; + switch (A) { case CudaArch::UNKNOWN: return CudaVersion::UNKNOWN; case CudaArch::SM_20: case CudaArch::SM_21: - case CudaArch::GFX600: - case CudaArch::GFX601: - case CudaArch::GFX700: - case CudaArch::GFX701: - case CudaArch::GFX702: - case CudaArch::GFX703: - case CudaArch::GFX704: - case CudaArch::GFX801: - case CudaArch::GFX802: - case CudaArch::GFX803: - case CudaArch::GFX810: - case CudaArch::GFX900: - case CudaArch::GFX902: - case CudaArch::GFX1010: - case CudaArch::GFX1011: - case CudaArch::GFX1012: return CudaVersion::CUDA_80; default: return CudaVersion::LATEST; diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -161,13 +161,13 @@ // CUDA-9+ uses single libdevice file for all GPU variants. std::string FilePath = LibDevicePath + "/libdevice.10.bc"; if (FS.exists(FilePath)) { - for (const char *GpuArchName : - {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53", - "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) { - const CudaArch GpuArch = StringToCudaArch(GpuArchName); - if (Version >= MinVersionForCudaArch(GpuArch) && - Version <= MaxVersionForCudaArch(GpuArch)) - LibDeviceMap[GpuArchName] = FilePath; + for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E; + ++Arch) { + CudaArch GpuArch = static_cast(Arch); + if (!IsNVIDIAGpuArch(GpuArch)) + continue; + std::string GpuArchName(CudaArchToString(GpuArch)); + LibDeviceMap[GpuArchName] = FilePath; } } } else { @@ -471,10 +471,9 @@ continue; // We need to pass an Arch of the form "sm_XX" for cubin files and // "compute_XX" for ptx. - const char *Arch = - (II.getType() == types::TY_PP_Asm) - ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch)) - : gpu_arch_str; + const char *Arch = (II.getType() == types::TY_PP_Asm) + ? CudaArchToVirtualArchString(gpu_arch) + : gpu_arch_str; CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + ",file=" + II.getFilename())); }