Index: cfe/trunk/include/clang/Basic/Cuda.h =================================================================== --- cfe/trunk/include/clang/Basic/Cuda.h +++ cfe/trunk/include/clang/Basic/Cuda.h @@ -0,0 +1,77 @@ +//===--- Cuda.h - Utilities for compiling CUDA code ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_CUDA_H +#define LLVM_CLANG_BASIC_CUDA_H + +namespace llvm { +class StringRef; +} // namespace llvm + +namespace clang { + +enum class CudaVersion { + UNKNOWN, + CUDA_70, + CUDA_75, + CUDA_80, +}; +const char *CudaVersionToString(CudaVersion V); + +// No string -> CudaVersion conversion function because there's no canonical +// spelling of the various CUDA versions. + +enum class CudaArch { + UNKNOWN, + SM_20, + SM_21, + SM_30, + SM_32, + SM_35, + SM_37, + SM_50, + SM_52, + SM_53, + SM_60, + SM_61, + SM_62, +}; +const char *CudaArchToString(CudaArch A); + +// The input should have the form "sm_20". +CudaArch StringToCudaArch(llvm::StringRef S); + +enum class CudaVirtualArch { + UNKNOWN, + COMPUTE_20, + COMPUTE_30, + COMPUTE_32, + COMPUTE_35, + COMPUTE_37, + COMPUTE_50, + COMPUTE_52, + COMPUTE_53, + COMPUTE_60, + COMPUTE_61, + COMPUTE_62, +}; +const char *CudaVirtualArchToString(CudaVirtualArch A); + +// The input should have the form "compute_20". +CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S); + +/// Get the compute_xx corresponding to an sm_yy. +CudaVirtualArch VirtualArchForCudaArch(CudaArch A); + +/// Get the earliest CudaVersion that supports the given CudaArch. +CudaVersion MinVersionForCudaArch(CudaArch A); + +} // namespace clang + +#endif Index: cfe/trunk/include/clang/Driver/Action.h =================================================================== --- cfe/trunk/include/clang/Driver/Action.h +++ cfe/trunk/include/clang/Driver/Action.h @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_DRIVER_ACTION_H #define LLVM_CLANG_DRIVER_ACTION_H +#include "clang/Basic/Cuda.h" #include "clang/Driver/Types.h" #include "clang/Driver/Util.h" #include "llvm/ADT/SmallVector.h" @@ -157,26 +158,22 @@ class CudaDeviceAction : public Action { virtual void anchor(); - /// GPU architecture to bind. Always of the form /sm_\d+/ or null (when the - /// action applies to multiple architectures). - const char *GpuArchName; + + const CudaArch GpuArch; + /// True when action results are not consumed by the host action (e.g when /// -fsyntax-only or --cuda-device-only options are used). bool AtTopLevel; public: - CudaDeviceAction(Action *Input, const char *ArchName, bool AtTopLevel); + CudaDeviceAction(Action *Input, CudaArch Arch, bool AtTopLevel); - const char *getGpuArchName() const { return GpuArchName; } - - /// Gets the compute_XX that corresponds to getGpuArchName(). Returns null - /// when getGpuArchName() is null. - const char *getComputeArchName() const; + /// Get the CUDA GPU architecture to which this Action corresponds. Returns + /// UNKNOWN if this Action corresponds to multiple architectures. + CudaArch getGpuArch() const { return GpuArch; } bool isAtTopLevel() const { return AtTopLevel; } - static bool IsValidGpuArchName(llvm::StringRef ArchName); - static bool classof(const Action *A) { return A->getKind() == CudaDeviceClass; } Index: cfe/trunk/lib/Basic/CMakeLists.txt =================================================================== --- cfe/trunk/lib/Basic/CMakeLists.txt +++ cfe/trunk/lib/Basic/CMakeLists.txt @@ -66,6 +66,7 @@ Attributes.cpp Builtins.cpp CharInfo.cpp + Cuda.cpp Diagnostic.cpp DiagnosticIDs.cpp DiagnosticOptions.cpp Index: cfe/trunk/lib/Basic/Cuda.cpp =================================================================== --- cfe/trunk/lib/Basic/Cuda.cpp +++ cfe/trunk/lib/Basic/Cuda.cpp @@ -0,0 +1,165 @@ +#include "clang/Basic/Cuda.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" + +namespace clang { + +const char *CudaVersionToString(CudaVersion V) { + switch (V) { + case CudaVersion::UNKNOWN: + return "unknown"; + case CudaVersion::CUDA_70: + return "7.0"; + case CudaVersion::CUDA_75: + return "7.5"; + case CudaVersion::CUDA_80: + return "8.0"; + } +} + +const char *CudaArchToString(CudaArch A) { + switch (A) { + case CudaArch::UNKNOWN: + return "unknown"; + case CudaArch::SM_20: + return "sm_20"; + case CudaArch::SM_21: + return "sm_21"; + case CudaArch::SM_30: + return "sm_30"; + case CudaArch::SM_32: + return "sm_32"; + case CudaArch::SM_35: + return "sm_35"; + case CudaArch::SM_37: + return "sm_37"; + case CudaArch::SM_50: + return "sm_50"; + case CudaArch::SM_52: + return "sm_52"; + case CudaArch::SM_53: + return "sm_53"; + case CudaArch::SM_60: + return "sm_60"; + case CudaArch::SM_61: + return "sm_61"; + case CudaArch::SM_62: + return "sm_62"; + } +} + +CudaArch StringToCudaArch(llvm::StringRef S) { + return llvm::StringSwitch(S) + .Case("sm_20", CudaArch::SM_20) + .Case("sm_21", CudaArch::SM_21) + .Case("sm_30", CudaArch::SM_30) + .Case("sm_32", CudaArch::SM_32) + .Case("sm_35", CudaArch::SM_35) + .Case("sm_37", CudaArch::SM_37) + .Case("sm_50", CudaArch::SM_50) + .Case("sm_52", CudaArch::SM_52) + .Case("sm_53", CudaArch::SM_53) + .Case("sm_60", CudaArch::SM_60) + .Case("sm_61", CudaArch::SM_61) + .Case("sm_62", CudaArch::SM_62) + .Default(CudaArch::UNKNOWN); +} + +const char *CudaVirtualArchToString(CudaVirtualArch A) { + switch (A) { + case CudaVirtualArch::UNKNOWN: + return "unknown"; + case CudaVirtualArch::COMPUTE_20: + return "compute_20"; + case CudaVirtualArch::COMPUTE_30: + return "compute_30"; + case CudaVirtualArch::COMPUTE_32: + return "compute_32"; + case CudaVirtualArch::COMPUTE_35: + return "compute_35"; + case CudaVirtualArch::COMPUTE_37: + return "compute_37"; + case CudaVirtualArch::COMPUTE_50: + return "compute_50"; + case CudaVirtualArch::COMPUTE_52: + return "compute_52"; + case CudaVirtualArch::COMPUTE_53: + return "compute_53"; + case CudaVirtualArch::COMPUTE_60: + return "compute_60"; + case CudaVirtualArch::COMPUTE_61: + return "compute_61"; + case CudaVirtualArch::COMPUTE_62: + return "compute_62"; + } +} + +CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) { + return llvm::StringSwitch(S) + .Case("compute_20", CudaVirtualArch::COMPUTE_20) + .Case("compute_30", CudaVirtualArch::COMPUTE_30) + .Case("compute_32", CudaVirtualArch::COMPUTE_32) + .Case("compute_35", CudaVirtualArch::COMPUTE_35) + .Case("compute_37", CudaVirtualArch::COMPUTE_37) + .Case("compute_50", CudaVirtualArch::COMPUTE_50) + .Case("compute_52", CudaVirtualArch::COMPUTE_52) + .Case("compute_53", CudaVirtualArch::COMPUTE_53) + .Case("compute_60", CudaVirtualArch::COMPUTE_60) + .Case("compute_61", CudaVirtualArch::COMPUTE_61) + .Case("compute_62", CudaVirtualArch::COMPUTE_62) + .Default(CudaVirtualArch::UNKNOWN); +} + +CudaVirtualArch VirtualArchForCudaArch(CudaArch A) { + switch (A) { + case CudaArch::UNKNOWN: + return CudaVirtualArch::UNKNOWN; + case CudaArch::SM_20: + case CudaArch::SM_21: + return CudaVirtualArch::COMPUTE_20; + case CudaArch::SM_30: + return CudaVirtualArch::COMPUTE_30; + case CudaArch::SM_32: + return CudaVirtualArch::COMPUTE_32; + case CudaArch::SM_35: + return CudaVirtualArch::COMPUTE_35; + case CudaArch::SM_37: + return CudaVirtualArch::COMPUTE_37; + case CudaArch::SM_50: + return CudaVirtualArch::COMPUTE_50; + case CudaArch::SM_52: + return CudaVirtualArch::COMPUTE_52; + case CudaArch::SM_53: + return CudaVirtualArch::COMPUTE_53; + case CudaArch::SM_60: + return CudaVirtualArch::COMPUTE_60; + case CudaArch::SM_61: + return CudaVirtualArch::COMPUTE_61; + case CudaArch::SM_62: + return CudaVirtualArch::COMPUTE_62; + } +} + +CudaVersion MinVersionForCudaArch(CudaArch A) { + switch (A) { + case CudaArch::UNKNOWN: + return CudaVersion::UNKNOWN; + case CudaArch::SM_20: + case CudaArch::SM_21: + case CudaArch::SM_30: + case CudaArch::SM_32: + case CudaArch::SM_35: + case CudaArch::SM_37: + case CudaArch::SM_50: + case CudaArch::SM_52: + case CudaArch::SM_53: + return CudaVersion::CUDA_70; + case CudaArch::SM_60: + case CudaArch::SM_61: + case CudaArch::SM_62: + return CudaVersion::CUDA_80; + } +} + +} // namespace clang Index: cfe/trunk/lib/Basic/Targets.cpp =================================================================== --- cfe/trunk/lib/Basic/Targets.cpp +++ cfe/trunk/lib/Basic/Targets.cpp @@ -12,12 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "clang/Basic/TargetInfo.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/Cuda.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/Version.h" #include "llvm/ADT/APFloat.h" @@ -1694,23 +1695,7 @@ class NVPTXTargetInfo : public TargetInfo { static const char *const GCCRegNames[]; static const Builtin::Info BuiltinInfo[]; - - // The GPU profiles supported by the NVPTX backend - enum GPUKind { - GK_NONE, - GK_SM20, - GK_SM21, - GK_SM30, - GK_SM32, - GK_SM35, - GK_SM37, - GK_SM50, - GK_SM52, - GK_SM53, - GK_SM60, - GK_SM61, - GK_SM62, - } GPU; + CudaArch GPU; public: NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) @@ -1723,8 +1708,7 @@ // Define available target features // These must be defined in sorted order! NoAsmVariants = true; - // Set the default GPU to sm20 - GPU = GK_SM20; + GPU = CudaArch::SM_20; // If possible, get a TargetInfo for our host triple, so we can match its // types. @@ -1793,32 +1777,32 @@ // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { switch (GPU) { - case GK_NONE: + case CudaArch::UNKNOWN: assert(false && "No GPU arch when compiling CUDA device code."); return ""; - case GK_SM20: + case CudaArch::SM_20: return "200"; - case GK_SM21: + case CudaArch::SM_21: return "210"; - case GK_SM30: + case CudaArch::SM_30: return "300"; - case GK_SM32: + case CudaArch::SM_32: return "320"; - case GK_SM35: + case CudaArch::SM_35: return "350"; - case GK_SM37: + case CudaArch::SM_37: return "370"; - case GK_SM50: + case CudaArch::SM_50: return "500"; - case GK_SM52: + case CudaArch::SM_52: return "520"; - case GK_SM53: + case CudaArch::SM_53: return "530"; - case GK_SM60: + case CudaArch::SM_60: return "600"; - case GK_SM61: + case CudaArch::SM_61: return "610"; - case GK_SM62: + case CudaArch::SM_62: return "620"; } }(); @@ -1862,22 +1846,8 @@ return TargetInfo::CharPtrBuiltinVaList; } bool setCPU(const std::string &Name) override { - GPU = llvm::StringSwitch(Name) - .Case("sm_20", GK_SM20) - .Case("sm_21", GK_SM21) - .Case("sm_30", GK_SM30) - .Case("sm_32", GK_SM32) - .Case("sm_35", GK_SM35) - .Case("sm_37", GK_SM37) - .Case("sm_50", GK_SM50) - .Case("sm_52", GK_SM52) - .Case("sm_53", GK_SM53) - .Case("sm_60", GK_SM60) - .Case("sm_61", GK_SM61) - .Case("sm_62", GK_SM62) - .Default(GK_NONE); - - return GPU != GK_NONE; + GPU = StringToCudaArch(Name); + return GPU != CudaArch::UNKNOWN; } void setSupportedOpenCLOpts() override { auto &Opts = getSupportedOpenCLOpts(); Index: cfe/trunk/lib/Driver/Action.cpp =================================================================== --- cfe/trunk/lib/Driver/Action.cpp +++ cfe/trunk/lib/Driver/Action.cpp @@ -51,43 +51,11 @@ BindArchAction::BindArchAction(Action *Input, const char *_ArchName) : Action(BindArchClass, Input), ArchName(_ArchName) {} -// Converts CUDA GPU architecture, e.g. "sm_21", to its corresponding virtual -// compute arch, e.g. "compute_20". Returns null if the input arch is null or -// doesn't match an existing arch. -static const char* GpuArchToComputeName(const char *ArchName) { - if (!ArchName) - return nullptr; - return llvm::StringSwitch(ArchName) - .Cases("sm_20", "sm_21", "compute_20") - .Case("sm_30", "compute_30") - .Case("sm_32", "compute_32") - .Case("sm_35", "compute_35") - .Case("sm_37", "compute_37") - .Case("sm_50", "compute_50") - .Case("sm_52", "compute_52") - .Case("sm_53", "compute_53") - .Case("sm_60", "compute_60") - .Case("sm_61", "compute_61") - .Case("sm_62", "compute_62") - .Default(nullptr); -} - void CudaDeviceAction::anchor() {} -CudaDeviceAction::CudaDeviceAction(Action *Input, const char *ArchName, +CudaDeviceAction::CudaDeviceAction(Action *Input, CudaArch Arch, bool AtTopLevel) - : Action(CudaDeviceClass, Input), GpuArchName(ArchName), - AtTopLevel(AtTopLevel) { - assert(!GpuArchName || IsValidGpuArchName(GpuArchName)); -} - -const char *CudaDeviceAction::getComputeArchName() const { - return GpuArchToComputeName(GpuArchName); -} - -bool CudaDeviceAction::IsValidGpuArchName(llvm::StringRef ArchName) { - return GpuArchToComputeName(ArchName.data()) != nullptr; -} + : Action(CudaDeviceClass, Input), GpuArch(Arch), AtTopLevel(AtTopLevel) {} void CudaHostAction::anchor() {} Index: cfe/trunk/lib/Driver/Driver.cpp =================================================================== --- cfe/trunk/lib/Driver/Driver.cpp +++ cfe/trunk/lib/Driver/Driver.cpp @@ -23,6 +23,7 @@ #include "clang/Driver/ToolChain.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" @@ -1022,9 +1023,10 @@ os << '"' << BIA->getArchName() << '"' << ", {" << PrintActions1(C, *BIA->input_begin(), Ids) << "}"; } else if (CudaDeviceAction *CDA = dyn_cast(A)) { - os << '"' - << (CDA->getGpuArchName() ? CDA->getGpuArchName() : "(multiple archs)") - << '"' << ", {" << PrintActions1(C, *CDA->input_begin(), Ids) << "}"; + CudaArch Arch = CDA->getGpuArch(); + if (Arch != CudaArch::UNKNOWN) + os << "'" << CudaArchToString(Arch) << "', "; + os << "{" << PrintActions1(C, *CDA->input_begin(), Ids) << "}"; } else { const ActionList *AL; if (CudaHostAction *CHA = dyn_cast(A)) { @@ -1380,24 +1382,25 @@ return C.MakeAction(HostAction, ActionList()); // Collect all cuda_gpu_arch parameters, removing duplicates. - SmallVector GpuArchList; - llvm::StringSet<> GpuArchNames; + SmallVector GpuArchList; + llvm::SmallSet GpuArchs; for (Arg *A : Args) { if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) continue; A->claim(); - const auto& Arch = A->getValue(); - if (!CudaDeviceAction::IsValidGpuArchName(Arch)) - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch; - else if (GpuArchNames.insert(Arch).second) + const auto &ArchStr = A->getValue(); + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + else if (GpuArchs.insert(Arch).second) GpuArchList.push_back(Arch); } // Default to sm_20 which is the lowest common denominator for supported GPUs. // sm_20 code should work correctly, if suboptimally, on all newer GPUs. if (GpuArchList.empty()) - GpuArchList.push_back("sm_20"); + GpuArchList.push_back(CudaArch::SM_20); // Replicate inputs for each GPU architecture. Driver::InputList CudaDeviceInputs; @@ -1463,7 +1466,7 @@ } auto FatbinAction = C.MakeAction( C.MakeAction(DeviceActions, types::TY_CUDA_FATBIN), - /* GpuArchName = */ nullptr, + CudaArch::UNKNOWN, /* AtTopLevel = */ false); // Return a new host action that incorporates original host action and all // device actions. @@ -2047,8 +2050,8 @@ // Call BuildJobsForAction() again, now with correct device parameters. InputInfo II = BuildJobsForAction( C, *CDA->input_begin(), C.getSingleOffloadToolChain(), - CDA->getGpuArchName(), CDA->isAtTopLevel(), /*MultipleArchs=*/true, - LinkingOutput, CachedResults); + CudaArchToString(CDA->getGpuArch()), CDA->isAtTopLevel(), + /*MultipleArchs=*/true, LinkingOutput, CachedResults); // Currently II's Action is *CDA->input_begin(). Set it to CDA instead, so // that one can retrieve II's GPU arch. II.setAction(A); Index: cfe/trunk/lib/Driver/Tools.cpp =================================================================== --- cfe/trunk/lib/Driver/Tools.cpp +++ cfe/trunk/lib/Driver/Tools.cpp @@ -11222,9 +11222,10 @@ auto* A = cast(II.getAction()); // We need to pass an Arch of the form "sm_XX" for cubin files and // "compute_XX" for ptx. - const char *Arch = (II.getType() == types::TY_PP_Asm) - ? A->getComputeArchName() - : A->getGpuArchName(); + const char *Arch = + (II.getType() == types::TY_PP_Asm) + ? CudaVirtualArchToString(VirtualArchForCudaArch(A->getGpuArch())) + : CudaArchToString(A->getGpuArch()); CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + ",file=" + II.getFilename())); }