Index: include/clang/Basic/Cuda.h =================================================================== --- include/clang/Basic/Cuda.h +++ include/clang/Basic/Cuda.h @@ -46,6 +46,19 @@ SM_62, SM_70, SM_72, + GFX600, + GFX601, + GFX700, + GFX701, + GFX702, + GFX703, + GFX704, + GFX801, + GFX802, + GFX803, + GFX810, + GFX900, + GFX902, LAST, }; const char *CudaArchToString(CudaArch A); @@ -68,6 +81,7 @@ COMPUTE_62, COMPUTE_70, COMPUTE_72, + COMPUTE_AMDGCN, }; const char *CudaVirtualArchToString(CudaVirtualArch A); Index: lib/Basic/Cuda.cpp =================================================================== --- lib/Basic/Cuda.cpp +++ lib/Basic/Cuda.cpp @@ -58,6 +58,32 @@ return "sm_70"; case CudaArch::SM_72: return "sm_72"; + case CudaArch::GFX600: // tahiti + return "gfx600"; + case CudaArch::GFX601: // pitcairn, verde, oland,hainan + return "gfx601"; + case CudaArch::GFX700: // kaveri + return "gfx700"; + case CudaArch::GFX701: // hawaii + return "gfx701"; + case CudaArch::GFX702: // 290,290x,R390,R390x + return "gfx702"; + case CudaArch::GFX703: // kabini mullins + return "gfx703"; + case CudaArch::GFX704: // bonaire + return "gfx704"; + case CudaArch::GFX801: // carrizo + return "gfx801"; + case CudaArch::GFX802: // tonga,iceland + return "gfx802"; + case CudaArch::GFX803: // fiji,polaris10 + return "gfx803"; + case CudaArch::GFX810: // stoney + return "gfx810"; + case CudaArch::GFX900: // vega, instinct + return "gfx900"; + case CudaArch::GFX902: // TBA + return "gfx902"; } llvm_unreachable("invalid enum"); } @@ -78,6 +104,19 @@ .Case("sm_62", CudaArch::SM_62) .Case("sm_70", CudaArch::SM_70) .Case("sm_72", CudaArch::SM_72) + .Case("gfx600", CudaArch::GFX600) + .Case("gfx601", CudaArch::GFX601) + .Case("gfx700", CudaArch::GFX700) + .Case("gfx701", CudaArch::GFX701) + .Case("gfx702", CudaArch::GFX702) + .Case("gfx703", CudaArch::GFX703) + .Case("gfx704", CudaArch::GFX704) + .Case("gfx801", CudaArch::GFX801) + .Case("gfx802", CudaArch::GFX802) + .Case("gfx803", CudaArch::GFX803) + .Case("gfx810", CudaArch::GFX810) + .Case("gfx900", CudaArch::GFX900) + .Case("gfx902", CudaArch::GFX902) .Default(CudaArch::UNKNOWN); } @@ -111,6 +150,8 @@ return "compute_70"; case CudaVirtualArch::COMPUTE_72: return "compute_72"; + case CudaVirtualArch::COMPUTE_AMDGCN: + return "compute_amdgcn"; } llvm_unreachable("invalid enum"); } @@ -130,6 +171,7 @@ .Case("compute_62", CudaVirtualArch::COMPUTE_62) .Case("compute_70", CudaVirtualArch::COMPUTE_70) .Case("compute_72", CudaVirtualArch::COMPUTE_72) + .Case("compute_amdgcn", CudaVirtualArch::COMPUTE_AMDGCN) .Default(CudaVirtualArch::UNKNOWN); } @@ -166,6 +208,20 @@ return CudaVirtualArch::COMPUTE_70; case CudaArch::SM_72: return CudaVirtualArch::COMPUTE_72; + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: + return CudaVirtualArch::COMPUTE_AMDGCN; } llvm_unreachable("invalid enum"); } @@ -194,6 +250,20 @@ return CudaVersion::CUDA_90; case CudaArch::SM_72: return CudaVersion::CUDA_91; + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: + return CudaVersion::CUDA_70; } llvm_unreachable("invalid enum"); } @@ -204,6 +274,19 @@ return CudaVersion::UNKNOWN; case CudaArch::SM_20: case CudaArch::SM_21: + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: return CudaVersion::CUDA_80; default: return CudaVersion::LATEST; Index: lib/Basic/Targets.h =================================================================== --- lib/Basic/Targets.h +++ lib/Basic/Targets.h @@ -16,6 +16,7 @@ #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_H #define LLVM_CLANG_LIB_BASIC_TARGETS_H +#include "clang/Basic/Cuda.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetInfo.h" @@ -46,6 +47,9 @@ LLVM_LIBRARY_VISIBILITY void addCygMingDefines(const clang::LangOptions &Opts, clang::MacroBuilder &Builder); + +LLVM_LIBRARY_VISIBILITY +void defineCudaArchMacro(CudaArch GPU, clang::MacroBuilder &Builder); } // namespace targets } // namespace clang #endif // LLVM_CLANG_LIB_BASIC_TARGETS_H Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -112,6 +112,61 @@ addCygMingDefines(Opts, Builder); } +void defineCudaArchMacro(CudaArch GPU, clang::MacroBuilder &Builder) { + std::string CUDAArchCode = [GPU] { + switch (GPU) { + case CudaArch::LAST: + break; + case CudaArch::SM_20: + return "200"; + case CudaArch::SM_21: + return "210"; + case CudaArch::SM_30: + return "300"; + case CudaArch::SM_32: + return "320"; + case CudaArch::SM_35: + return "350"; + case CudaArch::SM_37: + return "370"; + case CudaArch::SM_50: + return "500"; + case CudaArch::SM_52: + return "520"; + case CudaArch::SM_53: + return "530"; + case CudaArch::SM_60: + return "600"; + case CudaArch::SM_61: + return "610"; + case CudaArch::SM_62: + return "620"; + case CudaArch::SM_70: + return "700"; + case CudaArch::SM_72: + return "720"; + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: + return "320"; + case CudaArch::UNKNOWN: + llvm_unreachable("unhandled Cuda/HIP Arch"); + } + llvm_unreachable("unhandled Cuda/HIP Arch"); + }(); + Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); +} + //===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// Index: lib/Basic/Targets/AMDGPU.h =================================================================== --- lib/Basic/Targets/AMDGPU.h +++ lib/Basic/Targets/AMDGPU.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H +#include "clang/Basic/Cuda.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "llvm/ADT/StringSet.h" @@ -174,6 +175,7 @@ static bool isAMDGCN(const llvm::Triple &TT) { return TT.getArch() == llvm::Triple::amdgcn; } + CudaArch GCN_Subarch; public: AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); @@ -330,6 +332,7 @@ else GPU = parseR600Name(Name); + GCN_Subarch = StringToCudaArch(Name); return GK_NONE != GPU.Kind; } Index: lib/Basic/Targets/AMDGPU.cpp =================================================================== --- lib/Basic/Targets/AMDGPU.cpp +++ lib/Basic/Targets/AMDGPU.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "Targets.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/MacroBuilder.h" @@ -263,6 +264,7 @@ resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN : DataLayoutStringR600); assert(DataLayout->getAllocaAddrSpace() == Private); + GCN_Subarch = CudaArch::GFX803; // Default to fiji setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || !isAMDGCN(Triple)); @@ -307,6 +309,9 @@ if (GPU.Kind != GK_NONE) Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__")); + if (Opts.CUDAIsDevice) + defineCudaArchMacro(GCN_Subarch, Builder); + // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be // removed in the near future. if (GPU.HasFMAF) Index: lib/Basic/Targets/NVPTX.cpp =================================================================== --- lib/Basic/Targets/NVPTX.cpp +++ lib/Basic/Targets/NVPTX.cpp @@ -153,48 +153,8 @@ MacroBuilder &Builder) const { Builder.defineMacro("__PTX__"); Builder.defineMacro("__NVPTX__"); - if (Opts.CUDAIsDevice) { - // Set __CUDA_ARCH__ for the GPU specified. - std::string CUDAArchCode = [this] { - switch (GPU) { - case CudaArch::LAST: - break; - case CudaArch::UNKNOWN: - assert(false && "No GPU arch when compiling CUDA device code."); - return ""; - case CudaArch::SM_20: - return "200"; - case CudaArch::SM_21: - return "210"; - case CudaArch::SM_30: - return "300"; - case CudaArch::SM_32: - return "320"; - case CudaArch::SM_35: - return "350"; - case CudaArch::SM_37: - return "370"; - case CudaArch::SM_50: - return "500"; - case CudaArch::SM_52: - return "520"; - case CudaArch::SM_53: - return "530"; - case CudaArch::SM_60: - return "600"; - case CudaArch::SM_61: - return "610"; - case CudaArch::SM_62: - return "620"; - case CudaArch::SM_70: - return "700"; - case CudaArch::SM_72: - return "720"; - } - llvm_unreachable("unhandled CudaArch"); - }(); - Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); - } + if (Opts.CUDAIsDevice) + defineCudaArchMacro(GPU, Builder); } ArrayRef NVPTXTargetInfo::getTargetBuiltins() const { Index: test/Driver/cuda-arch-translation.cu =================================================================== --- test/Driver/cuda-arch-translation.cu +++ test/Driver/cuda-arch-translation.cu @@ -31,6 +31,32 @@ // RUN: | FileCheck -check-prefixes=COMMON,SM62 %s // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_70 %s 2>&1 \ // RUN: | FileCheck -check-prefixes=COMMON,SM70 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx600 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX600 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx601 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX601 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx700 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX700 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx701 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX701 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx702 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX702 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx703 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX703 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx704 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX704 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx801 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX801 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx802 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX802 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx803 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX803 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx810 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX810 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx900 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX900 %s +// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx902 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX902 %s // COMMON: ptxas // COMMON-SAME: -m64 @@ -49,3 +75,16 @@ // SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61 // SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62 // SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70 +// GFX600:--image=profile=gfx600{{.*}}--image=profile=compute_amdgcn +// GFX601:--image=profile=gfx601{{.*}}--image=profile=compute_amdgcn +// GFX700:--image=profile=gfx700{{.*}}--image=profile=compute_amdgcn +// GFX701:--image=profile=gfx701{{.*}}--image=profile=compute_amdgcn +// GFX702:--image=profile=gfx702{{.*}}--image=profile=compute_amdgcn +// GFX703:--image=profile=gfx703{{.*}}--image=profile=compute_amdgcn +// GFX704:--image=profile=gfx704{{.*}}--image=profile=compute_amdgcn +// GFX801:--image=profile=gfx801{{.*}}--image=profile=compute_amdgcn +// GFX802:--image=profile=gfx802{{.*}}--image=profile=compute_amdgcn +// GFX803:--image=profile=gfx803{{.*}}--image=profile=compute_amdgcn +// GFX810:--image=profile=gfx810{{.*}}--image=profile=compute_amdgcn +// GFX900:--image=profile=gfx900{{.*}}--image=profile=compute_amdgcn +// GFX902:--image=profile=gfx902{{.*}}--image=profile=compute_amdgcn