Index: include/clang/Basic/Cuda.h =================================================================== --- include/clang/Basic/Cuda.h +++ include/clang/Basic/Cuda.h @@ -64,6 +64,7 @@ GFX902, GFX904, GFX906, + GFX906_SRAM_ECC, GFX909, LAST, }; Index: lib/Basic/Cuda.cpp =================================================================== --- lib/Basic/Cuda.cpp +++ lib/Basic/Cuda.cpp @@ -109,6 +109,8 @@ return "gfx904"; case CudaArch::GFX906: // TBA return "gfx906"; + case CudaArch::GFX906_SRAM_ECC: // TBA + return "gfx906+sram-ecc"; case CudaArch::GFX909: // TBA return "gfx909"; } @@ -147,6 +149,7 @@ .Case("gfx902", CudaArch::GFX902) .Case("gfx904", CudaArch::GFX904) .Case("gfx906", CudaArch::GFX906) + .Case("gfx906+sram-ecc", CudaArch::GFX906_SRAM_ECC) .Case("gfx909", CudaArch::GFX909) .Default(CudaArch::UNKNOWN); } @@ -259,6 +262,7 @@ case CudaArch::GFX902: case CudaArch::GFX904: case CudaArch::GFX906: + case CudaArch::GFX906_SRAM_ECC: case CudaArch::GFX909: return CudaVirtualArch::COMPUTE_AMDGCN; } @@ -306,6 +310,7 @@ case CudaArch::GFX902: case CudaArch::GFX904: case CudaArch::GFX906: + case CudaArch::GFX906_SRAM_ECC: case CudaArch::GFX909: return CudaVersion::CUDA_70; } Index: lib/Basic/Targets/NVPTX.cpp =================================================================== --- lib/Basic/Targets/NVPTX.cpp +++ lib/Basic/Targets/NVPTX.cpp @@ -189,6 +189,7 @@ case CudaArch::GFX902: case CudaArch::GFX904: case CudaArch::GFX906: + case CudaArch::GFX906_SRAM_ECC: case CudaArch::GFX909: case CudaArch::LAST: break; Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4951,6 +4951,7 @@ case CudaArch::GFX902: case CudaArch::GFX904: case CudaArch::GFX906: + case CudaArch::GFX906_SRAM_ECC: case CudaArch::GFX909: case CudaArch::UNKNOWN: break; @@ -5004,6 +5005,7 @@ case CudaArch::GFX902: case CudaArch::GFX904: case CudaArch::GFX906: + case CudaArch::GFX906_SRAM_ECC: case CudaArch::GFX909: case CudaArch::UNKNOWN: break; Index: lib/Driver/ToolChains/HIP.cpp =================================================================== --- lib/Driver/ToolChains/HIP.cpp +++ lib/Driver/ToolChains/HIP.cpp @@ -31,6 +31,14 @@ namespace { +void stripFeatureString(StringRef &Arch) { + Arch = Arch.take_until([](char c) { return c == '+'; }); +} + +StringRef getFeatureString(StringRef Arch) { + return Arch.drop_until([](char c) { return c == '+'; }); +} + static void addBCLib(Compilation &C, const ArgList &Args, ArgStringList &CmdArgs, ArgStringList LibraryPaths, StringRef BCName) { @@ -114,6 +122,7 @@ Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName) const { + stripFeatureString(SubArchName); // Construct opt command. ArgStringList OptArgs; // The input to opt is the output from llvm-link. @@ -162,11 +171,24 @@ Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName) const { + + // Only two gpu archs for gfx906 are accepted: gfx906 and gfx906+sram-ecc. + // gfx906 implies sram-ecc off, whereas gfx906+sram-ecc implies sram-ecc on. + StringRef Feature = getFeatureString(SubArchName); + stripFeatureString(SubArchName); + assert(Feature.empty() || + (SubArchName == "gfx906" && Feature == "+sram-ecc")); + if (SubArchName == "gfx906" && Feature.empty()) + Feature = "-sram-ecc"; + // Construct llc command. ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa", "-filetype=obj", "-mattr=-code-object-v3", Args.MakeArgString("-mcpu=" + SubArchName)}; + if (!Feature.empty()) + LlcArgs.push_back(Args.MakeArgString("-mattr=" + Feature)); + // Extract all the -m options std::vector Features; handleTargetFeaturesGroup( @@ -372,6 +394,7 @@ } if (!BoundArch.empty()) { + stripFeatureString(BoundArch); DAL->eraseArg(options::OPT_march_EQ); DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch); } Index: test/Driver/hip-toolchain-sram-ecc.hip =================================================================== --- /dev/null +++ test/Driver/hip-toolchain-sram-ecc.hip @@ -0,0 +1,38 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --cuda-gpu-arch=gfx906 --cuda-gpu-arch=gfx906+sram-ecc \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx906" + +// CHECK: [[OPT:".*opt"]] {{".*-gfx906-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx906" +// CHECK-SAME: "-o" [[OPT_906_BC:".*-gfx906-optimized.*bc"]] + +// CHECK: [[LLC: ".*llc"]] [[OPT_906_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" "-filetype=obj" +// CHECK-SAME: {{.*}} "-mcpu=gfx906" +// CHECK-SAME: "-mattr=-sram-ecc" "-o" {{".*-gfx906-.*o"}} + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx906" + +// CHECK: [[OPT]] {{".*-gfx906\+sram-ecc.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx906" +// CHECK-SAME: "-o" [[OPT_906ECC_BC:".*-gfx906\+sram-ecc.*bc"]] + +// CHECK: [[LLC]] [[OPT_906ECC_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" "-filetype=obj" +// CHECK-SAME: {{.*}} "-mcpu=gfx906" +// CHECK-SAME: "-mattr=+sram-ecc" "-o" {{".*-gfx906\+sram-ecc.*o"}} + +// CHECK: {{".*clang-offload-bundler"}} +// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx906+sram-ecc"