Index: lib/Driver/ToolChains/HIP.cpp =================================================================== --- lib/Driver/ToolChains/HIP.cpp +++ lib/Driver/ToolChains/HIP.cpp @@ -162,9 +162,26 @@ // AMDGPUPromoteAlloca pass which cause invalid memory access in PyTorch. // Remove this once the issue is fixed. ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa", - "-filetype=obj", "-mattr=-code-object-v3", + "-filetype=obj", "-disable-promote-alloca-to-lds", - Args.MakeArgString("-mcpu=" + SubArchName), "-o"}; + Args.MakeArgString("-mcpu=" + SubArchName)}; + + // Extract all the -m options + std::vector Features; + handleTargetFeaturesGroup( + Args, Features, options::OPT_m_amdgpu_Features_Group); + + // Add features to mattr such as code-object-v3 and xnack + std::string MAttrString = "-mattr="; + for(auto OneFeature : Features) { + MAttrString.append(Args.MakeArgString(OneFeature)); + if (OneFeature != Features.back()) + MAttrString.append(","); + } + LlcArgs.push_back(Args.MakeArgString(MAttrString)); + + // Add output filename + LlcArgs.push_back("-o"); std::string LlcOutputFileName = C.getDriver().GetTemporaryPath(OutputFilePrefix, "o"); const char *LlcOutputFile = Index: test/Driver/hip-toolchain-features.hip =================================================================== --- /dev/null +++ test/Driver/hip-toolchain-features.hip @@ -0,0 +1,48 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -mcode-object-v3 2>&1 | FileCheck %s -check-prefix=COV3 +// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -mno-code-object-v3 2>&1 | FileCheck %s -check-prefix=NOCOV3 + +// COV3: {{.*}}clang{{.*}}"-target-feature" "+code-object-v3" +// NOCOV3: {{.*}}clang{{.*}}"-target-feature" "-code-object-v3" + + +// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -mxnack 2>&1 | FileCheck %s -check-prefix=XNACK +// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -mno-xnack 2>&1 | FileCheck %s -check-prefix=NOXNACK + +// XNACK: {{.*}}clang{{.*}}"-target-feature" "+xnack" +// NOXNACK: {{.*}}clang{{.*}}"-target-feature" "-xnack" + + +// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -msram-ecc 2>&1 | FileCheck %s -check-prefix=SRAM +// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -mno-sram-ecc 2>&1 | FileCheck %s -check-prefix=NOSRAM + +// SRAM: {{.*}}clang{{.*}}"-target-feature" "+sram-ecc" +// NOSRAM: {{.*}}clang{{.*}}"-target-feature" "-sram-ecc" + + +// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -mcode-object-v3 -mxnack -msram-ecc \ +// RUN: 2>&1 | FileCheck %s -check-prefix=ALL3 +// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -mno-code-object-v3 -mno-xnack -mno-sram-ecc \ +// RUN: 2>&1 | FileCheck %s -check-prefix=NOALL3 + +// ALL3: {{.*}}clang{{.*}}"-target-feature" "+code-object-v3" "-target-feature" "+xnack" "-target-feature" "+sram-ecc" +// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-code-object-v3" "-target-feature" "-xnack" "-target-feature" "-sram-ecc"