diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -67,6 +67,8 @@ "cannot find HIP runtime. Provide its path via --rocm-path, or pass " "-nogpuinc to build without HIP runtime.">; +def err_drv_undetermined_amdgpu_arch : Error< + "Cannot determine AMDGPU architecture: %0. Consider passing it via --march.">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a different CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -924,6 +924,8 @@ HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, + HelpText<"Tool used for detecting AMD GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -100,12 +100,20 @@ /// Should skip argument. bool shouldSkipArgument(const llvm::opt::Arg *Arg) const; + /// Uses amdgpu_arch tool to get arch of the system GPU. Will return error + /// if unable to find one. + llvm::Error getSystemGPUArch(const llvm::opt::ArgList &Args, + std::string &GPUArch) const; + protected: /// Check and diagnose invalid target ID specified by -mcpu. void checkTargetID(const llvm::opt::ArgList &DriverArgs) const; /// Get GPU arch from -mcpu without checking. StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const; + + llvm::Error detectSystemGPUs(const llvm::opt::ArgList &Args, + SmallVector &GPUArchs) const; }; class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,9 +12,16 @@ #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" +#include + +#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" using namespace clang::driver; using namespace clang::driver::tools; @@ -715,6 +722,78 @@ } } +llvm::Error +AMDGPUToolChain::detectSystemGPUs(const ArgList &Args, + SmallVector &GPUArchs) const { + std::string Program; + if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) + Program = A->getValue(); + else + Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, + OutputFile); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + llvm::Optional Redirects[] = { + {""}, + StringRef(OutputFile), + {""}, + }; + + std::string ErrorMessage; + if (int Result = llvm::sys::ExecuteAndWait( + Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0, + /*MemoryLimit*/ 0, &ErrorMessage)) { + if (Result > 0) { + ErrorMessage = "Exited with error code " + std::to_string(Result); + } else if (Result == -1) { + ErrorMessage = "Execute failed: " + ErrorMessage; + } else { + ErrorMessage = "Crashed: " + ErrorMessage; + } + + return llvm::createStringError(std::error_code(), + Program + ": " + ErrorMessage); + } + + llvm::ErrorOr> OutputBuf = + llvm::MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) { + return llvm::createStringError(OutputBuf.getError(), + "Failed to read stdout of " + Program + + ": " + OutputBuf.getError().message()); + } + + for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) { + GPUArchs.push_back(LineIt->str()); + } + return llvm::Error::success(); +} + +llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args, + std::string &GPUArch) const { + // detect the AMDGPU installed in system + SmallVector GPUArchs; + auto Err = detectSystemGPUs(Args, GPUArchs); + if (Err) { + return Err; + } + if (GPUArchs.empty()) { + return llvm::createStringError(std::error_code(), + "No AMD GPU detected in the system"); + } + GPUArch = GPUArchs[0]; + if (GPUArchs.size() > 1) { + bool AllSame = std::all_of( + GPUArchs.begin(), GPUArchs.end(), + [&](const StringRef &GPUArch) { return GPUArch == GPUArchs.front(); }); + if (!AllSame) + return llvm::createStringError( + std::error_code(), "Multiple AMD GPUs found with different archs"); + } + return llvm::Error::success(); +} + void ROCMToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -10,11 +10,14 @@ #include "AMDGPU.h" #include "CommonArgs.h" #include "InputInfo.h" +#include "clang/Basic/DiagnosticDriver.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" using namespace clang::driver; @@ -66,6 +69,18 @@ CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); } } + +static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, + std::string &GPUArch) { + if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) { + std::string ErrMsg = + llvm::formatv("{0}", llvm::fmt_consume(std::move(Err))); + TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg; + return false; + } + + return true; +} } // namespace const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( @@ -145,17 +160,23 @@ const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { + const ToolChain &TC = getToolChain(); assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target"); - StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); - assert(GPUArch.startswith("gfx") && "Unsupported sub arch"); + const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC = + static_cast(TC); + + std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str(); + if (GPUArch.empty()) { + if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch)) + return; + } // Prefix for temporary file name. std::string Prefix; for (const auto &II : Inputs) if (II.isFilename()) - Prefix = - llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch.str(); + Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch; assert(Prefix.length() && "no linker inputs are files "); // Each command outputs different files. @@ -186,18 +207,22 @@ Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); - assert(!GpuArch.empty() && "Must have an explicit GPU arch."); + std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str(); + if (GPUArch.empty()) { + if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch)) + return; + } + assert(DeviceOffloadingKind == Action::OFK_OpenMP && "Only OpenMP offloading kinds are supported."); CC1Args.push_back("-target-cpu"); - CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); + CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch)); CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasArg(options::OPT_nogpulib)) return; - std::string BitcodeSuffix = "amdgcn-" + GpuArch.str(); + std::string BitcodeSuffix = "amdgcn-" + GPUArch; addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, getTriple()); } diff --git a/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different b/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different new file mode 100755 --- /dev/null +++ b/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different @@ -0,0 +1,4 @@ +#!/usr/bin/env sh +echo gfx908 +echo gfx906 +return 0 \ No newline at end of file diff --git a/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail b/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail new file mode 100755 --- /dev/null +++ b/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail @@ -0,0 +1,2 @@ +#!/usr/bin/env sh +return 1 \ No newline at end of file diff --git a/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 b/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 new file mode 100755 --- /dev/null +++ b/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 @@ -0,0 +1,3 @@ +#!/usr/bin/env sh +echo "gfx906" +return 0 diff --git a/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 b/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 new file mode 100755 --- /dev/null +++ b/clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 @@ -0,0 +1,4 @@ +#!/usr/bin/env sh +echo gfx908 +echo gfx908 +return 0 diff --git a/clang/test/Driver/amdgpu-openmp-system-arch-fail.c b/clang/test/Driver/amdgpu-openmp-system-arch-fail.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/amdgpu-openmp-system-arch-fail.c @@ -0,0 +1,28 @@ +// REQUIRES: system-linux +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target +// REQUIRES: shell + +// RUN: mkdir -p %t +// RUN: rm -f %t/amdgpu_arch_fail %t/amdgpu_arch_different +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/ +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_different %t/ +// RUN: echo '#!/usr/bin/env sh' > %t/amdgpu_arch_empty +// RUN: chmod +x %t/amdgpu_arch_fail +// RUN: chmod +x %t/amdgpu_arch_different +// RUN: chmod +x %t/amdgpu_arch_empty + +// case when amdgpu_arch returns nothing or fails +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// NO-OUTPUT-ERROR: error: Cannot determine AMDGPU architecture{{.*}}Exited with error code 1. Consider passing it via --march + +// case when amdgpu_arch returns multiple gpus but all are different +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_different %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=MULTIPLE-OUTPUT-ERROR +// MULTIPLE-OUTPUT-ERROR: error: Cannot determine AMDGPU architecture: Multiple AMD GPUs found with different archs. Consider passing it via --march + +// case when amdgpu_arch does not return anything with successful execution +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT +// EMPTY-OUTPUT: error: Cannot determine AMDGPU architecture: No AMD GPU detected in the system. Consider passing it via --march diff --git a/clang/test/Driver/amdgpu-openmp-system-arch.c b/clang/test/Driver/amdgpu-openmp-system-arch.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/amdgpu-openmp-system-arch.c @@ -0,0 +1,24 @@ +// REQUIRES: system-linux +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target +// REQUIRES: shell + +// RUN: mkdir -p %t +// RUN: rm -f %t/amdgpu_arch_gfx906 +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/ +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 %t/ +// RUN: chmod +x %t/amdgpu_arch_gfx906 +// RUN: chmod +x %t/amdgpu_arch_gfx908_gfx908 + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \ +// RUN: | FileCheck %s +// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]" +// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" +// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o" + +// case when amdgpu_arch returns multiple gpus but of same arch +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx908_gfx908 %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-MULTIPLE +// CHECK-MULTIPLE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx908]]" +// CHECK-MULTIPLE: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" +// CHECK-MULTIPLE: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o" diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt --- a/clang/tools/CMakeLists.txt +++ b/clang/tools/CMakeLists.txt @@ -43,3 +43,5 @@ # libclang may require clang-tidy in clang-tools-extra. add_clang_subdirectory(libclang) + +add_clang_subdirectory(amdgpu-arch) diff --git a/clang/tools/amdgpu-arch/AMDGPUArch.cpp b/clang/tools/amdgpu-arch/AMDGPUArch.cpp new file mode 100644 --- /dev/null +++ b/clang/tools/amdgpu-arch/AMDGPUArch.cpp @@ -0,0 +1,59 @@ +//===- AMDGPUArch.cpp - list AMDGPU installed ----------*- C++ -*---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a tool for detecting name of AMDGPU installed in system +// using HSA. This tool is used by AMDGPU OpenMP driver. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) { + hsa_device_type_t DeviceType; + hsa_status_t Status = + hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, &DeviceType); + + // continue only if device type if GPU + if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) { + return Status; + } + + std::vector *GPUs = + static_cast *>(Data); + char GPUName[64]; + Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName); + if (Status != HSA_STATUS_SUCCESS) { + return Status; + } + GPUs->push_back(GPUName); + return HSA_STATUS_SUCCESS; +} + +int main() { + hsa_status_t Status = hsa_init(); + if (Status != HSA_STATUS_SUCCESS) { + return 1; + } + + std::vector GPUs; + Status = hsa_iterate_agents(iterateAgentsCallback, &GPUs); + if (Status != HSA_STATUS_SUCCESS) { + return 1; + } + + for (const auto &GPU : GPUs) + printf("%s\n", GPU.c_str()); + + if (GPUs.size() < 1) + return 1; + + hsa_shut_down(); + return 0; +} diff --git a/clang/tools/amdgpu-arch/CMakeLists.txt b/clang/tools/amdgpu-arch/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang/tools/amdgpu-arch/CMakeLists.txt @@ -0,0 +1,17 @@ +# //===----------------------------------------------------------------------===// +# // +# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# // See https://llvm.org/LICENSE.txt for details. +# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# // +# //===----------------------------------------------------------------------===// + +find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) +if (NOT ${hsa-runtime64_FOUND}) + message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found") + return() +endif() + +add_clang_tool(amdgpu-arch AMDGPUArch.cpp) + +clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)