diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -77,9 +77,9 @@ "'--hip-path' must be specified when offloading to " "SPIR-V%select{| unless %1 is given}0.">; -def err_drv_undetermined_amdgpu_arch : Error< - "cannot determine AMDGPU architecture: %0; consider passing it via " - "'--march'">; +def err_drv_undetermined_gpu_arch : Error< + "cannot determine %0 architecture: %1; consider passing it via " + "'%2'">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4; use '--cuda-path' to specify a different CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1000,6 +1000,8 @@ HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, HelpText<"Tool used for detecting AMD GPU arch in the system.">; +def nvptx_arch_tool_EQ : Joined<["--"], "nvptx-arch-tool=">, Group, + HelpText<"Tool used for detecting NVIDIA GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -192,6 +192,10 @@ ToolChain(const Driver &D, const llvm::Triple &T, const llvm::opt::ArgList &Args); + /// Executes the given \p Executable and returns the stdout. + llvm::Expected> + executeToolChainProgram(StringRef Executable) const; + void setTripleEnvironment(llvm::Triple::EnvironmentType Env); virtual Tool *buildAssembler() const; @@ -704,6 +708,10 @@ bool addFastMathRuntimeIfAvailable( const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// getSystemGPUArchs - Use a tool to detect the user's availible GPUs. + virtual Expected> + getSystemGPUArchs(const llvm::opt::ArgList &Args) const; + /// addProfileRTLibs - When -fprofile-instr-profile is specified, try to pass /// a suitable profile runtime library to the linker. virtual void addProfileRTLibs(const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3067,17 +3067,19 @@ if (A->getOption().matches(options::OPT_no_offload_arch_EQ) && ArchStr == "all") { GpuArchs.clear(); - } else if (ArchStr == "native" && - ToolChains.front()->getTriple().isAMDGPU()) { - auto *TC = static_cast( - ToolChains.front()); - SmallVector GPUs; - auto Err = TC->detectSystemGPUs(Args, GPUs); - if (!Err) { - for (auto GPU : GPUs) - GpuArchs.insert(Args.MakeArgString(GPU)); - } else - llvm::consumeError(std::move(Err)); + } else if (ArchStr == "native") { + const ToolChain &TC = *ToolChains.front(); + auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); + if (!GPUsOrErr) { + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << (TC.getTriple().isNVPTX() ? "NVPTX" : "AMDGPU") + << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; + continue; + } + + for (auto GPU : *GPUsOrErr) { + GpuArchs.insert(Args.MakeArgString(GPU)); + } } else { ArchStr = getCanonicalOffloadArch(ArchStr); if (ArchStr.empty()) { diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -36,6 +36,7 @@ #include "llvm/Option/Option.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/VersionTuple.h" @@ -88,6 +89,33 @@ addIfExists(getFilePaths(), getArchSpecificLibPath()); } +llvm::Expected> +ToolChain::executeToolChainProgram(StringRef Executable) const { + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + std::optional Redirects[] = { + {""}, + OutputFile.str(), + {""}, + }; + + std::string ErrorMessage; + if (int Result = llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, + /* SecondsToWait */ 0, + /*MemoryLimit*/ 0, &ErrorMessage)) + return llvm::createStringError(std::error_code(), + Executable + ": " + ErrorMessage); + + llvm::ErrorOr> OutputBuf = + llvm::MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return llvm::createStringError(OutputBuf.getError(), + "Failed to read stdout of " + Executable + + ": " + OutputBuf.getError().message()); + return std::move(*OutputBuf); +} + void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) { Triple.setEnvironment(Env); if (EffectiveTriple != llvm::Triple()) @@ -1086,6 +1114,11 @@ return false; } +Expected> +ToolChain::getSystemGPUArchs(const llvm::opt::ArgList &Args) const { + return SmallVector(); +} + SanitizerMask ToolChain::getSupportedSanitizers() const { // Return sanitizers which don't require runtime support and are not // platform dependent. diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -100,13 +100,10 @@ /// Should skip argument. bool shouldSkipArgument(const llvm::opt::Arg *Arg) const; - /// Uses amdgpu_arch tool to get arch of the system GPU. Will return error + /// Uses amdgpu-arch tool to get arch of the system GPU. Will return error /// if unable to find one. - llvm::Error getSystemGPUArch(const llvm::opt::ArgList &Args, - std::string &GPUArch) const; - - llvm::Error detectSystemGPUs(const llvm::opt::ArgList &Args, - SmallVector &GPUArchs) const; + virtual Expected> + getSystemGPUArchs(const llvm::opt::ArgList &Args) const override; protected: /// Check and diagnose invalid target ID specified by -mcpu. diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -16,7 +16,6 @@ #include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Error.h" -#include "llvm/Support/FileUtilities.h" #include "llvm/Support/Host.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" @@ -25,8 +24,6 @@ #include #include -#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" - using namespace clang::driver; using namespace clang::driver::tools; using namespace clang::driver::toolchains; @@ -767,73 +764,29 @@ } } -llvm::Error -AMDGPUToolChain::detectSystemGPUs(const ArgList &Args, - SmallVector &GPUArchs) const { +Expected> +AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { + // Detect AMD GPUs availible on the system. std::string Program; if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) Program = A->getValue(); else - Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); - llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, - OutputFile); - llvm::FileRemover OutputRemover(OutputFile.c_str()); - std::optional Redirects[] = { - {""}, - OutputFile.str(), - {""}, - }; - - std::string ErrorMessage; - if (int Result = llvm::sys::ExecuteAndWait( - Program, {}, {}, Redirects, /* SecondsToWait */ 0, - /*MemoryLimit*/ 0, &ErrorMessage)) { - if (Result > 0) { - ErrorMessage = "Exited with error code " + std::to_string(Result); - } else if (Result == -1) { - ErrorMessage = "Execute failed: " + ErrorMessage; - } else { - ErrorMessage = "Crashed: " + ErrorMessage; - } + Program = GetProgramPath("amdgpu-arch"); - return llvm::createStringError(std::error_code(), - Program + ": " + ErrorMessage); - } - - llvm::ErrorOr> OutputBuf = - llvm::MemoryBuffer::getFile(OutputFile.c_str()); - if (!OutputBuf) { - return llvm::createStringError(OutputBuf.getError(), - "Failed to read stdout of " + Program + - ": " + OutputBuf.getError().message()); - } + auto StdoutOrErr = executeToolChainProgram(Program); + if (!StdoutOrErr) + return StdoutOrErr.takeError(); - for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) { - GPUArchs.push_back(LineIt->str()); - } - return llvm::Error::success(); -} - -llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args, - std::string &GPUArch) const { - // detect the AMDGPU installed in system SmallVector GPUArchs; - auto Err = detectSystemGPUs(Args, GPUArchs); - if (Err) { - return Err; - } - if (GPUArchs.empty()) { + for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) + if (!Arch.empty()) + GPUArchs.push_back(Arch.str()); + + if (GPUArchs.empty()) return llvm::createStringError(std::error_code(), "No AMD GPU detected in the system"); - } - GPUArch = GPUArchs[0]; - if (GPUArchs.size() > 1) { - if (!llvm::all_equal(GPUArchs)) - return llvm::createStringError( - std::error_code(), "Multiple AMD GPUs found with different archs"); - } - return llvm::Error::success(); + + return GPUArchs; } void ROCMToolChain::addClangTargetOptions( diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -33,13 +33,24 @@ static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, std::string &GPUArch) { - if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) { + auto CheckError = [&](llvm::Error Err) -> bool { std::string ErrMsg = llvm::formatv("{0}", llvm::fmt_consume(std::move(Err))); - TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg; + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << "AMDGPU" << ErrMsg << "-march"; return false; - } + }; + + auto ArchsOrErr = TC.getSystemGPUArchs(Args); + if (!ArchsOrErr) + return CheckError(ArchsOrErr.takeError()); + + if (ArchsOrErr->size() > 1) + if (!llvm::all_equal(*ArchsOrErr)) + return CheckError(llvm::createStringError( + std::error_code(), "Multiple AMD GPUs found with different archs")); + GPUArch = ArchsOrErr->front(); return true; } } // namespace diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -183,6 +183,11 @@ const ToolChain &HostTC; CudaInstallationDetector CudaInstallation; + /// Uses nvptx-arch tool to get arch of the system GPU. Will return error + /// if unable to find one. + virtual Expected> + getSystemGPUArchs(const llvm::opt::ArgList &Args) const override; + protected: Tool *buildAssembler() const override; // ptxas Tool *buildLinker() const override; // fatbinary (ok, not really a linker) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -785,6 +785,31 @@ return DAL; } +Expected> +CudaToolChain::getSystemGPUArchs(const ArgList &Args) const { + // Detect NVIDIA GPUs availible on the system. + std::string Program; + if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ)) + Program = A->getValue(); + else + Program = GetProgramPath("nvptx-arch"); + + auto StdoutOrErr = executeToolChainProgram(Program); + if (!StdoutOrErr) + return StdoutOrErr.takeError(); + + SmallVector GPUArchs; + for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) + if (!Arch.empty()) + GPUArchs.push_back(Arch.str()); + + if (GPUArchs.empty()) + return llvm::createStringError(std::error_code(), + "No NVIDIA GPU detected in the system"); + + return GPUArchs; +} + Tool *CudaToolChain::buildAssembler() const { return new tools::NVPTX::Assembler(*this); } diff --git a/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_fail b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_fail new file mode 100755 --- /dev/null +++ b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_fail @@ -0,0 +1,2 @@ +#!/bin/sh +exit 1 diff --git a/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_70 b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_70 new file mode 100755 --- /dev/null +++ b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_70 @@ -0,0 +1,3 @@ +#!/bin/sh +echo sm_70 +exit 0 diff --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/amdgpu-hip-system-arch.c @@ -0,0 +1,27 @@ +// REQUIRES: system-linux +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target +// REQUIRES: shell + +// RUN: mkdir -p %t +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/ +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/ +// RUN: echo '#!/bin/sh' > %t/amdgpu_arch_empty +// RUN: chmod +x %t/amdgpu_arch_fail +// RUN: chmod +x %t/amdgpu_arch_gfx906 +// RUN: chmod +x %t/amdgpu_arch_empty + +// case when amdgpu-arch returns nothing or fails +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_fail -x hip %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// NO-OUTPUT-ERROR: error: cannot determine AMDGPU architecture{{.*}}; consider passing it via '--offload-arch' + +// case when amdgpu_arch does not return anything with successful execution +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT +// EMPTY-OUTPUT: error: cannot determine AMDGPU architecture: No AMD GPU detected in the system; consider passing it via '--offload-arch' + +// case when amdgpu_arch does not return anything with successful execution +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-GFX906 +// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" diff --git a/clang/test/Driver/amdgpu-openmp-system-arch-fail.c b/clang/test/Driver/amdgpu-openmp-system-arch-fail.c --- a/clang/test/Driver/amdgpu-openmp-system-arch-fail.c +++ b/clang/test/Driver/amdgpu-openmp-system-arch-fail.c @@ -15,14 +15,14 @@ // case when amdgpu_arch returns nothing or fails // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: cannot determine AMDGPU architecture{{.*}}Exited with error code 1; consider passing it via '--march' +// NO-OUTPUT-ERROR: error: cannot determine AMDGPU architecture{{.*}}; consider passing it via '-march' // case when amdgpu_arch returns multiple gpus but all are different // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_different %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=MULTIPLE-OUTPUT-ERROR -// MULTIPLE-OUTPUT-ERROR: error: cannot determine AMDGPU architecture: Multiple AMD GPUs found with different archs; consider passing it via '--march' +// MULTIPLE-OUTPUT-ERROR: error: cannot determine AMDGPU architecture: Multiple AMD GPUs found with different archs; consider passing it via '-march' // case when amdgpu_arch does not return anything with successful execution // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT -// EMPTY-OUTPUT: error: cannot determine AMDGPU architecture: No AMD GPU detected in the system; consider passing it via '--march' +// EMPTY-OUTPUT: error: cannot determine AMDGPU architecture: No AMD GPU detected in the system; consider passing it via '-march' diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/nvptx-cuda-system-arch.c @@ -0,0 +1,27 @@ +// REQUIRES: system-linux +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target +// REQUIRES: shell + +// RUN: mkdir -p %t +// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/ +// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/ +// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty +// RUN: chmod +x %t/nvptx_arch_fail +// RUN: chmod +x %t/nvptx_arch_sm_70 +// RUN: chmod +x %t/nvptx_arch_empty + +// case when nvptx-arch returns nothing or fails +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_fail -x cuda %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// NO-OUTPUT-ERROR: error: cannot determine NVPTX architecture{{.*}}; consider passing it via '--offload-arch' + +// case when nvptx_arch does not return anything with successful execution +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT +// EMPTY-OUTPUT: error: cannot determine NVPTX architecture: No NVIDIA GPU detected in the system; consider passing it via '--offload-arch' + +// case when nvptx_arch does not return anything with successful execution +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-sm_70 +// ARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"