diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -481,10 +481,11 @@ /// Returns the set of bound architectures active for this offload kind. /// If there are no bound architctures we return a set containing only the - /// empty string. + /// empty string. The \p Query option is used to suppress errors on failure. llvm::DenseSet getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC) const; + Action::OffloadKind Kind, const ToolChain *TC, + bool Query = false) const; /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -848,9 +848,30 @@ HostTC->getTriple()); // Attempt to deduce the offloading triple from the set of architectures. - // We can only correctly deduce NVPTX / AMDGPU triples currently. - llvm::DenseSet Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr); + // We can only correctly deduce NVPTX / AMDGPU triples currently. We need + // to temporarily create these toolchains so that we can access tools for + // inferring architectures. + llvm::DenseSet Archs; + if (NVPTXTriple) { + auto TempTC = std::make_unique( + *this, *NVPTXTriple, *HostTC, C.getInputArgs()); + for (StringRef Arch : getOffloadArchs( + C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true)) + Archs.insert(Arch); + } + if (AMDTriple) { + auto TempTC = std::make_unique( + *this, *AMDTriple, *HostTC, C.getInputArgs()); + for (StringRef Arch : getOffloadArchs( + C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true)) + Archs.insert(Arch); + } + if (!AMDTriple && !NVPTXTriple) { + for (StringRef Arch : + getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr, true)) + Archs.insert(Arch); + } + for (StringRef Arch : Archs) { if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch( getProcessorFromTargetID(*NVPTXTriple, Arch)))) { @@ -865,6 +886,13 @@ } } + // If the set is empty then we failed to find a native architecture. + if (Archs.empty()) { + Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) + << "native"; + return; + } + for (const auto &TripleAndArchs : DerivedArchs) OpenMPTriples.push_back(TripleAndArchs.first()); } @@ -4182,16 +4210,17 @@ static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - const llvm::Triple &Triple) { + const llvm::Triple &Triple, + bool Query = false) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr)); - if (Triple.isNVPTX() && + if (!Query && Triple.isNVPTX() && (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "CUDA" << ArchStr; return StringRef(); - } else if (Triple.isAMDGPU() && + } else if (!Query && Triple.isAMDGPU() && (Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "HIP" << ArchStr; @@ -4234,7 +4263,8 @@ llvm::DenseSet Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC) const { + Action::OffloadKind Kind, const ToolChain *TC, + bool Query) const { if (!TC) TC = &C.getDefaultToolChain(); @@ -4271,18 +4301,22 @@ if (Arch == "native") { auto GPUsOrErr = TC->getSystemGPUArchs(Args); if (!GPUsOrErr) { - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; + if (Query) + llvm::consumeError(GPUsOrErr.takeError()); + else + TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC->getArch()) + << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } - for (auto ArchStr : *GPUsOrErr) - Archs.insert( - getCanonicalArchString(C, Args, ArchStr, TC->getTriple())); + for (auto ArchStr : *GPUsOrErr) { + Archs.insert(getCanonicalArchString( + C, Args, Args.MakeArgString(ArchStr), TC->getTriple(), Query)); + } } else { StringRef ArchStr = - getCanonicalArchString(C, Args, Arch, TC->getTriple()); + getCanonicalArchString(C, Args, Arch, TC->getTriple(), Query); if (ArchStr.empty()) return Archs; Archs.insert(ArchStr); @@ -4294,7 +4328,7 @@ Archs.clear(); } else { StringRef ArchStr = - getCanonicalArchString(C, Args, Arch, TC->getTriple()); + getCanonicalArchString(C, Args, Arch, TC->getTriple(), Query); if (ArchStr.empty()) return Archs; Archs.erase(ArchStr); @@ -4309,6 +4343,10 @@ C.setContainsError(); } + // Skip filling defaults if we're just querying what is availible. + if (Query) + return Archs; + if (Archs.empty()) { if (Kind == Action::OFK_Cuda) Archs.insert(CudaArchToString(CudaArch::CudaDefault)); diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/openmp-system-arch.c @@ -0,0 +1,50 @@ +// RUN: mkdir -p %t +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/ +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/ +// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/ +// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/ +// RUN: echo '#!/bin/sh' > %t/amdgpu_arch_empty +// RUN: chmod +x %t/amdgpu_arch_fail +// RUN: chmod +x %t/amdgpu_arch_gfx906 +// RUN: chmod +x %t/amdgpu_arch_empty +// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty +// RUN: chmod +x %t/nvptx_arch_fail +// RUN: chmod +x %t/nvptx_arch_sm_70 +// RUN: chmod +x %t/nvptx_arch_empty + +// case when nvptx-arch and amdgpu-arch return nothing or fails +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead. + +// case when amdgpu-arch succeeds. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-GFX906 +// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" + +// case when nvptx-arch succeeds. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-SM_70 +// ARCH-SM_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" + +// case when both nvptx-arch and amdgpu-arch succeed. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-SM_70-GFX906 +// ARCH-SM_70-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" +// ARCH-SM_70-GFX906: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" + +// case when both nvptx-arch and amdgpu-arch succeed with other archs. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native,sm_75,gfx1030 \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-MULTIPLE +// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030" +// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" +// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" +// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_75"