diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -56,6 +56,7 @@ "compiler runtime routines will be linked">, InGroup; def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">; +def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: %1">; def err_drv_no_cuda_installation : Error< "cannot find CUDA installation; provide its path via '--cuda-path', or pass " "'-nocudainc' to build without CUDA includes">; @@ -317,6 +318,9 @@ def err_drv_expecting_fopenmp_with_fopenmp_targets : Error< "'-fopenmp-targets' must be used in conjunction with a '-fopenmp' option " "compatible with offloading; e.g., '-fopenmp=libomp' or '-fopenmp=libiomp5'">; +def err_drv_failed_to_deduce_target_from_arch : Error< + "failed to deduce triple for target architecture '%0'; specify the triple " + "using '-fopenmp-targets' and '-Xopenmp-target' instead.">; def err_drv_omp_offload_target_missingbcruntime : Error< "no library '%0' found in the default clang lib directory or in LIBRARY_PATH" "; use '--libomptarget-%1-bc-path' to specify %1 bitcode library">; diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -297,6 +297,11 @@ /// stored in it, and will clean them up when torn down. mutable llvm::StringMap> ToolChains; + /// Cache of known offloading architectures for the ToolChain already derived. + /// This should only be modified when we first initialize the offloading + /// toolchains. + llvm::DenseMap> KnownArchs; + private: /// TranslateInputArgs - Create a new derived argument list from the input /// arguments, after applying the standard argument translations. @@ -450,6 +455,13 @@ const InputTy &Input, Action *HostAction) const; + /// Returns the set of bound architectures active for this offload kind. + /// If there are no bound architctures we return a set containing only the + /// empty string. + llvm::DenseSet + getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, + Action::OffloadKind Kind, const ToolChain *TC) const; + /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. /// If TypoCorrect is true and the file does not exist, see if it looks diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -781,76 +781,117 @@ // OpenMP // // We need to generate an OpenMP toolchain if the user specified targets with - // the -fopenmp-targets option. - if (Arg *OpenMPTargets = - C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { - if (OpenMPTargets->getNumValues()) { - // We expect that -fopenmp-targets is always used in conjunction with the - // option -fopenmp specifying a valid runtime with offloading support, - // i.e. libomp or libiomp. - bool HasValidOpenMPRuntime = C.getInputArgs().hasFlag( - options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false); - if (HasValidOpenMPRuntime) { - OpenMPRuntimeKind OpenMPKind = getOpenMPRuntime(C.getInputArgs()); - HasValidOpenMPRuntime = - OpenMPKind == OMPRT_OMP || OpenMPKind == OMPRT_IOMP5; + // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. + bool IsOpenMPOffloading = + C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false) && + (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) || + C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)); + if (IsOpenMPOffloading) { + // We expect that -fopenmp-targets is always used in conjunction with the + // option -fopenmp specifying a valid runtime with offloading support, i.e. + // libomp or libiomp. + OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); + if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); + return; + } + + llvm::StringMap> DerivedArchs; + llvm::StringMap FoundNormalizedTriples; + llvm::SmallVector OpenMPTriples; + + // If the user specified -fopenmp-targets= we create a toolchain for each + // valid triple. Otherwise, if only --offload-arch= was specified we instead + // attempt to derive the appropriate toolchains from the arguments. + if (Arg *OpenMPTargets = + C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { + if (OpenMPTargets && !OpenMPTargets->getNumValues()) { + Diag(clang::diag::warn_drv_empty_joined_argument) + << OpenMPTargets->getAsString(C.getInputArgs()); + return; + } + llvm::copy(OpenMPTargets->getValues(), std::back_inserter(OpenMPTriples)); + } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && + !IsHIP && !IsCuda) { + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); + auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), + HostTC->getTriple()); + + // Attempt to deduce the offloading triple from the set of architectures. + // We can only correctly deduce NVPTX / AMDGPU triples currently. + llvm::DenseSet Archs = + getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr); + for (StringRef Arch : Archs) { + if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch( + getProcessorFromTargetID(*NVPTXTriple, Arch)))) { + DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); + } else if (AMDTriple && + IsAMDGpuArch(StringToCudaArch( + getProcessorFromTargetID(*AMDTriple, Arch)))) { + DerivedArchs[AMDTriple->getTriple()].insert(Arch); + } else { + Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; + return; + } } - if (HasValidOpenMPRuntime) { - llvm::StringMap FoundNormalizedTriples; - for (const char *Val : OpenMPTargets->getValues()) { - llvm::Triple TT(ToolChain::getOpenMPTriple(Val)); - std::string NormalizedName = TT.normalize(); - - // Make sure we don't have a duplicate triple. - auto Duplicate = FoundNormalizedTriples.find(NormalizedName); - if (Duplicate != FoundNormalizedTriples.end()) { - Diag(clang::diag::warn_drv_omp_offload_target_duplicate) - << Val << Duplicate->second; - continue; - } + for (const auto &TripleAndArchs : DerivedArchs) + OpenMPTriples.push_back(TripleAndArchs.first()); + } - // Store the current triple so that we can check for duplicates in the - // following iterations. - FoundNormalizedTriples[NormalizedName] = Val; - - // If the specified target is invalid, emit a diagnostic. - if (TT.getArch() == llvm::Triple::UnknownArch) - Diag(clang::diag::err_drv_invalid_omp_target) << Val; - else { - const ToolChain *TC; - // Device toolchains have to be selected differently. They pair host - // and device in their implementation. - if (TT.isNVPTX() || TT.isAMDGCN()) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - assert(HostTC && "Host toolchain should be always defined."); - auto &DeviceTC = - ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()]; - if (!DeviceTC) { - if (TT.isNVPTX()) - DeviceTC = std::make_unique( - *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP); - else if (TT.isAMDGCN()) - DeviceTC = - std::make_unique( - *this, TT, *HostTC, C.getInputArgs()); - else - assert(DeviceTC && "Device toolchain not defined."); - } - - TC = DeviceTC.get(); - } else - TC = &getToolChain(C.getInputArgs(), TT); - C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP); + for (StringRef Val : OpenMPTriples) { + llvm::Triple TT(ToolChain::getOpenMPTriple(Val)); + std::string NormalizedName = TT.normalize(); + + // Make sure we don't have a duplicate triple. + auto Duplicate = FoundNormalizedTriples.find(NormalizedName); + if (Duplicate != FoundNormalizedTriples.end()) { + Diag(clang::diag::warn_drv_omp_offload_target_duplicate) + << Val << Duplicate->second; + continue; + } + + // Store the current triple so that we can check for duplicates in the + // following iterations. + FoundNormalizedTriples[NormalizedName] = Val; + + // If the specified target is invalid, emit a diagnostic. + if (TT.getArch() == llvm::Triple::UnknownArch) + Diag(clang::diag::err_drv_invalid_omp_target) << Val; + else { + const ToolChain *TC; + // Device toolchains have to be selected differently. They pair host + // and device in their implementation. + if (TT.isNVPTX() || TT.isAMDGCN()) { + const ToolChain *HostTC = + C.getSingleOffloadToolChain(); + assert(HostTC && "Host toolchain should be always defined."); + auto &DeviceTC = + ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()]; + if (!DeviceTC) { + if (TT.isNVPTX()) + DeviceTC = std::make_unique( + *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP); + else if (TT.isAMDGCN()) + DeviceTC = std::make_unique( + *this, TT, *HostTC, C.getInputArgs()); + else + assert(DeviceTC && "Device toolchain not defined."); } - } - } else - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); - } else - Diag(clang::diag::warn_drv_empty_joined_argument) - << OpenMPTargets->getAsString(C.getInputArgs()); + + TC = DeviceTC.get(); + } else + TC = &getToolChain(C.getInputArgs(), TT); + C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP); + if (DerivedArchs.find(TT.getTriple()) != DerivedArchs.end()) + KnownArchs[TC] = DerivedArchs[TT.getTriple()]; + } + } + } else if (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ)) { + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); + return; } // @@ -4210,29 +4251,36 @@ Args.ClaimAllArgs(options::OPT_offload_host_device); } -/// Returns the canonical name for the offloading architecture when using HIP or -/// CUDA. +/// Returns the canonical name for the offloading architecture when using a HIP +/// or CUDA architecture. static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - Action::OffloadKind Kind, - const ToolChain *TC) { - if (Kind == Action::OFK_Cuda || - (Kind == Action::OFK_OpenMP && TC->getTriple().isNVPTX())) { - CudaArch Arch = StringToCudaArch(ArchStr); - if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; - return StringRef(); - } + const llvm::Triple &Triple) { + // Lookup the CUDA / HIP architecture string. Only report an error if we were + // expecting the triple to be only NVPTX / AMDGPU. + CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr)); + if (Triple.isNVPTX() && + (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << ArchStr; + return StringRef(); + } else if (Triple.isAMDGPU() && + (Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << ArchStr; + return StringRef(); + } + + if (IsNVIDIAGpuArch(Arch)) return Args.MakeArgStringRef(CudaArchToString(Arch)); - } else if (Kind == Action::OFK_HIP || - (Kind == Action::OFK_OpenMP && TC->getTriple().isAMDGPU())) { + + if (IsAMDGpuArch(Arch)) { llvm::StringMap Features; - // getHIPOffloadTargetTriple() is known to return valid value as it has - // been called successfully in the CreateOffloadingDeviceToolChains(). - auto Arch = parseTargetID( - *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), ArchStr, - &Features); + auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()); + if (!HIPTriple) + return StringRef(); + auto Arch = parseTargetID(*HIPTriple, ArchStr, &Features); if (!Arch) { C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr; C.setContainsError(); @@ -4241,6 +4289,7 @@ return Args.MakeArgStringRef( getCanonicalTargetID(Arch.getValue(), Features)); } + // If the input isn't CUDA or HIP just return the architecture. return ArchStr; } @@ -4258,12 +4307,11 @@ return getConflictTargetIDCombination(ArchSet); } -/// Returns the set of bound architectures active for this compilation kind. -/// This function returns a set of bound architectures, if there are no bound -/// architctures we return a set containing only the empty string. -static llvm::DenseSet -getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC) { +llvm::DenseSet +Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, + Action::OffloadKind Kind, const ToolChain *TC) const { + if (!TC) + TC = &C.getDefaultToolChain(); // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && @@ -4276,15 +4324,20 @@ : "--no-offload-arch"); } + if (KnownArchs.find(TC) != KnownArchs.end()) + return KnownArchs.lookup(TC); + llvm::DenseSet Archs; for (auto &Arg : Args) { if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { - Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC)); + Archs.insert( + getCanonicalArchString(C, Args, Arg->getValue(), TC->getTriple())); } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { if (Arg->getValue() == StringRef("all")) Archs.clear(); else - Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC)); + Archs.erase( + getCanonicalArchString(C, Args, Arg->getValue(), TC->getTriple())); } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7003,20 +7003,13 @@ // For all the host OpenMP offloading compile jobs we need to pass the targets // information using -fopenmp-targets= option. if (JA.isHostOffloading(Action::OFK_OpenMP)) { - SmallString<128> TargetInfo("-fopenmp-targets="); - - Arg *Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ); - assert(Tgts && Tgts->getNumValues() && - "OpenMP offloading has to have targets specified."); - for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { - if (i) - TargetInfo += ','; - // We need to get the string from the triple because it may be not exactly - // the same as the one we get directly from the arguments. - llvm::Triple T(Tgts->getValue(i)); - TargetInfo += T.getTriple(); - } - CmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); + SmallString<128> Targets("-fopenmp-targets="); + + SmallVector Triples; + auto TCRange = C.getOffloadToolChains(); + std::transform(TCRange.first, TCRange.second, std::back_inserter(Triples), + [](auto TC) { return TC.second->getTripleString(); }); + CmdArgs.push_back(Args.MakeArgString(Targets + llvm::join(Triples, ","))); } bool VirtualFunctionElimination = diff --git a/clang/test/Driver/openmp-offload-infer.c b/clang/test/Driver/openmp-offload-infer.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/openmp-offload-infer.c @@ -0,0 +1,50 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \ +// RUN: --offload-arch=sm_52 --offload-arch=gfx803 \ +// RUN: --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ +// RUN: | FileCheck %s + +// verify the tools invocations +// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" +// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx803" +// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52" +// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" +// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \ +// RUN: --offload-arch=sm_70 --offload-arch=gfx908:sramecc+:xnack- \ +// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU + +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]" +// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]" +// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[AMD_BC]]", "[[NVIDIA_CUBIN]]"], output: "[[HOST_OBJ:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \ +// RUN: --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS + +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \ +// RUN: --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=native \ +// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-FAILED + +// CHECK-FAILED: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead. + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \ +// RUN: --offload-arch=sm_70 --offload-arch=gfx908 -fno-openmp \ +// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLED + +// CHECK-DISABLED-NOT: "nvptx64-nvidia-cuda" - "clang",