diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -73,6 +73,11 @@ InGroup; def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">; def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">; +def err_drv_bad_target_id : Error<"Invalid target ID: %0 (A target ID is a processor name " + "followed by an optional list of predefined features post-fixed by a plus or minus sign deliminated " + "by colon, e.g. 'gfx908:sram-ecc+:xnack-')">; +def err_drv_bad_offload_arch_combo : Error<"Invalid offload arch combinations: %0 and %1 (For a specific " + "processor, a feature should either exist in all offload archs, or not exist in any offload archs)">; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< diff --git a/clang/include/clang/Basic/TargetID.h b/clang/include/clang/Basic/TargetID.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/TargetID.h @@ -0,0 +1,56 @@ +//===--- TargetID.h - Utilities for target ID -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_TARGET_ID_H +#define LLVM_CLANG_BASIC_TARGET_ID_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" +#include + +namespace clang { + +/// Get all feature strings that can be used in target ID for \p Processor. +/// Target ID is a processor name with optional feature strings +/// postfixed by a plus or minus sign delimited by colons, e.g. +/// gfx908:xnack+:sram-ecc-. Each processor have a limited +/// number of predefined features when showing up in a target ID. +const llvm::SmallVector +getAllPossibleTargetIDFeatures(const llvm::Triple &T, + llvm::StringRef Processor); + +/// Get processor name from target ID. +/// Returns canonical processor name or empty if the processor name is invalid. +llvm::StringRef getProcessorFromTargetID(const llvm::Triple &T, + llvm::StringRef OffloadArch); + +/// Parse a target ID to get processor and feature map. +/// Returns canonicalized processor name or None if the target ID is invalid. +/// Returns target ID features in \p FeatureMap if it is not null pointer. +/// This function assumes \p OffloadArch is a valid target ID. +/// If the target ID contains feature+, map it to true. +/// If the target ID contains feature-, map it to false. +/// If the target ID does not contain a feature (default), do not map it. +llvm::Optional +parseTargetID(const llvm::Triple &T, llvm::StringRef OffloadArch, + llvm::StringMap *FeatureMap); + +/// Returns canonical target ID, assuming \p Processor is canonical and all +/// entries in \p Features are valid. +std::string getCanonicalTargetID(llvm::StringRef Processor, + const llvm::StringMap &Features); + +/// Get the conflicted pair of target IDs for a compilation or a bundled code +/// object, assuming \p TargetIDs are canonicalized. If there is no conflicts, +/// returns None. +llvm::Optional> +getConflictTargetIDCombination(const std::set &TargetIDs); +} // namespace clang + +#endif diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1061,6 +1061,9 @@ return Triple; } + /// Returns the target ID if supported. + virtual llvm::Optional getTargetID() const { return llvm::None; } + const llvm::DataLayout &getDataLayout() const { assert(DataLayout && "Uninitialized DataLayout!"); return *DataLayout; diff --git a/clang/include/clang/Driver/Compilation.h b/clang/include/clang/Driver/Compilation.h --- a/clang/include/clang/Driver/Compilation.h +++ b/clang/include/clang/Driver/Compilation.h @@ -297,6 +297,10 @@ /// Return whether an error during the parsing of the input args. bool containsError() const { return ContainsError; } + /// Force driver to fail before toolchain is created. This is necessary when + /// error happens in action builder. + void setContainsError() { ContainsError = true; } + /// Redirect - Redirect output of this compilation. Can only be done once. /// /// \param Redirects - array of optional paths. The array should have a size diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -609,7 +609,10 @@ def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[DriverOption]>, HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[DriverOption]>, - HelpText<"CUDA/HIP offloading device architecture (e.g. sm_35, gfx906). May be specified more than once.">; + HelpText<"CUDA offloading device architecture (e.g. sm_35), or HIP offloading target ID in the form of a " + "device architecture followed by target ID features delimited by a colon. Each target ID feature " + "is a pre-defined string followed by a plus or minus sign (e.g. gfx908:xnack+:sram-ecc-). May be " + "specified more than once.">; def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>, Alias; def hip_link : Flag<["--"], "hip-link">, diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -62,6 +62,7 @@ SourceLocation.cpp SourceManager.cpp Stack.cpp + TargetID.cpp TargetInfo.cpp Targets.cpp Targets/AArch64.cpp diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Basic/TargetID.cpp @@ -0,0 +1,169 @@ +//===--- TargetID.cpp - Utilities for parsing target ID -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/TargetID.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/TargetParser.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace clang { + +static const llvm::SmallVector +getAllPossibleAMDGPUTargetIDFeatures(const llvm::Triple &T, + llvm::StringRef Proc) { + // Entries in returned vector should be in alphabetical order. + llvm::SmallVector Ret; + auto ProcKind = T.isAMDGCN() ? llvm::AMDGPU::parseArchAMDGCN(Proc) + : llvm::AMDGPU::parseArchR600(Proc); + if (ProcKind == llvm::AMDGPU::GK_NONE) + return Ret; + auto Features = T.isAMDGCN() ? llvm::AMDGPU::getArchAttrAMDGCN(ProcKind) + : llvm::AMDGPU::getArchAttrR600(ProcKind); + if (Features & llvm::AMDGPU::FEATURE_SRAM_ECC) + Ret.push_back("sram-ecc"); + if (Features & llvm::AMDGPU::FEATURE_XNACK) + Ret.push_back("xnack"); + return Ret; +} + +const llvm::SmallVector +getAllPossibleTargetIDFeatures(const llvm::Triple &T, + llvm::StringRef Processor) { + llvm::SmallVector Ret; + if (T.isAMDGPU()) + return getAllPossibleAMDGPUTargetIDFeatures(T, Processor); + return Ret; +} + +/// Returns canonical processor name or empty string if \p Processor is invalid. +static llvm::StringRef getCanonicalProcessorName(const llvm::Triple &T, + llvm::StringRef Processor) { + if (T.isAMDGPU()) + return llvm::AMDGPU::getCanonicalArchName(T, Processor); + return Processor; +} + +llvm::StringRef getProcessorFromTargetID(const llvm::Triple &T, + llvm::StringRef TargetID) { + auto Split = TargetID.split(':'); + return getCanonicalProcessorName(T, Split.first); +} + +// Parse a target ID with format checking only. Do not check whether processor +// name or features are valid for the processor. +// +// A target ID is a processor name followed by a list of target features +// delimited by colon. Each target feature is a string post-fixed by a plus +// or minus sign, e.g. gfx908:sram-ecc+:xnack-. +static llvm::Optional +parseTargetIDWithFormatCheckingOnly(llvm::StringRef TargetID, + llvm::StringMap *FeatureMap) { + llvm::StringRef Processor; + + if (TargetID.empty()) + return llvm::StringRef(); + + auto Split = TargetID.split(':'); + Processor = Split.first; + if (Processor.empty()) + return llvm::None; + + auto Features = Split.second; + if (Features.empty()) + return Processor; + + llvm::StringMap LocalFeatureMap; + if (!FeatureMap) + FeatureMap = &LocalFeatureMap; + + while (!Features.empty()) { + auto Splits = Features.split(':'); + auto Sign = Splits.first.back(); + auto Feature = Splits.first.drop_back(); + if (Sign != '+' && Sign != '-') + return llvm::None; + bool IsOn = Sign == '+'; + auto Loc = FeatureMap->find(Feature); + // Each feature can only show up at most once in target ID. + if (Loc != FeatureMap->end()) + return llvm::None; + (*FeatureMap)[Feature] = IsOn; + Features = Splits.second; + } + return Processor; +}; + +llvm::Optional +parseTargetID(const llvm::Triple &T, llvm::StringRef TargetID, + llvm::StringMap *FeatureMap) { + auto OptionalProcessor = + parseTargetIDWithFormatCheckingOnly(TargetID, FeatureMap); + + if (!OptionalProcessor) + return llvm::None; + + llvm::StringRef Processor = + getCanonicalProcessorName(T, OptionalProcessor.getValue()); + if (Processor.empty()) + return llvm::None; + + llvm::SmallSet AllFeatures; + for (auto &&F : getAllPossibleTargetIDFeatures(T, Processor)) + AllFeatures.insert(F); + + for (auto &&F : *FeatureMap) + if (!AllFeatures.count(F.first())) + return llvm::None; + + return Processor; +}; + +// A canonical target ID is a target ID containing a canonical processor name +// and features in alphabetical order. +std::string getCanonicalTargetID(llvm::StringRef Processor, + const llvm::StringMap &Features) { + std::string TargetID = Processor.str(); + std::map OrderedMap; + for (const auto &F : Features) + OrderedMap[F.first()] = F.second; + for (auto F : OrderedMap) + TargetID = TargetID + ':' + F.first.str() + (F.second ? "+" : "-"); + return TargetID; +} + +// For a specific processor, a feature either shows up in all target IDs, or +// does not show up in any target IDs. Otherwise the target ID combination +// is invalid. +llvm::Optional> +getConflictTargetIDCombination(const std::set &TargetIDs) { + struct Info { + llvm::StringRef TargetID; + llvm::StringMap Features; + }; + llvm::StringMap FeatureMap; + for (auto &&ID : TargetIDs) { + llvm::StringMap Features; + llvm::StringRef Proc = + parseTargetIDWithFormatCheckingOnly(ID, &Features).getValue(); + auto Loc = FeatureMap.find(Proc); + if (Loc == FeatureMap.end()) + FeatureMap[Proc] = Info{ID, Features}; + else { + auto &ExistingFeatures = Loc->second.Features; + if (llvm::any_of(Features, [&](auto &F) { + return ExistingFeatures.count(F.first()) == 0; + })) + return std::make_pair(Loc->second.TargetID, ID); + } + } + return llvm::None; +} + +} // namespace clang diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -13,6 +13,7 @@ #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H +#include "clang/Basic/TargetID.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "llvm/ADT/StringSet.h" @@ -41,6 +42,14 @@ llvm::AMDGPU::GPUKind GPUKind; unsigned GPUFeatures; + /// Target ID is device name followed by optional feature name postfixed + /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sram-ecc-. + /// If the target ID contains feature+, map it to true. + /// If the target ID contains feature-, map it to false. + /// If the target ID does not contain a feature (default), do not map it. + llvm::StringMap OffloadArchFeatures; + std::string TargetID; + bool hasFP64() const { return getTriple().getArch() == llvm::Triple::amdgcn || !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64); @@ -389,6 +398,35 @@ void setAuxTarget(const TargetInfo *Aux) override; bool hasExtIntType() const override { return true; } + + // Record offload arch features since they are needed for defining the + // pre-defined macros. + bool handleTargetFeatures(std::vector &Features, + DiagnosticsEngine &Diags) override { + auto TargetIDFeatures = + getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind)); + llvm::for_each(Features, [&](const auto &F) { + assert(F.front() == '+' || F.front() == '-'); + bool IsOn = F.front() == '+'; + StringRef Name = StringRef(F).drop_front(); + if (llvm::find(TargetIDFeatures, Name) == TargetIDFeatures.end()) + return; + assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end()); + OffloadArchFeatures[Name] = IsOn; + }); + return true; + } + + Optional getTargetID() const override { + if (!isAMDGCN(getTriple())) + return llvm::None; + // When -target-cpu is not set, we assume generic code that it is valid + // for all GPU and use an empty string as target ID to represent that. + if (GPUKind == llvm::AMDGPU::GK_NONE) + return std::string(""); + return getCanonicalTargetID(getArchNameAMDGCN(GPUKind), + OffloadArchFeatures); + } }; } // namespace targets diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -357,6 +357,23 @@ StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); + if (isAMDGCN(getTriple())) { + Builder.defineMacro("__amdgcn_processor__", + Twine("\"") + Twine(CanonName) + Twine("\"")); + Builder.defineMacro("__amdgcn_target_id__", + Twine("\"") + Twine(getTargetID().getValue()) + + Twine("\"")); + for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { + auto Loc = OffloadArchFeatures.find(F); + if (Loc != OffloadArchFeatures.end()) { + std::string NewF = F.str(); + std::replace(NewF.begin(), NewF.end(), '-', '_'); + Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + + Twine("__"), + Loc->second ? "1" : "0"); + } + } + } } // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -46,6 +46,7 @@ #include "ToolChains/VEToolchain.h" #include "ToolChains/WebAssembly.h" #include "ToolChains/XCore.h" +#include "clang/Basic/TargetID.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" @@ -93,6 +94,11 @@ using namespace clang; using namespace llvm::opt; +static llvm::Triple getHIPOffloadTargetTriple() { + static const llvm::Triple T("amdgcn-amd-amdhsa"); + return T; +} + // static std::string Driver::GetResourcesPath(StringRef BinaryPath, StringRef CustomResourceDir) { @@ -672,10 +678,8 @@ } else if (IsHIP) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); const llvm::Triple &HostTriple = HostTC->getTriple(); - StringRef DeviceTripleStr; auto OFK = Action::OFK_HIP; - DeviceTripleStr = "amdgcn-amd-amdhsa"; - llvm::Triple HIPTriple(DeviceTripleStr); + llvm::Triple HIPTriple = getHIPOffloadTargetTriple(); // Use the HIP and host triples as the key into the ToolChains map, // because the device toolchain we create depends on both. auto &HIPTC = ToolChains[HIPTriple.str() + "/" + HostTriple.str()]; @@ -2390,8 +2394,20 @@ bool EmitLLVM = false; bool EmitAsm = false; + /// ID to identify each device compilation. For CUDA it is simply the + /// GPU arch string. For HIP it is either the GPU arch string or GPU + /// arch string plus feature strings delimited by a plus sign, e.g. + /// gfx906+xnack. + struct TargetID { + /// Target ID string which is persistent throughout the compilation. + const char *ID; + TargetID(CudaArch Arch) { ID = CudaArchToString(Arch); } + TargetID(const char *ID) : ID(ID) {} + operator const char *() { return ID; } + operator StringRef() { return StringRef(ID); } + }; /// List of GPU architectures to use in this compilation. - SmallVector GpuArchList; + SmallVector GpuArchList; /// The CUDA actions for the current input. ActionList CudaDeviceActions; @@ -2474,7 +2490,7 @@ for (auto Arch : GpuArchList) { CudaDeviceActions.push_back(UA); - UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch), + UA->registerDependentActionInfo(ToolChains[0], Arch, AssociatedOffloadKind); } return ABRT_Success; @@ -2485,10 +2501,9 @@ void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. - auto AddTopLevel = [&](Action *A, CudaArch BoundArch) { + auto AddTopLevel = [&](Action *A, TargetID TargetID) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch), - AssociatedOffloadKind); + Dep.add(*A, *ToolChains.front(), TargetID, AssociatedOffloadKind); AL.push_back(C.MakeAction(Dep, A->getType())); }; @@ -2516,6 +2531,13 @@ CudaDeviceActions.clear(); } + /// Get canonicalized offload arch option. \returns empty StringRef if the + /// option is invalid. + virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0; + + virtual llvm::Optional> + getConflictOffloadArchCombination(const std::set &GpuArchs) = 0; + bool initialize() override { assert(AssociatedOffloadKind == Action::OFK_Cuda || AssociatedOffloadKind == Action::OFK_HIP); @@ -2563,7 +2585,7 @@ EmitAsm = Args.getLastArg(options::OPT_S); // Collect all cuda_gpu_arch parameters, removing duplicates. - std::set GpuArchs; + std::set GpuArchs; bool Error = false; for (Arg *A : Args) { if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || @@ -2571,27 +2593,35 @@ continue; A->claim(); - const StringRef ArchStr = A->getValue(); + StringRef ArchStr = A->getValue(); if (A->getOption().matches(options::OPT_no_offload_arch_EQ) && ArchStr == "all") { GpuArchs.clear(); continue; } - CudaArch Arch = StringToCudaArch(ArchStr); - if (Arch == CudaArch::UNKNOWN) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + ArchStr = getCanonicalOffloadArch(ArchStr); + if (ArchStr.empty()) { Error = true; } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) - GpuArchs.insert(Arch); + GpuArchs.insert(ArchStr); else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) - GpuArchs.erase(Arch); + GpuArchs.erase(ArchStr); else llvm_unreachable("Unexpected option."); } + auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs); + if (ConflictingArchs) { + C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) + << ConflictingArchs.getValue().first + << ConflictingArchs.getValue().second; + C.setContainsError(); + return true; + } + // Collect list of GPUs remaining in the set. - for (CudaArch Arch : GpuArchs) - GpuArchList.push_back(Arch); + for (auto Arch : GpuArchs) + GpuArchList.push_back(Arch.data()); // Default to sm_20 which is the lowest common denominator for // supported GPUs. sm_20 code should work correctly, if @@ -2613,6 +2643,21 @@ DefaultCudaArch = CudaArch::SM_20; } + StringRef getCanonicalOffloadArch(StringRef ArchStr) override { + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + return StringRef(); + } + return CudaArchToString(Arch); + } + + llvm::Optional> + getConflictOffloadArchCombination( + const std::set &GpuArchs) override { + return llvm::None; + } + ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, @@ -2672,8 +2717,7 @@ for (auto &A : {AssembleAction, BackendAction}) { OffloadAction::DeviceDependences DDep; - DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]), - Action::OFK_Cuda); + DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); DeviceActions.push_back( C.MakeAction(DDep, A->getType())); } @@ -2732,6 +2776,24 @@ bool canUseBundlerUnbundler() const override { return true; } + StringRef getCanonicalOffloadArch(StringRef IdStr) override { + llvm::StringMap Features; + auto ArchStr = + parseTargetID(getHIPOffloadTargetTriple(), IdStr, &Features); + if (!ArchStr) { + C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr; + return StringRef(); + } + auto CanId = getCanonicalTargetID(ArchStr.getValue(), Features); + return Args.MakeArgStringRef(CanId); + }; + + llvm::Optional> + getConflictOffloadArchCombination( + const std::set &GpuArchs) override { + return getConflictTargetIDCombination(GpuArchs); + } + ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, @@ -2776,8 +2838,8 @@ // device arch of the next action being propagated to the above link // action. OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction( DDep, CudaDeviceActions[I]->getType()); } @@ -2844,7 +2906,7 @@ // LI contains all the inputs for the linker. OffloadAction::DeviceDependences DeviceLinkDeps; DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + GpuArchList[I], AssociatedOffloadKind); AL.push_back(C.MakeAction(DeviceLinkDeps, DeviceLinkAction->getType())); ++I; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -11,6 +11,7 @@ #include "Gnu.h" #include "ROCm.h" +#include "clang/Basic/TargetID.h" #include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" @@ -36,7 +37,8 @@ const char *LinkingOutput) const override; }; -void getAMDGPUTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args, +void getAMDGPUTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args, std::vector &Features); } // end namespace amdgpu @@ -87,6 +89,14 @@ /// Needed for translating LTO options. const char *getDefaultLinker() const override { return "ld.lld"; } + +protected: + /// Translate -mcpu option containing target ID to cc1 options. + /// Returns the GPU name. + StringRef translateTargetID(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const; + + StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const; }; class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -9,6 +9,7 @@ #include "AMDGPU.h" #include "CommonArgs.h" #include "InputInfo.h" +#include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" #include "llvm/Option/ArgList.h" @@ -360,11 +361,34 @@ } void amdgpu::getAMDGPUTargetFeatures(const Driver &D, + const llvm::Triple &Triple, const llvm::opt::ArgList &Args, std::vector &Features) { if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args); + // Add target ID features to -target-feature options. No diagnostics should + // be emitted here since invalid target ID is diagnosed at other places. + StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); + if (!TargetID.empty()) { + llvm::StringMap FeatureMap; + auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap); + if (OptionalGpuArch) { + StringRef GpuArch = OptionalGpuArch.getValue(); + // Iterate through all possible target ID features for the given GPU. + // If it is mapped to true, add +feature. + // If it is mapped to false, add -feature. + // If it is not in the map (default), do not add it + for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) { + auto Pos = FeatureMap.find(Feature); + if (Pos == FeatureMap.end()) + continue; + Features.push_back(Args.MakeArgStringRef( + (Twine(Pos->second ? "+" : "-") + Feature).str())); + } + } + } + if (Args.getLastArg(options::OPT_mwavefrontsize64)) { Features.push_back("-wavefrontsize16"); Features.push_back("-wavefrontsize32"); @@ -398,16 +422,15 @@ DerivedArgList *DAL = Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); - // Do nothing if not OpenCL (-x cl) - if (!Args.getLastArgValue(options::OPT_x).equals("cl")) - return DAL; + const OptTable &Opts = getDriver().getOpts(); if (!DAL) DAL = new DerivedArgList(Args.getBaseArgs()); for (auto *A : Args) DAL->append(A); - const OptTable &Opts = getDriver().getOpts(); + if (!Args.getLastArgValue(options::OPT_x).equals("cl")) + return DAL; // Phase 1 (.cl -> .bc) if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { @@ -452,7 +475,8 @@ if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { - auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch()); + auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch()); + auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch); if (FPType && FPType == &llvm::APFloat::IEEEsingle() && DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, @@ -462,7 +486,7 @@ return llvm::DenormalMode::getIEEE(); } - const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + const StringRef GpuArch = getGPUArch(DriverArgs); auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); // TODO: There are way too many flags that change this. Do we need to check @@ -497,6 +521,8 @@ const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { + // Allow using target ID in -mcpu. + translateTargetID(DriverArgs, CC1Args); // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, @@ -507,6 +533,29 @@ } } +StringRef +AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { + return getProcessorFromTargetID( + getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); +} + +StringRef +AMDGPUToolChain::translateTargetID(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const { + StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + if (TargetID.empty()) + return StringRef(); + + llvm::StringMap FeatureMap; + auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap); + if (!OptionalGpuArch) { + getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID; + return StringRef(); + } + + return OptionalGpuArch.getValue(); +} + void ROCMToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { @@ -528,7 +577,7 @@ } // Get the device name and canonicalize it - const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + const StringRef GpuArch = getGPUArch(DriverArgs); auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -366,7 +366,7 @@ break; case llvm::Triple::r600: case llvm::Triple::amdgcn: - amdgpu::getAMDGPUTargetFeatures(D, Args, Features); + amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::msp430: msp430::getMSP430TargetFeatures(D, Args, Features); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -226,11 +226,12 @@ } } -/// Get the (LLVM) name of the R600 gpu we are targeting. -static std::string getR600TargetGPU(const ArgList &Args) { +/// Get the (LLVM) name of the AMDGPU gpu we are targeting. +static std::string getAMDGPUTargetGPU(const llvm::Triple &T, + const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - const char *GPUName = A->getValue(); - return llvm::StringSwitch(GPUName) + auto GPUName = getProcessorFromTargetID(T, A->getValue()); + return llvm::StringSwitch(GPUName) .Cases("rv630", "rv635", "r600") .Cases("rv610", "rv620", "rs780", "rs880") .Case("rv740", "rv770") @@ -238,7 +239,7 @@ .Cases("sumo", "sumo2", "sumo") .Case("hemlock", "cypress") .Case("aruba", "cayman") - .Default(GPUName); + .Default(GPUName.str()); } return ""; } @@ -364,7 +365,7 @@ case llvm::Triple::r600: case llvm::Triple::amdgcn: - return getR600TargetGPU(Args); + return getAMDGPUTargetGPU(T, Args); case llvm::Triple::wasm32: case llvm::Triple::wasm64: diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -11,6 +11,7 @@ #include "CommonArgs.h" #include "InputInfo.h" #include "clang/Basic/Cuda.h" +#include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" @@ -68,7 +69,7 @@ // Extract all the -m options std::vector Features; - amdgpu::getAMDGPUTargetFeatures(D, Args, Features); + amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features); // Add features to mattr such as cumode std::string MAttrString = "-plugin-opt=-mattr="; @@ -232,7 +233,8 @@ Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + // Allow using target ID in --offload-arch. + StringRef GpuArch = translateTargetID(DriverArgs, CC1Args); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && diff --git a/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_isa_version_908.bc b/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_isa_version_908.bc new file mode 100644 diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -12,19 +12,19 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-code-object-v3 %s 2>&1 | FileCheck --check-prefix=NO-CODE-OBJECT-V3 %s // NO-CODE-OBJECT-V3: "-target-feature" "-code-object-v3" -// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mxnack %s 2>&1 | FileCheck --check-prefix=XNACK %s +// RUN: %clang -### -target amdgcn-amdhsa -mcpu=gfx900:xnack+ %s 2>&1 | FileCheck --check-prefix=XNACK %s // XNACK: "-target-feature" "+xnack" -// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-xnack %s 2>&1 | FileCheck --check-prefix=NO-XNACK %s +// RUN: %clang -### -target amdgcn-amdpal -mcpu=gfx900:xnack- %s 2>&1 | FileCheck --check-prefix=NO-XNACK %s // NO-XNACK: "-target-feature" "-xnack" -// RUN: %clang -### -target amdgcn -mcpu=gfx700 -msram-ecc %s 2>&1 | FileCheck --check-prefix=SRAM-ECC %s +// RUN: %clang -### -target amdgcn-mesa3d -mcpu=gfx908:sram-ecc+ %s 2>&1 | FileCheck --check-prefix=SRAM-ECC %s // SRAM-ECC: "-target-feature" "+sram-ecc" -// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-sram-ecc %s 2>&1 | FileCheck --check-prefix=NO-SRAM-ECC %s +// RUN: %clang -### -target amdgcn-amdhsa -mcpu=gfx908:sram-ecc- %s 2>&1 | FileCheck --check-prefix=NO-SRAM-ECC %s // NO-SRAM-ECC: "-target-feature" "-sram-ecc" -// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// RUN: %clang -### -target amdgcn-amdpal -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s // WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "-wavefrontsize32" "-target-feature" "+wavefrontsize64" // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -324,3 +324,26 @@ // GFX1012-DAG: #define __gfx1012__ 1 // GFX1030-DAG: #define __gfx1030__ 1 // GFX1031-DAG: #define __gfx1031__ 1 + +// GFX600-DAG: #define __amdgcn_processor__ "gfx600" +// GFX601-DAG: #define __amdgcn_processor__ "gfx601" +// GFX700-DAG: #define __amdgcn_processor__ "gfx700" +// GFX701-DAG: #define __amdgcn_processor__ "gfx701" +// GFX702-DAG: #define __amdgcn_processor__ "gfx702" +// GFX703-DAG: #define __amdgcn_processor__ "gfx703" +// GFX704-DAG: #define __amdgcn_processor__ "gfx704" +// GFX801-DAG: #define __amdgcn_processor__ "gfx801" +// GFX802-DAG: #define __amdgcn_processor__ "gfx802" +// GFX803-DAG: #define __amdgcn_processor__ "gfx803" +// GFX810-DAG: #define __amdgcn_processor__ "gfx810" +// GFX900-DAG: #define __amdgcn_processor__ "gfx900" +// GFX902-DAG: #define __amdgcn_processor__ "gfx902" +// GFX904-DAG: #define __amdgcn_processor__ "gfx904" +// GFX906-DAG: #define __amdgcn_processor__ "gfx906" +// GFX908-DAG: #define __amdgcn_processor__ "gfx908" +// GFX909-DAG: #define __amdgcn_processor__ "gfx909" +// GFX1010-DAG: #define __amdgcn_processor__ "gfx1010" +// GFX1011-DAG: #define __amdgcn_processor__ "gfx1011" +// GFX1012-DAG: #define __amdgcn_processor__ "gfx1012" +// GFX1030-DAG: #define __amdgcn_processor__ "gfx1030" +// GFX1031-DAG: #define __amdgcn_processor__ "gfx1031" \ No newline at end of file diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -54,33 +54,33 @@ // RUN: %clang -### -target amdgcn %s 2>&1 | FileCheck --check-prefix=GCNDEFAULT %s // RUN: %clang -### -target amdgcn -mcpu=gfx600 %s 2>&1 | FileCheck --check-prefix=GFX600 %s -// RUN: %clang -### -target amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=TAHITI %s +// RUN: %clang -### -target amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=GFX600 %s // RUN: %clang -### -target amdgcn -mcpu=gfx601 %s 2>&1 | FileCheck --check-prefix=GFX601 %s -// RUN: %clang -### -target amdgcn -mcpu=hainan %s 2>&1 | FileCheck --check-prefix=HAINAN %s -// RUN: %clang -### -target amdgcn -mcpu=oland %s 2>&1 | FileCheck --check-prefix=OLAND %s -// RUN: %clang -### -target amdgcn -mcpu=pitcairn %s 2>&1 | FileCheck --check-prefix=PITCAIRN %s -// RUN: %clang -### -target amdgcn -mcpu=verde %s 2>&1 | FileCheck --check-prefix=VERDE %s +// RUN: %clang -### -target amdgcn -mcpu=hainan %s 2>&1 | FileCheck --check-prefix=GFX601 %s +// RUN: %clang -### -target amdgcn -mcpu=oland %s 2>&1 | FileCheck --check-prefix=GFX601 %s +// RUN: %clang -### -target amdgcn -mcpu=pitcairn %s 2>&1 | FileCheck --check-prefix=GFX601 %s +// RUN: %clang -### -target amdgcn -mcpu=verde %s 2>&1 | FileCheck --check-prefix=GFX601 %s // RUN: %clang -### -target amdgcn -mcpu=gfx700 %s 2>&1 | FileCheck --check-prefix=GFX700 %s -// RUN: %clang -### -target amdgcn -mcpu=kaveri %s 2>&1 | FileCheck --check-prefix=KAVERI %s +// RUN: %clang -### -target amdgcn -mcpu=kaveri %s 2>&1 | FileCheck --check-prefix=GFX700 %s // RUN: %clang -### -target amdgcn -mcpu=gfx701 %s 2>&1 | FileCheck --check-prefix=GFX701 %s -// RUN: %clang -### -target amdgcn -mcpu=hawaii %s 2>&1 | FileCheck --check-prefix=HAWAII %s +// RUN: %clang -### -target amdgcn -mcpu=hawaii %s 2>&1 | FileCheck --check-prefix=GFX701 %s // RUN: %clang -### -target amdgcn -mcpu=gfx702 %s 2>&1 | FileCheck --check-prefix=GFX702 %s // RUN: %clang -### -target amdgcn -mcpu=gfx703 %s 2>&1 | FileCheck --check-prefix=GFX703 %s -// RUN: %clang -### -target amdgcn -mcpu=kabini %s 2>&1 | FileCheck --check-prefix=KABINI %s -// RUN: %clang -### -target amdgcn -mcpu=mullins %s 2>&1 | FileCheck --check-prefix=MULLINS %s +// RUN: %clang -### -target amdgcn -mcpu=kabini %s 2>&1 | FileCheck --check-prefix=GFX703 %s +// RUN: %clang -### -target amdgcn -mcpu=mullins %s 2>&1 | FileCheck --check-prefix=GFX703 %s // RUN: %clang -### -target amdgcn -mcpu=gfx704 %s 2>&1 | FileCheck --check-prefix=GFX704 %s -// RUN: %clang -### -target amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefix=BONAIRE %s +// RUN: %clang -### -target amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefix=GFX704 %s // RUN: %clang -### -target amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefix=GFX801 %s -// RUN: %clang -### -target amdgcn -mcpu=carrizo %s 2>&1 | FileCheck --check-prefix=CARRIZO %s +// RUN: %clang -### -target amdgcn -mcpu=carrizo %s 2>&1 | FileCheck --check-prefix=GFX801 %s // RUN: %clang -### -target amdgcn -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=GFX802 %s -// RUN: %clang -### -target amdgcn -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=ICELAND %s -// RUN: %clang -### -target amdgcn -mcpu=tonga %s 2>&1 | FileCheck --check-prefix=TONGA %s +// RUN: %clang -### -target amdgcn -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=GFX802 %s +// RUN: %clang -### -target amdgcn -mcpu=tonga %s 2>&1 | FileCheck --check-prefix=GFX802 %s // RUN: %clang -### -target amdgcn -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=GFX803 %s -// RUN: %clang -### -target amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=FIJI %s -// RUN: %clang -### -target amdgcn -mcpu=polaris10 %s 2>&1 | FileCheck --check-prefix=POLARIS10 %s -// RUN: %clang -### -target amdgcn -mcpu=polaris11 %s 2>&1 | FileCheck --check-prefix=POLARIS11 %s +// RUN: %clang -### -target amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=GFX803 %s +// RUN: %clang -### -target amdgcn -mcpu=polaris10 %s 2>&1 | FileCheck --check-prefix=GFX803 %s +// RUN: %clang -### -target amdgcn -mcpu=polaris11 %s 2>&1 | FileCheck --check-prefix=GFX803 %s // RUN: %clang -### -target amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck --check-prefix=GFX810 %s -// RUN: %clang -### -target amdgcn -mcpu=stoney %s 2>&1 | FileCheck --check-prefix=STONEY %s +// RUN: %clang -### -target amdgcn -mcpu=stoney %s 2>&1 | FileCheck --check-prefix=GFX810 %s // RUN: %clang -### -target amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=GFX900 %s // RUN: %clang -### -target amdgcn -mcpu=gfx902 %s 2>&1 | FileCheck --check-prefix=GFX902 %s // RUN: %clang -### -target amdgcn -mcpu=gfx904 %s 2>&1 | FileCheck --check-prefix=GFX904 %s @@ -95,33 +95,16 @@ // GCNDEFAULT-NOT: -target-cpu // GFX600: "-target-cpu" "gfx600" -// TAHITI: "-target-cpu" "tahiti" // GFX601: "-target-cpu" "gfx601" -// HAINAN: "-target-cpu" "hainan" -// OLAND: "-target-cpu" "oland" -// PITCAIRN: "-target-cpu" "pitcairn" -// VERDE: "-target-cpu" "verde" // GFX700: "-target-cpu" "gfx700" -// KAVERI: "-target-cpu" "kaveri" // GFX701: "-target-cpu" "gfx701" -// HAWAII: "-target-cpu" "hawaii" // GFX702: "-target-cpu" "gfx702" // GFX703: "-target-cpu" "gfx703" -// KABINI: "-target-cpu" "kabini" -// MULLINS: "-target-cpu" "mullins" // GFX704: "-target-cpu" "gfx704" -// BONAIRE: "-target-cpu" "bonaire" // GFX801: "-target-cpu" "gfx801" -// CARRIZO: "-target-cpu" "carrizo" // GFX802: "-target-cpu" "gfx802" -// ICELAND: "-target-cpu" "iceland" -// TONGA: "-target-cpu" "tonga" // GFX803: "-target-cpu" "gfx803" -// FIJI: "-target-cpu" "fiji" -// POLARIS10: "-target-cpu" "polaris10" -// POLARIS11: "-target-cpu" "polaris11" // GFX810: "-target-cpu" "gfx810" -// STONEY: "-target-cpu" "stoney" // GFX900: "-target-cpu" "gfx900" // GFX902: "-target-cpu" "gfx902" // GFX904: "-target-cpu" "gfx904" diff --git a/clang/test/Driver/hip-invalid-target-id.hip b/clang/test/Driver/hip-invalid-target-id.hip new file mode 100644 --- /dev/null +++ b/clang/test/Driver/hip-invalid-target-id.hip @@ -0,0 +1,70 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908xnack \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: Invalid target ID: gfx908xnack + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx900 \ +// RUN: --offload-arch=gfx908:xnack+:xnack+ \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid target ID: gfx908:xnack+:xnack+ + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908:unknown+ \ +// RUN: --offload-arch=gfx908+sram-ecc+unknown \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid target ID: gfx908:unknown+ + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908:sram-ecc+:unknown+ \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid target ID: gfx908:sram-ecc+:unknown+ + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx900:sram-ecc+ \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid target ID: gfx900:sram-ecc+ + +/ RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx900:xnack \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOSIGN %s + +// NOSIGN: error: Invalid target ID: gfx900:xnack + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s + +// NOCOLON: error: Invalid target ID: gfx900+xnack + +// RUN: not %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908:xnack+ \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=COMBO %s + +// COMBO: error: Invalid offload arch combinations: gfx908 and gfx908:xnack+ diff --git a/clang/test/Driver/hip-target-id.hip b/clang/test/Driver/hip-target-id.hip new file mode 100644 --- /dev/null +++ b/clang/test/Driver/hip-target-id.hip @@ -0,0 +1,74 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip \ +// RUN: --offload-arch=gfx908:xnack+:sram-ecc+ \ +// RUN: --offload-arch=gfx908:xnack+:sram-ecc- \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck %s + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip \ +// RUN: --offload-arch=gfx908:xnack+:sram-ecc+ \ +// RUN: --offload-arch=gfx908:xnack+:sram-ecc- \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: -save-temps \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=CHECK,TMP %s + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip \ +// RUN: --offload-arch=gfx908:xnack+:sram-ecc+ \ +// RUN: --offload-arch=gfx908:xnack+:sram-ecc- \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: -fgpu-rdc \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +// CHECK: [[CLANG:"[^"]*clang[^"]*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-target-cpu" "gfx908" +// CHECK-SAME: "-target-feature" "+sram-ecc" +// CHECK-SAME: "-target-feature" "+xnack" + +// TMP: [[CLANG:"[^"]*clang[^"]*"]] "-cc1as" "-triple" "amdgcn-amd-amdhsa" +// TMP-SAME: "-target-cpu" "gfx908" +// TMP-SAME: "-target-feature" "+sram-ecc" +// TMP-SAME: "-target-feature" "+xnack" + +// CHECK: [[LLD:"[^"]*lld[^"]*"]] +// CHECK-SAME: "-plugin-opt=mcpu=gfx908" +// CHECK-SAME: "-plugin-opt=-mattr=+sram-ecc,+xnack" + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-target-cpu" "gfx908" +// CHECK-SAME: "-target-feature" "-sram-ecc" +// CHECK-SAME: "-target-feature" "+xnack" + +// CHECK: [[LLD]] +// CHECK-SAME: "-plugin-opt=mcpu=gfx908" +// CHECK-SAME: "-plugin-opt=-mattr=-sram-ecc,+xnack" + +// CHECK: {{"[^"]*clang-offload-bundler[^"]*"}} +// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx908:sram-ecc+:xnack+,hip-amdgcn-amd-amdhsa-gfx908:sram-ecc-:xnack+" + +// Check canonicalization and repeating of target ID. + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip \ +// RUN: --offload-arch=fiji \ +// RUN: --offload-arch=gfx803 \ +// RUN: --offload-arch=fiji \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=FIJI %s +// FIJI: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx803" + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip \ +// RUN: --offload-arch=gfx900:xnack- \ +// RUN: --offload-arch=gfx900:xnack+ \ +// RUN: --offload-arch=gfx908:sram-ecc+ \ +// RUN: --offload-arch=gfx908:sram-ecc- \ +// RUN: --offload-arch=gfx906 \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MULTI %s +// MULTI: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx900:xnack+,hip-amdgcn-amd-amdhsa-gfx900:xnack-,hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx908:sram-ecc+,hip-amdgcn-amd-amdhsa-gfx908:sram-ecc-" diff --git a/clang/test/Driver/hip-toolchain-features.hip b/clang/test/Driver/hip-toolchain-features.hip --- a/clang/test/Driver/hip-toolchain-features.hip +++ b/clang/test/Driver/hip-toolchain-features.hip @@ -3,44 +3,34 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mxnack 2>&1 | FileCheck %s -check-prefix=XNACK +// RUN: --cuda-gpu-arch=gfx906:xnack+ --cuda-gpu-arch=gfx900:xnack+ %s \ +// RUN: 2>&1 | FileCheck %s -check-prefix=XNACK // RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mno-xnack 2>&1 | FileCheck %s -check-prefix=NOXNACK +// RUN: --cuda-gpu-arch=gfx906:xnack- --cuda-gpu-arch=gfx900:xnack- %s \ +// RUN: 2>&1 | FileCheck %s -check-prefix=NOXNACK // XNACK: {{.*}}clang{{.*}}"-target-feature" "+xnack" -// XNACK: {{.*}}lld{{.*}}"-plugin-opt=-mattr=+xnack" // NOXNACK: {{.*}}clang{{.*}}"-target-feature" "-xnack" -// NOXNACK: {{.*}}lld{{.*}}"-plugin-opt=-mattr=-xnack" - // RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -msram-ecc 2>&1 | FileCheck %s -check-prefix=SRAM +// RUN: --cuda-gpu-arch=gfx908:sram-ecc+ %s \ +// RUN: 2>&1 | FileCheck %s -check-prefix=SRAM // RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mno-sram-ecc 2>&1 | FileCheck %s -check-prefix=NOSRAM +// RUN: --cuda-gpu-arch=gfx908:sram-ecc- %s \ +// RUN: 2>&1 | FileCheck %s -check-prefix=NOSRAM // SRAM: {{.*}}clang{{.*}}"-target-feature" "+sram-ecc" -// SRAM: {{.*}}lld{{.*}}"-plugin-opt=-mattr=+sram-ecc" // NOSRAM: {{.*}}clang{{.*}}"-target-feature" "-sram-ecc" -// NOSRAM: {{.*}}lld{{.*}}"-plugin-opt=-mattr=-sram-ecc" - // RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mxnack -msram-ecc \ +// RUN: --cuda-gpu-arch=gfx908:xnack+:sram-ecc+ %s \ // RUN: 2>&1 | FileCheck %s -check-prefix=ALL3 // RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mno-xnack -mno-sram-ecc \ +// RUN: --cuda-gpu-arch=gfx908:xnack-:sram-ecc- %s \ // RUN: 2>&1 | FileCheck %s -check-prefix=NOALL3 -// ALL3: {{.*}}clang{{.*}}"-target-feature" "+xnack" "-target-feature" "+sram-ecc" -// ALL3: {{.*}}lld{{.*}}"-plugin-opt=-mattr=+xnack,+sram-ecc" -// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-xnack" "-target-feature" "-sram-ecc" -// NOALL3: {{.*}}lld{{.*}}"-plugin-opt=-mattr=-xnack,-sram-ecc" +// ALL3: {{.*}}clang{{.*}}"-target-feature" "+sram-ecc" "-target-feature" "+xnack" +// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-sram-ecc" "-target-feature" "-xnack" // RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ // RUN: --cuda-gpu-arch=gfx1010 %s \ diff --git a/clang/test/Driver/invalid-target-id.cl b/clang/test/Driver/invalid-target-id.cl new file mode 100644 --- /dev/null +++ b/clang/test/Driver/invalid-target-id.cl @@ -0,0 +1,45 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: Invalid target ID: gfx908xnack + +// RUN: not %clang -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908:xnack+:xnack+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid target ID: gfx908:xnack+:xnack+ + +// RUN: not %clang -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908:unknown+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid target ID: gfx908:unknown+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:sram-ecc+:unknown+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid target ID: gfx908:sram-ecc+:unknown+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900:sram-ecc+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid target ID: gfx900:sram-ecc+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900:xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOSIGN %s + +// NOSIGN: error: Invalid target ID: gfx900:xnack + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900+xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s + +// NOCOLON: error: Invalid target ID: gfx900+xnack diff --git a/clang/test/Driver/target-id-macros.hip b/clang/test/Driver/target-id-macros.hip new file mode 100644 --- /dev/null +++ b/clang/test/Driver/target-id-macros.hip @@ -0,0 +1,12 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -E -dM -target x86_64-linux-gnu --cuda-device-only \ +// RUN: --offload-arch=gfx908:xnack+:sram-ecc- -nogpuinc -nogpulib \ +// RUN: -o - %s 2>&1 | FileCheck %s + +// CHECK-DAG: #define __amdgcn_processor__ "gfx908" +// CHECK-DAG: #define __amdgcn_feature_xnack__ 1 +// CHECK-DAG: #define __amdgcn_feature_sram_ecc__ 0 +// CHECK-DAG: #define __amdgcn_target_id__ "gfx908:sram-ecc-:xnack+" diff --git a/clang/test/Driver/target-id-macros.cl b/clang/test/Driver/target-id-macros.cl new file mode 100644 --- /dev/null +++ b/clang/test/Driver/target-id-macros.cl @@ -0,0 +1,38 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:xnack+:sram-ecc- -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=PROC,ID1 %s + +// RUN: %clang -E -dM -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908:xnack+:sram-ecc- -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=PROC,ID1 %s + +// RUN: %clang -E -dM -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908:xnack+:sram-ecc- -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=PROC,ID1 %s + +// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908 -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=PROC,ID2 %s + +// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \ +// RUN: -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=NONE %s + +// PROC-DAG: #define __amdgcn_processor__ "gfx908" + +// ID1-DAG: #define __amdgcn_feature_xnack__ 1 +// ID1-DAG: #define __amdgcn_feature_sram_ecc__ 0 +// ID1-DAG: #define __amdgcn_target_id__ "gfx908:sram-ecc-:xnack+" + +// ID2-DAG: #define __amdgcn_target_id__ "gfx908" +// ID2-NOT: #define __amdgcn_feature_xnack__ +// ID2-NOT: #define __amdgcn_feature_sram_ecc__ + +// NONE-NOT: #define __amdgcn_processor__ +// NONE-NOT: #define __amdgcn_feature_xnack__ +// NONE-NOT: #define __amdgcn_feature_sram_ecc__ +// NONE-NOT: #define __amdgcn_target_id__ diff --git a/clang/test/Driver/target-id.cl b/clang/test/Driver/target-id.cl new file mode 100644 --- /dev/null +++ b/clang/test/Driver/target-id.cl @@ -0,0 +1,33 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:xnack+:sram-ecc- \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:xnack+:sram-ecc- \ +// RUN: -nostdlib -x ir %s 2>&1 | FileCheck %s + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:xnack+:sram-ecc- \ +// RUN: -nostdlib -x assembler %s 2>&1 | FileCheck %s + +// RUN: %clang -### -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908:xnack+:sram-ecc- \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + +// RUN: %clang -### -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908:xnack+:sram-ecc- \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -nostdlib %s 2>&1 | FileCheck -check-prefix=NONE %s + +// CHECK: "-target-cpu" "gfx908" +// CHECK-SAME: "-target-feature" "-sram-ecc" +// CHECK-SAME: "-target-feature" "+xnack" + +// NONE-NOT: "-target-cpu" +// NONE-NOT: "-target-feature" diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -113,12 +113,18 @@ FEATURE_FAST_DENORMAL_F32 = 1 << 5, // Wavefront 32 is available. - FEATURE_WAVE32 = 1 << 6 + FEATURE_WAVE32 = 1 << 6, + + // Xnack is available. + FEATURE_XNACK = 1 << 7, + + // Sram-ecc is available. + FEATURE_SRAM_ECC = 1 << 8, }; StringRef getArchNameAMDGCN(GPUKind AK); StringRef getArchNameR600(GPUKind AK); -StringRef getCanonicalArchName(StringRef Arch); +StringRef getCanonicalArchName(const Triple &T, StringRef Arch); GPUKind parseArchAMDGCN(StringRef CPU); GPUKind parseArchR600(StringRef CPU); unsigned getArchAttrAMDGCN(GPUKind AK); diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -83,26 +83,26 @@ {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE}, {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE}, - {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, - {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, - {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, - {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, - {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, - {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, - {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, - {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32}, - {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32}, - {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, - {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, - {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAM_ECC}, + {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAM_ECC}, + {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, + {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, + {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, }; @@ -212,6 +212,15 @@ } } +StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) { + assert(T.isAMDGPU()); + auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch); + if (ProcKind == GK_NONE) + return StringRef(); + + return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind); +} + namespace llvm { namespace RISCV {