diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3116,6 +3116,9 @@ /// valid feature names. ParsedTargetAttr filterFunctionTargetAttrs(const TargetAttr *TD) const; + std::vector + filterFunctionTargetVersionAttrs(const TargetVersionAttr *TV) const; + void getFunctionFeatureMap(llvm::StringMap &FeatureMap, const FunctionDecl *) const; void getFunctionFeatureMap(llvm::StringMap &FeatureMap, diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1891,7 +1891,8 @@ Target, CPUSpecific, CPUDispatch, - TargetClones + TargetClones, + TargetVersion }; /// Represents a function declaration or definition. diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2740,6 +2740,31 @@ }]; } +def TargetVersion : InheritableAttr { + let Spellings = [GCC<"target_version">]; + let Args = [StringArgument<"NamesStr">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [TargetVersionDocs]; + let AdditionalMembers = [{ + StringRef getName() const { return getNamesStr().trim(); } + bool isDefaultVersion() const { + return getName() == "default"; + } + void getFeatures(llvm::SmallVectorImpl &Out) const { + if (isDefaultVersion()) return; + StringRef Features = getName(); + + SmallVector AttrFeatures; + Features.split(AttrFeatures, "+"); + + for (auto &Feature : AttrFeatures) { + Feature = Feature.trim(); + Out.push_back(Feature); + } + } + }]; +} + def TargetClones : InheritableAttr { let Spellings = [GCC<"target_clones">]; let Args = [VariadicStringArgument<"featuresStrs">]; @@ -2773,11 +2798,12 @@ return 0 == std::count_if( featuresStrs_begin(), featuresStrs_begin() + Index, [FeatureStr](StringRef S) { return S == FeatureStr; }); + } }]; } -def : MutualExclusions<[TargetClones, Target, CPUDispatch, CPUSpecific]>; +def : MutualExclusions<[TargetClones, TargetVersion, Target, CPUDispatch, CPUSpecific]>; def MinVectorWidth : InheritableAttr { let Spellings = [Clang<"min_vector_width">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2377,6 +2377,19 @@ }]; } +def TargetVersionDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +For AArch64 target clang supports function multiversioning by +``__attribute__((target_version("OPTIONS")))`` attribute. When applied to a +function it instructs compiler to emit multiple function versions based on +``target_version`` attribute strings, which resolved at runtime depend on their +priority and target features availability. One of the versions is always +( implicitly or explicitly ) the ``default`` (fallback). Attribute strings can +contain dependent features names joined by the "+" sign. +}]; +} + def TargetClonesDocs : Documentation { let Category = DocCatFunction; let Content = [{ @@ -2387,6 +2400,19 @@ based on the priority of their attribute options. All ``target_clone`` functions are considered multiversioned functions. +For AArch64 target: +The attribute contains comma-separated strings of target features joined by "+" +sign. For example: + + .. code-block:: c++ + + __attribute__((target_clones("sha2+memtag2", "fcma+sve2-pmull128"))) + void foo() {} + +For every multiversioned function a ``default`` (fallback) implementation +always generated if not specified directly. + +For x86/x86-64 targets: All multiversioned functions must contain a ``default`` (fallback) implementation, otherwise usages of the function are considered invalid. Additionally, a function may not become multiversioned after its first use. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3056,8 +3056,8 @@ def warn_unsupported_target_attribute : Warning<"%select{unsupported|duplicate|unknown}0%select{| CPU|" - " tune CPU}1 '%2' in the '%select{target|target_clones}3' " - "attribute string; '%select{target|target_clones}3' " + " tune CPU}1 '%2' in the '%select{target|target_clones|target_version}3' " + "attribute string; '%select{target|target_clones|target_version}3' " "attribute ignored">, InGroup; def err_attribute_unsupported @@ -11513,7 +11513,7 @@ def err_multiversion_required_in_redecl : Error< "function declaration is missing %select{'target'|'cpu_specific' or " - "'cpu_dispatch'}0 attribute in a multiversioned function">; + "'cpu_dispatch'|'target_version'}0 attribute in a multiversioned function">; def note_multiversioning_caused_here : Note< "function multiversioning caused by this declaration">; def err_multiversion_after_used : Error< @@ -11528,7 +11528,7 @@ "multiversioned function must have a prototype">; def err_multiversion_disallowed_other_attr : Error<"attribute " - "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' " + "'%select{|target|cpu_specific|cpu_dispatch|target_clones|target_version}0' " "multiversioning cannot be combined" " with attribute %1">; def err_multiversion_diff : Error< @@ -11537,7 +11537,7 @@ "language linkage}0">; def err_multiversion_doesnt_support : Error<"attribute " - "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' " + "'%select{|target|cpu_specific|cpu_dispatch|target_clones|target_version}0' " "multiversioned functions do not " "yet support %select{function templates|virtual functions|" "deduced return types|constructors|destructors|deleted functions|" @@ -11572,6 +11572,9 @@ def warn_target_clone_duplicate_options : Warning<"version list contains duplicate entries">, InGroup; +def warn_target_clone_no_impact_options + : Warning<"version list contains entries that don't impact code generation">, + InGroup; // three-way comparison operator diagnostics def err_implied_comparison_category_type_not_found : Error< diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1340,6 +1340,13 @@ return true; } + /// Returns true if feature has an impact on target code + /// generation and get its dependent options in second argument. + virtual bool getFeatureDepOptions(StringRef Feature, + std::string &Options) const { + return true; + } + struct BranchProtectionInfo { LangOptions::SignReturnAddressScopeKind SignReturnAddr = LangOptions::SignReturnAddressScopeKind::None; @@ -1386,7 +1393,9 @@ /// Identify whether this target supports multiversioning of functions, /// which requires support for cpu_supports and cpu_is functionality. - bool supportsMultiVersioning() const { return getTriple().isX86(); } + bool supportsMultiVersioning() const { + return getTriple().isX86() || getTriple().isAArch64(); + } /// Identify whether this target supports IFuncs. bool supportsIFunc() const { @@ -1403,6 +1412,10 @@ return 0; } + // Return the target-specific cost for feature + // that taken into account in priority sorting. + virtual unsigned multiVersionFeatureCost() const { return 0; } + // Validate the contents of the __builtin_cpu_is(const char*) // argument. virtual bool validateCpuIs(StringRef Name) const { return false; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3858,6 +3858,8 @@ def msoft_float : Flag<["-"], "msoft-float">, Group, Flags<[CC1Option]>, HelpText<"Use software floating point">, MarshallingInfoFlag>; +def mno_fmv : Flag<["-"], "mno-fmv">, Group, Flags<[CC1Option]>, + HelpText<"Disable function multiversioning">; def moutline_atomics : Flag<["-"], "moutline-atomics">, Group, Flags<[CC1Option]>, HelpText<"Generate local calls to out-of-line atomic operations">; def mno_outline_atomics : Flag<["-"], "mno-outline-atomics">, Group, Flags<[CC1Option]>, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4657,10 +4657,13 @@ llvm::Error isValidSectionSpecifier(StringRef Str); bool checkSectionName(SourceLocation LiteralLoc, StringRef Str); bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str); - bool checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str, - const StringLiteral *Literal, - bool &HasDefault, bool &HasCommas, - SmallVectorImpl &Strings); + bool checkTargetVersionAttr(SourceLocation LiteralLoc, StringRef &Str, + bool &isDefault); + bool + checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str, + const StringLiteral *Literal, bool &HasDefault, + bool &HasCommas, bool &HasNotDefault, + SmallVectorImpl> &StringsBuffer); bool checkMSInheritanceAttrOnDefinition( CXXRecordDecl *RD, SourceRange Range, bool BestCase, MSInheritanceModel SemanticSpelling); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -13291,6 +13291,18 @@ } } +std::vector ASTContext::filterFunctionTargetVersionAttrs( + const TargetVersionAttr *TV) const { + assert(TV != nullptr); + llvm::SmallVector Feats; + std::vector ResFeats; + TV->getFeatures(Feats); + for (auto &Feature : Feats) + if (Target->validateCpuSupports(Feature.str())) + ResFeats.push_back("?" + Feature.str()); + return ResFeats; +} + ParsedTargetAttr ASTContext::filterFunctionTargetAttrs(const TargetAttr *TD) const { assert(TD != nullptr); @@ -13349,12 +13361,32 @@ } else if (const auto *TC = FD->getAttr()) { std::vector Features; StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); - if (VersionStr.startswith("arch=")) - TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); - else if (VersionStr != "default") - Features.push_back((StringRef{"+"} + VersionStr).str()); - + if (Target->getTriple().isAArch64()) { + // TargetClones for AArch64 + if (VersionStr != "default") { + SmallVector VersionFeatures; + VersionStr.split(VersionFeatures, "+"); + for (auto &VFeature : VersionFeatures) { + VFeature = VFeature.trim(); + Features.push_back((StringRef{"?"} + VFeature).str()); + } + } + Features.insert(Features.begin(), + Target->getTargetOpts().FeaturesAsWritten.begin(), + Target->getTargetOpts().FeaturesAsWritten.end()); + } else { + if (VersionStr.startswith("arch=")) + TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); + else if (VersionStr != "default") + Features.push_back((StringRef{"+"} + VersionStr).str()); + } Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); + } else if (const auto *TV = FD->getAttr()) { + std::vector Feats = filterFunctionTargetVersionAttrs(TV); + Feats.insert(Feats.begin(), + Target->getTargetOpts().FeaturesAsWritten.begin(), + Target->getTargetOpts().FeaturesAsWritten.end()); + Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Feats); } else { FeatureMap = Target->getTargetOpts().FeatureMap; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3347,6 +3347,8 @@ MultiVersionKind FunctionDecl::getMultiVersionKind() const { if (hasAttr()) return MultiVersionKind::Target; + if (hasAttr()) + return MultiVersionKind::TargetVersion; if (hasAttr()) return MultiVersionKind::CPUDispatch; if (hasAttr()) @@ -3365,7 +3367,8 @@ } bool FunctionDecl::isTargetMultiVersion() const { - return isMultiVersion() && hasAttr(); + return isMultiVersion() && + (hasAttr() || hasAttr()); } bool FunctionDecl::isTargetClonesMultiVersion() const { diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -43,6 +43,7 @@ bool HasLS64 = false; bool HasRandGen = false; bool HasMatMul = false; + bool HasBFloat16 = false; bool HasSVE2 = false; bool HasSVE2AES = false; bool HasSVE2SHA3 = false; @@ -52,9 +53,28 @@ bool HasMatmulFP32 = false; bool HasLSE = false; bool HasFlagM = false; + bool HasAlternativeNZCV = false; bool HasMOPS = false; bool HasD128 = false; bool HasRCPC = false; + bool HasRDM = false; + bool HasDIT = false; + bool HasCCPP = false; + bool HasCCDP = false; + bool HasFRInt3264 = false; + bool HasSME = false; + bool HasSMEF64 = false; + bool HasSMEI64 = false; + bool HasSB = false; + bool HasPredRes = false; + bool HasSSBS = false; + bool HasBTI = false; + bool HasWFxT = false; + bool HasJSCVT = false; + bool HasFCMA = false; + bool HasNoNeon = false; + bool HasNoSVE = false; + bool HasFMV = true; llvm::AArch64::ArchKind ArchKind = llvm::AArch64::ArchKind::INVALID; @@ -77,10 +97,19 @@ void fillValidCPUList(SmallVectorImpl &Values) const override; bool setCPU(const std::string &Name) override; + unsigned multiVersionSortPriority(StringRef Name) const override; + unsigned multiVersionFeatureCost() const override; + + bool + initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, + const std::vector &FeaturesVec) const override; bool useFP16ConversionIntrinsics() const override { return false; } + void setArchFeatures(); + void getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const; void getTargetDefinesARMV82A(const LangOptions &Opts, @@ -117,15 +146,14 @@ Optional> getVScaleRange(const LangOptions &LangOpts) const override; + bool getFeatureDepOptions(StringRef Feature, + std::string &Options) const override; + bool validateCpuSupports(StringRef FeatureStr) const override; bool hasFeature(StringRef Feature) const override; void setFeatureEnabled(llvm::StringMap &Features, StringRef Name, bool Enabled) const override; bool handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) override; - bool - initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, - StringRef CPU, - const std::vector &FeaturesVec) const override; ParsedTargetAttr parseTargetAttr(StringRef Str) const override; bool supportsTargetAttributeTune() const override { return true; } diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -58,6 +58,91 @@ } } +void AArch64TargetInfo::setArchFeatures() { + switch (ArchKind) { + case llvm::AArch64::ArchKind::ARMV8_9A: + case llvm::AArch64::ArchKind::ARMV8_8A: + case llvm::AArch64::ArchKind::ARMV8_7A: + HasWFxT = true; + LLVM_FALLTHROUGH; + case llvm::AArch64::ArchKind::ARMV8_6A: + HasBFloat16 = true; + HasMatMul = true; + LLVM_FALLTHROUGH; + case llvm::AArch64::ArchKind::ARMV8_5A: + HasAlternativeNZCV = true; + HasFRInt3264 = true; + HasSSBS = true; + HasSB = true; + HasPredRes = true; + HasBTI = true; + LLVM_FALLTHROUGH; + case llvm::AArch64::ArchKind::ARMV8_4A: + HasDotProd = true; + HasDIT = true; + HasFlagM = true; + LLVM_FALLTHROUGH; + case llvm::AArch64::ArchKind::ARMV8_3A: + HasRCPC = true; + FPU |= NeonMode; + LLVM_FALLTHROUGH; + case llvm::AArch64::ArchKind::ARMV8_2A: + HasCCPP = true; + LLVM_FALLTHROUGH; + case llvm::AArch64::ArchKind::ARMV8_1A: + HasCRC = true; + HasLSE = true; + HasRDM = true; + return; + default: + break; + } + switch (ArchKind) { + case llvm::AArch64::ArchKind::ARMV9_4A: + case llvm::AArch64::ArchKind::ARMV9_3A: + case llvm::AArch64::ArchKind::ARMV9_2A: + HasWFxT = true; + LLVM_FALLTHROUGH; + case llvm::AArch64::ArchKind::ARMV9_1A: + HasBFloat16 = true; + HasMatMul = true; + LLVM_FALLTHROUGH; + case llvm::AArch64::ArchKind::ARMV9A: + FPU |= SveMode; + HasSVE2 = true; + HasFullFP16 = true; + HasAlternativeNZCV = true; + HasFRInt3264 = true; + HasSSBS = true; + HasSB = true; + HasPredRes = true; + HasBTI = true; + HasDotProd = true; + HasDIT = true; + HasFlagM = true; + HasRCPC = true; + FPU |= NeonMode; + HasCCPP = true; + HasCRC = true; + HasLSE = true; + HasRDM = true; + return; + default: + break; + } + if (ArchKind == llvm::AArch64::ArchKind::ARMV8R) { + HasDotProd = true; + HasDIT = true; + HasFlagM = true; + HasRCPC = true; + FPU |= NeonMode; + HasCCPP = true; + HasCRC = true; + HasLSE = true; + HasRDM = true; + } +} + StringRef AArch64TargetInfo::getArchProfile() const { switch (ArchKind) { case llvm::AArch64::ArchKind::ARMV8R: @@ -366,6 +451,9 @@ if (HasRCPC) Builder.defineMacro("__ARM_FEATURE_RCPC", "1"); + if (HasFMV) + Builder.defineMacro("__HAVE_FUNCTION_MULTI_VERSIONING", "1"); + // The __ARM_FEATURE_CRYPTO is deprecated in favor of finer grained feature // macros for AES, SHA2, SHA3 and SM4 if (HasAES && HasSHA2) @@ -552,13 +640,95 @@ return std::nullopt; } +unsigned AArch64TargetInfo::multiVersionSortPriority(StringRef Name) const { + if (Name == "default") + return 0; + unsigned Priority = llvm::StringSwitch(Name) +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \ + DEP_FEATURES, FMV_PRIORITY) \ + .Case(NAME, FMV_PRIORITY) +#include "../../../../llvm/include/llvm/TargetParser/AArch64TargetParser.def" + ; + assert((Name == "none" || Priority < multiVersionFeatureCost()) && + "FMV priority is out of bounds!"); + return Priority; +} + +unsigned AArch64TargetInfo::multiVersionFeatureCost() const { + // Take the maximum priority as per feature cost, so more features win. + // AARCH64_ARCH_EXT_NAME "none" feature must have top priority, use it. + return multiVersionSortPriority("none"); +} + +bool AArch64TargetInfo::getFeatureDepOptions(StringRef Name, + std::string &FeatureVec) const { + FeatureVec = llvm::StringSwitch(Name) +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \ + DEP_FEATURES, FMV_PRIORITY) \ + .Case(NAME, DEP_FEATURES) +#include "../../../../llvm/include/llvm/TargetParser/AArch64TargetParser.def" + .Default(""); + return FeatureVec != ""; +} + +bool AArch64TargetInfo::validateCpuSupports(StringRef FeatureStr) const { + unsigned Feat = llvm::StringSwitch(FeatureStr) +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \ + DEP_FEATURES, FMV_PRIORITY) \ + .Case(NAME, llvm::AArch64::FEAT_##FMV_ID) +#include "../../../../llvm/include/llvm/TargetParser/AArch64TargetParser.def" + .Default(llvm::AArch64::FEAT_MAX); + return Feat != llvm::AArch64::FEAT_MAX; +} + bool AArch64TargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) - .Cases("aarch64", "arm64", "arm", true) - .Case("neon", FPU & NeonMode) - .Cases("sve", "sve2", "sve2-bitperm", "sve2-aes", "sve2-sha3", "sve2-sm4", "f64mm", "f32mm", "i8mm", "bf16", FPU & SveMode) - .Case("ls64", HasLS64) - .Default(false); + .Cases("aarch64", "arm64", "arm", true) + .Case("fmv", HasFMV) + .Cases("neon", "fp", "simd", FPU & NeonMode) + .Case("jscvt", HasJSCVT) + .Case("fcma", HasFCMA) + .Case("rng", HasRandGen) + .Case("flagm", HasFlagM) + .Case("flagm2", HasAlternativeNZCV) + .Case("fp16fml", HasFP16FML) + .Case("dotprod", HasDotProd) + .Case("sm4", HasSM4) + .Case("rdm", HasRDM) + .Case("lse", HasLSE) + .Case("crc", HasCRC) + .Case("sha2", HasSHA2) + .Case("sha3", HasSHA3) + .Cases("aes", "pmull", HasAES) + .Cases("fp16", "fullfp16", HasFullFP16) + .Case("dit", HasDIT) + .Case("dpb", HasCCPP) + .Case("dpb2", HasCCDP) + .Case("rcpc", HasRCPC) + .Case("frintts", HasFRInt3264) + .Case("i8mm", HasMatMul) + .Case("bf16", HasBFloat16) + .Case("sve", FPU & SveMode) + .Case("sve-bf16", FPU & SveMode && HasBFloat16) + .Case("sve-i8mm", FPU & SveMode && HasMatMul) + .Case("f32mm", FPU & SveMode && HasMatmulFP32) + .Case("f64mm", FPU & SveMode && HasMatmulFP64) + .Case("sve2", FPU & SveMode && HasSVE2) + .Case("sve2-pmull128", FPU & SveMode && HasSVE2AES) + .Case("sve2-bitperm", FPU & SveMode && HasSVE2BitPerm) + .Case("sve2-sha3", FPU & SveMode && HasSVE2SHA3) + .Case("sve2-sm4", FPU & SveMode && HasSVE2SM4) + .Case("sme", HasSME) + .Case("sme-f64f64", HasSMEF64) + .Case("sme-i16i64", HasSMEI64) + .Cases("memtag", "memtag2", HasMTE) + .Case("sb", HasSB) + .Case("predres", HasPredRes) + .Cases("ssbs", "ssbs2", HasSSBS) + .Case("bti", HasBTI) + .Cases("ls64", "ls64_v", "ls64_accdata", HasLS64) + .Case("wfxt", HasWFxT) + .Default(false); } void AArch64TargetInfo::setFeatureEnabled(llvm::StringMap &Features, @@ -580,63 +750,134 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) { for (const auto &Feature : Features) { - if (Feature == "+neon") + if (Feature == "-neon") + HasNoNeon = true; + if (Feature == "-sve") + HasNoSVE = true; + + if (Feature == "+neon" || Feature == "+fp-armv8") + FPU |= NeonMode; + if (Feature == "+jscvt") { + HasJSCVT = true; FPU |= NeonMode; + } + if (Feature == "+fcma") { + HasFCMA = true; + FPU |= NeonMode; + } + if (Feature == "+sve") { + FPU |= NeonMode; FPU |= SveMode; HasFullFP16 = true; } if (Feature == "+sve2") { + FPU |= NeonMode; FPU |= SveMode; HasFullFP16 = true; HasSVE2 = true; } if (Feature == "+sve2-aes") { + FPU |= NeonMode; FPU |= SveMode; HasFullFP16 = true; HasSVE2 = true; HasSVE2AES = true; } if (Feature == "+sve2-sha3") { + FPU |= NeonMode; FPU |= SveMode; HasFullFP16 = true; HasSVE2 = true; HasSVE2SHA3 = true; } if (Feature == "+sve2-sm4") { + FPU |= NeonMode; FPU |= SveMode; HasFullFP16 = true; HasSVE2 = true; HasSVE2SM4 = true; } if (Feature == "+sve2-bitperm") { + FPU |= NeonMode; FPU |= SveMode; HasFullFP16 = true; HasSVE2 = true; HasSVE2BitPerm = true; } if (Feature == "+f32mm") { + FPU |= NeonMode; FPU |= SveMode; + HasFullFP16 = true; HasMatmulFP32 = true; } if (Feature == "+f64mm") { + FPU |= NeonMode; FPU |= SveMode; + HasFullFP16 = true; HasMatmulFP64 = true; } + if (Feature == "+sme") { + HasSME = true; + HasBFloat16 = true; + } + if (Feature == "+sme-f64f64") { + HasSME = true; + HasSMEF64 = true; + HasBFloat16 = true; + } + if (Feature == "+sme-i16i64") { + HasSME = true; + HasSMEI64 = true; + HasBFloat16 = true; + } + if (Feature == "+sb") + HasSB = true; + if (Feature == "+predres") + HasPredRes = true; + if (Feature == "+ssbs") + HasSSBS = true; + if (Feature == "+bti") + HasBTI = true; + if (Feature == "+wfxt") + HasWFxT = true; + if (Feature == "-fmv") + HasFMV = false; if (Feature == "+crc") HasCRC = true; if (Feature == "+rcpc") HasRCPC = true; - if (Feature == "+aes") + if (Feature == "+aes") { + FPU |= NeonMode; HasAES = true; - if (Feature == "+sha2") + } + if (Feature == "+sha2") { + FPU |= NeonMode; HasSHA2 = true; + } if (Feature == "+sha3") { + FPU |= NeonMode; HasSHA2 = true; HasSHA3 = true; } - if (Feature == "+sm4") + if (Feature == "+rdm") { + FPU |= NeonMode; + HasRDM = true; + } + if (Feature == "+dit") + HasDIT = true; + if (Feature == "+cccp") + HasCCPP = true; + if (Feature == "+ccdp") { + HasCCPP = true; + HasCCDP = true; + } + if (Feature == "+fptoint") + HasFRInt3264 = true; + if (Feature == "+sm4") { + FPU |= NeonMode; HasSM4 = true; + } if (Feature == "+strict-align") HasUnaligned = false; // All predecessor archs are added but select the latest one for ArchKind. @@ -672,12 +913,19 @@ ArchKind = llvm::AArch64::ArchKind::ARMV9_4A; if (Feature == "+v8r") ArchKind = llvm::AArch64::ArchKind::ARMV8R; - if (Feature == "+fullfp16") + if (Feature == "+fullfp16") { + FPU |= NeonMode; HasFullFP16 = true; - if (Feature == "+dotprod") + } + if (Feature == "+dotprod") { + FPU |= NeonMode; HasDotProd = true; - if (Feature == "+fp16fml") + } + if (Feature == "+fp16fml") { + FPU |= NeonMode; + HasFullFP16 = true; HasFP16FML = true; + } if (Feature == "+mte") HasMTE = true; if (Feature == "+tme") @@ -696,6 +944,10 @@ HasRandGen = true; if (Feature == "+flagm") HasFlagM = true; + if (Feature == "+altnzcv") { + HasFlagM = true; + HasAlternativeNZCV = true; + } if (Feature == "+mops") HasMOPS = true; if (Feature == "+d128") @@ -711,6 +963,14 @@ } setDataLayout(); + setArchFeatures(); + + if (HasNoNeon) { + FPU &= ~NeonMode; + FPU &= ~SveMode; + } + if (HasNoSVE) + FPU &= ~SveMode; return true; } @@ -718,6 +978,7 @@ bool AArch64TargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const { + std::vector UpdatedFeaturesVec; // Parse the CPU and add any implied features. llvm::AArch64::ArchKind Arch = llvm::AArch64::parseCPUArch(CPU); if (Arch != llvm::AArch64::ArchKind::INVALID) { @@ -726,11 +987,33 @@ llvm::AArch64::getExtensionFeatures(Exts, CPUFeats); for (auto F : CPUFeats) { assert((F[0] == '+' || F[0] == '-') && "Expected +/- in target feature!"); - setFeatureEnabled(Features, F.drop_front(), F[0] == '+'); + UpdatedFeaturesVec.push_back(F.str()); } } - return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); + // Process target and dependent features. This is done in two loops collecting + // them into UpdatedFeaturesVec: first to add dependent '+'features, + // second to add target '+/-'features that can later disable some of + // features added on the first loop. + for (const auto &Feature : FeaturesVec) + if ((Feature[0] == '?' || Feature[0] == '+')) { + std::string Options; + if (AArch64TargetInfo::getFeatureDepOptions(Feature.substr(1), Options)) { + SmallVector AttrFeatures; + StringRef(Options).split(AttrFeatures, ","); + for (auto F : AttrFeatures) + UpdatedFeaturesVec.push_back(F.str()); + } + } + for (const auto &Feature : FeaturesVec) + if (Feature[0] == '+') { + std::string F; + llvm::AArch64::getFeatureOption(Feature, F); + UpdatedFeaturesVec.push_back(F); + } else if (Feature[0] != '?') + UpdatedFeaturesVec.push_back(Feature); + + return TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec); } // Parse AArch64 Target attributes, which are a comma separated list of: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" +#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/X86TargetParser.h" @@ -13038,6 +13039,16 @@ return Result; } +Value *CodeGenFunction::EmitAArch64CpuInit() { + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + llvm::FunctionCallee Func = + CGM.CreateRuntimeFunction(FTy, "init_cpu_features_resolver"); + cast(Func.getCallee())->setDSOLocal(true); + cast(Func.getCallee()) + ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); + return Builder.CreateCall(Func); +} + Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); @@ -13049,6 +13060,32 @@ return Builder.CreateCall(Func); } +llvm::Value * +CodeGenFunction::EmitAArch64CpuSupports(ArrayRef FeaturesStrs) { + uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs); + Value *Result = Builder.getTrue(); + if (FeaturesMask != 0) { + // Get features from structure in runtime library + // struct { + // unsigned long long features; + // } __aarch64_cpu_features; + llvm::Type *STy = llvm::StructType::get(Int64Ty); + llvm::Constant *AArch64CPUFeatures = + CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features"); + cast(AArch64CPUFeatures)->setDSOLocal(true); + llvm::Value *CpuFeatures = Builder.CreateGEP( + STy, AArch64CPUFeatures, + {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)}); + Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures, + CharUnits::fromQuantity(8)); + Value *Mask = Builder.getInt64(FeaturesMask); + Value *Bitset = Builder.CreateAnd(Features, Mask); + Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); + Result = Builder.CreateAnd(Result, Cmp); + } + return Result; +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_cpu_is) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -11501,7 +11501,7 @@ if (CGM.getTarget().hasFeature("sve")) emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, MangledName, 's', 128, Fn, ExprLoc); - if (CGM.getTarget().hasFeature("neon")) + else if (CGM.getTarget().hasFeature("neon")) emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, MangledName, 'n', 128, Fn, ExprLoc); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4813,6 +4813,12 @@ // last (if it exists). void EmitMultiVersionResolver(llvm::Function *Resolver, ArrayRef Options); + void + EmitX86MultiVersionResolver(llvm::Function *Resolver, + ArrayRef Options); + void + EmitAArch64MultiVersionResolver(llvm::Function *Resolver, + ArrayRef Options); private: QualType getVarArgType(const Expr *Arg); @@ -4831,7 +4837,11 @@ llvm::Value *EmitX86CpuSupports(ArrayRef FeatureStrs); llvm::Value *EmitX86CpuSupports(uint64_t Mask); llvm::Value *EmitX86CpuInit(); - llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO); + llvm::Value *FormX86ResolverCondition(const MultiVersionResolverOption &RO); + llvm::Value *EmitAArch64CpuInit(); + llvm::Value * + FormAArch64ResolverCondition(const MultiVersionResolverOption &RO); + llvm::Value *EmitAArch64CpuSupports(ArrayRef FeatureStrs); }; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2629,8 +2629,22 @@ Bundles.emplace_back("kcfi", CGM.CreateKCFITypeId(FP->desugar())); } -llvm::Value * -CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) { +llvm::Value *CodeGenFunction::FormAArch64ResolverCondition( + const MultiVersionResolverOption &RO) { + llvm::SmallVector CondFeatures; + for (const StringRef &Feature : RO.Conditions.Features) { + // Form condition for features which are not yet enabled in target + if (!getContext().getTargetInfo().hasFeature(Feature)) + CondFeatures.push_back(Feature); + } + if (!CondFeatures.empty()) { + return EmitAArch64CpuSupports(CondFeatures); + } + return nullptr; +} + +llvm::Value *CodeGenFunction::FormX86ResolverCondition( + const MultiVersionResolverOption &RO) { llvm::Value *Condition = nullptr; if (!RO.Conditions.Architecture.empty()) @@ -2668,8 +2682,72 @@ void CodeGenFunction::EmitMultiVersionResolver( llvm::Function *Resolver, ArrayRef Options) { - assert(getContext().getTargetInfo().getTriple().isX86() && - "Only implemented for x86 targets"); + + llvm::Triple::ArchType ArchType = + getContext().getTargetInfo().getTriple().getArch(); + + switch (ArchType) { + case llvm::Triple::x86: + case llvm::Triple::x86_64: + EmitX86MultiVersionResolver(Resolver, Options); + return; + case llvm::Triple::aarch64: + EmitAArch64MultiVersionResolver(Resolver, Options); + return; + + default: + assert(false && "Only implemented for x86 and AArch64 targets"); + } +} + +void CodeGenFunction::EmitAArch64MultiVersionResolver( + llvm::Function *Resolver, ArrayRef Options) { + assert(!Options.empty() && "No multiversion resolver options found"); + assert(Options.back().Conditions.Features.size() == 0 && + "Default case must be last"); + bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); + assert(SupportsIFunc && + "Multiversion resolver requires target IFUNC support"); + bool AArch64CpuInitialized = false; + llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); + + for (const MultiVersionResolverOption &RO : Options) { + Builder.SetInsertPoint(CurBlock); + llvm::Value *Condition = FormAArch64ResolverCondition(RO); + + // The 'default' or 'all features enabled' case. + if (!Condition) { + CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function, + SupportsIFunc); + return; + } + + if (!AArch64CpuInitialized) { + Builder.SetInsertPoint(CurBlock, CurBlock->begin()); + EmitAArch64CpuInit(); + AArch64CpuInitialized = true; + Builder.SetInsertPoint(CurBlock); + } + + llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); + CGBuilderTy RetBuilder(*this, RetBlock); + CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function, + SupportsIFunc); + CurBlock = createBasicBlock("resolver_else", Resolver); + Builder.CreateCondBr(Condition, RetBlock, CurBlock); + } + + // If no default, emit an unreachable. + Builder.SetInsertPoint(CurBlock); + llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); +} + +void CodeGenFunction::EmitX86MultiVersionResolver( + llvm::Function *Resolver, ArrayRef Options) { bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); @@ -2680,7 +2758,7 @@ for (const MultiVersionResolverOption &RO : Options) { Builder.SetInsertPoint(CurBlock); - llvm::Value *Condition = FormResolverCondition(RO); + llvm::Value *Condition = FormX86ResolverCondition(RO); // The 'default' or 'generic' case. if (!Condition) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1337,6 +1337,20 @@ Out << ".resolver"; } +static void AppendTargetVersionMangling(const CodeGenModule &CGM, + const TargetVersionAttr *Attr, + raw_ostream &Out) { + if (Attr->isDefaultVersion()) + return; + Out << "._"; + llvm::SmallVector Feats; + Attr->getFeatures(Feats); + for (const auto &Feat : Feats) { + Out << 'M'; + Out << Feat; + } +} + static void AppendTargetMangling(const CodeGenModule &CGM, const TargetAttr *Attr, raw_ostream &Out) { if (Attr->isDefaultVersion()) @@ -1382,14 +1396,27 @@ const TargetClonesAttr *Attr, unsigned VersionIndex, raw_ostream &Out) { - Out << '.'; - StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); - if (FeatureStr.startswith("arch=")) - Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1); - else - Out << FeatureStr; + if (CGM.getTarget().getTriple().isAArch64()) { + StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); + if (FeatureStr == "default") + return; + Out << "._"; + SmallVector Features; + FeatureStr.split(Features, "+"); + for (auto &Feat : Features) { + Out << 'M'; + Out << Feat; + } + } else { + Out << '.'; + StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); + if (FeatureStr.startswith("arch=")) + Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1); + else + Out << FeatureStr; - Out << '.' << Attr->getMangledIndex(VersionIndex); + Out << '.' << Attr->getMangledIndex(VersionIndex); + } } static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, @@ -1445,6 +1472,9 @@ case MultiVersionKind::Target: AppendTargetMangling(CGM, FD->getAttr(), Out); break; + case MultiVersionKind::TargetVersion: + AppendTargetVersionMangling(CGM, FD->getAttr(), Out); + break; case MultiVersionKind::TargetClones: AppendTargetClonesMangling(CGM, FD->getAttr(), GD.getMultiVersionIndex(), Out); @@ -2166,10 +2196,12 @@ const auto *FD = dyn_cast_or_null(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr() : nullptr; + const auto *TV = FD ? FD->getAttr() : nullptr; + assert((!TD || !TV) && "both target_version and target specified"); const auto *SD = FD ? FD->getAttr() : nullptr; const auto *TC = FD ? FD->getAttr() : nullptr; bool AddedAttr = false; - if (TD || SD || TC) { + if (TD || TV || SD || TC) { llvm::StringMap FeatureMap; getContext().getFunctionFeatureMap(FeatureMap, GD); @@ -3623,12 +3655,18 @@ TargetMVPriority(const TargetInfo &TI, const CodeGenFunction::MultiVersionResolverOption &RO) { unsigned Priority = 0; - for (StringRef Feat : RO.Conditions.Features) + unsigned NumFeatures = 0; + for (StringRef Feat : RO.Conditions.Features) { Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); + NumFeatures++; + } if (!RO.Conditions.Architecture.empty()) Priority = std::max( Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); + + Priority += TI.multiVersionFeatureCost() * NumFeatures; + return Priority; } @@ -3673,13 +3711,19 @@ } assert(Func && "This should have just been created"); } - - const auto *TA = CurFD->getAttr(); - llvm::SmallVector Feats; - TA->getAddedFeatures(Feats); - - Options.emplace_back(cast(Func), - TA->getArchitecture(), Feats); + if (CurFD->getMultiVersionKind() == MultiVersionKind::Target) { + const auto *TA = CurFD->getAttr(); + llvm::SmallVector Feats; + TA->getAddedFeatures(Feats); + Options.emplace_back(cast(Func), + TA->getArchitecture(), Feats); + } else { + const auto *TVA = CurFD->getAttr(); + llvm::SmallVector Feats; + TVA->getFeatures(Feats); + Options.emplace_back(cast(Func), + /*Architecture*/ "", Feats); + } }); } else if (FD->isTargetClonesMultiVersion()) { const auto *TC = FD->getAttr(); @@ -3709,10 +3753,19 @@ StringRef Architecture; llvm::SmallVector Feature; - if (Version.startswith("arch=")) - Architecture = Version.drop_front(sizeof("arch=") - 1); - else if (Version != "default") - Feature.push_back(Version); + if (getTarget().getTriple().isAArch64()) { + if (Version != "default") { + llvm::SmallVector VerFeats; + Version.split(VerFeats, "+"); + for (auto &CurFeat : VerFeats) + Feature.push_back(CurFeat.trim()); + } + } else { + if (Version.startswith("arch=")) + Architecture = Version.drop_front(sizeof("arch=") - 1); + else if (Version != "default") + Feature.push_back(Version); + } Options.emplace_back(cast(Func), Architecture, Feature); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7228,6 +7228,14 @@ CmdArgs.push_back("+outline-atomics"); } + if (Triple.isAArch64() && + (Args.hasArg(options::OPT_mno_fmv) || + getToolChain().GetRuntimeLibType(Args) != ToolChain::RLT_CompilerRT)) { + // Disable Function Multiversioning on AArch64 target. + CmdArgs.push_back("-target-feature"); + CmdArgs.push_back("-fmv"); + } + if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig, (TC.getTriple().isOSBinFormatELF() || TC.getTriple().isOSBinFormatCOFF()) && diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2539,6 +2539,9 @@ if (FD->isMultiVersion() && FD->hasAttr() && !FD->getAttr()->isDefaultVersion()) continue; + if (FD->isMultiVersion() && FD->hasAttr() && + !FD->getAttr()->isDefaultVersion()) + continue; } S.Diag(Fn->getLocation(), diag::note_possible_target_of_call); ++ShownOverloads; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -10143,6 +10143,13 @@ // Handle attributes. ProcessDeclAttributes(S, NewFD, D); + const auto *NewTVA = NewFD->getAttr(); + if (NewTVA && !NewTVA->isDefaultVersion() && + !Context.getTargetInfo().hasFeature("fmv")) { + // Don't add to scope fmv functions declarations if fmv disabled + AddToScope = false; + return NewFD; + } if (getLangOpts().OpenCL) { // OpenCL v1.1 s6.5: Using an address space qualifier in a function return @@ -10351,7 +10358,8 @@ D.setRedeclaration(true); } - assert((NewFD->isInvalidDecl() || !D.isRedeclaration() || + assert((NewFD->isInvalidDecl() || NewFD->isMultiVersion() || + !D.isRedeclaration() || Previous.getResultKind() != LookupResult::FoundOverloaded) && "previous declaration set still overloaded"); @@ -10823,37 +10831,53 @@ PrevVD->getType()); } -/// Check the target attribute of the function for MultiVersion -/// validity. +/// Check the target or target_version attribute of the function for +/// MultiVersion validity. /// /// Returns true if there was an error, false otherwise. static bool CheckMultiVersionValue(Sema &S, const FunctionDecl *FD) { const auto *TA = FD->getAttr(); - assert(TA && "MultiVersion Candidate requires a target attribute"); - ParsedTargetAttr ParseInfo = - S.getASTContext().getTargetInfo().parseTargetAttr(TA->getFeaturesStr()); + const auto *TVA = FD->getAttr(); + assert( + (TA || TVA) && + "MultiVersion candidate requires a target or target_version attribute"); const TargetInfo &TargetInfo = S.Context.getTargetInfo(); enum ErrType { Feature = 0, Architecture = 1 }; - if (!ParseInfo.CPU.empty() && !TargetInfo.validateCpuIs(ParseInfo.CPU)) { - S.Diag(FD->getLocation(), diag::err_bad_multiversion_option) - << Architecture << ParseInfo.CPU; - return true; - } - - for (const auto &Feat : ParseInfo.Features) { - auto BareFeat = StringRef{Feat}.substr(1); - if (Feat[0] == '-') { + if (TA) { + ParsedTargetAttr ParseInfo = + S.getASTContext().getTargetInfo().parseTargetAttr(TA->getFeaturesStr()); + if (!ParseInfo.CPU.empty() && !TargetInfo.validateCpuIs(ParseInfo.CPU)) { S.Diag(FD->getLocation(), diag::err_bad_multiversion_option) - << Feature << ("no-" + BareFeat).str(); + << Architecture << ParseInfo.CPU; return true; } + for (const auto &Feat : ParseInfo.Features) { + auto BareFeat = StringRef{Feat}.substr(1); + if (Feat[0] == '-') { + S.Diag(FD->getLocation(), diag::err_bad_multiversion_option) + << Feature << ("no-" + BareFeat).str(); + return true; + } + + if (!TargetInfo.validateCpuSupports(BareFeat) || + !TargetInfo.isValidFeatureName(BareFeat)) { + S.Diag(FD->getLocation(), diag::err_bad_multiversion_option) + << Feature << BareFeat; + return true; + } + } + } - if (!TargetInfo.validateCpuSupports(BareFeat) || - !TargetInfo.isValidFeatureName(BareFeat)) { - S.Diag(FD->getLocation(), diag::err_bad_multiversion_option) - << Feature << BareFeat; - return true; + if (TVA) { + llvm::SmallVector Feats; + TVA->getFeatures(Feats); + for (const auto &Feat : Feats) { + if (!TargetInfo.validateCpuSupports(Feat)) { + S.Diag(FD->getLocation(), diag::err_bad_multiversion_option) + << Feature << Feat; + return true; + } } } return false; @@ -10900,6 +10924,10 @@ if (MVKind != MultiVersionKind::Target) return Diagnose(S, A); break; + case attr::TargetVersion: + if (MVKind != MultiVersionKind::TargetVersion) + return Diagnose(S, A); + break; case attr::TargetClones: if (MVKind != MultiVersionKind::TargetClones) return Diagnose(S, A); @@ -11072,18 +11100,18 @@ /// This sets NewFD->isInvalidDecl() to true if there was an error. /// /// Returns true if there was an error, false otherwise. -static bool CheckMultiVersionFirstFunction(Sema &S, FunctionDecl *FD, - MultiVersionKind MVKind, - const TargetAttr *TA) { +static bool CheckMultiVersionFirstFunction(Sema &S, FunctionDecl *FD) { + MultiVersionKind MVKind = FD->getMultiVersionKind(); assert(MVKind != MultiVersionKind::None && "Function lacks multiversion attribute"); - - // Target only causes MV if it is default, otherwise this is a normal - // function. - if (MVKind == MultiVersionKind::Target && !TA->isDefaultVersion()) + const auto *TA = FD->getAttr(); + const auto *TVA = FD->getAttr(); + // Target and target_version only causes MV if it is default, otherwise this + // is a normal function. + if ((TA && !TA->isDefaultVersion()) || (TVA && !TVA->isDefaultVersion())) return false; - if (MVKind == MultiVersionKind::Target && CheckMultiVersionValue(S, FD)) { + if ((TA || TVA) && CheckMultiVersionValue(S, FD)) { FD->setInvalidDecl(); return true; } @@ -11106,25 +11134,27 @@ return false; } -static bool CheckTargetCausesMultiVersioning( - Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD, const TargetAttr *NewTA, - bool &Redeclaration, NamedDecl *&OldDecl, LookupResult &Previous) { +static bool CheckTargetCausesMultiVersioning(Sema &S, FunctionDecl *OldFD, + FunctionDecl *NewFD, + bool &Redeclaration, + NamedDecl *&OldDecl, + LookupResult &Previous) { + const auto *NewTA = NewFD->getAttr(); + const auto *NewTVA = NewFD->getAttr(); const auto *OldTA = OldFD->getAttr(); - ParsedTargetAttr NewParsed = - S.getASTContext().getTargetInfo().parseTargetAttr( - NewTA->getFeaturesStr()); - // Sort order doesn't matter, it just needs to be consistent. - llvm::sort(NewParsed.Features); - + const auto *OldTVA = OldFD->getAttr(); // If the old decl is NOT MultiVersioned yet, and we don't cause that // to change, this is a simple redeclaration. - if (!NewTA->isDefaultVersion() && - (!OldTA || OldTA->getFeaturesStr() == NewTA->getFeaturesStr())) + if ((NewTA && !NewTA->isDefaultVersion() && + (!OldTA || OldTA->getFeaturesStr() == NewTA->getFeaturesStr())) || + (NewTVA && !NewTVA->isDefaultVersion() && + (!OldTVA || OldTVA->getName() == NewTVA->getName()))) return false; // Otherwise, this decl causes MultiVersioning. if (CheckMultiVersionAdditionalRules(S, OldFD, NewFD, true, - MultiVersionKind::Target)) { + NewTVA ? MultiVersionKind::TargetVersion + : MultiVersionKind::Target)) { NewFD->setInvalidDecl(); return true; } @@ -11135,7 +11165,9 @@ } // If this is 'default', permit the forward declaration. - if (!OldFD->isMultiVersion() && !OldTA && NewTA->isDefaultVersion()) { + if (!OldFD->isMultiVersion() && + ((NewTA && NewTA->isDefaultVersion() && !OldTA) || + (NewTVA && NewTVA->isDefaultVersion() && !OldTVA))) { Redeclaration = true; OldDecl = OldFD; OldFD->setIsMultiVersion(); @@ -11149,26 +11181,50 @@ return true; } - ParsedTargetAttr OldParsed = - S.getASTContext().getTargetInfo().parseTargetAttr( - OldTA->getFeaturesStr()); - llvm::sort(OldParsed.Features); + if (NewTA) { + ParsedTargetAttr OldParsed = + S.getASTContext().getTargetInfo().parseTargetAttr( + OldTA->getFeaturesStr()); + llvm::sort(OldParsed.Features); + ParsedTargetAttr NewParsed = + S.getASTContext().getTargetInfo().parseTargetAttr( + NewTA->getFeaturesStr()); + // Sort order doesn't matter, it just needs to be consistent. + llvm::sort(NewParsed.Features); + if (OldParsed == NewParsed) { + S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate); + S.Diag(OldFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + return true; + } + } + + if (NewTVA) { + llvm::SmallVector Feats; + OldTVA->getFeatures(Feats); + llvm::sort(Feats); + llvm::SmallVector NewFeats; + NewTVA->getFeatures(NewFeats); + llvm::sort(NewFeats); - if (OldParsed == NewParsed) { - S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate); - S.Diag(OldFD->getLocation(), diag::note_previous_declaration); - NewFD->setInvalidDecl(); - return true; + if (Feats == NewFeats) { + S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate); + S.Diag(OldFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + return true; + } } for (const auto *FD : OldFD->redecls()) { const auto *CurTA = FD->getAttr(); + const auto *CurTVA = FD->getAttr(); // We allow forward declarations before ANY multiversioning attributes, but // nothing after the fact. if (PreviousDeclsHaveMultiVersionAttribute(FD) && - (!CurTA || CurTA->isInherited())) { + ((NewTA && (!CurTA || CurTA->isInherited())) || + (NewTVA && (!CurTVA || CurTVA->isInherited())))) { S.Diag(FD->getLocation(), diag::err_multiversion_required_in_redecl) - << 0; + << (NewTA ? 0 : 2); S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here); NewFD->setInvalidDecl(); return true; @@ -11199,11 +11255,11 @@ /// multiversioned declaration collection. static bool CheckMultiVersionAdditionalDecl( Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD, - MultiVersionKind NewMVKind, const TargetAttr *NewTA, - const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec, - const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl, - LookupResult &Previous) { - + MultiVersionKind NewMVKind, const CPUDispatchAttr *NewCPUDisp, + const CPUSpecificAttr *NewCPUSpec, const TargetClonesAttr *NewClones, + bool &Redeclaration, NamedDecl *&OldDecl, LookupResult &Previous) { + const auto *NewTA = NewFD->getAttr(); + const auto *NewTVA = NewFD->getAttr(); MultiVersionKind OldMVKind = OldFD->getMultiVersionKind(); // Disallow mixing of multiversioning types. if (!MultiVersionTypesCompatible(OldMVKind, NewMVKind)) { @@ -11219,6 +11275,11 @@ NewTA->getFeaturesStr()); llvm::sort(NewParsed.Features); } + llvm::SmallVector NewFeats; + if (NewTVA) { + NewTVA->getFeatures(NewFeats); + llvm::sort(NewFeats); + } bool UseMemberUsingDeclRules = S.CurContext->isRecord() && !NewFD->getFriendObjectKind(); @@ -11236,6 +11297,20 @@ S.IsOverload(NewFD, CurFD, UseMemberUsingDeclRules)) continue; + if (NewMVKind == MultiVersionKind::None && + OldMVKind == MultiVersionKind::TargetVersion) { + NewFD->addAttr(TargetVersionAttr::CreateImplicit( + S.Context, "default", NewFD->getSourceRange(), + AttributeCommonInfo::AS_GNU)); + NewFD->setIsMultiVersion(); + NewMVKind = MultiVersionKind::TargetVersion; + if (!NewTVA) { + NewTVA = NewFD->getAttr(); + NewTVA->getFeatures(NewFeats); + llvm::sort(NewFeats); + } + } + switch (NewMVKind) { case MultiVersionKind::None: assert(OldMVKind == MultiVersionKind::TargetClones && @@ -11262,6 +11337,27 @@ } break; } + case MultiVersionKind::TargetVersion: { + const auto *CurTVA = CurFD->getAttr(); + if (CurTVA->getName() == NewTVA->getName()) { + NewFD->setIsMultiVersion(); + Redeclaration = true; + OldDecl = ND; + return false; + } + llvm::SmallVector CurFeats; + if (CurTVA) { + CurTVA->getFeatures(CurFeats); + llvm::sort(CurFeats); + } + if (CurFeats == NewFeats) { + S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate); + S.Diag(CurFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + return true; + } + break; + } case MultiVersionKind::TargetClones: { const auto *CurClones = CurFD->getAttr(); Redeclaration = true; @@ -11344,7 +11440,8 @@ // Else, this is simply a non-redecl case. Checking the 'value' is only // necessary in the Target case, since The CPUSpecific/Dispatch cases are // handled in the attribute adding step. - if (NewMVKind == MultiVersionKind::Target && + if ((NewMVKind == MultiVersionKind::TargetVersion || + NewMVKind == MultiVersionKind::Target) && CheckMultiVersionValue(S, NewFD)) { NewFD->setInvalidDecl(); return true; @@ -11382,16 +11479,20 @@ bool &Redeclaration, NamedDecl *&OldDecl, LookupResult &Previous) { const auto *NewTA = NewFD->getAttr(); + const auto *NewTVA = NewFD->getAttr(); const auto *NewCPUDisp = NewFD->getAttr(); const auto *NewCPUSpec = NewFD->getAttr(); const auto *NewClones = NewFD->getAttr(); MultiVersionKind MVKind = NewFD->getMultiVersionKind(); // Main isn't allowed to become a multiversion function, however it IS - // permitted to have 'main' be marked with the 'target' optimization hint. + // permitted to have 'main' be marked with the 'target' optimization hint, + // for 'target_version' only default is allowed. if (NewFD->isMain()) { if (MVKind != MultiVersionKind::None && - !(MVKind == MultiVersionKind::Target && !NewTA->isDefaultVersion())) { + !(MVKind == MultiVersionKind::Target && !NewTA->isDefaultVersion()) && + !(MVKind == MultiVersionKind::TargetVersion && + NewTVA->isDefaultVersion())) { S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main); NewFD->setInvalidDecl(); return true; @@ -11406,18 +11507,34 @@ // multiversioning, this isn't an error condition. if (MVKind == MultiVersionKind::None) return false; - return CheckMultiVersionFirstFunction(S, NewFD, MVKind, NewTA); + return CheckMultiVersionFirstFunction(S, NewFD); } FunctionDecl *OldFD = OldDecl->getAsFunction(); - if (!OldFD->isMultiVersion() && MVKind == MultiVersionKind::None) + if (!OldFD->isMultiVersion() && MVKind == MultiVersionKind::None) { + // No target_version attributes mean default + if (!NewTVA) { + const auto *OldTVA = OldFD->getAttr(); + if (OldTVA) { + NewFD->addAttr(TargetVersionAttr::CreateImplicit( + S.Context, "default", NewFD->getSourceRange(), + AttributeCommonInfo::AS_GNU)); + NewFD->setIsMultiVersion(); + OldFD->setIsMultiVersion(); + OldDecl = OldFD; + Redeclaration = true; + return true; + } + } return false; + } // Multiversioned redeclarations aren't allowed to omit the attribute, except - // for target_clones. + // for target_clones and target_version. if (OldFD->isMultiVersion() && MVKind == MultiVersionKind::None && - OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones) { + OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones && + OldFD->getMultiVersionKind() != MultiVersionKind::TargetVersion) { S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl) << (OldFD->getMultiVersionKind() != MultiVersionKind::Target); NewFD->setInvalidDecl(); @@ -11427,8 +11544,9 @@ if (!OldFD->isMultiVersion()) { switch (MVKind) { case MultiVersionKind::Target: - return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA, - Redeclaration, OldDecl, Previous); + case MultiVersionKind::TargetVersion: + return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, Redeclaration, + OldDecl, Previous); case MultiVersionKind::TargetClones: if (OldFD->isUsed(false)) { NewFD->setInvalidDecl(); @@ -11436,6 +11554,7 @@ } OldFD->setIsMultiVersion(); break; + case MultiVersionKind::CPUDispatch: case MultiVersionKind::CPUSpecific: case MultiVersionKind::None: @@ -11446,9 +11565,9 @@ // At this point, we have a multiversion function decl (in OldFD) AND an // appropriate attribute in the current function decl. Resolve that these are // still compatible with previous declarations. - return CheckMultiVersionAdditionalDecl(S, OldFD, NewFD, MVKind, NewTA, - NewCPUDisp, NewCPUSpec, NewClones, - Redeclaration, OldDecl, Previous); + return CheckMultiVersionAdditionalDecl(S, OldFD, NewFD, MVKind, NewCPUDisp, + NewCPUSpec, NewClones, Redeclaration, + OldDecl, Previous); } /// Perform semantic checking of a new function declaration. @@ -14993,6 +15112,16 @@ FD->dropAttr(); FD->setInvalidDecl(); } + if (const auto *Attr = FD->getAttr()) { + if (!Context.getTargetInfo().hasFeature("fmv") && + !Attr->isDefaultVersion()) { + // If function multi versioning disabled skip parsing function body + // defined with non-default target_version attribute + if (SkipBody) + SkipBody->ShouldSkip = true; + return nullptr; + } + } if (auto *Ctor = dyn_cast(FD)) { if (Ctor->getTemplateSpecializationKind() == TSK_ExplicitSpecialization && diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3450,6 +3450,42 @@ return false; } +// Check Target Version attrs +bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, StringRef &AttrStr, + bool &isDefault) { + enum FirstParam { Unsupported }; + enum SecondParam { None }; + enum ThirdParam { Target, TargetClones, TargetVersion }; + if (AttrStr.trim() == "default") + isDefault = true; + llvm::SmallVector Features; + AttrStr.split(Features, "+"); + for (auto &CurFeature : Features) { + CurFeature = CurFeature.trim(); + if (CurFeature == "default") + continue; + if (!Context.getTargetInfo().validateCpuSupports(CurFeature)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << CurFeature << TargetVersion; + } + return false; +} + +static void handleTargetVersionAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + StringRef Str; + SourceLocation LiteralLoc; + bool isDefault = false; + if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) || + S.checkTargetVersionAttr(LiteralLoc, Str, isDefault)) + return; + // Do not create default only target_version attribute + if (!isDefault) { + TargetVersionAttr *NewAttr = + ::new (S.Context) TargetVersionAttr(S.Context, AL, Str); + D->addAttr(NewAttr); + } +} + static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) { StringRef Str; SourceLocation LiteralLoc; @@ -3461,10 +3497,10 @@ D->addAttr(NewAttr); } -bool Sema::checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str, - const StringLiteral *Literal, - bool &HasDefault, bool &HasCommas, - SmallVectorImpl &Strings) { +bool Sema::checkTargetClonesAttrString( + SourceLocation LiteralLoc, StringRef Str, const StringLiteral *Literal, + bool &HasDefault, bool &HasCommas, bool &HasNotDefault, + SmallVectorImpl> &StringsBuffer) { enum FirstParam { Unsupported, Duplicate, Unknown }; enum SecondParam { None, CPU, Tune }; enum ThirdParam { Target, TargetClones }; @@ -3483,29 +3519,75 @@ getLangOpts(), Context.getTargetInfo()); bool DefaultIsDupe = false; + bool HasCodeGenImpact = false; if (Cur.empty()) return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << "" << TargetClones; - if (Cur.startswith("arch=")) { - if (!Context.getTargetInfo().isValidCPUName( - Cur.drop_front(sizeof("arch=") - 1))) + if (Context.getTargetInfo().getTriple().isAArch64()) { + // AArch64 target clones specific + if (Cur == "default") { + DefaultIsDupe = HasDefault; + HasDefault = true; + if (llvm::is_contained(StringsBuffer, Cur) || DefaultIsDupe) + Diag(CurLoc, diag::warn_target_clone_duplicate_options); + else + StringsBuffer.push_back(Cur); + } else { + std::pair CurParts = {{}, Cur}; + llvm::SmallVector CurFeatures; + while (!CurParts.second.empty()) { + CurParts = CurParts.second.split('+'); + StringRef CurFeature = CurParts.first.trim(); + if (!Context.getTargetInfo().validateCpuSupports(CurFeature)) { + Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << CurFeature << TargetClones; + continue; + } + std::string Options; + if (Context.getTargetInfo().getFeatureDepOptions(CurFeature, Options)) + HasCodeGenImpact = true; + CurFeatures.push_back(CurFeature); + } + // Canonize TargetClones Attributes + llvm::sort(CurFeatures); + SmallString<64> Res; + for (auto &CurFeat : CurFeatures) { + if (!Res.equals("")) + Res.append("+"); + Res.append(CurFeat); + } + if (llvm::is_contained(StringsBuffer, Res) || DefaultIsDupe) + Diag(CurLoc, diag::warn_target_clone_duplicate_options); + else if (!HasCodeGenImpact) + // Ignore features in target_clone attribute that don't impact + // code generation + Diag(CurLoc, diag::warn_target_clone_no_impact_options); + else if (!Res.empty()) { + StringsBuffer.push_back(Res); + HasNotDefault = true; + } + } + } else { + // Other targets ( currently X86 ) + if (Cur.startswith("arch=")) { + if (!Context.getTargetInfo().isValidCPUName( + Cur.drop_front(sizeof("arch=") - 1))) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << CPU << Cur.drop_front(sizeof("arch=") - 1) + << TargetClones; + } else if (Cur == "default") { + DefaultIsDupe = HasDefault; + HasDefault = true; + } else if (!Context.getTargetInfo().isValidFeatureName(Cur)) return Diag(CurLoc, diag::warn_unsupported_target_attribute) - << Unsupported << CPU << Cur.drop_front(sizeof("arch=") - 1) - << TargetClones; - } else if (Cur == "default") { - DefaultIsDupe = HasDefault; - HasDefault = true; - } else if (!Context.getTargetInfo().isValidFeatureName(Cur)) - return Diag(CurLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << Cur << TargetClones; - - if (llvm::is_contained(Strings, Cur) || DefaultIsDupe) - Diag(CurLoc, diag::warn_target_clone_duplicate_options); - // Note: Add even if there are duplicates, since it changes name mangling. - Strings.push_back(Cur); + << Unsupported << None << Cur << TargetClones; + if (llvm::is_contained(StringsBuffer, Cur) || DefaultIsDupe) + Diag(CurLoc, diag::warn_target_clone_duplicate_options); + // Note: Add even if there are duplicates, since it changes name mangling. + StringsBuffer.push_back(Cur); + } } - if (Str.rtrim().endswith(",")) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << "" << TargetClones; @@ -3513,6 +3595,10 @@ } static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (S.Context.getTargetInfo().getTriple().isAArch64() && + !S.Context.getTargetInfo().hasFeature("fmv")) + return; + // Ensure we don't combine these with themselves, since that causes some // confusing behavior. if (const auto *Other = D->getAttr()) { @@ -3524,7 +3610,8 @@ return; SmallVector Strings; - bool HasCommas = false, HasDefault = false; + SmallVector, 2> StringsBuffer; + bool HasCommas = false, HasDefault = false, HasNotDefault = false; for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) { StringRef CurStr; @@ -3533,13 +3620,21 @@ S.checkTargetClonesAttrString( LiteralLoc, CurStr, cast(AL.getArgAsExpr(I)->IgnoreParenCasts()), - HasDefault, HasCommas, Strings)) + HasDefault, HasCommas, HasNotDefault, StringsBuffer)) return; } + for (auto &SmallStr : StringsBuffer) + Strings.push_back(SmallStr.str()); if (HasCommas && AL.getNumArgs() > 1) S.Diag(AL.getLoc(), diag::warn_target_clone_mixed_values); + if (S.Context.getTargetInfo().getTriple().isAArch64() && !HasDefault) { + // Add default attribute if there is no one + HasDefault = true; + Strings.push_back("default"); + } + if (!HasDefault) { S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default); return; @@ -3556,6 +3651,10 @@ } } + // No multiversion if we have default version only. + if (S.Context.getTargetInfo().getTriple().isAArch64() && !HasNotDefault) + return; + cast(D)->setIsMultiVersion(); TargetClonesAttr *NewAttr = ::new (S.Context) TargetClonesAttr(S.Context, AL, Strings.data(), Strings.size()); @@ -8907,6 +9006,9 @@ case ParsedAttr::AT_Target: handleTargetAttr(S, D, AL); break; + case ParsedAttr::AT_TargetVersion: + handleTargetVersionAttr(S, D, AL); + break; case ParsedAttr::AT_TargetClones: handleTargetClonesAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -6521,8 +6521,11 @@ } } - if (Function->isMultiVersion() && Function->hasAttr() && - !Function->getAttr()->isDefaultVersion()) { + if (Function->isMultiVersion() && + ((Function->hasAttr() && + !Function->getAttr()->isDefaultVersion()) || + (Function->hasAttr() && + !Function->getAttr()->isDefaultVersion()))) { Candidate.Viable = false; Candidate.FailureKind = ovl_non_default_multiversion_function; return; @@ -7182,8 +7185,11 @@ return; } - if (Method->isMultiVersion() && Method->hasAttr() && - !Method->getAttr()->isDefaultVersion()) { + if (Method->isMultiVersion() && + ((Method->hasAttr() && + !Method->getAttr()->isDefaultVersion()) || + (Method->hasAttr() && + !Method->getAttr()->isDefaultVersion()))) { Candidate.Viable = false; Candidate.FailureKind = ovl_non_default_multiversion_function; } @@ -7636,8 +7642,11 @@ return; } - if (Conversion->isMultiVersion() && Conversion->hasAttr() && - !Conversion->getAttr()->isDefaultVersion()) { + if (Conversion->isMultiVersion() && + ((Conversion->hasAttr() && + !Conversion->getAttr()->isDefaultVersion()) || + (Conversion->hasAttr() && + !Conversion->getAttr()->isDefaultVersion()))) { Candidate.Viable = false; Candidate.FailureKind = ovl_non_default_multiversion_function; } @@ -10563,6 +10572,9 @@ if (Fn->isMultiVersion() && Fn->hasAttr() && !Fn->getAttr()->isDefaultVersion()) return; + if (Fn->isMultiVersion() && Fn->hasAttr() && + !Fn->getAttr()->isDefaultVersion()) + return; if (shouldSkipNotingLambdaConversionDecl(Fn)) return; @@ -12372,6 +12384,9 @@ const auto *TA = FunDecl->getAttr(); if (TA && !TA->isDefaultVersion()) return false; + const auto *TVA = FunDecl->getAttr(); + if (TVA && !TVA->isDefaultVersion()) + return false; } // If any candidate has a placeholder return type, trigger its deduction diff --git a/clang/test/AST/attr-target-version.c b/clang/test/AST/attr-target-version.c new file mode 100644 --- /dev/null +++ b/clang/test/AST/attr-target-version.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -ast-dump %s | FileCheck %s + +int __attribute__((target_version("sve2-bitperm + sha2"))) foov(void) { return 1; } +int __attribute__((target_clones(" lse + fp + sha3 "))) fooc(void) { return 2; } +// CHECK: TargetVersionAttr +// CHECK: sve2-bitperm + sha2 +// CHECK: TargetClonesAttr +// CHECK: fp+lse+sha3 default diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c --- a/clang/test/CodeGen/aarch64-targetattr.c +++ b/clang/test/CodeGen/aarch64-targetattr.c @@ -10,100 +10,100 @@ // CHECK-LABEL: @v82sve2() #2 __attribute__((target("arch=armv8.2-a+sve2"))) void v82sve2() {} -// CHECK-LABEL: @v82svesve2() #3 +// CHECK-LABEL: @v82svesve2() #2 __attribute__((target("arch=armv8.2-a+sve+sve2"))) void v82svesve2() {} -// CHECK-LABEL: @v86sve2() #4 +// CHECK-LABEL: @v86sve2() #3 __attribute__((target("arch=armv8.6-a+sve2"))) void v86sve2() {} -// CHECK-LABEL: @a710() #5 +// CHECK-LABEL: @a710() #4 __attribute__((target("cpu=cortex-a710"))) void a710() {} -// CHECK-LABEL: @tunea710() #6 +// CHECK-LABEL: @tunea710() #5 __attribute__((target("tune=cortex-a710"))) void tunea710() {} -// CHECK-LABEL: @generic() #7 +// CHECK-LABEL: @generic() #6 __attribute__((target("cpu=generic"))) void generic() {} -// CHECK-LABEL: @tune() #8 +// CHECK-LABEL: @tune() #7 __attribute__((target("tune=generic"))) void tune() {} -// CHECK-LABEL: @n1tunea710() #9 +// CHECK-LABEL: @n1tunea710() #8 __attribute__((target("cpu=neoverse-n1,tune=cortex-a710"))) void n1tunea710() {} -// CHECK-LABEL: @svetunea710() #10 +// CHECK-LABEL: @svetunea710() #9 __attribute__((target("sve,tune=cortex-a710"))) void svetunea710() {} -// CHECK-LABEL: @plussvetunea710() #10 +// CHECK-LABEL: @plussvetunea710() #9 __attribute__((target("+sve,tune=cortex-a710"))) void plussvetunea710() {} -// CHECK-LABEL: @v1plussve2() #11 +// CHECK-LABEL: @v1plussve2() #10 __attribute__((target("cpu=neoverse-v1,+sve2"))) void v1plussve2() {} -// CHECK-LABEL: @v1sve2() #11 +// CHECK-LABEL: @v1sve2() #10 __attribute__((target("cpu=neoverse-v1+sve2"))) void v1sve2() {} -// CHECK-LABEL: @v1minussve() #12 +// CHECK-LABEL: @v1minussve() #11 __attribute__((target("cpu=neoverse-v1,+nosve"))) void v1minussve() {} -// CHECK-LABEL: @v1nosve() #12 +// CHECK-LABEL: @v1nosve() #11 __attribute__((target("cpu=neoverse-v1,no-sve"))) void v1nosve() {} -// CHECK-LABEL: @v1msve() #12 +// CHECK-LABEL: @v1msve() #11 __attribute__((target("cpu=neoverse-v1+nosve"))) void v1msve() {} -// CHECK-LABEL: @plussve() #13 +// CHECK-LABEL: @plussve() #12 __attribute__((target("+sve"))) void plussve() {} -// CHECK-LABEL: @plussveplussve2() #14 +// CHECK-LABEL: @plussveplussve2() #13 __attribute__((target("+sve+nosve2"))) void plussveplussve2() {} -// CHECK-LABEL: @plussveminusnosve2() #14 +// CHECK-LABEL: @plussveminusnosve2() #13 __attribute__((target("sve,no-sve2"))) void plussveminusnosve2() {} -// CHECK-LABEL: @plusfp16() #15 +// CHECK-LABEL: @plusfp16() #14 __attribute__((target("+fp16"))) void plusfp16() {} -// CHECK-LABEL: @all() #16 +// CHECK-LABEL: @all() #15 __attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2"))) void all() {} -// CHECK-LABEL: @allplusbranchprotection() #17 +// CHECK-LABEL: @allplusbranchprotection() #16 __attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2,branch-protection=standard"))) void allplusbranchprotection() {} // These tests check that the user facing and internal llvm name are both accepted. -// CHECK-LABEL: @plusnoneon() #18 +// CHECK-LABEL: @plusnoneon() #17 __attribute__((target("+noneon"))) void plusnoneon() {} -// CHECK-LABEL: @plusnosimd() #18 +// CHECK-LABEL: @plusnosimd() #17 __attribute__((target("+nosimd"))) void plusnosimd() {} -// CHECK-LABEL: @noneon() #18 +// CHECK-LABEL: @noneon() #17 __attribute__((target("no-neon"))) void noneon() {} -// CHECK-LABEL: @nosimd() #18 +// CHECK-LABEL: @nosimd() #17 __attribute__((target("no-simd"))) void nosimd() {} // CHECK: attributes #0 = { {{.*}} "target-features"="+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #1 = { {{.*}} "target-features"="+sve,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #2 = { {{.*}} "target-features"="+sve2,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #4 = { {{.*}} "target-features"="+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } -// CHECK: attributes #5 = { {{.*}} "target-cpu"="cortex-a710" "target-features"="+bf16,+crc,+dotprod,+flagm,+fp-armv8,+fp16fml,+i8mm,+lse,+mte,+neon,+pauth,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm" } -// CHECK: attributes #6 = { {{.*}} "tune-cpu"="cortex-a710" } -// CHECK: attributes #7 = { {{.*}} "target-cpu"="generic" } -// CHECK: attributes #8 = { {{.*}} "tune-cpu"="generic" } -// CHECK: attributes #9 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs" "tune-cpu"="cortex-a710" } -// CHECK: attributes #10 = { {{.*}} "target-features"="+sve" "tune-cpu"="cortex-a710" } -// CHECK: attributes #11 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+bf16,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+lse,+neon,+rand,+ras,+rcpc,+rdm,+spe,+ssbs,+sve,+sve2" } -// CHECK: attributes #12 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+bf16,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+lse,+neon,+rand,+ras,+rcpc,+rdm,+spe,+ssbs,-sve" } -// CHECK: attributes #13 = { {{.*}} "target-features"="+sve" } -// CHECK: attributes #14 = { {{.*}} "target-features"="+sve,-sve2" } -// CHECK: attributes #15 = { {{.*}} "target-features"="+fullfp16" } -// CHECK: attributes #16 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK: attributes #17 = { {{.*}} "branch-target-enforcement"="true" {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK: attributes #18 = { {{.*}} "target-features"="-neon" } +// CHECK: attributes #1 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+v8.1a,+v8.2a,+v8a" } +// CHECK: attributes #2 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } +// CHECK: attributes #3 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } +// CHECK: attributes #4 = { {{.*}} "target-cpu"="cortex-a710" "target-features"="+bf16,+crc,+dotprod,+flagm,+fp-armv8,+fp16fml,+i8mm,+lse,+mte,+neon,+pauth,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm" } +// CHECK: attributes #5 = { {{.*}} "tune-cpu"="cortex-a710" } +// CHECK: attributes #6 = { {{.*}} "target-cpu"="generic" } +// CHECK: attributes #7 = { {{.*}} "tune-cpu"="generic" } +// CHECK: attributes #8 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs" "tune-cpu"="cortex-a710" } +// CHECK: attributes #9 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve" "tune-cpu"="cortex-a710" } +// CHECK: attributes #10 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+bf16,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+lse,+neon,+rand,+ras,+rcpc,+rdm,+spe,+ssbs,+sve,+sve2" } +// CHECK: attributes #11 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+bf16,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+lse,+neon,+rand,+ras,+rcpc,+rdm,+spe,+ssbs,-sve" } +// CHECK: attributes #12 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } +// CHECK: attributes #13 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,-sve2" } +// CHECK: attributes #14 = { {{.*}} "target-features"="+fullfp16" } +// CHECK: attributes #15 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } +// CHECK: attributes #16 = { {{.*}} "branch-target-enforcement"="true" {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } +// CHECK: attributes #17 = { {{.*}} "target-features"="-neon" } diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-target-clones-aarch64.c @@ -0,0 +1,347 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV + +int __attribute__((target_clones("lse+aes", "sve2"))) ftc(void) { return 0; } +int __attribute__((target_clones("sha2", "sha2+memtag2", " default "))) ftc_def(void) { return 1; } +int __attribute__((target_clones("sha2", "default"))) ftc_dup1(void) { return 2; } +int __attribute__((target_clones("fp", "crc+dotprod"))) ftc_dup2(void) { return 3; } +int foo() { + return ftc() + ftc_def() + ftc_dup1() + ftc_dup2(); +} + +inline int __attribute__((target_clones("rng+simd", "rcpc+predres", "sve2-aes+wfxt"))) ftc_inline1(void) { return 1; } +inline int __attribute__((target_clones("fp16", "fcma+sve2-bitperm", "default"))) ftc_inline2(void); +inline int __attribute__((target_clones("bti", "sve+sb"))) ftc_inline3(void) { return 3; } + +int __attribute__((target_clones("default"))) ftc_direct(void) { return 4; } + +int __attribute__((target_clones("default"))) main() { + return ftc_inline1() + ftc_inline2() + ftc_inline3() + ftc_direct(); +} +inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))) ftc_inline2(void) { return 2; }; + + +// CHECK: @__aarch64_cpu_features = external dso_local global { i64 } +// CHECK: @ftc.ifunc = weak_odr ifunc i32 (), ptr @ftc.resolver +// CHECK: @ftc_def.ifunc = weak_odr ifunc i32 (), ptr @ftc_def.resolver +// CHECK: @ftc_dup1.ifunc = weak_odr ifunc i32 (), ptr @ftc_dup1.resolver +// CHECK: @ftc_dup2.ifunc = weak_odr ifunc i32 (), ptr @ftc_dup2.resolver +// CHECK: @ftc_inline1.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline1.resolver +// CHECK: @ftc_inline2.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline2.resolver +// CHECK: @ftc_inline3.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline3.resolver + +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc._MaesMlse( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc._Msve2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK-LABEL: @ftc.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 16512 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 16512 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @ftc._MaesMlse +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 68719476736 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 68719476736 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @ftc._Msve2 +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @ftc +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_def._Msha2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_def._Mmemtag2Msha2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_def( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @ftc_def.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @ftc_def._Mmemtag2Msha2 +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4096 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4096 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @ftc_def._Msha2 +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @ftc_def +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_dup1._Msha2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_dup1( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @ftc_dup1.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @ftc_dup1._Msha2 +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @ftc_dup1 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_dup2._Mfp( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_dup2._McrcMdotprod( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_dup2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK-LABEL: @ftc_dup2.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1040 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1040 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @ftc_dup2._McrcMdotprod +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 256 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 256 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @ftc_dup2._Mfp +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @ftc_dup2 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @foo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call i32 @ftc.ifunc() +// CHECK-NEXT: [[CALL1:%.*]] = call i32 @ftc_def.ifunc() +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: [[CALL2:%.*]] = call i32 @ftc_dup1.ifunc() +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NEXT: [[CALL4:%.*]] = call i32 @ftc_dup2.ifunc() +// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]] +// CHECK-NEXT: ret i32 [[ADD5]] +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_direct( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 4 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @main( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call i32 @ftc_inline1.ifunc() +// CHECK-NEXT: [[CALL1:%.*]] = call i32 @ftc_inline2.ifunc() +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: [[CALL2:%.*]] = call i32 @ftc_inline3.ifunc() +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NEXT: [[CALL4:%.*]] = call i32 @ftc_direct() +// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]] +// CHECK-NEXT: ret i32 [[ADD5]] +// CHECK-LABEL: @ftc_inline1.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 18014535948435456 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014535948435456 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @ftc_inline1._Msve2-aesMwfxt +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 140737492549632 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 140737492549632 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @ftc_inline1._MpredresMrcpc +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 513 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 513 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @ftc_inline1._MrngMsimd +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @ftc_inline1 +// CHECK-LABEL: @ftc_inline2.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 549757911040 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 549757911040 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @ftc_inline2._MfcmaMsve2-bitperm +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65536 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65536 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @ftc_inline2._Mfp16 +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @ftc_inline2 +// CHECK-LABEL: @ftc_inline3.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817919488 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817919488 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @ftc_inline3._MsbMsve +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @ftc_inline3._Mbti +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @ftc_inline3 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline1._MrngMsimd( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline1._MpredresMrcpc( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline1._Msve2-aesMwfxt( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline1( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline2._Mfp16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline2._MfcmaMsve2-bitperm( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline3._Mbti( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline3._MsbMsve( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: @ftc_inline3( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK-NOFMV: Function Attrs: noinline nounwind optnone +// CHECK-NOFMV-LABEL: @ftc( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 0 +// CHECK-NOFMV: Function Attrs: noinline nounwind optnone +// CHECK-NOFMV-LABEL: @ftc_def( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 1 +// CHECK-NOFMV: Function Attrs: noinline nounwind optnone +// CHECK-NOFMV-LABEL: @ftc_dup1( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 2 +// CHECK-NOFMV: Function Attrs: noinline nounwind optnone +// CHECK-NOFMV-LABEL: @ftc_dup2( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 3 +// CHECK-NOFMV: Function Attrs: noinline nounwind optnone +// CHECK-NOFMV-LABEL: @foo( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @ftc() +// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @ftc_def() +// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NOFMV-NEXT: [[CALL2:%.*]] = call i32 @ftc_dup1() +// CHECK-NOFMV-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NOFMV-NEXT: [[CALL4:%.*]] = call i32 @ftc_dup2() +// CHECK-NOFMV-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]] +// CHECK-NOFMV-NEXT: ret i32 [[ADD5]] +// CHECK-NOFMV: Function Attrs: noinline nounwind optnone +// CHECK-NOFMV-LABEL: @ftc_direct( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 4 +// CHECK-NOFMV: Function Attrs: noinline nounwind optnone +// CHECK-NOFMV-LABEL: @main( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NOFMV-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @ftc_inline1() +// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @ftc_inline2() +// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NOFMV-NEXT: [[CALL2:%.*]] = call i32 @ftc_inline3() +// CHECK-NOFMV-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NOFMV-NEXT: [[CALL4:%.*]] = call i32 @ftc_direct() +// CHECK-NOFMV-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]] +// CHECK-NOFMV-NEXT: ret i32 [[ADD5]] + +// CHECK: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon" } +// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2" } +// CHECK: attributes #2 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #3 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sha2" } +// CHECK: attributes #4 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+mte,+neon,+sha2" } +// CHECK: attributes #5 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } +// CHECK: attributes #6 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+dotprod,+fp-armv8,+neon" } +// CHECK: attributes #7 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rand" } +// CHECK: attributes #8 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+predres,+rcpc" } +// CHECK: attributes #9 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt" } +// CHECK: attributes #10 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" } +// CHECK: attributes #11 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm" } +// CHECK: attributes #12 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti" } +// CHECK: attributes #13 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sb,+sve" } + +// CHECK-NOFMV: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } +// CHECK-NOFMV: attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-target-version.c @@ -0,0 +1,541 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +ls64 -target-feature +fullfp16 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV + +int __attribute__((target_version("rng+flagm+fp16fml"))) fmv(void) { return 1; } +int __attribute__((target_version("flagm2+sme-i16i64"))) fmv(void) { return 2; } +int __attribute__((target_version("lse+sha2"))) fmv(void) { return 3; } +int __attribute__((target_version("dotprod+ls64_accdata"))) fmv(void) { return 4; } +int __attribute__((target_version("fp16fml+memtag"))) fmv(void) { return 5; } +int __attribute__((target_version("fp+aes"))) fmv(void) { return 6; } +int __attribute__((target_version("crc+ls64_v"))) fmv(void) { return 7; } +int __attribute__((target_version("bti"))) fmv(void) { return 8; } +int __attribute__((target_version("sme2"))) fmv(void) { return 9; } +int __attribute__((target_version("default"))) fmv(void) { return 0; } +int __attribute__((target_version("ls64+simd"))) fmv_one(void) { return 1; } +int __attribute__((target_version("dpb"))) fmv_one(void) { return 2; } +int __attribute__((target_version("default"))) fmv_one(void) { return 0; } +int __attribute__((target_version("fp"))) fmv_two(void) { return 1; } +int __attribute__((target_version("simd"))) fmv_two(void) { return 2; } +int __attribute__((target_version("dgh"))) fmv_two(void) { return 3; } +int __attribute__((target_version("fp16+simd"))) fmv_two(void) { return 4; } +int __attribute__((target_version("default"))) fmv_two(void) { return 0; } +int foo() { + return fmv()+fmv_one()+fmv_two(); +} + +inline int __attribute__((target_version("sha1+pmull+f64mm"))) fmv_inline(void) { return 1; } +inline int __attribute__((target_version("fp16+fcma+sme+ fp16 "))) fmv_inline(void) { return 2; } +inline int __attribute__((target_version("sha3+i8mm+f32mm"))) fmv_inline(void) { return 12; } +inline int __attribute__((target_version("dit+sve-ebf16"))) fmv_inline(void) { return 8; } +inline int __attribute__((target_version("dpb+rcpc2 "))) fmv_inline(void) { return 6; } +inline int __attribute__((target_version(" dpb2 + jscvt"))) fmv_inline(void) { return 7; } +inline int __attribute__((target_version("rcpc+frintts"))) fmv_inline(void) { return 3; } +inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { return 4; } +inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; } +inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; } +inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; } +inline int __attribute__((target_version("memtag3"))) fmv_inline(void) { return 11; } +inline int __attribute__((target_version("default"))) fmv_inline(void) { return 3; } + +__attribute__((target_version("ls64"))) int fmv_e(void); +int fmv_e(void) { return 20; } + +static __attribute__((target_version("sb"))) inline int fmv_d(void); +static __attribute__((target_version("default"))) inline int fmv_d(void); + +int __attribute__((target_version("default"))) fmv_default(void) { return 111; } +int fmv_default(void); + +void fmv_c(void); +void __attribute__((target_version("ssbs"))) fmv_c(void){}; +void __attribute__((target_version("default"))) fmv_c(void){}; + +int goo() { + fmv_inline(); + fmv_e(); + fmv_d(); + fmv_c(); + return fmv_default(); +} +static inline int __attribute__((target_version("sb"))) fmv_d(void) { return 0; } +static inline int __attribute__((target_version(" default "))) fmv_d(void) { return 1; } + +static void func(void) {} +inline __attribute__((target_version("default"))) void recb(void) { func(); } +inline __attribute__((target_version("default"))) void reca(void) { recb(); } +void recur(void) { reca(); } + +int __attribute__((target_version("default"))) main(void) { + recur(); + return goo(); +} + +typedef int (*Fptr)(); +void f(Fptr); +int hoo(void) { + f(fmv); + Fptr fp1 = &fmv; + Fptr fp2 = fmv; + return fp1() + fp2(); +} + +// CHECK: @__aarch64_cpu_features = external dso_local global { i64 } +// CHECK: @fmv.ifunc = weak_odr ifunc i32 (), ptr @fmv.resolver +// CHECK: @fmv_one.ifunc = weak_odr ifunc i32 (), ptr @fmv_one.resolver +// CHECK: @fmv_two.ifunc = weak_odr ifunc i32 (), ptr @fmv_two.resolver +// CHECK: @fmv_inline.ifunc = weak_odr ifunc i32 (), ptr @fmv_inline.resolver +// CHECK: @fmv_e.ifunc = weak_odr ifunc i32 (), ptr @fmv_e.resolver +// CHECK: @fmv_d.ifunc = internal ifunc i32 (), ptr @fmv_d.resolver +// CHECK: @fmv_c.ifunc = weak_odr ifunc void (), ptr @fmv_c.resolver + +// CHECK-LABEL: @fmv._MrngMflagmMfp16fml( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @fmv._Mflagm2Msme-i16i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @fmv._MlseMsha2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK-LABEL: @fmv._MdotprodMls64_accdata( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 4 +// CHECK-LABEL: @fmv._Mfp16fmlMmemtag( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 5 +// CHECK-LABEL: @fmv._MfpMaes( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 6 +// CHECK-LABEL: @fmv._McrcMls64_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 7 +// CHECK-LABEL: @fmv._Mbti( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 8 +// CHECK-LABEL: @fmv._Msme2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 9 +// CHECK-LABEL: @fmv( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK-LABEL: @fmv_one._Mls64Msimd( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @fmv_one._Mdpb( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @fmv_one( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK-LABEL: @fmv_two._Mfp( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @fmv_two._Msimd( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @fmv_two._Mdgh( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK-LABEL: @fmv_two._Mfp16Msimd( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 4 +// CHECK-LABEL: @fmv_two( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK-LABEL: @foo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call i32 @fmv.ifunc() +// CHECK-NEXT: [[CALL1:%.*]] = call i32 @fmv_one.ifunc() +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: [[CALL2:%.*]] = call i32 @fmv_two.ifunc() +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NEXT: ret i32 [[ADD3]] +// CHECK-LABEL: @fmv.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 11 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 11 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @fmv._MrngMflagmMfp16fml +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927940 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 72057594037927940 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @fmv._Mflagm2Msme-i16i64 +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 16 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @fmv._MdotprodMls64_accdata +// CHECK: resolver_else4: +// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 1024 +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 1024 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]] +// CHECK: resolver_return5: +// CHECK-NEXT: ret ptr @fmv._McrcMls64_v +// CHECK: resolver_else6: +// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 8796093022216 +// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 8796093022216 +// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]] +// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]] +// CHECK: resolver_return7: +// CHECK-NEXT: ret ptr @fmv._Mfp16fmlMmemtag +// CHECK: resolver_else8: +// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 16384 +// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 16384 +// CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]] +// CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]] +// CHECK: resolver_return9: +// CHECK-NEXT: ret ptr @fmv._MfpMaes +// CHECK: resolver_else10: +// CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 4224 +// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4224 +// CHECK-NEXT: [[TMP27:%.*]] = and i1 true, [[TMP26]] +// CHECK-NEXT: br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]] +// CHECK: resolver_return11: +// CHECK-NEXT: ret ptr @fmv._MlseMsha2 +// CHECK: resolver_else12: +// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 144115188075855872 +// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 144115188075855872 +// CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]] +// CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]] +// CHECK: resolver_return13: +// CHECK-NEXT: ret ptr @fmv._Msme2 +// CHECK: resolver_else14: +// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 1125899906842624 +// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 1125899906842624 +// CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]] +// CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]] +// CHECK: resolver_return15: +// CHECK-NEXT: ret ptr @fmv._Mbti +// CHECK: resolver_else16: +// CHECK-NEXT: ret ptr @fmv +// CHECK-LABEL: @fmv_one.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: ret ptr @fmv_one._Mls64Msimd +// CHECK-LABEL: @fmv_two.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: ret ptr @fmv_two._Mfp16Msimd +// CHECK-LABEL: @fmv_e( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 20 +// CHECK-LABEL: @fmv_default( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 111 +// CHECK-LABEL: @fmv_c._Mssbs( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// CHECK-LABEL: @fmv_c( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// CHECK-LABEL: @goo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call i32 @fmv_inline.ifunc() +// CHECK-NEXT: [[CALL1:%.*]] = call i32 @fmv_e.ifunc() +// CHECK-NEXT: [[CALL2:%.*]] = call i32 @fmv_d.ifunc() +// CHECK-NEXT: call void @fmv_c.ifunc() +// CHECK-NEXT: [[CALL3:%.*]] = call i32 @fmv_default() +// CHECK-NEXT: ret i32 [[CALL3]] +// CHECK-LABEL: @fmv_inline.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4398048608256 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4398048608256 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @fmv_inline._Mfp16MfcmaMsmeMfp16 +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 893353197568 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 893353197568 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 34359773184 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 34359773184 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm +// CHECK: resolver_else4: +// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 17246986240 +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 17246986240 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]] +// CHECK: resolver_return5: +// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm +// CHECK: resolver_else6: +// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 19791209299968 +// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 19791209299968 +// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]] +// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]] +// CHECK: resolver_return7: +// CHECK-NEXT: ret ptr @fmv_inline._Msve2-sm4Mmemtag2 +// CHECK: resolver_else8: +// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1236950581248 +// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 1236950581248 +// CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]] +// CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]] +// CHECK: resolver_return9: +// CHECK-NEXT: ret ptr @fmv_inline._Msve2-aesMsve2-sha3 +// CHECK: resolver_else10: +// CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 4295098368 +// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4295098368 +// CHECK-NEXT: [[TMP27:%.*]] = and i1 true, [[TMP26]] +// CHECK-NEXT: br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]] +// CHECK: resolver_return11: +// CHECK-NEXT: ret ptr @fmv_inline._MditMsve-ebf16 +// CHECK: resolver_else12: +// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 3221225472 +// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 3221225472 +// CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]] +// CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]] +// CHECK: resolver_return13: +// CHECK-NEXT: ret ptr @fmv_inline._MsveMsve-bf16 +// CHECK: resolver_else14: +// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 20971520 +// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 20971520 +// CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]] +// CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]] +// CHECK: resolver_return15: +// CHECK-NEXT: ret ptr @fmv_inline._MrcpcMfrintts +// CHECK: resolver_else16: +// CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 8650752 +// CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[TMP37]], 8650752 +// CHECK-NEXT: [[TMP39:%.*]] = and i1 true, [[TMP38]] +// CHECK-NEXT: br i1 [[TMP39]], label [[RESOLVER_RETURN17:%.*]], label [[RESOLVER_ELSE18:%.*]] +// CHECK: resolver_return17: +// CHECK-NEXT: ret ptr @fmv_inline._MdpbMrcpc2 +// CHECK: resolver_else18: +// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP41:%.*]] = and i64 [[TMP40]], 1572864 +// CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP41]], 1572864 +// CHECK-NEXT: [[TMP43:%.*]] = and i1 true, [[TMP42]] +// CHECK-NEXT: br i1 [[TMP43]], label [[RESOLVER_RETURN19:%.*]], label [[RESOLVER_ELSE20:%.*]] +// CHECK: resolver_return19: +// CHECK-NEXT: ret ptr @fmv_inline._Mdpb2Mjscvt +// CHECK: resolver_else20: +// CHECK-NEXT: [[TMP44:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP45:%.*]] = and i64 [[TMP44]], 35184372088832 +// CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[TMP45]], 35184372088832 +// CHECK-NEXT: [[TMP47:%.*]] = and i1 true, [[TMP46]] +// CHECK-NEXT: br i1 [[TMP47]], label [[RESOLVER_RETURN21:%.*]], label [[RESOLVER_ELSE22:%.*]] +// CHECK: resolver_return21: +// CHECK-NEXT: ret ptr @fmv_inline._Mmemtag3 +// CHECK: resolver_else22: +// CHECK-NEXT: ret ptr @fmv_inline +// CHECK-LABEL: @fmv_e.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: ret ptr @fmv_e._Mls64 +// CHECK-LABEL: @fmv_d.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70368744177664 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70368744177664 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @fmv_d._Msb +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @fmv_d +// CHECK-LABEL: @fmv_c.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 281474976710656 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 281474976710656 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @fmv_c._Mssbs +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @fmv_c +// CHECK-LABEL: @recur( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @reca() +// CHECK-NEXT: ret void +// CHECK-LABEL: @main( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NEXT: call void @recur() +// CHECK-NEXT: [[CALL:%.*]] = call i32 @goo() +// CHECK-NEXT: ret i32 [[CALL]] +// CHECK-LABEL: @hoo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FP1:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[FP2:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: call void @f(ptr noundef @fmv.ifunc) +// CHECK-NEXT: store ptr @fmv.ifunc, ptr [[FP1]], align 8 +// CHECK-NEXT: store ptr @fmv.ifunc, ptr [[FP2]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP1]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call i32 [[TMP0]]() +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP2]], align 8 +// CHECK-NEXT: [[CALL1:%.*]] = call i32 [[TMP1]]() +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: ret i32 [[ADD]] +// CHECK-LABEL: @fmv_inline._Msha1MpmullMf64mm( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @fmv_inline._Mfp16MfcmaMsmeMfp16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @fmv_inline._Msha3Mi8mmMf32mm( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 12 +// CHECK-LABEL: @fmv_inline._MditMsve-ebf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 8 +// CHECK-LABEL: @fmv_inline._MdpbMrcpc2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 6 +// CHECK-LABEL: @fmv_inline._Mdpb2Mjscvt( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 7 +// CHECK-LABEL: @fmv_inline._MrcpcMfrintts( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK-LABEL: @fmv_inline._MsveMsve-bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 4 +// CHECK-LABEL: @fmv_inline._Msve2-aesMsve2-sha3( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 5 +// CHECK-LABEL: @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 9 +// CHECK-LABEL: @fmv_inline._Msve2-sm4Mmemtag2( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 10 +// CHECK-LABEL: @fmv_inline._Mmemtag3( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 11 +// CHECK-LABEL: @fmv_inline( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK-LABEL: @fmv_d._Msb( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK-LABEL: define internal i32 @fmv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-NOFMV-LABEL: @fmv( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 0 +// CHECK-NOFMV-LABEL: @fmv_one( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 0 +// CHECK-NOFMV-LABEL: @fmv_two( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 0 +// CHECK-NOFMV-LABEL: @foo( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @fmv() +// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @fmv_one() +// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NOFMV-NEXT: [[CALL2:%.*]] = call i32 @fmv_two() +// CHECK-NOFMV-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NOFMV-NEXT: ret i32 [[ADD3]] +// CHECK-NOFMV-LABEL: @fmv_e( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 20 +// CHECK-NOFMV-LABEL: @fmv_default( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 111 +// CHECK-NOFMV-LABEL: @fmv_c( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret void +// CHECK-NOFMV-LABEL: @goo( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @fmv_inline() +// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @fmv_e() +// CHECK-NOFMV-NEXT: [[CALL2:%.*]] = call i32 @fmv_d() +// CHECK-NOFMV-NEXT: call void @fmv_c() +// CHECK-NOFMV-NEXT: [[CALL3:%.*]] = call i32 @fmv_default() +// CHECK-NOFMV-NEXT: ret i32 [[CALL3]] +// CHECK-NOFMV-LABEL: define internal i32 @fmv_d( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: ret i32 1 +// CHECK-NOFMV-LABEL: @recur( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: call void @reca() +// CHECK-NOFMV-NEXT: ret void +// CHECK-NOFMV-LABEL: @main( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NOFMV-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NOFMV-NEXT: call void @recur() +// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @goo() +// CHECK-NOFMV-NEXT: ret i32 [[CALL]] +// CHECK-NOFMV-LABEL: @hoo( +// CHECK-NOFMV-NEXT: entry: +// CHECK-NOFMV-NEXT: [[FP1:%.*]] = alloca ptr, align 8 +// CHECK-NOFMV-NEXT: [[FP2:%.*]] = alloca ptr, align 8 +// CHECK-NOFMV-NEXT: call void @f(ptr noundef @fmv) +// CHECK-NOFMV-NEXT: store ptr @fmv, ptr [[FP1]], align 8 +// CHECK-NOFMV-NEXT: store ptr @fmv, ptr [[FP2]], align 8 +// CHECK-NOFMV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP1]], align 8 +// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 [[TMP0]]() +// CHECK-NOFMV-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP2]], align 8 +// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 [[TMP1]]() +// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NOFMV-NEXT: ret i32 [[ADD]] + +// CHECK: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+flagm,+fp-armv8,+fp16fml,+fullfp16,+ls64,+neon,+rand" } +// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+altnzcv,+bf16,+flagm,+fullfp16,+ls64,+sme,+sme-i16i64" } +// CHECK: attributes #2 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+lse,+neon,+sha2" } +// CHECK: attributes #3 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+fullfp16,+ls64,+neon" } +// CHECK: attributes #4 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fp16fml,+fullfp16,+ls64,+neon" } +// CHECK: attributes #5 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon" } +// CHECK: attributes #6 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fullfp16,+ls64" } +// CHECK: attributes #7 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fullfp16,+ls64" } +// CHECK: attributes #8 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fullfp16,+ls64,+sme,+sme2" } +// CHECK: attributes #9 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64" } +// CHECK: attributes #10 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,+fullfp16,+ls64" } +// CHECK: attributes #11 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64" } +// CHECK: attributes #12 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+f64mm,+fp-armv8,+fullfp16,+ls64,+neon,+sve" } +// CHECK: attributes #13 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+fp-armv8,+fullfp16,+ls64,+neon,+sme" } +// CHECK: attributes #14 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+f32mm,+fp-armv8,+fullfp16,+i8mm,+ls64,+neon,+sha2,+sha3,+sve" } +// CHECK: attributes #15 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+dit,+fp-armv8,+fullfp16,+ls64,+neon,+sve" } +// CHECK: attributes #16 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,+fullfp16,+ls64,+rcpc" } +// CHECK: attributes #17 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccdp,+ccpp,+fp-armv8,+fullfp16,+jsconv,+ls64,+neon" } +// CHECK: attributes #18 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fptoint,+fullfp16,+ls64,+rcpc" } +// CHECK: attributes #19 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+ls64,+neon,+sve" } +// CHECK: attributes #20 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" } +// CHECK: attributes #21 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" } +// CHECK: attributes #22 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+mte,+neon,+sve,+sve2,+sve2-sm4" } +// CHECK: attributes #23 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mte" } +// CHECK: attributes #24 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+sb" } + +// CHECK-NOFMV: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } +// CHECK-NOFMV: attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } diff --git a/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp @@ -0,0 +1,195 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs +// RUN: %clang_cc1 -std=c++11 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +int __attribute__((target_clones("ls64_v+fp16", "default"))) foo_ovl(int) { return 1; } +int __attribute__((target_clones("ls64_accdata+ls64"))) foo_ovl(void) { return 2; } + +int bar() { + return foo_ovl(1) + foo_ovl(); +} + +template struct MyClass { + int __attribute__((target_clones("frintts", "ssbs+sme-f64f64"))) foo_tml() { return 1; } +}; + +template struct MyClass { + int __attribute__((target_clones("frintts", "ssbs+sme-f64f64"))) foo_tml() { return 2; } +}; + +template struct MyClass { + int foo_tml() { return 3; } +}; + +template <> struct MyClass { + int __attribute__((target_clones("default"))) foo_tml() { return 4; } +}; + +void run_foo_tml() { + MyClass Mc1; + Mc1.foo_tml(); + MyClass Mc2; + Mc2.foo_tml(); + MyClass Mc3; + Mc3.foo_tml(); + MyClass Mc4; + Mc4.foo_tml(); +} + + +// CHECK: @__aarch64_cpu_features = external dso_local global { i64 } +// CHECK: @_Z7foo_ovli.ifunc = weak_odr ifunc i32 (i32), ptr @_Z7foo_ovli.resolver +// CHECK: @_Z7foo_ovlv.ifunc = weak_odr ifunc i32 (), ptr @_Z7foo_ovlv.resolver +// CHECK: @_ZN7MyClassIssE7foo_tmlEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN7MyClassIssE7foo_tmlEv.resolver +// CHECK: @_ZN7MyClassIisE7foo_tmlEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN7MyClassIisE7foo_tmlEv.resolver + +// CHECK-LABEL: @_Z7foo_ovli._Mfp16Mls64_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4 +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @_Z7foo_ovli( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4 +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @_Z7foo_ovli.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4503599627436032 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4503599627436032 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z7foo_ovli._Mfp16Mls64_v +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @_Z7foo_ovli +// CHECK-LABEL: @_Z7foo_ovlv._Mls64Mls64_accdata( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @_Z7foo_ovlv( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @_Z7foo_ovlv.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 11258999068426240 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 11258999068426240 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z7foo_ovlv._Mls64Mls64_accdata +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @_Z7foo_ovlv +// CHECK-LABEL: @_Z3barv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z7foo_ovli.ifunc(i32 noundef 1) +// CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z7foo_ovlv.ifunc() +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: ret i32 [[ADD]] +// CHECK-LABEL: @_Z11run_foo_tmlv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[MC1:%.*]] = alloca [[STRUCT_MYCLASS:%.*]], align 1 +// CHECK-NEXT: [[MC2:%.*]] = alloca [[STRUCT_MYCLASS_0:%.*]], align 1 +// CHECK-NEXT: [[MC3:%.*]] = alloca [[STRUCT_MYCLASS_1:%.*]], align 1 +// CHECK-NEXT: [[MC4:%.*]] = alloca [[STRUCT_MYCLASS_2:%.*]], align 1 +// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN7MyClassIssE7foo_tmlEv.ifunc(ptr noundef nonnull align 1 dereferenceable(1) [[MC1]]) +// CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN7MyClassIisE7foo_tmlEv.ifunc(ptr noundef nonnull align 1 dereferenceable(1) [[MC2]]) +// CHECK-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN7MyClassIfsE7foo_tmlEv(ptr noundef nonnull align 1 dereferenceable(1) [[MC3]]) +// CHECK-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN7MyClassIdfE7foo_tmlEv(ptr noundef nonnull align 1 dereferenceable(1) [[MC4]]) +// CHECK-NEXT: ret void +// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36310271995674624 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36310271995674624 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16777216 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777216 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_ZN7MyClassIssE7foo_tmlEv._Mfrintts +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @_ZN7MyClassIssE7foo_tmlEv +// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36310271995674624 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36310271995674624 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16777216 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777216 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_ZN7MyClassIisE7foo_tmlEv._Mfrintts +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @_ZN7MyClassIisE7foo_tmlEv +// CHECK-LABEL: @_ZN7MyClassIfsE7foo_tmlEv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 3 +// CHECK-LABEL: @_ZN7MyClassIdfE7foo_tmlEv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 4 +// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv._Mfrintts( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv._Mfrintts( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 2 + +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" } +// CHECK: attributes #1 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #2 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ls64" } +// CHECK: attributes #3 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fptoint" } +// CHECK: attributes #4 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme-f64f64" } diff --git a/clang/test/CodeGenCXX/attr-target-version.cpp b/clang/test/CodeGenCXX/attr-target-version.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/attr-target-version.cpp @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs +// RUN: %clang_cc1 -std=c++11 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +int __attribute__((target_version("sme-f64f64+bf16"))) foo(int) { return 1; } +int __attribute__((target_version("default"))) foo(int) { return 2; } +int __attribute__((target_version("sm4+ebf16"))) foo(void) { return 3; } +int __attribute__((target_version("default"))) foo(void) { return 4; } + +struct MyClass { + int __attribute__((target_version("dotprod"))) goo(int); + int __attribute__((target_version("crc"))) goo(int); + int __attribute__((target_version("default"))) goo(int); +}; + +int __attribute__((target_version("default"))) MyClass::goo(int) { return 1; } +int __attribute__((target_version("crc"))) MyClass::goo(int) { return 2; } +int __attribute__((target_version("dotprod"))) MyClass::goo(int) { return 3; } + +int bar() { + MyClass m; + return m.goo(1) + foo(1) + foo(); +} + + +// CHECK: @__aarch64_cpu_features = external dso_local global { i64 } +// CHECK: @_ZN7MyClass3gooEi.ifunc = weak_odr ifunc i32 (ptr, i32), ptr @_ZN7MyClass3gooEi.resolver +// CHECK: @_Z3fooi.ifunc = weak_odr ifunc i32 (i32), ptr @_Z3fooi.resolver +// CHECK: @_Z3foov.ifunc = weak_odr ifunc i32 (), ptr @_Z3foov.resolver + +// CHECK-LABEL: @_Z3fooi._Msme-f64f64Mbf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4 +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @_Z3fooi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4 +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @_Z3foov._Msm4Mebf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// CHECK-LABEL: @_Z3foov( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 4 +// CHECK-LABEL: @_ZN7MyClass3gooEi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 1 +// CHECK-LABEL: @_ZN7MyClass3gooEi._Mcrc( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 2 +// CHECK-LABEL: @_ZN7MyClass3gooEi._Mdotprod( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret i32 3 +// CHECK-LABEL: @_Z3barv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[M:%.*]] = alloca [[STRUCT_MYCLASS:%.*]], align 1 +// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN7MyClass3gooEi.ifunc(ptr noundef nonnull align 1 dereferenceable(1) [[M]], i32 noundef 1) +// CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3fooi.ifunc(i32 noundef 1) +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: [[CALL2:%.*]] = call noundef i32 @_Z3foov.ifunc() +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NEXT: ret i32 [[ADD3]] +// CHECK-LABEL: @_ZN7MyClass3gooEi.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1024 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1024 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_ZN7MyClass3gooEi._Mcrc +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_ZN7MyClass3gooEi._Mdotprod +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @_ZN7MyClass3gooEi +// CHECK-LABEL: @_Z3fooi.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36028797153181696 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36028797153181696 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z3fooi._Msme-f64f64Mbf16 +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @_Z3fooi +// CHECK-LABEL: @_Z3foov.resolver( +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 268435488 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 268435488 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z3foov._Msm4Mebf16 +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @_Z3foov + +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme-f64f64" } +// CHECK: attributes #1 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #2 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+neon,+sm4" } +// CHECK: attributes #3 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc" } +// CHECK: attributes #4 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+neon" } diff --git a/clang/test/Driver/aarch64-features.c b/clang/test/Driver/aarch64-features.c --- a/clang/test/Driver/aarch64-features.c +++ b/clang/test/Driver/aarch64-features.c @@ -6,6 +6,22 @@ // The AArch64 PCS states that chars should be unsigned. // CHECK: fno-signed-char +// Check Function Multi Versioning option and rtlib dependency. +// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt \ +// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV %s + +// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt -mno-fmv \ +// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s + +// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \ +// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s + +// RUN: %clang -target arm64-unknown-linux -rtlib=libgcc \ +// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s + +// CHECK-FMV-OFF: "-target-feature" "-fmv" +// CHECK-FMV-NOT: "-target-feature" "-fmv" + // Check for AArch64 out-of-line atomics default settings. // RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt \ // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -183,6 +183,7 @@ // CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local) // CHECK-NEXT: Target (SubjectMatchRule_function) // CHECK-NEXT: TargetClones (SubjectMatchRule_function) +// CHECK-NEXT: TargetVersion (SubjectMatchRule_function) // CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member) // CHECK-NEXT: TrivialABI (SubjectMatchRule_record) // CHECK-NEXT: Uninitialized (SubjectMatchRule_variable_is_local) diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -150,8 +150,13 @@ // RUN: %clang -target aarch64 -mtune=cyclone -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MTUNE-CYCLONE %s // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE %s +// CHECK-SVE: __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1 +// CHECK-SVE: __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 1 // CHECK-SVE: __ARM_FEATURE_SVE 1 // CHECK-SVE: __ARM_FEATURE_SVE_VECTOR_OPERATORS 2 +// CHECK-SVE: __ARM_NEON 1 +// CHECK-SVE: __ARM_NEON_FP 0xE +// CHECK-SVE: __ARM_NEON_SVE_BRIDGE 1 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+sve+bf16 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-BF16 %s // CHECK-SVE-BF16: __ARM_FEATURE_BF16_SCALAR_ARITHMETIC 1 @@ -190,7 +195,18 @@ // RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2 %s // RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+sve2 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2 %s +// CHECK-SVE2: __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1 +// CHECK-SVE2: __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 1 +// CHECK-SVE2: __ARM_FEATURE_SVE 1 // CHECK-SVE2: __ARM_FEATURE_SVE2 1 +// CHECK-SVE2: __ARM_NEON 1 +// CHECK-SVE2: __ARM_NEON_FP 0xE +// CHECK-SVE2: __ARM_NEON_SVE_BRIDGE 1 + +// RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+nosimd -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NONEON %s +// CHECK-NONEON-NOT: __ARM_FEATURE_SVE 1 +// CHECK-NONEON-NOT: __ARM_NEON 1 + // RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+sve2-aes -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2AES %s // CHECK-SVE2AES: __ARM_FEATURE_SVE2_AES 1 // RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+sve2-sha3 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2SHA3 %s @@ -202,6 +218,8 @@ // RUN: %clang -target aarch64-none-linux-gnu -march=armv8.2a+dotprod -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-DOTPROD %s // CHECK-DOTPROD: __ARM_FEATURE_DOTPROD 1 +// CHECK-DOTPROD: __ARM_NEON 1 +// CHECK-DOTPROD: __ARM_NEON_FP 0xE // On ARMv8.2-A and above, +fp16fml implies +fp16. // On ARMv8.4-A and above, +fp16 implies +fp16fml. diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c --- a/clang/test/Preprocessor/init-aarch64.c +++ b/clang/test/Preprocessor/init-aarch64.c @@ -112,6 +112,7 @@ // AARCH64-NEXT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 // AARCH64_CXX-NEXT: #define __GLIBCXX_BITSIZE_INT_N_0 128 // AARCH64_CXX-NEXT: #define __GLIBCXX_TYPE_INT_N_0 __int128 +// AARCH64-NEXT: #define __HAVE_FUNCTION_MULTI_VERSIONING 1 // AARCH64-NEXT: #define __INT16_C_SUFFIX__ // AARCH64-NEXT: #define __INT16_FMTd__ "hd" // AARCH64-NEXT: #define __INT16_FMTi__ "hi" diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/attr-target-clones-aarch64.c @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s + +void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3"))) no_def(void); + +// expected-warning@+1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}} +void __attribute__((target_clones("default+sha3"))) warn1(void); +// expected-warning@+1 {{version list contains entries that don't impact code generation}} +void __attribute__((target_clones("ssbs+ls64"))) warn2(void); + +// expected-error@+2 {{'target_clones' and 'target_version' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target_version("sve-bf16"), target_clones("sme+memtag"))) not_compat(void); + +int redecl(void); +int __attribute__((target_clones("frintts", "simd+fp", "default"))) redecl(void) { return 1; } + +int __attribute__((target_clones("jscvt+fcma", "rcpc", "default"))) redecl2(void); +int __attribute__((target_clones("jscvt+fcma", "rcpc"))) redecl2(void) { return 1; } + +int __attribute__((target_clones("sve+dotprod"))) redecl3(void); +int redecl3(void); + +int __attribute__((target_clones("rng", "fp16fml+fp", "default"))) redecl4(void); +// expected-error@+3 {{'target_clones' attribute does not match previous declaration}} +// expected-note@-2 {{previous declaration is here}} +// expected-warning@+1 {{version list contains entries that don't impact code generation}} +int __attribute__((target_clones("dgh+memtag+rpres+ls64_v", "ebf16+dpb+sha1", "default"))) redecl4(void) { return 1; } + +int __attribute__((target_version("flagm2"))) redef2(void) { return 1; } +// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("flagm2", "default"))) redef2(void) { return 1; } + +int __attribute__((target_clones("f32mm", "f64mm", "sha1+fp"))) redef3(void) { return 1; } +// expected-error@+2 {{'target_clones' attribute does not match previous declaration}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("f32mm", "sha1+fp", "f64mm"))) redef3(void) { return 1; } + +int __attribute__((target_clones("rdm+lse+rdm", "lse+rdm"))) dup1(void) { return 1; } +// expected-warning@+1 {{version list contains duplicate entries}} +int __attribute__((target_clones("rdm+lse+rdm", "rdm+lse+rdm"))) dup2(void) { return 2; } +// expected-warning@+1 {{version list contains duplicate entries}} +int __attribute__((target_clones("rcpc2+sve2-pmull128", "rcpc2+sve2-pmull128"))) dup3(void) { return 3; } +// expected-warning@+1 {{version list contains duplicate entries}} +void __attribute__((target_clones("sha3", "default", "default"))) dup4(void); +// expected-warning@+2 {{version list contains duplicate entries}} +// expected-warning@+1 {{version list contains duplicate entries}} +int __attribute__((target_clones("fp", "fp", "crc+dotprod", "dotprod+crc"))) dup5(void) { return 5; } + +// expected-warning@+1 {{version list contains duplicate entries}} +int __attribute__((target_clones("fp16+memtag", "memtag+fp16"))) dup6(void) { return 6; } +int __attribute__((target_clones("simd+ssbs2", "simd+dpb2"))) dup7(void) { return 7; } + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(""))) empty_target_1(void); +// expected-warning@+3 {{unsupported 'default' in the 'target_clones' attribute string;}} +// expected-warning@+2 {{unsupported 'default' in the 'target_clones' attribute string;}} +// expected-warning@+1 {{version list contains entries that don't impact code generation}} +void __attribute__((target_clones("default+default"))) empty_target_2(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("+sve2"))) +empty_target_3(void); +// expected-warning@+1 {{unsupported 'bs' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("sb+bs"))) +empty_target_4(void); + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default", ""))) +empty_target_5(void); + +// expected-warning@+1 {{version list contains duplicate entries}} +void __attribute__((target_clones("sve2-bitperm", "sve2-bitperm"))) +dupe_normal(void); + +void __attribute__((target_clones("default"), target_clones("memtag3+bti"))) dupe_normal2(void); + +int mv_after_use(void); +int useage(void) { + return mv_after_use(); +} +// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} +int __attribute__((target_clones("sve2-sha3+ssbs2", "sm4"))) mv_after_use(void) { return 1; } +// expected-error@+1 {{'main' cannot be a multiversioned function}} +int __attribute__((target_clones("sve-i8mm"))) main() { return 1; } diff --git a/clang/test/Sema/attr-target-version.c b/clang/test/Sema/attr-target-version.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/attr-target-version.c @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s + +int __attribute__((target_version("crc"))) dup(void) { return 3; } +int __attribute__((target_version("default"))) dup(void) { return 1; } +//expected-error@+2 {{redefinition of 'dup'}} +//expected-note@-2 {{previous definition is here}} +int __attribute__((target_version("default"))) dup(void) { return 2; } + +int __attribute__((target_version("default"))) dup1(void) { return 1; } +//expected-error@+2 {{redefinition of 'dup1'}} +//expected-note@-2 {{previous definition is here}} +int dup1(void) { return 2; } + +int __attribute__((target_version("aes"))) foo(void) { return 1; } +//expected-note@+1 {{previous definition is here}} +int __attribute__((target_version("default"))) foo(void) { return 2; } + +//expected-note@+1 {{previous declaration is here}} +int __attribute__((target_version("sha3 + pmull "))) foo(void) { return 1; } + +//expected-error@+1 {{multiversioning attributes cannot be combined}} +int __attribute__((target("dotprod"))) foo(void) { return -1; } + +//expected-error@+1 {{redefinition of 'foo'}} +int foo(void) { return 2; } + +//expected-note@+1 {{previous declaration is here}} +void __attribute__((target_version("bti+flagm2"))) one(void) {} +//expected-error@+1 {{multiversioned function redeclarations require identical target attributes}} +void __attribute__((target_version("flagm2+bti"))) one(void) {} + +void __attribute__((target_version("ssbs+sha1"))) two(void) {} +void __attribute__((target_version("ssbs+fp16fml"))) two(void) {} + +//expected-error@+1 {{'main' cannot be a multiversioned function}} +int __attribute__((target_version("lse"))) main(void) { return 1; } + +//expected-note@+1 {{previous definition is here}} +int hoo(void) { return 1; } +//expected-note@-1 {{previous definition is here}} +//expected-warning@+2 {{attribute declaration must precede definition}} +//expected-error@+1 {{redefinition of 'hoo'}} +int __attribute__((target_version("dit"))) hoo(void) { return 2; } + +//expected-warning@+1 {{unsupported '' in the 'target_version' attribute string; 'target_version' attribute ignored}} +int __attribute__((target_version(""))) unsup1(void) { return 1; } +//expected-warning@+1 {{unsupported 'crc32' in the 'target_version' attribute string; 'target_version' attribute ignored}} +void __attribute__((target_version("crc32"))) unsup2(void) {} + +void __attribute__((target_version("default+fp16"))) koo(void) {} +void __attribute__((target_version("default+default+default"))) loo(void) {} +void __attribute__((target_version("rdm+rng+crc"))) redef(void) {} +//expected-error@+2 {{redefinition of 'redef'}} +//expected-note@-2 {{previous definition is here}} +void __attribute__((target_version("rdm+rng+crc"))) redef(void) {} + +int __attribute__((target_version("sm4"))) def(void); +void __attribute__((target_version("dit"))) nodef(void); +void __attribute__((target_version("ls64"))) nodef(void); +void __attribute__((target_version("aes"))) ovl(void); +void __attribute__((target_version("default"))) ovl(void); +int bar() { + // expected-error@+2 {{reference to overloaded function could not be resolved; did you mean to call it?}} + // expected-note@-3 {{possible target for call}} + ovl++; + // expected-error@+1 {{no matching function for call to 'nodef'}} + nodef(); + return def(); +} +// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} +int __attribute__((target_version("sha1"))) def(void) { return 1; } + +int __attribute__((target_version("sve"))) prot(); +// expected-error@-1 {{multiversioned function must have a prototype}} +// expected-note@+1 {{function multiversioning caused by this declaration}} +int __attribute__((target_version("fcma"))) prot(); + +int __attribute__((target_version("pmull"))) rtype(int); +// expected-error@+1 {{multiversioned function declaration has a different return type}} +float __attribute__((target_version("rdm"))) rtype(int); + +int __attribute__((target_version("sha2"))) combine(void) { return 1; } +// expected-error@+1 {{multiversioned function declaration has a different calling convention}} +int __attribute__((aarch64_vector_pcs, target_version("sha3"))) combine(void) { return 2; } diff --git a/clang/test/SemaCXX/attr-target-clones-aarch64.cpp b/clang/test/SemaCXX/attr-target-clones-aarch64.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/attr-target-clones-aarch64.cpp @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify -fexceptions -fcxx-exceptions %s -std=c++14 + +void lambda() { + // expected-error@+1 {{attribute 'target_clones' multiversioned functions do not yet support lambdas}} + auto x = []() __attribute__((target_clones("default"))){}; + x(); + // expected-error@+1 {{attribute 'target_clones' multiversioned functions do not yet support lambdas}} + auto y = []() __attribute__((target_clones("fp16+lse", "rdm"))){}; + y(); +} diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/attr-target-version.cpp @@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify -fexceptions -fcxx-exceptions %s -std=c++14 +void __attribute__((target_version("default"))) wrong_tv(void); +//expected-warning@+1 {{unsupported 'vmull' in the 'target_version' attribute string; 'target_version' attribute ignored}} +void __attribute__((target_version("vmull"))) wrong_tv(void); + +void __attribute__((target_version("dotprod"))) no_def(void); +void __attribute__((target_version("rdm+fp"))) no_def(void); + +// expected-error@+1 {{no matching function for call to 'no_def'}} +void foo(void) { no_def(); } + +constexpr int __attribute__((target_version("sve2"))) diff_const(void) { return 1; } +//expected-error@+1 {{multiversioned function declaration has a different constexpr specification}} +int __attribute__((target_version("sve2-bitperm"))) diff_const(void); + +int __attribute__((target_version("fp"))) diff_const1(void) { return 1; } +//expected-error@+1 {{multiversioned function declaration has a different constexpr specification}} +constexpr int __attribute__((target_version("sve2-aes"))) diff_const1(void); + +static int __attribute__((target_version("sve2-sha3"))) diff_link(void) { return 1; } +//expected-error@+1 {{multiversioned function declaration has a different linkage}} +int __attribute__((target_version("dpb"))) diff_link(void); + +int __attribute__((target_version("memtag"))) diff_link1(void) { return 1; } +//expected-error@+1 {{multiversioned function declaration has a different linkage}} +static int __attribute__((target_version("bti"))) diff_link1(void); + +int __attribute__((target_version("flagm2"))) diff_link2(void) { return 1; } +extern int __attribute__((target_version("flagm"))) diff_link2(void); + +namespace { +static int __attribute__((target_version("memtag3"))) diff_link2(void) { return 2; } +int __attribute__((target_version("sve2-bitperm"))) diff_link2(void) { return 1; } +} // namespace + +inline int __attribute__((target_version("sme"))) diff_inline(void) { return 1; } +//expected-error@+1 {{multiversioned function declaration has a different inline specification}} +int __attribute__((target_version("fp16"))) diff_inline(void) { return 2; } + +inline int __attribute__((target_version("sme"))) diff_inline1(void) { return 1; } +int __attribute__((target_version("default"))) diff_inline1(void) { return 2; } + +int __attribute__((target_version("fcma"))) diff_type1(void) { return 1; } +//expected-error@+1 {{multiversioned function declaration has a different return type}} +double __attribute__((target_version("rcpc"))) diff_type1(void); + +auto __attribute__((target_version("rcpc2"))) diff_type2(void) -> int { return 1; } +//expected-error@+1 {{multiversioned function declaration has a different return type}} +auto __attribute__((target_version("sve-bf16"))) diff_type2(void) -> long { return (long)1; } + +int __attribute__((target_version("fp16fml"))) diff_type3(void) noexcept(false) { return 1; } +//expected-error@+2 {{exception specification in declaration does not match previous declaration}} +//expected-note@-2 {{previous declaration is here}} +int __attribute__((target_version("sve2-sha3"))) diff_type3(void) noexcept(true) { return 2; } + +template int __attribute__((target_version("default"))) temp(T) { return 1; } + +template int __attribute__((target_version("simd"))) temp1(T) { return 1; } +// expected-error@+1 {{attribute 'target_version' multiversioned functions do not yet support function templates}} +template int __attribute__((target_version("sha3"))) temp1(T) { return 2; } + +extern "C" { +int __attribute__((target_version("aes"))) extc(void) { return 1; } +} +//expected-error@+1 {{multiversioned function declaration has a different language linkage}} +int __attribute__((target_version("lse"))) extc(void) { return 1; } + +auto __attribute__((target_version("default"))) ret1(void) { return 1; } +auto __attribute__((target_version("dpb"))) ret2(void) { return 1; } +auto __attribute__((target_version("dpb2"))) ret3(void) -> int { return 1; } + +class Cls { + __attribute__((target_version("rng"))) Cls(); + __attribute__((target_version("sve-i8mm"))) ~Cls(); + + Cls &__attribute__((target_version("f32mm"))) operator=(const Cls &) = default; + Cls &__attribute__((target_version("ssbs"))) operator=(Cls &&) = delete; + + virtual void __attribute__((target_version("default"))) vfunc(); + virtual void __attribute__((target_version("sm4"))) vfunc1(); +}; + +__attribute__((target_version("sha3"))) void Decl(); +namespace Nms { +using ::Decl; +// expected-error@+3 {{declaration conflicts with target of using declaration already in scope}} +// expected-note@-4 {{target of using declaration}} +// expected-note@-3 {{using declaration}} +__attribute__((target_version("jscvt"))) void Decl(); +} // namespace Nms + +class Out { + int __attribute__((target_version("bti"))) func(void); + int __attribute__((target_version("ssbs2"))) func(void); +}; +int __attribute__((target_version("bti"))) Out::func(void) { return 1; } +int __attribute__((target_version("ssbs2"))) Out::func(void) { return 2; } +// expected-error@+3 {{out-of-line definition of 'func' does not match any declaration in 'Out'}} +// expected-note@-3 {{member declaration nearly matches}} +// expected-note@-3 {{member declaration nearly matches}} +int __attribute__((target_version("rng"))) Out::func(void) { return 3; } diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c --- a/compiler-rt/lib/builtins/cpu_model.c +++ b/compiler-rt/lib/builtins/cpu_model.c @@ -9,7 +9,7 @@ // This file is based on LLVM's lib/Support/Host.cpp. // It implements the operating system Host concept and builtin // __cpu_model for the compiler_rt library for x86 and -// __aarch64_have_lse_atomics for AArch64. +// __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64. // //===----------------------------------------------------------------------===// @@ -837,6 +837,76 @@ return 0; } #elif defined(__aarch64__) +// CPUFeatures must correspond to the same AArch64 features in +// AArch64TargetParser.h +enum CPUFeatures { + FEAT_RNG, + FEAT_FLAGM, + FEAT_FLAGM2, + FEAT_FP16FML, + FEAT_DOTPROD, + FEAT_SM4, + FEAT_RDM, + FEAT_LSE, + FEAT_FP, + FEAT_SIMD, + FEAT_CRC, + FEAT_SHA1, + FEAT_SHA2, + FEAT_SHA3, + FEAT_AES, + FEAT_PMULL, + FEAT_FP16, + FEAT_DIT, + FEAT_DPB, + FEAT_DPB2, + FEAT_JSCVT, + FEAT_FCMA, + FEAT_RCPC, + FEAT_RCPC2, + FEAT_FRINTTS, + FEAT_DGH, + FEAT_I8MM, + FEAT_BF16, + FEAT_EBF16, + FEAT_RPRES, + FEAT_SVE, + FEAT_SVE_BF16, + FEAT_SVE_EBF16, + FEAT_SVE_I8MM, + FEAT_SVE_F32MM, + FEAT_SVE_F64MM, + FEAT_SVE2, + FEAT_SVE_AES, + FEAT_SVE_PMULL128, + FEAT_SVE_BITPERM, + FEAT_SVE_SHA3, + FEAT_SVE_SM4, + FEAT_SME, + FEAT_MEMTAG, + FEAT_MEMTAG2, + FEAT_MEMTAG3, + FEAT_SB, + FEAT_PREDRES, + FEAT_SSBS, + FEAT_SSBS2, + FEAT_BTI, + FEAT_LS64, + FEAT_LS64_V, + FEAT_LS64_ACCDATA, + FEAT_WFXT, + FEAT_SME_F64, + FEAT_SME_I64, + FEAT_SME2, + FEAT_MAX +}; +// Architecture features used +// in Function Multi Versioning +struct { + unsigned long long features; + // As features grows new fields could be added +} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon)); + // LSE support detection for out-of-line atomics // using HWCAP and Auxiliary vector _Bool __aarch64_have_lse_atomics @@ -844,12 +914,177 @@ #if defined(__has_include) #if __has_include() #include +#if __has_include() +#include + #ifndef AT_HWCAP #define AT_HWCAP 16 #endif + +#ifndef HWCAP_CPUID +#define HWCAP_CPUID (1 << 11) +#endif +#ifndef HWCAP_FP +#define HWCAP_FP (1 << 0) +#endif +#ifndef HWCAP_ASIMD +#define HWCAP_ASIMD (1 << 1) +#endif +#ifndef HWCAP_AES +#define HWCAP_AES (1 << 3) +#endif +#ifndef HWCAP_PMULL +#define HWCAP_PMULL (1 << 4) +#endif +#ifndef HWCAP_SHA1 +#define HWCAP_SHA1 (1 << 5) +#endif +#ifndef HWCAP_SHA2 +#define HWCAP_SHA2 (1 << 6) +#endif #ifndef HWCAP_ATOMICS #define HWCAP_ATOMICS (1 << 8) #endif +#ifndef HWCAP_FPHP +#define HWCAP_FPHP (1 << 9) +#endif +#ifndef HWCAP_ASIMDHP +#define HWCAP_ASIMDHP (1 << 10) +#endif +#ifndef HWCAP_ASIMDRDM +#define HWCAP_ASIMDRDM (1 << 12) +#endif +#ifndef HWCAP_JSCVT +#define HWCAP_JSCVT (1 << 13) +#endif +#ifndef HWCAP_FCMA +#define HWCAP_FCMA (1 << 14) +#endif +#ifndef HWCAP_LRCPC +#define HWCAP_LRCPC (1 << 15) +#endif +#ifndef HWCAP_DCPOP +#define HWCAP_DCPOP (1 << 16) +#endif +#ifndef HWCAP_SHA3 +#define HWCAP_SHA3 (1 << 17) +#endif +#ifndef HWCAP_SM3 +#define HWCAP_SM3 (1 << 18) +#endif +#ifndef HWCAP_SM4 +#define HWCAP_SM4 (1 << 19) +#endif +#ifndef HWCAP_ASIMDDP +#define HWCAP_ASIMDDP (1 << 20) +#endif +#ifndef HWCAP_SHA512 +#define HWCAP_SHA512 (1 << 21) +#endif +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif +#ifndef HWCAP_ASIMDFHM +#define HWCAP_ASIMDFHM (1 << 23) +#endif +#ifndef HWCAP_DIT +#define HWCAP_DIT (1 << 24) +#endif +#ifndef HWCAP_ILRCPC +#define HWCAP_ILRCPC (1 << 26) +#endif +#ifndef HWCAP_FLAGM +#define HWCAP_FLAGM (1 << 27) +#endif +#ifndef HWCAP_SSBS +#define HWCAP_SSBS (1 << 28) +#endif +#ifndef HWCAP_SB +#define HWCAP_SB (1 << 29) +#endif + +#ifndef HWCAP2_DCPODP +#define HWCAP2_DCPODP (1 << 0) +#endif +#ifndef HWCAP2_SVE2 +#define HWCAP2_SVE2 (1 << 1) +#endif +#ifndef HWCAP2_SVEAES +#define HWCAP2_SVEAES (1 << 2) +#endif +#ifndef HWCAP2_SVEPMULL +#define HWCAP2_SVEPMULL (1 << 3) +#endif +#ifndef HWCAP2_SVEBITPERM +#define HWCAP2_SVEBITPERM (1 << 4) +#endif +#ifndef HWCAP2_SVESHA3 +#define HWCAP2_SVESHA3 (1 << 5) +#endif +#ifndef HWCAP2_SVESM4 +#define HWCAP2_SVESM4 (1 << 6) +#endif +#ifndef HWCAP2_FLAGM2 +#define HWCAP2_FLAGM2 (1 << 7) +#endif +#ifndef HWCAP2_FRINT +#define HWCAP2_FRINT (1 << 8) +#endif +#ifndef HWCAP2_SVEI8MM +#define HWCAP2_SVEI8MM (1 << 9) +#endif +#ifndef HWCAP2_SVEF32MM +#define HWCAP2_SVEF32MM (1 << 10) +#endif +#ifndef HWCAP2_SVEF64MM +#define HWCAP2_SVEF64MM (1 << 11) +#endif +#ifndef HWCAP2_SVEBF16 +#define HWCAP2_SVEBF16 (1 << 12) +#endif +#ifndef HWCAP2_I8MM +#define HWCAP2_I8MM (1 << 13) +#endif +#ifndef HWCAP2_BF16 +#define HWCAP2_BF16 (1 << 14) +#endif +#ifndef HWCAP2_DGH +#define HWCAP2_DGH (1 << 15) +#endif +#ifndef HWCAP2_RNG +#define HWCAP2_RNG (1 << 16) +#endif +#ifndef HWCAP2_BTI +#define HWCAP2_BTI (1 << 17) +#endif +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif +#ifndef HWCAP2_RPRES +#define HWCAP2_RPRES (1 << 21) +#endif +#ifndef HWCAP2_MTE3 +#define HWCAP2_MTE3 (1 << 22) +#endif +#ifndef HWCAP2_SME +#define HWCAP2_SME (1 << 23) +#endif +#ifndef HWCAP2_SME_I16I64 +#define HWCAP2_SME_I16I64 (1 << 24) +#endif +#ifndef HWCAP2_SME_F64F64 +#define HWCAP2_SME_F64F64 (1 << 25) +#endif +#ifndef HWCAP2_WFXT +#define HWCAP2_WFXT (1UL << 31) +#endif +#ifndef HWCAP2_EBF16 +#define HWCAP2_EBF16 (1UL << 32) +#endif +#ifndef HWCAP2_SVE_EBF16 +#define HWCAP2_SVE_EBF16 (1UL << 33) +#endif + #if defined(__ANDROID__) #include #include @@ -857,6 +1092,13 @@ #include #include #endif + +// Detect Exynos 9810 CPU +#define IF_EXYNOS9810 \ + char arch[PROP_VALUE_MAX]; \ + if (__system_property_get("ro.arch", arch) > 0 && \ + strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0) + static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { #if defined(__FreeBSD__) unsigned long hwcap; @@ -875,25 +1117,233 @@ _Bool result = (hwcap & HWCAP_ATOMICS) != 0; #if defined(__ANDROID__) if (result) { - char arch[PROP_VALUE_MAX]; - if (__system_property_get("ro.arch", arch) > 0 && - strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0) { - // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0; - // only the former support LSE atomics. However, the kernel in the - // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly - // reported the feature as being supported. - // - // The kernel appears to have been corrected to mark it unsupported as of - // the Android 9.0 release on those devices, and this issue has not been - // observed anywhere else. Thus, this workaround may be removed if - // compiler-rt ever drops support for Android 8.0. - result = false; - } + // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0; + // only the former support LSE atomics. However, the kernel in the + // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly + // reported the feature as being supported. + // + // The kernel appears to have been corrected to mark it unsupported as of + // the Android 9.0 release on those devices, and this issue has not been + // observed anywhere else. Thus, this workaround may be removed if + // compiler-rt ever drops support for Android 8.0. + IF_EXYNOS9810 result = false; } #endif // defined(__ANDROID__) __aarch64_have_lse_atomics = result; #endif // defined(__FreeBSD__) } + +void init_cpu_features_resolver(unsigned long hwcap, unsigned long hwcap2) { +#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F +#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) +#define extractBits(val, start, number) \ + (val & ((1ULL << number) - 1ULL) << start) >> start + if (hwcap & HWCAP_CRC32) + setCPUFeature(FEAT_CRC); + if (hwcap & HWCAP_PMULL) + setCPUFeature(FEAT_PMULL); + if (hwcap & HWCAP_FLAGM) + setCPUFeature(FEAT_FLAGM); + if (hwcap2 & HWCAP2_FLAGM2) { + setCPUFeature(FEAT_FLAGM); + setCPUFeature(FEAT_FLAGM2); + } + if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) + setCPUFeature(FEAT_SM4); + if (hwcap & HWCAP_ASIMDDP) + setCPUFeature(FEAT_DOTPROD); + if (hwcap & HWCAP_ASIMDFHM) + setCPUFeature(FEAT_FP16FML); + if (hwcap & HWCAP_FPHP) { + setCPUFeature(FEAT_FP16); + setCPUFeature(FEAT_FP); + } + if (hwcap & HWCAP_DIT) + setCPUFeature(FEAT_DIT); + if (hwcap & HWCAP_ASIMDRDM) + setCPUFeature(FEAT_RDM); + if (hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC2); + if (hwcap & HWCAP_AES) + setCPUFeature(FEAT_AES); + if (hwcap & HWCAP_SHA1) + setCPUFeature(FEAT_SHA1); + if (hwcap & HWCAP_SHA2) + setCPUFeature(FEAT_SHA2); + if (hwcap & HWCAP_JSCVT) + setCPUFeature(FEAT_JSCVT); + if (hwcap & HWCAP_FCMA) + setCPUFeature(FEAT_FCMA); + if (hwcap & HWCAP_SB) + setCPUFeature(FEAT_SB); + if (hwcap & HWCAP_SSBS) + setCPUFeature(FEAT_SSBS2); + if (hwcap2 & HWCAP2_MTE) { + setCPUFeature(FEAT_MEMTAG); + setCPUFeature(FEAT_MEMTAG2); + } + if (hwcap2 & HWCAP2_MTE3) { + setCPUFeature(FEAT_MEMTAG); + setCPUFeature(FEAT_MEMTAG2); + setCPUFeature(FEAT_MEMTAG3); + } + if (hwcap2 & HWCAP2_SVEAES) + setCPUFeature(FEAT_SVE_AES); + if (hwcap2 & HWCAP2_SVEPMULL) { + setCPUFeature(FEAT_SVE_AES); + setCPUFeature(FEAT_SVE_PMULL128); + } + if (hwcap2 & HWCAP2_SVEBITPERM) + setCPUFeature(FEAT_SVE_BITPERM); + if (hwcap2 & HWCAP2_SVESHA3) + setCPUFeature(FEAT_SVE_SHA3); + if (hwcap2 & HWCAP2_SVESM4) + setCPUFeature(FEAT_SVE_SM4); + if (hwcap2 & HWCAP2_DCPODP) + setCPUFeature(FEAT_DPB2); + if (hwcap & HWCAP_ATOMICS) + setCPUFeature(FEAT_LSE); + if (hwcap2 & HWCAP2_RNG) + setCPUFeature(FEAT_RNG); + if (hwcap2 & HWCAP2_I8MM) + setCPUFeature(FEAT_I8MM); + if (hwcap2 & HWCAP2_EBF16) + setCPUFeature(FEAT_EBF16); + if (hwcap2 & HWCAP2_SVE_EBF16) + setCPUFeature(FEAT_SVE_EBF16); + if (hwcap2 & HWCAP2_DGH) + setCPUFeature(FEAT_DGH); + if (hwcap2 & HWCAP2_FRINT) + setCPUFeature(FEAT_FRINTTS); + if (hwcap2 & HWCAP2_SVEI8MM) + setCPUFeature(FEAT_SVE_I8MM); + if (hwcap2 & HWCAP2_SVEF32MM) + setCPUFeature(FEAT_SVE_F32MM); + if (hwcap2 & HWCAP2_SVEF64MM) + setCPUFeature(FEAT_SVE_F64MM); + if (hwcap2 & HWCAP2_BTI) + setCPUFeature(FEAT_BTI); + if (hwcap2 & HWCAP2_RPRES) + setCPUFeature(FEAT_RPRES); + if (hwcap2 & HWCAP2_WFXT) + setCPUFeature(FEAT_WFXT); + if (hwcap2 & HWCAP2_SME) + setCPUFeature(FEAT_SME); + if (hwcap2 & HWCAP2_SME_I16I64) + setCPUFeature(FEAT_SME_I64); + if (hwcap2 & HWCAP2_SME_F64F64) + setCPUFeature(FEAT_SME_F64); + if (hwcap & HWCAP_CPUID) { + unsigned long ftr; + getCPUFeature(ID_AA64PFR1_EL1, ftr); + // ID_AA64PFR1_EL1.MTE >= 0b0001 + if (extractBits(ftr, 8, 4) >= 0x1) + setCPUFeature(FEAT_MEMTAG); + // ID_AA64PFR1_EL1.SSBS == 0b0001 + if (extractBits(ftr, 4, 4) == 0x1) + setCPUFeature(FEAT_SSBS); + // ID_AA64PFR1_EL1.SME == 0b0010 + if (extractBits(ftr, 24, 4) == 0x2) + setCPUFeature(FEAT_SME2); + getCPUFeature(ID_AA64PFR0_EL1, ftr); + // ID_AA64PFR0_EL1.FP != 0b1111 + if (extractBits(ftr, 16, 4) != 0xF) { + setCPUFeature(FEAT_FP); + // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP + setCPUFeature(FEAT_SIMD); + } + // ID_AA64PFR0_EL1.SVE != 0b0000 + if (extractBits(ftr, 32, 4) != 0x0) { + // get ID_AA64ZFR0_EL1, that name supported + // if sve enabled only + getCPUFeature(S3_0_C0_C4_4, ftr); + // ID_AA64ZFR0_EL1.SVEver == 0b0000 + if (extractBits(ftr, 0, 4) == 0x0) + setCPUFeature(FEAT_SVE); + // ID_AA64ZFR0_EL1.SVEver == 0b0001 + if (extractBits(ftr, 0, 4) == 0x1) + setCPUFeature(FEAT_SVE2); + // ID_AA64ZFR0_EL1.BF16 != 0b0000 + if (extractBits(ftr, 20, 4) != 0x0) + setCPUFeature(FEAT_SVE_BF16); + } + getCPUFeature(ID_AA64ISAR0_EL1, ftr); + // ID_AA64ISAR0_EL1.SHA3 != 0b0000 + if (extractBits(ftr, 32, 4) != 0x0) + setCPUFeature(FEAT_SHA3); + getCPUFeature(ID_AA64ISAR1_EL1, ftr); + // ID_AA64ISAR1_EL1.DPB >= 0b0001 + if (extractBits(ftr, 0, 4) >= 0x1) + setCPUFeature(FEAT_DPB); + // ID_AA64ISAR1_EL1.LRCPC != 0b0000 + if (extractBits(ftr, 20, 4) != 0x0) + setCPUFeature(FEAT_RCPC); + // ID_AA64ISAR1_EL1.SPECRES == 0b0001 + if (extractBits(ftr, 40, 4) == 0x2) + setCPUFeature(FEAT_PREDRES); + // ID_AA64ISAR1_EL1.BF16 != 0b0000 + if (extractBits(ftr, 44, 4) != 0x0) + setCPUFeature(FEAT_BF16); + // ID_AA64ISAR1_EL1.LS64 >= 0b0001 + if (extractBits(ftr, 60, 4) >= 0x1) + setCPUFeature(FEAT_LS64); + // ID_AA64ISAR1_EL1.LS64 >= 0b0010 + if (extractBits(ftr, 60, 4) >= 0x2) + setCPUFeature(FEAT_LS64_V); + // ID_AA64ISAR1_EL1.LS64 >= 0b0011 + if (extractBits(ftr, 60, 4) >= 0x3) + setCPUFeature(FEAT_LS64_ACCDATA); + } else { + // Set some features in case of no CPUID support + if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { + setCPUFeature(FEAT_FP); + // FP and AdvSIMD fields have the same value + setCPUFeature(FEAT_SIMD); + } + if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) + setCPUFeature(FEAT_DPB); + if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC); + if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) + setCPUFeature(FEAT_BF16); + if (hwcap2 & HWCAP2_SVEBF16) + setCPUFeature(FEAT_SVE_BF16); + if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) + setCPUFeature(FEAT_SVE2); + if (hwcap & HWCAP_SHA3) + setCPUFeature(FEAT_SHA3); + } +} + +void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) { + unsigned long hwcap; + unsigned long hwcap2; + // CPU features already initialized. + if (__aarch64_cpu_features.features) + return; + setCPUFeature(FEAT_MAX); +#if defined(__FreeBSD__) + int res = 0; + res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); + res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2); + if (res) + return; +#else +#if defined(__ANDROID__) + // Don't set any CPU features, + // detection could be wrong on Exynos 9810. + IF_EXYNOS9810 return; +#endif // defined(__ANDROID__) + hwcap = getauxval(AT_HWCAP); + hwcap2 = getauxval(AT_HWCAP2); +#endif // defined(__FreeBSD__) + init_cpu_features_resolver(hwcap, hwcap2); +#undef extractBits +#undef getCPUFeature +#undef setCPUFeature +#undef IF_EXYNOS9810 +} #endif // defined(__has_include) #endif // __has_include() +#endif // __has_include() #endif // defined(__aarch64__) diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -14,6 +14,7 @@ #ifndef LLVM_TARGETPARSER_AARCH64TARGETPARSER_H #define LLVM_TARGETPARSER_AARCH64TARGETPARSER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include @@ -23,6 +24,67 @@ class Triple; namespace AArch64 { +enum CPUFeatures { + FEAT_RNG, + FEAT_FLAGM, + FEAT_FLAGM2, + FEAT_FP16FML, + FEAT_DOTPROD, + FEAT_SM4, + FEAT_RDM, + FEAT_LSE, + FEAT_FP, + FEAT_SIMD, + FEAT_CRC, + FEAT_SHA1, + FEAT_SHA2, + FEAT_SHA3, + FEAT_AES, + FEAT_PMULL, + FEAT_FP16, + FEAT_DIT, + FEAT_DPB, + FEAT_DPB2, + FEAT_JSCVT, + FEAT_FCMA, + FEAT_RCPC, + FEAT_RCPC2, + FEAT_FRINTTS, + FEAT_DGH, + FEAT_I8MM, + FEAT_BF16, + FEAT_EBF16, + FEAT_RPRES, + FEAT_SVE, + FEAT_SVE_BF16, + FEAT_SVE_EBF16, + FEAT_SVE_I8MM, + FEAT_SVE_F32MM, + FEAT_SVE_F64MM, + FEAT_SVE2, + FEAT_SVE_AES, + FEAT_SVE_PMULL128, + FEAT_SVE_BITPERM, + FEAT_SVE_SHA3, + FEAT_SVE_SM4, + FEAT_SME, + FEAT_MEMTAG, + FEAT_MEMTAG2, + FEAT_MEMTAG3, + FEAT_SB, + FEAT_PREDRES, + FEAT_SSBS, + FEAT_SSBS2, + FEAT_BTI, + FEAT_LS64, + FEAT_LS64_V, + FEAT_LS64_ACCDATA, + FEAT_WFXT, + FEAT_SME_F64, + FEAT_SME_I64, + FEAT_SME2, + FEAT_MAX +}; // Arch extension modifiers for CPUs. These are labelled with their Arm ARM // feature name (though the canonical reference for those is AArch64.td) @@ -117,7 +179,8 @@ }; const ExtName AArch64ARCHExtNames[] = { -#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \ +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \ + DEP_FEATURES, FMV_PRIORITY) \ {NAME, ID, FEATURE, NEGFEATURE}, #include "AArch64TargetParser.def" }; @@ -175,6 +238,7 @@ // Information by Name uint64_t getDefaultExtensions(StringRef CPU, ArchKind AK); +void getFeatureOption(StringRef Name, std::string &Feature); ArchKind getCPUArchKind(StringRef CPU); ArchKind getSubArchArchKind(StringRef SubArch); @@ -186,6 +250,7 @@ void fillValidCPUArchList(SmallVectorImpl &Values); bool isX18ReservedByDefault(const Triple &TT); +uint64_t getCpuSupportsMask(ArrayRef FeatureStrs); } // namespace AArch64 } // namespace llvm diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.def b/llvm/include/llvm/TargetParser/AArch64TargetParser.def --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.def +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.def @@ -101,65 +101,152 @@ #undef AARCH64_ARCH #ifndef AARCH64_ARCH_EXT_NAME -#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE,\ + FMV_ID, DEP_FEATURES, FMV_PRIORITY) #endif // FIXME: This would be nicer were it tablegen -AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, {}, {}) -AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, {}, {}) -AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") -AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") -AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") -AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto", "-crypto") -AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") -AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") -AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") -AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") -AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod", "-dotprod") -AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") -AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") -AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") -AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") -AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") -AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") -AARCH64_ARCH_EXT_NAME("rasv2", AArch64::AEK_RASv2, "+rasv2", "-rasv2") -AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") -AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2") -AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes") -AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4") -AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") -AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm") -AARCH64_ARCH_EXT_NAME("sve2p1", AArch64::AEK_SVE2p1, "+sve2p1", "-sve2p1") -AARCH64_ARCH_EXT_NAME("b16b16", AArch64::AEK_B16B16, "+b16b16", "-b16b16") -AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") -AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") -AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") -AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") -AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") -AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") -AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16") -AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm") -AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm") -AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm") -AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme") -AARCH64_ARCH_EXT_NAME("ls64", AArch64::AEK_LS64, "+ls64", "-ls64") -AARCH64_ARCH_EXT_NAME("brbe", AArch64::AEK_BRBE, "+brbe", "-brbe") -AARCH64_ARCH_EXT_NAME("pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth") -AARCH64_ARCH_EXT_NAME("flagm", AArch64::AEK_FLAGM, "+flagm", "-flagm") -AARCH64_ARCH_EXT_NAME("sme", AArch64::AEK_SME, "+sme", "-sme") -AARCH64_ARCH_EXT_NAME("sme-f64f64", AArch64::AEK_SMEF64F64, "+sme-f64f64", "-sme-f64f64") -AARCH64_ARCH_EXT_NAME("sme-i16i64", AArch64::AEK_SMEI16I64, "+sme-i16i64", "-sme-i16i64") -AARCH64_ARCH_EXT_NAME("sme-f16f16", AArch64::AEK_SMEF16F16, "+sme-f16f16", "-sme-f16f16") -AARCH64_ARCH_EXT_NAME("sme2", AArch64::AEK_SME2, "+sme2", "-sme2") -AARCH64_ARCH_EXT_NAME("sme2p1", AArch64::AEK_SME2p1, "+sme2p1", "-sme2p1") -AARCH64_ARCH_EXT_NAME("hbc", AArch64::AEK_HBC, "+hbc", "-hbc") -AARCH64_ARCH_EXT_NAME("mops", AArch64::AEK_MOPS, "+mops", "-mops") -AARCH64_ARCH_EXT_NAME("pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon") -AARCH64_ARCH_EXT_NAME("predres2", AArch64::AEK_SPECRES2, "+specres2", "-specres2") -AARCH64_ARCH_EXT_NAME("cssc", AArch64::AEK_CSSC, "+cssc", "-cssc") -AARCH64_ARCH_EXT_NAME("rcpc3", AArch64::AEK_RCPC3, "+rcpc3", "-rcpc3") -AARCH64_ARCH_EXT_NAME("the", AArch64::AEK_THE, "+the", "-the") -AARCH64_ARCH_EXT_NAME("d128", AArch64::AEK_D128, "+d128", "-d128") -AARCH64_ARCH_EXT_NAME("lse128", AArch64::AEK_LSE128, "+lse128", "-lse128") +AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, {}, {}, MAX, "", 0) +// "none" feature has the maximum allowed function multi versioning priority +AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, {}, {}, MAX, "", 1000) +AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc", CRC, "+crc", 110) +AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse", LSE, "+lse", 80) +AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm", RDM, + "+rdm,+fp-armv8,+neon", 70) +AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto", "-crypto", MAX, + "", 0) +AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4", SM4, + "+sm4,+fp-armv8,+neon", 60) +AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3", SHA3, + "+sha3,+sha2,+fp-armv8,+neon", 140) +AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2", SHA2, + "+sha2,+fp-armv8,+neon", 130) +AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes", AES, + "+fp-armv8,+neon", 150) +AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod", "-dotprod", + DOTPROD, "+dotprod,+fp-armv8,+neon", 50) +AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8", FP, + "+fp-armv8,+neon", 90) +AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon", SIMD, + "+fp-armv8,+neon", 100) +AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16", FP16, + "+fullfp16,+fp-armv8,+neon", 170) +AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml", + FP16FML, "+fp16fml,+fullfp16,+fp-armv8,+neon", 40) +AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe", MAX, "", + 0) +AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("rasv2", AArch64::AEK_RASv2, "+rasv2", "-rasv2", MAX, "", + 0) +AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve", SVE, + "+sve,+fullfp16,+fp-armv8,+neon", 310) +AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2", SVE2, + "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370) +AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", + "-sve2-aes", SVE_AES, + "+sve2,+sve,+sve2-aes,+fullfp16,+fp-armv8,+neon", 380) +AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", + "-sve2-sm4", SVE_SM4, + "+sve2,+sve,+sve2-sm4,+fullfp16,+fp-armv8,+neon", 420) +AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", + "-sve2-sha3", SVE_SHA3, + "+sve2,+sve,+sve2-sha3,+fullfp16,+fp-armv8,+neon", 410) +AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", + "-sve2-bitperm", SVE_BITPERM, + "+sve2,+sve,+sve2-bitperm,+fullfp16,+fp-armv8,+neon", 400) +AARCH64_ARCH_EXT_NAME("sve2p1", AArch64::AEK_SVE2p1, "+sve2p1", "-sve2p1", MAX, + "", 0) +AARCH64_ARCH_EXT_NAME("b16b16", AArch64::AEK_B16B16, "+b16b16", "-b16b16", MAX, + "", 0) +AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc", RCPC, + "+rcpc", 230) +AARCH64_ARCH_EXT_NAME("rcpc2", AArch64::AEK_NONE, {}, {}, RCPC2, "+rcpc", 240) +AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand", RNG, "+rand", + 10) +AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte", MEMTAG, "", + 440) +AARCH64_ARCH_EXT_NAME("memtag2", AArch64::AEK_NONE, {}, {}, MEMTAG2, "+mte", + 450) +AARCH64_ARCH_EXT_NAME("memtag3", AArch64::AEK_NONE, {}, {}, MEMTAG3, "+mte", + 460) +AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs", SSBS, "", + 490) +AARCH64_ARCH_EXT_NAME("ssbs2", AArch64::AEK_NONE, {}, {}, SSBS2, "+ssbs", 500) +AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb", SB, "+sb", 470) +AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres", + PREDRES, "+predres", 480) +AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16", BF16, + "+bf16", 280) +AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm", I8MM, + "+i8mm", 270) +AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm", + SVE_F32MM, "+sve,+f32mm,+fullfp16,+fp-armv8,+neon", 350) +AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm", + SVE_F64MM, "+sve,+f64mm,+fullfp16,+fp-armv8,+neon", 360) +AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("ls64", AArch64::AEK_LS64, "+ls64", "-ls64", LS64, "", + 520) +AARCH64_ARCH_EXT_NAME("brbe", AArch64::AEK_BRBE, "+brbe", "-brbe", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth", MAX, "", + 0) +AARCH64_ARCH_EXT_NAME("flagm", AArch64::AEK_FLAGM, "+flagm", "-flagm", FLAGM, + "+flagm", 20) +AARCH64_ARCH_EXT_NAME("flagm2", AArch64::AEK_NONE, {}, {}, FLAGM2, + "+flagm,+altnzcv", 30) +AARCH64_ARCH_EXT_NAME("sme", AArch64::AEK_SME, "+sme", "-sme", SME, + "+sme,+bf16", 430) +AARCH64_ARCH_EXT_NAME("sme-f64f64", AArch64::AEK_SMEF64F64, "+sme-f64f64", + "-sme-f64f64", SME_F64, "+sme,+sme-f64f64,+bf16", 560) +AARCH64_ARCH_EXT_NAME("sme-i16i64", AArch64::AEK_SMEI16I64, "+sme-i16i64", + "-sme-i16i64", SME_I64, "+sme,+sme-i16i64,+bf16", 570) +AARCH64_ARCH_EXT_NAME("sme-f16f16", AArch64::AEK_SMEF16F16, "+sme-f16f16", + "-sme-f16f16", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("sme2", AArch64::AEK_SME2, "+sme2", "-sme2", SME2, + "+sme2,+sme,+bf16", 580) +AARCH64_ARCH_EXT_NAME("sme2p1", AArch64::AEK_SME2p1, "+sme2p1", "-sme2p1", MAX, + "", 0) +AARCH64_ARCH_EXT_NAME("hbc", AArch64::AEK_HBC, "+hbc", "-hbc", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("mops", AArch64::AEK_MOPS, "+mops", "-mops", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon", + MAX, "", 0) +AARCH64_ARCH_EXT_NAME("predres2", AArch64::AEK_SPECRES2, "+specres2", + "-specres2", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("cssc", AArch64::AEK_CSSC, "+cssc", "-cssc", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("rcpc3", AArch64::AEK_RCPC3, "+rcpc3", "-rcpc3", MAX, "", + 0) +AARCH64_ARCH_EXT_NAME("the", AArch64::AEK_THE, "+the", "-the", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("d128", AArch64::AEK_D128, "+d128", "-d128", MAX, "", 0) +AARCH64_ARCH_EXT_NAME("lse128", AArch64::AEK_LSE128, "+lse128", "-lse128", MAX, + "", 0) +AARCH64_ARCH_EXT_NAME("sha1", AArch64::AEK_NONE, {}, {}, SHA1, + "+fp-armv8,+neon", 120) +AARCH64_ARCH_EXT_NAME("pmull", AArch64::AEK_NONE, {}, {}, PMULL, + "+aes,+fp-armv8,+neon", 160) +AARCH64_ARCH_EXT_NAME("dit", AArch64::AEK_NONE, {}, {}, DIT, "+dit", 180) +AARCH64_ARCH_EXT_NAME("dpb", AArch64::AEK_NONE, {}, {}, DPB, "+ccpp", 190) +AARCH64_ARCH_EXT_NAME("dpb2", AArch64::AEK_NONE, {}, {}, DPB2, "+ccpp,+ccdp", + 200) +AARCH64_ARCH_EXT_NAME("jscvt", AArch64::AEK_NONE, {}, {}, JSCVT, + "+fp-armv8,+neon,+jsconv", 210) +AARCH64_ARCH_EXT_NAME("fcma", AArch64::AEK_NONE, {}, {}, FCMA, + "+fp-armv8,+neon,+complxnum", 220) +AARCH64_ARCH_EXT_NAME("frintts", AArch64::AEK_NONE, {}, {}, FRINTTS, "+fptoint", + 250) +AARCH64_ARCH_EXT_NAME("dgh", AArch64::AEK_NONE, {}, {}, DGH, "", 260) +AARCH64_ARCH_EXT_NAME("ebf16", AArch64::AEK_NONE, {}, {}, EBF16, "+bf16", 290) +AARCH64_ARCH_EXT_NAME("rpres", AArch64::AEK_NONE, {}, {}, RPRES, "", 300) +AARCH64_ARCH_EXT_NAME("sve-bf16", AArch64::AEK_NONE, {}, {}, SVE_BF16, + "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 320) +AARCH64_ARCH_EXT_NAME("sve-ebf16", AArch64::AEK_NONE, {}, {}, SVE_EBF16, + "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 330) +AARCH64_ARCH_EXT_NAME("sve-i8mm", AArch64::AEK_NONE, {}, {}, SVE_I8MM, + "+sve,+i8mm,+fullfp16,+fp-armv8,+neon", 340) +AARCH64_ARCH_EXT_NAME("sve2-pmull128", AArch64::AEK_NONE, {}, {}, SVE_PMULL128, + "+sve2,+sve,+sve2-aes,+fullfp16,+fp-armv8,+neon", 390) +AARCH64_ARCH_EXT_NAME("bti", AArch64::AEK_NONE, {}, {}, BTI, "+bti", 510) +AARCH64_ARCH_EXT_NAME("ls64_v", AArch64::AEK_NONE, {}, {}, LS64_V, "", 530) +AARCH64_ARCH_EXT_NAME("ls64_accdata", AArch64::AEK_NONE, {}, {}, LS64_ACCDATA, + "+ls64", 540) +AARCH64_ARCH_EXT_NAME("wfxt", AArch64::AEK_NONE, {}, {}, WFXT, "+wfxt", 550) #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -77,6 +77,9 @@ def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", "Enable out of line atomics to support LSE instructions">; +def FeatureFMV : SubtargetFeature<"fmv", "HasFMV", "true", + "Enable Function Multi Versioning support.">; + def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions (FEAT_RDM)">; diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -38,6 +38,15 @@ .Default(AArch64::AEK_INVALID); } +void AArch64::getFeatureOption(StringRef Name, std::string &Feature) { + Feature = llvm::StringSwitch(Name.substr(1)) +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \ + DEP_FEATURES, FMV_PRIORITY) \ + .Case(NAME, FEATURE) +#include "../../include/llvm/TargetParser/AArch64TargetParser.def" + .Default(Name.str()); +} + AArch64::ArchKind AArch64::getCPUArchKind(StringRef CPU) { if (CPU == "generic") return ArchKind::ARMV8A; @@ -55,12 +64,27 @@ return ArchKind::INVALID; } +uint64_t AArch64::getCpuSupportsMask(ArrayRef FeatureStrs) { + uint64_t FeaturesMask = 0; + for (const StringRef &FeatureStr : FeatureStrs) { + unsigned Feature = StringSwitch(FeatureStr) +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \ + DEP_FEATURES, FMV_PRIORITY) \ + .Case(NAME, llvm::AArch64::FEAT_##FMV_ID) +#include "../../include/llvm/TargetParser/AArch64TargetParser.def" + ; + FeaturesMask |= (1ULL << Feature); + } + return FeaturesMask; +} + bool AArch64::getExtensionFeatures(uint64_t Extensions, std::vector &Features) { if (Extensions == AArch64::AEK_INVALID) return false; -#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \ +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \ + DEP_FEATURES, FMV_PRIORITY) \ if (Extensions & ID) { \ const char *feature = FEATURE; \ /* INVALID and NONE have no feature name. */ \