diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1840,7 +1840,8 @@ None, Target, CPUSpecific, - CPUDispatch + CPUDispatch, + TargetClones }; /// Represents a function declaration or definition. @@ -2459,6 +2460,10 @@ /// the target functionality. bool isTargetMultiVersion() const; + /// True if this function is a multiversioned dispatch function as a part of + /// the target-clones functionality. + bool isTargetClonesMultiVersion() const; + /// \brief Get the associated-constraints of this function declaration. /// Currently, this will either be a vector of size 1 containing the /// trailing-requires-clause or an empty vector. diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2676,6 +2676,40 @@ }]; } +def TargetClones : InheritableAttr { + let Spellings = [GCC<"target_clones">]; + let Args = [VariadicStringArgument<"featuresStrs">]; + let Documentation = [TargetClonesDocs]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let AdditionalMembers = [{ + StringRef getFeatureStr(unsigned Index) const { + return *(featuresStrs_begin() + Index); + } + // 'default' is always moved to the end, so it isn't considered + // when mangling the index. + unsigned getMangledIndex(unsigned Index) const { + if (getFeatureStr(Index) == "default") + return std::count_if(featuresStrs_begin(), featuresStrs_end(), + [](StringRef S) { return S != "default"; }); + + return std::count_if(featuresStrs_begin(), featuresStrs_begin() + Index, + [](StringRef S) { return S != "default"; }); + } + + // True if this is the first of this version to appear in the config string. + // This is used to make sure we don't try to emit this function multiple + // times. + bool isFirstOfVersion(unsigned Index) const { + StringRef FeatureStr(getFeatureStr(Index)); + return 0 == std::count_if( + featuresStrs_begin(), featuresStrs_begin() + Index, + [FeatureStr](StringRef S) { return S == FeatureStr; }); + } + }]; +} + +def : MutualExclusions<[TargetClones, Target, CPUDispatch, CPUSpecific]>; + def MinVectorWidth : InheritableAttr { let Spellings = [Clang<"min_vector_width">]; let Args = [UnsignedArgument<"VectorWidth">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2233,6 +2233,40 @@ }]; } +def TargetClonesDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +Clang supports the ``target_clones("OPTIONS")`` attribute. This attribute may be +attached to a function declaration and causes function multiversioning, where +multiple versions of the function will be emitted with different code +generation options. Additionally, these versions will be resolved at runtime +based on the priority of their attribute options. All ``target_clone`` functions +are considered multiversioned functions. + +All multiversioned functions must contain a ``default`` (fallback) +implementation, otherwise usages of the function are considered invalid. +Additionally, a function may not become multiversioned after its first use. + +The options to ``target_clones`` can either be a target-specific architecture +(specified as ``arch=CPU``), or one of a list of subtarget features. + +Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2", +"avx", "xop" and largely correspond to the machine specific options handled by +the front end. + +The versions can either be listed as a comma-separated sequence of string +literals or as a single string literal containing a comma-separated list of +versions. For compatibility with GCC, the two formats can be mixed. For +example, the following will emit 4 versions of the function: + + .. code-block:: c++ + + __attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default"))) + void foo() {} + +}]; +} + def MinVectorWidthDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1271,9 +1271,14 @@ // Warning for the GlobalISel options. def GlobalISel : DiagGroup<"global-isel">; +// A warning group for the GNU extension to allow mixed specifier types for +// target-clones multiversioning. +def TargetClonesMixedSpecifiers : DiagGroup<"target-clones-mixed-specifiers">; + // A warning group specifically for warnings related to function // multiversioning. -def FunctionMultiVersioning : DiagGroup<"function-multiversion">; +def FunctionMultiVersioning + : DiagGroup<"function-multiversion", [TargetClonesMixedSpecifiers]>; def NoDeref : DiagGroup<"noderef">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2981,7 +2981,8 @@ "invalid or misplaced branch protection specification '%0'">; def warn_unsupported_target_attribute : Warning<"%select{unsupported|duplicate|unknown}0%select{| architecture|" - " tune CPU}1 '%2' in the 'target' attribute string; 'target' " + " tune CPU}1 '%2' in the '%select{target|target_clones}3' " + "attribute string; '%select{target|target_clones}3' " "attribute ignored">, InGroup; def err_attribute_unsupported @@ -9864,6 +9865,8 @@ def warn_duplicate_attribute : Warning< "attribute %0 is already applied with different arguments">, InGroup; +def err_disallowed_duplicate_attribute : Error< + "attribute %0 cannot appear more than once on a declaration">; def warn_sync_fetch_and_nand_semantics_change : Warning< "the semantics of this intrinsic changed with GCC " @@ -11248,9 +11251,11 @@ "multiversioned function redeclarations require identical target attributes">; def err_multiversion_noproto : Error< "multiversioned function must have a prototype">; -def err_multiversion_disallowed_other_attr : Error< - "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined" - " with attribute %1">; +def err_multiversion_disallowed_other_attr + : Error<"attribute " + "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' " + "multiversioning cannot be combined" + " with attribute %1">; def err_multiversion_mismatched_attrs : Error<"attributes on multiversioned functions must all match, attribute " "%0 %select{is missing|has different arguments}1">; @@ -11258,11 +11263,14 @@ "multiversioned function declaration has a different %select{calling convention" "|return type|constexpr specification|inline specification|linkage|" "language linkage}0">; -def err_multiversion_doesnt_support : Error< - "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioned functions do not " - "yet support %select{function templates|virtual functions|" - "deduced return types|constructors|destructors|deleted functions|" - "defaulted functions|constexpr functions|consteval function}1">; +def err_multiversion_doesnt_support + : Error<"attribute " + "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' " + "multiversioned functions do not " + "yet support %select{function templates|virtual functions|" + "deduced return types|constructors|destructors|deleted functions|" + "defaulted functions|constexpr functions|consteval " + "function|lambdas}1">; def err_multiversion_not_allowed_on_main : Error< "'main' cannot be a multiversioned function">; def err_multiversion_not_supported : Error< @@ -11279,6 +11287,19 @@ def warn_dispatch_body_ignored : Warning< "body of cpu_dispatch function will be ignored">, InGroup; +def err_target_clone_must_have_default + : Error<"'target_clones' multiversioning requires a default target">; +def err_target_clone_doesnt_match + : Error<"'target_clones' attribute does not match previous declaration">; +def warn_target_clone_mixed_values + : ExtWarn< + "mixing 'target_clones' specifier mechanisms is permitted for GCC " + "compatibility; use a comma separated sequence of string literals, " + "or a string literal containing a comma-separated list of versions">, + InGroup; +def warn_target_clone_duplicate_options + : Warning<"version list contains duplicate entries">, + InGroup; // three-way comparison operator diagnostics def err_implied_comparison_category_type_not_found : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4350,6 +4350,10 @@ llvm::Error isValidSectionSpecifier(StringRef Str); bool checkSectionName(SourceLocation LiteralLoc, StringRef Str); bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str); + bool checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str, + const StringLiteral *Literal, + bool &HasDefault, bool &HasCommas, + SmallVectorImpl &Strings); bool checkMSInheritanceAttrOnDefinition( CXXRecordDecl *RD, SourceRange Range, bool BestCase, MSInheritanceModel SemanticSpelling); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11763,6 +11763,15 @@ Target->getTargetOpts().FeaturesAsWritten.begin(), Target->getTargetOpts().FeaturesAsWritten.end()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); + } else if (const auto *TC = FD->getAttr()) { + std::vector Features; + StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); + if (VersionStr.startswith("arch=")) + TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); + else if (VersionStr != "default") + Features.push_back((StringRef{"+"} + VersionStr).str()); + + Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } else { FeatureMap = Target->getTargetOpts().FeatureMap; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3271,6 +3271,8 @@ return MultiVersionKind::CPUDispatch; if (hasAttr()) return MultiVersionKind::CPUSpecific; + if (hasAttr()) + return MultiVersionKind::TargetClones; return MultiVersionKind::None; } @@ -3286,6 +3288,10 @@ return isMultiVersion() && hasAttr(); } +bool FunctionDecl::isTargetClonesMultiVersion() const { + return isMultiVersion() && hasAttr(); +} + void FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) { redeclarable_base::setPreviousDecl(PrevDecl); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1500,6 +1500,7 @@ void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); void emitCPUDispatchDefinition(GlobalDecl GD); + void EmitTargetClonesResolver(GlobalDecl GD); void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D); void EmitObjCIvarInitializations(ObjCImplementationDecl *D); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1264,6 +1264,20 @@ (CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage); } +static void AppendTargetClonesMangling(const CodeGenModule &CGM, + const TargetClonesAttr *Attr, + unsigned VersionIndex, + raw_ostream &Out) { + Out << '.'; + StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); + if (FeatureStr.startswith("arch=")) + Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1); + else + Out << FeatureStr; + + Out << '.' << Attr->getMangledIndex(VersionIndex); +} + static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { @@ -1317,6 +1331,10 @@ case MultiVersionKind::Target: AppendTargetMangling(CGM, FD->getAttr(), Out); break; + case MultiVersionKind::TargetClones: + AppendTargetClonesMangling(CGM, FD->getAttr(), + GD.getMultiVersionIndex(), Out); + break; case MultiVersionKind::None: llvm_unreachable("None multiversion type isn't valid here"); } @@ -1981,8 +1999,9 @@ FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr() : nullptr; const auto *SD = FD ? FD->getAttr() : nullptr; + const auto *TC = FD ? FD->getAttr() : nullptr; bool AddedAttr = false; - if (TD || SD) { + if (TD || SD || TC) { llvm::StringMap FeatureMap; getContext().getFunctionFeatureMap(FeatureMap, GD); @@ -3224,6 +3243,12 @@ for (unsigned I = 0; I < Spec->cpus_size(); ++I) EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); // Requires multiple emits. + } else if (FD->isTargetClonesMultiVersion()) { + auto *Clone = FD->getAttr(); + for (unsigned I = 0; I < Clone->featuresStrs_size(); ++I) + if (Clone->isFirstOfVersion(I)) + EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); + EmitTargetClonesResolver(GD); } else EmitGlobalFunctionDefinition(GD, GV); } @@ -3305,6 +3330,63 @@ return llvm::GlobalValue::WeakODRLinkage; } +void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) { + const auto *FD = cast(GD.getDecl()); + assert(FD && "Not a FunctionDecl?"); + const auto *TC = FD->getAttr(); + assert(TC && "Not a target_clones Function?"); + + QualType CanonTy = Context.getCanonicalType(FD->getType()); + llvm::Type *DeclTy = getTypes().ConvertType(CanonTy); + + if (const auto *CXXFD = dyn_cast(FD)) { + const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD); + DeclTy = getTypes().GetFunctionType(FInfo); + } + + llvm::Function *ResolverFunc; + if (getTarget().supportsIFunc()) { + auto *IFunc = cast( + GetOrCreateMultiVersionResolver(GD, DeclTy, FD)); + ResolverFunc = cast(IFunc->getResolver()); + } else + ResolverFunc = + cast(GetOrCreateMultiVersionResolver(GD, DeclTy, FD)); + + SmallVector Options; + for (unsigned VersionIndex = 0; VersionIndex < TC->featuresStrs_size(); + ++VersionIndex) { + if (!TC->isFirstOfVersion(VersionIndex)) + continue; + StringRef Version = TC->getFeatureStr(VersionIndex); + StringRef MangledName = + getMangledName(GD.getWithMultiVersionIndex(VersionIndex)); + llvm::Constant *Func = GetGlobalValue(MangledName); + assert(Func && + "Should have already been created before calling resolver emit"); + + StringRef Architecture; + llvm::SmallVector Feature; + + if (Version.startswith("arch=")) + Architecture = Version.drop_front(sizeof("arch=") - 1); + else if (Version != "default") + Feature.push_back(Version); + + Options.emplace_back(cast(Func), Architecture, Feature); + } + + const TargetInfo &TI = getTarget(); + std::stable_sort( + Options.begin(), Options.end(), + [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { + return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); + }); + CodeGenFunction CGF(*this); + CGF.EmitMultiVersionResolver(ResolverFunc, Options); +} + void CodeGenModule::emitMultiVersionFunctions() { std::vector MVFuncsToEmit; MultiVersionFuncs.swap(MVFuncsToEmit); @@ -3509,8 +3591,25 @@ // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be // replaced later if necessary (target multiversioning only). - if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) + if (FD->isTargetMultiVersion()) MultiVersionFuncs.push_back(GD); + else if (FD->isTargetClonesMultiVersion()) { + // In target_clones multiversioning, make sure we emit this if used. + auto DDI = + DeferredDecls.find(getMangledName(GD.getWithMultiVersionIndex(0))); + if (DDI != DeferredDecls.end()) { + addDeferredDeclToEmit(GD); + DeferredDecls.erase(DDI); + } else { + // Emit the symbol of the 1st variant, so that the deferred decls know we + // need it, otherwise the only global value will be the resolver/ifunc, + // which end up getting broken if we search for them with GetGlobalValue'. + GetOrCreateLLVMFunction( + getMangledName(GD.getWithMultiVersionIndex(0)), DeclTy, FD, + /*ForVTable=*/false, /*DontDefer=*/true, + /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + } + } if (getTarget().supportsIFunc()) { llvm::Type *ResolverType = llvm::FunctionType::get( diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -10269,13 +10269,9 @@ const FunctionDecl *FD, const FunctionDecl *CausedFD, MultiVersionKind MVType) { - bool IsCPUSpecificCPUDispatchMVType = - MVType == MultiVersionKind::CPUDispatch || - MVType == MultiVersionKind::CPUSpecific; - const auto Diagnose = [FD, CausedFD, IsCPUSpecificCPUDispatchMVType]( - Sema &S, const Attr *A) { + const auto Diagnose = [FD, CausedFD, MVType](Sema &S, const Attr *A) { S.Diag(FD->getLocation(), diag::err_multiversion_disallowed_other_attr) - << IsCPUSpecificCPUDispatchMVType << A; + << static_cast(MVType) << A; if (CausedFD) S.Diag(CausedFD->getLocation(), diag::note_multiversioning_caused_here); return true; @@ -10293,6 +10289,10 @@ if (MVType != MultiVersionKind::Target) return Diagnose(S, A); break; + case attr::TargetClones: + if (MVType != MultiVersionKind::TargetClones) + return Diagnose(S, A); + break; default: if (!AttrCompatibleWithMultiVersion(A->getKind(), MVType)) return Diagnose(S, A); @@ -10319,6 +10319,7 @@ DefaultedFuncs = 6, ConstexprFuncs = 7, ConstevalFuncs = 8, + Lambda = 9, }; enum Different { CallingConv = 0, @@ -10446,7 +10447,7 @@ S.PDiag(diag::note_multiversioning_caused_here)), PartialDiagnosticAt(NewFD->getLocation(), S.PDiag(diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType), + << static_cast(MVType)), PartialDiagnosticAt(NewFD->getLocation(), S.PDiag(diag::err_multiversion_diff)), /*TemplatesSupported=*/false, @@ -10575,21 +10576,30 @@ return false; } +static bool MultiVersionTypesCompatible(MultiVersionKind Old, + MultiVersionKind New) { + if (Old == New || Old == MultiVersionKind::None || + New == MultiVersionKind::None) + return true; + + return (Old == MultiVersionKind::CPUDispatch && + New == MultiVersionKind::CPUSpecific) || + (Old == MultiVersionKind::CPUSpecific && + New == MultiVersionKind::CPUDispatch); +} + /// Check the validity of a new function declaration being added to an existing /// multiversioned declaration collection. static bool CheckMultiVersionAdditionalDecl( Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD, MultiVersionKind NewMVType, const TargetAttr *NewTA, const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec, - bool &Redeclaration, NamedDecl *&OldDecl, bool &MergeTypeWithPrevious, - LookupResult &Previous) { + const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl, + bool &MergeTypeWithPrevious, LookupResult &Previous) { MultiVersionKind OldMVType = OldFD->getMultiVersionKind(); // Disallow mixing of multiversioning types. - if ((OldMVType == MultiVersionKind::Target && - NewMVType != MultiVersionKind::Target) || - (NewMVType == MultiVersionKind::Target && - OldMVType != MultiVersionKind::Target)) { + if (!MultiVersionTypesCompatible(OldMVType, NewMVType)) { S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); S.Diag(OldFD->getLocation(), diag::note_previous_declaration); NewFD->setInvalidDecl(); @@ -10614,7 +10624,12 @@ if (S.IsOverload(NewFD, CurFD, UseMemberUsingDeclRules)) continue; - if (NewMVType == MultiVersionKind::Target) { + switch (NewMVType) { + case MultiVersionKind::None: + assert(OldMVType == MultiVersionKind::TargetClones && + "Only target_clones can be omitted in subsequent declarations"); + break; + case MultiVersionKind::Target: { const auto *CurTA = CurFD->getAttr(); if (CurTA->getFeaturesStr() == NewTA->getFeaturesStr()) { NewFD->setIsMultiVersion(); @@ -10630,7 +10645,30 @@ NewFD->setInvalidDecl(); return true; } - } else { + break; + } + case MultiVersionKind::TargetClones: { + const auto *CurClones = CurFD->getAttr(); + Redeclaration = true; + OldDecl = CurFD; + MergeTypeWithPrevious = true; + NewFD->setIsMultiVersion(); + + if (CurClones && NewClones && + (CurClones->featuresStrs_size() != NewClones->featuresStrs_size() || + !std::equal(CurClones->featuresStrs_begin(), + CurClones->featuresStrs_end(), + NewClones->featuresStrs_begin()))) { + S.Diag(NewFD->getLocation(), diag::err_target_clone_doesnt_match); + S.Diag(CurFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + return true; + } + + return false; + } + case MultiVersionKind::CPUSpecific: + case MultiVersionKind::CPUDispatch: { const auto *CurCPUSpec = CurFD->getAttr(); const auto *CurCPUDisp = CurFD->getAttr(); // Handle CPUDispatch/CPUSpecific versions. @@ -10685,8 +10723,8 @@ } } } - // If the two decls aren't the same MVType, there is no possible error - // condition. + break; + } } } @@ -10722,7 +10760,6 @@ return false; } - /// Check the validity of a mulitversion function declaration. /// Also sets the multiversion'ness' of the function itself. /// @@ -10736,23 +10773,14 @@ const auto *NewTA = NewFD->getAttr(); const auto *NewCPUDisp = NewFD->getAttr(); const auto *NewCPUSpec = NewFD->getAttr(); - - // Mixing Multiversioning types is prohibited. - if ((NewTA && NewCPUDisp) || (NewTA && NewCPUSpec) || - (NewCPUDisp && NewCPUSpec)) { - S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); - NewFD->setInvalidDecl(); - return true; - } - - MultiVersionKind MVType = NewFD->getMultiVersionKind(); + const auto *NewClones = NewFD->getAttr(); + MultiVersionKind MVType = NewFD->getMultiVersionKind(); // Main isn't allowed to become a multiversion function, however it IS // permitted to have 'main' be marked with the 'target' optimization hint. if (NewFD->isMain()) { - if ((MVType == MultiVersionKind::Target && NewTA->isDefaultVersion()) || - MVType == MultiVersionKind::CPUDispatch || - MVType == MultiVersionKind::CPUSpecific) { + if (MVType != MultiVersionKind::None && + !(MVType == MultiVersionKind::Target && !NewTA->isDefaultVersion())) { S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main); NewFD->setInvalidDecl(); return true; @@ -10775,13 +10803,35 @@ if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::None) return false; - if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None) { + // Multiversioned redeclarations aren't allowed to omit the attribute, except + // for target_clones. + if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None && + OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones) { S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl) << (OldFD->getMultiVersionKind() != MultiVersionKind::Target); NewFD->setInvalidDecl(); return true; } + if (!OldFD->isMultiVersion()) { + switch (MVType) { + case MultiVersionKind::Target: + return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA, + Redeclaration, OldDecl, + MergeTypeWithPrevious, Previous); + case MultiVersionKind::TargetClones: + if (OldFD->isUsed(false)) { + NewFD->setInvalidDecl(); + return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used); + } + OldFD->setIsMultiVersion(); + break; + case MultiVersionKind::CPUDispatch: + case MultiVersionKind::CPUSpecific: + case MultiVersionKind::None: + break; + } + } // Handle the target potentially causes multiversioning case. if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::Target) return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA, @@ -10792,8 +10842,8 @@ // appropriate attribute in the current function decl. Resolve that these are // still compatible with previous declarations. return CheckMultiVersionAdditionalDecl( - S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, Redeclaration, - OldDecl, MergeTypeWithPrevious, Previous); + S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, NewClones, + Redeclaration, OldDecl, MergeTypeWithPrevious, Previous); } /// Perform semantic checking of a new function declaration. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -1965,6 +1965,28 @@ } static void handleCPUSpecificAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + // Ensure we don't combine these with themselves, since that causes some + // confusing behavior. + if (AL.getParsedKind() == ParsedAttr::AT_CPUDispatch) { + if (checkAttrMutualExclusion(S, D, AL)) + return; + + if (const auto *Other = D->getAttr()) { + S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL; + S.Diag(Other->getLocation(), diag::note_conflicting_attribute); + return; + } + } else if (AL.getParsedKind() == ParsedAttr::AT_CPUSpecific) { + if (checkAttrMutualExclusion(S, D, AL)) + return; + + if (const auto *Other = D->getAttr()) { + S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL; + S.Diag(Other->getLocation(), diag::note_conflicting_attribute); + return; + } + } + FunctionDecl *FD = cast(D); if (const auto *MD = dyn_cast(D)) { @@ -3211,40 +3233,41 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { enum FirstParam { Unsupported, Duplicate, Unknown }; enum SecondParam { None, Architecture, Tune }; + enum ThirdParam { Target, TargetClones }; if (AttrStr.contains("fpmath=")) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << "fpmath="; + << Unsupported << None << "fpmath=" << Target; // Diagnose use of tune if target doesn't support it. if (!Context.getTargetInfo().supportsTargetAttributeTune() && AttrStr.contains("tune=")) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << "tune="; + << Unsupported << None << "tune=" << Target; ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr); if (!ParsedAttrs.Architecture.empty() && !Context.getTargetInfo().isValidCPUName(ParsedAttrs.Architecture)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unknown << Architecture << ParsedAttrs.Architecture; + << Unknown << Architecture << ParsedAttrs.Architecture << Target; if (!ParsedAttrs.Tune.empty() && !Context.getTargetInfo().isValidCPUName(ParsedAttrs.Tune)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unknown << Tune << ParsedAttrs.Tune; + << Unknown << Tune << ParsedAttrs.Tune << Target; if (ParsedAttrs.DuplicateArchitecture) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Duplicate << None << "arch="; + << Duplicate << None << "arch=" << Target; if (ParsedAttrs.DuplicateTune) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Duplicate << None << "tune="; + << Duplicate << None << "tune=" << Target; for (const auto &Feature : ParsedAttrs.Features) { auto CurFeature = StringRef(Feature).drop_front(); // remove + or -. if (!Context.getTargetInfo().isValidFeatureName(CurFeature)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << CurFeature; + << Unsupported << None << CurFeature << Target; } TargetInfo::BranchProtectionInfo BPI; @@ -3254,7 +3277,7 @@ ParsedAttrs.BranchProtection, BPI, Error)) { if (Error.empty()) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << "branch-protection"; + << Unsupported << None << "branch-protection" << Target; else return Diag(LiteralLoc, diag::err_invalid_branch_protection_spec) << Error; @@ -3264,6 +3287,14 @@ } static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + // Ensure we don't combine these with themselves, since that causes some + // confusing behavior. + if (const auto *Other = D->getAttr()) { + S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL; + S.Diag(Other->getLocation(), diag::note_conflicting_attribute); + return; + } + StringRef Str; SourceLocation LiteralLoc; if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) || @@ -3274,6 +3305,107 @@ D->addAttr(NewAttr); } +bool Sema::checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str, + const StringLiteral *Literal, + bool &HasDefault, bool &HasCommas, + SmallVectorImpl &Strings) { + enum FirstParam { Unsupported, Duplicate, Unknown }; + enum SecondParam { None, Architecture, Tune }; + enum ThirdParam { Target, TargetClones }; + HasCommas = HasCommas || Str.contains(','); + // Warn on empty at the beginning of a string. + if (Str.size() == 0) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + + std::pair Parts = {{}, Str}; + while (!Parts.second.empty()) { + Parts = Parts.second.split(','); + StringRef Cur = Parts.first.trim(); + SourceLocation CurLoc = Literal->getLocationOfByte( + Cur.data() - Literal->getString().data(), getSourceManager(), + getLangOpts(), Context.getTargetInfo()); + + bool DefaultIsDupe = false; + if (Cur.empty()) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + + if (Cur.startswith("arch=")) { + if (!Context.getTargetInfo().isValidCPUName( + Cur.drop_front(sizeof("arch=") - 1))) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << Architecture + << Cur.drop_front(sizeof("arch=") - 1) << TargetClones; + } else if (Cur == "default") { + DefaultIsDupe = HasDefault; + HasDefault = true; + } else if (!Context.getTargetInfo().isValidFeatureName(Cur)) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Cur << TargetClones; + + if (llvm::find(Strings, Cur) != Strings.end() || DefaultIsDupe) + Diag(CurLoc, diag::warn_target_clone_duplicate_options); + // Note: Add even if there are duplicates, since it changes name mangling. + Strings.push_back(Cur); + } + + if (Str.rtrim().endswith(",")) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + return false; +} + +static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + // Ensure we don't combine these with themselves, since that causes some + // confusing behavior. + if (const auto *Other = D->getAttr()) { + S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL; + S.Diag(Other->getLocation(), diag::note_conflicting_attribute); + return; + } + if (checkAttrMutualExclusion(S, D, AL)) + return; + + SmallVector Strings; + bool HasCommas = false, HasDefault = false; + + for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) { + StringRef CurStr; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(AL, I, CurStr, &LiteralLoc) || + S.checkTargetClonesAttrString( + LiteralLoc, CurStr, + cast(AL.getArgAsExpr(I)->IgnoreParenCasts()), + HasDefault, HasCommas, Strings)) + return; + } + + if (HasCommas && AL.getNumArgs() > 1) + S.Diag(AL.getLoc(), diag::warn_target_clone_mixed_values); + + if (!HasDefault) { + S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default); + return; + } + + // FIXME: We could probably figure out how to get this to work for lambdas + // someday. + if (const auto *MD = dyn_cast(D)) { + if (MD->getParent()->isLambda()) { + S.Diag(D->getLocation(), diag::err_multiversion_doesnt_support) + << static_cast(MultiVersionKind::TargetClones) + << /*Lambda*/ 9; + return; + } + } + + cast(D)->setIsMultiVersion(); + TargetClonesAttr *NewAttr = ::new (S.Context) + TargetClonesAttr(S.Context, AL, Strings.data(), Strings.size()); + D->addAttr(NewAttr); +} + static void handleMinVectorWidthAttr(Sema &S, Decl *D, const ParsedAttr &AL) { Expr *E = AL.getArgAsExpr(0); uint32_t VecWidth; @@ -8216,6 +8348,9 @@ case ParsedAttr::AT_Target: handleTargetAttr(S, D, AL); break; + case ParsedAttr::AT_TargetClones: + handleTargetClonesAttr(S, D, AL); + break; case ParsedAttr::AT_MinVectorWidth: handleMinVectorWidthAttr(S, D, AL); break; diff --git a/clang/test/CodeGen/attr-target-clones.c b/clang/test/CodeGen/attr-target-clones.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-target-clones.c @@ -0,0 +1,126 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=LINUX,CHECK +// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefixes=WINDOWS,CHECK + +// LINUX: @foo.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo.resolver +// LINUX: @foo_dupes.ifunc = weak_odr ifunc void (), void ()* ()* @foo_dupes.resolver +// LINUX: @unused.ifunc = weak_odr ifunc void (), void ()* ()* @unused.resolver +// LINUX: @foo_inline.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo_inline.resolver +// LINUX: @foo_inline2.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo_inline2.resolver + +int __attribute__((target_clones("sse4.2, default"))) foo(void) { return 0; } +// LINUX: define {{.*}}i32 @foo.sse4.2.0() +// LINUX: define {{.*}}i32 @foo.default.1() +// LINUX: define i32 ()* @foo.resolver() +// LINUX: ret i32 ()* @foo.sse4.2.0 +// LINUX: ret i32 ()* @foo.default.1 + +// WINDOWS: define dso_local i32 @foo.sse4.2.0() +// WINDOWS: define dso_local i32 @foo.default.1() +// WINDOWS: define dso_local i32 @foo() +// WINDOWS: musttail call i32 @foo.sse4.2.0 +// WINDOWS: musttail call i32 @foo.default.1 + +__attribute__((target_clones("default,default ,sse4.2"))) void foo_dupes(void) {} +// LINUX: define {{.*}}void @foo_dupes.default.1() +// LINUX: define {{.*}}void @foo_dupes.sse4.2.0() +// LINUX: define void ()* @foo_dupes.resolver() +// LINUX: ret void ()* @foo_dupes.sse4.2.0 +// LINUX: ret void ()* @foo_dupes.default.1 + +// WINDOWS: define dso_local void @foo_dupes.default.1() +// WINDOWS: define dso_local void @foo_dupes.sse4.2.0() +// WINDOWS: define dso_local void @foo_dupes() +// WINDOWS: musttail call void @foo_dupes.sse4.2.0 +// WINDOWS: musttail call void @foo_dupes.default.1 + +void bar2() { + // LINUX: define {{.*}}void @bar2() + // WINDOWS: define dso_local void @bar2() + foo_dupes(); + // LINUX: call void @foo_dupes.ifunc() + // WINDOWS: call void @foo_dupes() +} + +int bar() { + // LINUX: define {{.*}}i32 @bar() #[[DEF:[0-9]+]] + // WINDOWS: define dso_local i32 @bar() #[[DEF:[0-9]+]] + return foo(); + // LINUX: call i32 @foo.ifunc() + // WINDOWS: call i32 @foo() +} + +void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {} +// LINUX: define {{.*}}void @unused.default.1() +// LINUX: define {{.*}}void @unused.arch_ivybridge.0() +// LINUX: define void ()* @unused.resolver() +// LINUX: ret void ()* @unused.arch_ivybridge.0 +// LINUX: ret void ()* @unused.default.1 + +// WINDOWS: define dso_local void @unused.default.1() +// WINDOWS: define dso_local void @unused.arch_ivybridge.0() +// WINDOWS: define dso_local void @unused() +// WINDOWS: musttail call void @unused.arch_ivybridge.0 +// WINDOWS: musttail call void @unused.default.1 + + +inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2"))) +foo_inline(void) { return 0; } +inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2"))) +foo_inline2(void); + +int bar3() { + // LINUX: define {{.*}}i32 @bar3() + // WINDOWS: define dso_local i32 @bar3() + return foo_inline() + foo_inline2(); + // LINUX: call i32 @foo_inline.ifunc() + // LINUX: call i32 @foo_inline2.ifunc() + // WINDOWS: call i32 @foo_inline() + // WINDOWS: call i32 @foo_inline2() +} + +// Deferred emission of foo_inline, which got delayed because it is inline. +// LINUX: define i32 ()* @foo_inline.resolver() +// LINUX: ret i32 ()* @foo_inline.arch_sandybridge.0 +// LINUX: ret i32 ()* @foo_inline.sse4.2.1 +// LINUX: ret i32 ()* @foo_inline.default.2 + +// WINDOWS: define dso_local i32 @foo_inline() +// WINDOWS: musttail call i32 @foo_inline.arch_sandybridge.0 +// WINDOWS: musttail call i32 @foo_inline.sse4.2.1 +// WINDOWS: musttail call i32 @foo_inline.default.2 + +inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2"))) +foo_inline2(void){ return 0; } +// LINUX: define linkonce i32 @foo_inline2.arch_sandybridge.0() #[[SB:[0-9]+]] +// LINUX: define i32 ()* @foo_inline2.resolver() +// LINUX: ret i32 ()* @foo_inline2.arch_sandybridge.0 +// LINUX: ret i32 ()* @foo_inline2.sse4.2.1 +// LINUX: ret i32 ()* @foo_inline2.default.2 + +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.arch_sandybridge.0() #[[SB:[0-9]+]] +// WINDOWS: define dso_local i32 @foo_inline2() +// WINDOWS: musttail call i32 @foo_inline2.arch_sandybridge.0 +// WINDOWS: musttail call i32 @foo_inline2.sse4.2.1 +// WINDOWS: musttail call i32 @foo_inline2.default.2 + +// LINUX: define linkonce i32 @foo_inline.arch_sandybridge.0() #[[SB]] +// LINUX: define linkonce i32 @foo_inline.default.2() #[[DEF]] +// LINUX: define linkonce i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]] + +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.arch_sandybridge.0() #[[SB]] +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.default.2() #[[DEF]] +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]] + + +// LINUX: define linkonce i32 @foo_inline2.default.2() #[[DEF]] +// LINUX: define linkonce i32 @foo_inline2.sse4.2.1() #[[SSE42]] + +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.default.2() #[[DEF]] +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.sse4.2.1() #[[SSE42]] + +// CHECK: attributes #[[SSE42]] = +// CHECK-SAME: "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +// CHECK: attributes #[[DEF]] = +// Don't bother checking features, we verified it is the same as a normal function. +// CHECK: attributes #[[SB]] = +// CHECK-SAME: "target-features"="+avx,+crc32,+cx16,+cx8,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" diff --git a/clang/test/CodeGenCXX/attr-target-clones.cpp b/clang/test/CodeGenCXX/attr-target-clones.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/attr-target-clones.cpp @@ -0,0 +1,116 @@ +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS + +// Overloaded ifuncs +// LINUX: @_Z10overloadedi.ifunc = weak_odr ifunc i32 (i32), i32 (i32)* ()* @_Z10overloadedi.resolver +// LINUX: @_Z10overloadedPKc.ifunc = weak_odr ifunc i32 (i8*), i32 (i8*)* ()* @_Z10overloadedPKc.resolver +// struct 'C' ifuncs, note the 'float, U' one doesn't get one. +// LINUX: @_ZN1CIssE3fooEv.ifunc = weak_odr ifunc i32 (%struct.C*), i32 (%struct.C*)* ()* @_ZN1CIssE3fooEv.resolver +// LINUX: @_ZN1CIisE3fooEv.ifunc = weak_odr ifunc i32 (%struct.C.0*), i32 (%struct.C.0*)* ()* @_ZN1CIisE3fooEv.resolver +// LINUX: @_ZN1CIdfE3fooEv.ifunc = weak_odr ifunc i32 (%struct.C.2*), i32 (%struct.C.2*)* ()* @_ZN1CIdfE3fooEv.resolver + +int __attribute__((target_clones("sse4.2", "default"))) overloaded(int) { return 1; } +// LINUX: define {{.*}}i32 @_Z10overloadedi.sse4.2.0(i32{{.+}}) +// LINUX: define {{.*}}i32 @_Z10overloadedi.default.1(i32{{.+}}) +// LINUX: define i32 (i32)* @_Z10overloadedi.resolver +// LINUX: ret i32 (i32)* @_Z10overloadedi.sse4.2.0 +// LINUX: ret i32 (i32)* @_Z10overloadedi.default.1 + +// WINDOWS: define dso_local i32 @"?overloaded@@YAHH@Z.sse4.2.0"(i32{{.+}}) +// WINDOWS: define dso_local i32 @"?overloaded@@YAHH@Z.default.1"(i32{{.+}}) +// WINDOWS: define dso_local i32 @"?overloaded@@YAHH@Z"(i32{{.+}}) +// WINDOWS: call i32 @"?overloaded@@YAHH@Z.sse4.2.0" +// WINDOWS: call i32 @"?overloaded@@YAHH@Z.default.1" + +int __attribute__((target_clones("arch=ivybridge", "default"))) overloaded(const char *) { return 2; } +// LINUX: define {{.*}}i32 @_Z10overloadedPKc.arch_ivybridge.0(i8*{{.+}}) +// LINUX: define {{.*}}i32 @_Z10overloadedPKc.default.1(i8*{{.+}}) +// LINUX: define i32 (i8*)* @_Z10overloadedPKc.resolver +// LINUX: ret i32 (i8*)* @_Z10overloadedPKc.arch_ivybridge.0 +// LINUX: ret i32 (i8*)* @_Z10overloadedPKc.default.1 + +// WINDOWS: define dso_local i32 @"?overloaded@@YAHPEBD@Z.arch_ivybridge.0"(i8*{{.+}}) +// WINDOWS: define dso_local i32 @"?overloaded@@YAHPEBD@Z.default.1"(i8*{{.+}}) +// WINDOWS: define dso_local i32 @"?overloaded@@YAHPEBD@Z"(i8*{{.+}}) +// WINDOWS: call i32 @"?overloaded@@YAHPEBD@Z.arch_ivybridge.0" +// WINDOWS: call i32 @"?overloaded@@YAHPEBD@Z.default.1" +// +void use_overloaded() { + overloaded(1); + // LINUX: call i32 @_Z10overloadedi.ifunc + // WINDOWS: call i32 @"?overloaded@@YAHH@Z" + overloaded(nullptr); + // LINUX: call i32 @_Z10overloadedPKc.ifunc + // WINDOWS: call i32 @"?overloaded@@YAHPEBD@Z" +} + +template +struct C { +int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 1;} +}; +template +struct C { +int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 2;} +}; +template +struct C { +int foo(){ return 2;} +}; +template<> +struct C { +int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 3;} +}; + +void uses_specialized() { + C c; + c.foo(); + // LINUX: call i32 @_ZN1CIssE3fooEv.ifunc(%struct.C + // WINDOWS: call i32 @"?foo@?$C@FF@@QEAAHXZ"(%struct.C + C c2; + c2.foo(); + // LINUX: call i32 @_ZN1CIisE3fooEv.ifunc(%struct.C + // WINDOWS: call i32 @"?foo@?$C@HF@@QEAAHXZ"(%struct.C + C c3; + c3.foo(); + // Note this is not an ifunc/mv + // LINUX: call i32 @_ZN1CIfsE3fooEv(%struct.C + // WINDOWS: call i32 @"?foo@?$C@MF@@QEAAHXZ"(%struct.C + C c4; + c4.foo(); + // LINUX: call i32 @_ZN1CIdfE3fooEv.ifunc(%struct.C + // WINDOWS: call i32 @"?foo@?$C@NM@@QEAAHXZ"(%struct.C +} + +// LINUX: define {{.*}}i32 @_ZN1CIssE3fooEv.sse4.2.0(%struct.C{{.+}}) +// WINDOWS: define {{.*}}i32 @"?foo@?$C@FF@@QEAAHXZ.sse4.2.0"(%struct.C{{.+}}) +// LINUX: define i32 (%struct.C*)* @_ZN1CIssE3fooEv.resolver +// LINUX: ret i32 (%struct.C*)* @_ZN1CIssE3fooEv.sse4.2.0 +// LINUX: ret i32 (%struct.C*)* @_ZN1CIssE3fooEv.default.1 +// WINDOWS: define {{.*}}i32 @"?foo@?$C@FF@@QEAAHXZ"(%struct.C{{.+}}) +// WINDOWS: call i32 @"?foo@?$C@FF@@QEAAHXZ.sse4.2.0" +// WINDOWS: call i32 @"?foo@?$C@FF@@QEAAHXZ.default.1" + +// LINUX: define {{.*}}i32 @_ZN1CIisE3fooEv.sse4.2.0(%struct.C{{.+}}) +// WINDOWS: define {{.*}}i32 @"?foo@?$C@HF@@QEAAHXZ.sse4.2.0"(%struct.C{{.+}}) +// LINUX: define i32 (%struct.C{{.+}})* @_ZN1CIisE3fooEv.resolver +// LINUX: ret i32 (%struct.C{{.+}})* @_ZN1CIisE3fooEv.sse4.2.0 +// LINUX: ret i32 (%struct.C{{.+}})* @_ZN1CIisE3fooEv.default.1 +// WINDOWS: define {{.*}}i32 @"?foo@?$C@HF@@QEAAHXZ"(%struct.C{{.+}}) +// WINDOWS: call i32 @"?foo@?$C@HF@@QEAAHXZ.sse4.2.0" +// WINDOWS: call i32 @"?foo@?$C@HF@@QEAAHXZ.default.1" + +// LINUX: define i32 (%struct.C{{.+}})* @_ZN1CIdfE3fooEv.resolver +// LINUX: ret i32 (%struct.C{{.+}})* @_ZN1CIdfE3fooEv.sse4.2.0 +// LINUX: ret i32 (%struct.C{{.+}})* @_ZN1CIdfE3fooEv.default.1 +// WINDOWS: define {{.*}}i32 @"?foo@?$C@NM@@QEAAHXZ"(%struct.C{{.+}}) +// WINDOWS: call i32 @"?foo@?$C@NM@@QEAAHXZ.sse4.2.0" +// WINDOWS: call i32 @"?foo@?$C@NM@@QEAAHXZ.default.1" + +// LINUX: define {{.*}}i32 @_ZN1CIdfE3fooEv.sse4.2.0(%struct.C{{.+}}) +// WINDOWS: define {{.*}}i32 @"?foo@?$C@NM@@QEAAHXZ.sse4.2.0"(%struct.C{{.+}}) +// LINUX: define {{.*}}i32 @_ZN1CIdfE3fooEv.default.1(%struct.C{{.+}}) +// WINDOWS: define {{.*}}i32 @"?foo@?$C@NM@@QEAAHXZ.default.1"(%struct.C{{.+}}) +// LINUX: define {{.*}}i32 @_ZN1CIssE3fooEv.default.1(%struct.C{{.+}}) +// WINDOWS: define {{.*}}i32 @"?foo@?$C@FF@@QEAAHXZ.default.1"(%struct.C{{.+}}) +// LINUX: define {{.*}}i32 @_ZN1CIisE3fooEv.default.1(%struct.C{{.+}}) +// WINDOWS: define {{.*}}i32 @"?foo@?$C@HF@@QEAAHXZ.default.1"(%struct.C{{.+}}) diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -172,6 +172,7 @@ // CHECK-NEXT: SwiftObjCMembers (SubjectMatchRule_objc_interface) // CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local) // CHECK-NEXT: Target (SubjectMatchRule_function) +// CHECK-NEXT: TargetClones (SubjectMatchRule_function) // CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member) // CHECK-NEXT: TrivialABI (SubjectMatchRule_record) // CHECK-NEXT: Uninitialized (SubjectMatchRule_variable_is_local) diff --git a/clang/test/Sema/attr-cpuspecific.c b/clang/test/Sema/attr-cpuspecific.c --- a/clang/test/Sema/attr-cpuspecific.c +++ b/clang/test/Sema/attr-cpuspecific.c @@ -88,7 +88,8 @@ // expected-note@-2 {{previous declaration is here}} void __attribute__((cpu_specific(sandybridge))) addtl_attrs2(void); -// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-error@+2 {{'cpu_dispatch' and 'cpu_specific' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} void __attribute((cpu_specific(sandybridge), cpu_dispatch(atom, sandybridge))) combine_attrs(void); diff --git a/clang/test/Sema/attr-target-clones.c b/clang/test/Sema/attr-target-clones.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/attr-target-clones.c @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s + +// expected-error@+1 {{'target_clones' multiversioning requires a default target}} +void __attribute__((target_clones("sse4.2", "arch=sandybridge"))) +no_default(void); + +// expected-error@+2 {{'target_clones' and 'target' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target("sse4.2"), target_clones("arch=sandybridge"))) +ignored_attr(void); +// expected-error@+2 {{'target' and 'target_clones' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target_clones("arch=sandybridge,default"), target("sse4.2"))) +ignored_attr2(void); + +int redecl(void); +int __attribute__((target_clones("sse4.2", "default"))) redecl(void) { return 1; } + +int __attribute__((target_clones("sse4.2", "default"))) redecl2(void); +int __attribute__((target_clones("sse4.2", "default"))) redecl2(void) { return 1; } + +int __attribute__((target_clones("sse4.2", "default"))) redecl3(void); +int redecl3(void); + +int __attribute__((target_clones("sse4.2", "arch=atom", "default"))) redecl4(void); +// expected-error@+3 {{'target_clones' attribute does not match previous declaration}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("sse4.2", "arch=sandybridge", "default"))) +redecl4(void) { return 1; } + +int __attribute__((target("sse4.2"))) redef2(void) { return 1; } +// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("sse4.2", "default"))) redef2(void) { return 1; } + +int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; } +// expected-error@+2 {{redefinition of 'redef3'}} +// expected-note@-2 {{previous definition is here}} +int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; } + +int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; } +// expected-error@+2 {{redefinition of 'redef4'}} +// expected-note@-2 {{previous definition is here}} +int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; } + +// Duplicates are allowed, however they alter name mangling. +// expected-warning@+2 {{mixing 'target_clones' specifier mechanisms is permitted for GCC compatibility}} +// expected-warning@+1 2 {{version list contains duplicate entries}} +int __attribute__((target_clones("arch=atom,arch=atom", "arch=atom,default"))) +dupes(void) { return 1; } + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(""))) +empty_target_1(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(",default"))) +empty_target_2(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default,"))) +empty_target_3(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default, ,avx2"))) +empty_target_4(void); + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default,avx2", ""))) +empty_target_5(void); + +// expected-warning@+1 {{version list contains duplicate entries}} +void __attribute__((target_clones("default", "default"))) +dupe_default(void); + +// expected-warning@+1 {{version list contains duplicate entries}} +void __attribute__((target_clones("avx2,avx2,default"))) +dupe_normal(void); + +// expected-error@+2 {{attribute 'target_clones' cannot appear more than once on a declaration}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target_clones("avx2,default"), target_clones("arch=atom,default"))) +dupe_normal2(void); + +int mv_after_use(void); +int useage() { + return mv_after_use(); +} +// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} +int __attribute__((target_clones("sse4.2", "default"))) mv_after_use(void) { return 1; } + diff --git a/clang/test/SemaCXX/attr-target-clones.cpp b/clang/test/SemaCXX/attr-target-clones.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/attr-target-clones.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify -fexceptions -fcxx-exceptions %s -std=c++14 + +// expected-error@+2 {{attribute 'target_clones' multiversioned functions do not yet support function templates}} +template +int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 1;} + +void uses_lambda() { + // expected-error@+1 {{attribute 'target_clones' multiversioned functions do not yet support lambdas}} + auto x = []()__attribute__((target_clones("sse4.2", "arch=ivybridge", "default"))) {}; + x(); +}