Index: include/clang/AST/Decl.h =================================================================== --- include/clang/AST/Decl.h +++ include/clang/AST/Decl.h @@ -2225,6 +2225,9 @@ /// True if this function is a multiversioned processor specific function as a /// part of the cpu_specific/cpu_dispatch functionality. bool isCPUSpecificMultiVersion() const; + /// True if this function is a multiversioned function specified with the + /// attribute target_clones. + bool isTargetClonesMultiVersion() const; void setPreviousDeclaration(FunctionDecl * PrevDecl); Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -2022,6 +2022,31 @@ }]; } +def TargetClones : InheritableAttr { + let Spellings = [GCC<"target_clones">]; + let Args = [VariadicStringArgument<"featuresStrs">]; + let Documentation = [TargetClonesDocs]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let AdditionalMembers = [{ + mutable unsigned ActiveArgIndex = 0; + void AdvanceActiveArgIndex() const { + ++ActiveArgIndex; + while(ActiveArgIndex < featuresStrs_size()) { + if (std::find(featuresStrs_begin(), + featuresStrs_begin() + ActiveArgIndex, + *(featuresStrs_begin() + ActiveArgIndex)) + == (featuresStrs_begin() + ActiveArgIndex)) + return; + ++ActiveArgIndex; + } + } + + StringRef getCurFeatureStr() const { + return *(featuresStrs_begin() + ActiveArgIndex); + } + }]; +} + def MinVectorWidth : InheritableAttr { let Spellings = [Clang<"min_vector_width">]; let Args = [UnsignedArgument<"VectorWidth">]; Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -1594,6 +1594,40 @@ }]; } +def TargetClonesDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +Clang supports the ``target_clones("OPTIONS")`` attribute. This attribute may be +attached to a function declaration and causes function multiversioning, where +multiple versions of the function will be emitted with different code +generation options. Additionally, these versions will be resolved at runtime +based on the priority of their attribute options. All ``target_clone`` functions +are considered multiversioned functions. + +All multiversioned functions must contain a ``default`` (fallback) +implementation, otherwise usages of the function are considered invalid. +Additionally, a function may not become multiversioned after its first use. + +The options to ``target_clones`` can either be a target-specific architecture +(specified as ``arch=CPU``), or one of a list of subtarget features. + +Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2", +"avx", "xop" and largely correspond to the machine specific options handled by +the front end. + + +Note that unlike the ``target`` syntax, every option listed creates a new +version, disregarding whether it is split on a comma inside or outside a string. +The following will emit 4 versions of the function. + + .. code-block:: c++ + + __attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default"))) + void foo() {} + + }]; +} + def MinVectorWidthDocs : Documentation { let Category = DocCatFunction; let Content = [{ Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -2502,7 +2502,8 @@ "%0 attribute requires OpenCL version %1%select{| or above}2">; def warn_unsupported_target_attribute : Warning<"%select{unsupported|duplicate}0%select{| architecture}1 '%2' in" - " the 'target' attribute string; 'target' attribute ignored">, + " the '%select{target|target_clones}3' attribute string; " + "'%select{target|target_clones}3' attribute ignored">, InGroup; def err_attribute_unsupported : Error<"%0 attribute is not supported for this target">; @@ -9425,8 +9426,8 @@ def err_multiversion_noproto : Error< "multiversioned function must have a prototype">; def err_multiversion_no_other_attrs : Error< - "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined" - " with other attributes">; + "attribute '%select{target|cpu_specific|cpu_dispatch|target_clones}0'" + " multiversioning cannot be combined with other attributes">; def err_multiversion_diff : Error< "multiversioned function declaration has a different %select{calling convention" "|return type|constexpr specification|inline specification|storage class|" @@ -9453,6 +9454,11 @@ "body of cpu_dispatch function will be ignored">, InGroup; +def err_target_clone_must_have_default : Error < + "'target_clones' multiversioning requires a default target">; +def err_target_clone_doesnt_match : Error < + "'target_clones' attribute does not match previous declaration">; + // three-way comparison operator diagnostics def err_implied_comparison_category_type_not_found : Error< "cannot deduce return type of 'operator<=>' because type '%0' was not found; " Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -3363,6 +3363,9 @@ SourceLocation *ArgLocation = nullptr); bool checkSectionName(SourceLocation LiteralLoc, StringRef Str); bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str); + bool checkTargetClonesAttr(SourceLocation LiteralLoc, StringRef Str, + bool &HasDefault, + SmallVectorImpl &Strings); bool checkMSInheritanceAttrOnDefinition( CXXRecordDecl *RD, SourceRange Range, bool BestCase, MSInheritanceAttr::Spelling SemanticSpelling); Index: lib/AST/Decl.cpp =================================================================== --- lib/AST/Decl.cpp +++ lib/AST/Decl.cpp @@ -2928,6 +2928,9 @@ bool FunctionDecl::isCPUSpecificMultiVersion() const { return isMultiVersion() && hasAttr(); } +bool FunctionDecl::isTargetClonesMultiVersion() const { + return isMultiVersion() && hasAttr(); +} void FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) { Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -4313,6 +4313,27 @@ ArrayRef Options); static uint32_t GetX86CpuSupportsMask(ArrayRef FeatureStrs); + struct TargetClonesMultiVersionResolverOption { + llvm::Function *Function; + StringRef FeatureStr; + unsigned Priority; + TargetClonesMultiVersionResolverOption(const TargetInfo &TargInfo, + llvm::Function *F, StringRef Feature) + : Function(F), FeatureStr(Feature), Priority(0u) { + if (FeatureStr.startswith("arch=")) + Priority = TargInfo.multiVersionSortPriority( + FeatureStr.drop_front(sizeof("arch=") - 1)); + else if (FeatureStr != "default") + Priority = TargInfo.multiVersionSortPriority(FeatureStr); + } + bool operator>(const TargetClonesMultiVersionResolverOption &Other) const { + return Priority > Other.Priority; + } + }; + void EmitTargetClonesMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef Options); + private: QualType getVarArgType(const Expr *Arg); @@ -4332,6 +4353,8 @@ llvm::Value *EmitX86CpuInit(); llvm::Value * FormResolverCondition(const TargetMultiVersionResolverOption &RO); + llvm::Value * + FormResolverCondition(const TargetClonesMultiVersionResolverOption &RO); }; inline DominatingLLVMValue::saved_type Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -2382,6 +2382,17 @@ return TrueCondition; } +llvm::Value *CodeGenFunction::FormResolverCondition( + const TargetClonesMultiVersionResolverOption &RO) { + if (RO.FeatureStr.startswith("arch=")) + return EmitX86CpuIs(RO.FeatureStr.drop_front(sizeof("arch=") - 1)); + + if (RO.FeatureStr == "default") + return nullptr; + + return EmitX86CpuSupports(RO.FeatureStr); +} + void CodeGenFunction::EmitTargetMultiVersionResolver( llvm::Function *Resolver, ArrayRef Options) { @@ -2456,6 +2467,36 @@ Builder.ClearInsertionPoint(); } +void CodeGenFunction::EmitTargetClonesMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef Options) { + assert((getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86 || + getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86_64) && + "Only implemented for x86 targets"); + llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); + Builder.SetInsertPoint(CurBlock); + EmitX86CpuInit(); + + for (const TargetClonesMultiVersionResolverOption &RO : Options) { + Builder.SetInsertPoint(CurBlock); + llvm::Value *TrueCondition = FormResolverCondition(RO); + + if (TrueCondition) { + llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver); + llvm::IRBuilder<> RetBuilder(RetBlock); + RetBuilder.CreateRet(RO.Function); + CurBlock = createBasicBlock("ro_else", Resolver); + Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); + } else { + // Emit the default version and end emission. + Builder.CreateRet(RO.Function); + return; + } + } +} + llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) { if (CGDebugInfo *DI = getDebugInfo()) return DI->SourceLocToDebugLoc(Location); Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -1314,6 +1314,7 @@ void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); void emitCPUDispatchDefinition(GlobalDecl GD); + void EmitTargetClonesResolver(GlobalDecl GD); void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D); void EmitObjCIvarInitializations(ObjCImplementationDecl *D); Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -917,6 +917,19 @@ } } +static void AppendTargetClonesMangling(const CodeGenModule &CGM, + const TargetClonesAttr *Attr, + raw_ostream &Out) { + Out << '.'; + StringRef FeatureStr = Attr->getCurFeatureStr(); + if (FeatureStr.startswith("arch=")) + Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1); + else + Out << FeatureStr; + + Out << '.' << Attr->ActiveArgIndex; +} + static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { @@ -950,6 +963,8 @@ if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion()) AppendCPUSpecificCPUDispatchMangling( CGM, FD->getAttr(), Out); + else if (FD->isTargetClonesMultiVersion()) + AppendTargetClonesMangling(CGM, FD->getAttr(), Out); else AppendTargetMangling(CGM, FD->getAttr(), Out); } @@ -1013,12 +1028,19 @@ // Since CPUSpecific can require multiple emits per decl, store the manglings // separately. if (FD && - (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) { + (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion() || + FD->isTargetClonesMultiVersion())) { const auto *SD = FD->getAttr(); + const auto *TC = FD->getAttr(); - std::pair SpecCanonicalGD{ - CanonicalGD, - SD ? SD->ActiveArgIndex : std::numeric_limits::max()}; + unsigned VersionID = std::numeric_limits::max(); + + if (SD) + VersionID = SD->ActiveArgIndex; + else if (TC) + VersionID = TC->ActiveArgIndex; + + std::pair SpecCanonicalGD{CanonicalGD, VersionID}; auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD); if (FoundName != CPUSpecificMangledDeclNames.end()) @@ -1376,9 +1398,10 @@ const auto *FD = dyn_cast_or_null(D); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr() : nullptr; - const auto *SD = FD ? FD->getAttr() : nullptr; bool AddedAttr = false; - if (TD || SD) { + + if (FD && (TD || FD->hasAttr() || + FD->hasAttr())) { llvm::StringMap FeatureMap; getFunctionFeatureMap(FeatureMap, FD); @@ -2111,6 +2134,9 @@ if (Global->hasAttr()) return emitCPUDispatchDefinition(GD); + if (Global->hasAttr() || Global->hasAttr()) + return EmitGlobalFunctionDefinition(GD, nullptr); + // If this is CUDA, be selective about which declarations we emit. if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { @@ -2526,6 +2552,51 @@ CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options); } +void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) { + const auto *FD = cast(GD.getDecl()); + assert(FD && "Not a FunctionDecl?"); + const auto *ClonesAttr = FD->getAttr(); + assert(ClonesAttr && "Not a target_clones Function?"); + llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType()); + + // Force emission of the IFunc. + GetOrCreateMultiVersionIFunc(GD, DeclTy, FD); + + StringRef MangledName = + getMangledNameImpl(*this, GD, FD, /*OmitMVMangling*/ true); + std::string ResolverName = (MangledName + ".resolver").str(); + llvm::Type *ResolverType = llvm::FunctionType::get( + llvm::PointerType::get(DeclTy, + Context.getTargetAddressSpace(FD->getType())), + false); + auto *ResolverFunc = cast( + GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, + /*ForVTable=*/false)); + + SmallVector + Options; + const TargetInfo &Target = getTarget(); + + ClonesAttr->ActiveArgIndex = 0; + while (ClonesAttr->ActiveArgIndex < ClonesAttr->featuresStrs_size()) { + std::string MangledName = getMangledName(GD); + llvm::Constant *Func = GetOrCreateLLVMFunction( + MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false, + /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + + Options.emplace_back(Target, cast(Func), + ClonesAttr->getCurFeatureStr()); + ClonesAttr->AdvanceActiveArgIndex(); + } + ClonesAttr->ActiveArgIndex = 0; + + std::stable_sort( + Options.begin(), Options.end(), + std::greater()); + CodeGenFunction CGF(*this); + CGF.EmitTargetClonesMultiVersionResolver(ResolverFunc, Options); +} + /// If an ifunc for the specified mangled name is not in the module, create and /// return an llvm IFunc Function with the specified type. llvm::Constant * @@ -2540,7 +2611,8 @@ // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be // replaced later if necessary (target multiversioning only). - if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) + if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion() && + !FD->isTargetClonesMultiVersion()) MultiVersionFuncs.push_back(GD); std::string ResolverName = MangledName + ".resolver"; @@ -3877,6 +3949,16 @@ ++Spec->ActiveArgIndex; EmitGlobalFunctionDefinition(GD, nullptr); } + } else if (D->isTargetClonesMultiVersion()) { + auto *Clone = D->getAttr(); + // If there is another specific version we need to emit, do so here. Then, + // the last thing we do is emit the resolver. + Clone->AdvanceActiveArgIndex(); + if (Clone->ActiveArgIndex < Clone->featuresStrs_size()) + EmitGlobalFunctionDefinition(GD, nullptr); + else + EmitTargetClonesResolver(GD); + Clone->ActiveArgIndex = 0; } } @@ -5264,6 +5346,15 @@ FeaturesTmp); std::vector Features(FeaturesTmp.begin(), FeaturesTmp.end()); Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); + } else if (const auto *Clones = FD->getAttr()) { + StringRef Cur = Clones->getCurFeatureStr(); + std::vector Features; + + if (Cur.startswith("arch=")) + TargetCPU = Cur.substr(sizeof("arch=") - 1); + else if (Cur != "default") + Features.push_back((StringRef{"+"} + Cur).str()); + Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); } else { Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Target.getTargetOpts().Features); Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -9327,7 +9327,7 @@ } namespace MultiVersioning { -enum Type { None, Target, CPUSpecific, CPUDispatch}; +enum Type { None, Target, CPUSpecific, CPUDispatch, TargetClones }; } // MultiVersionType static MultiVersioning::Type @@ -9338,6 +9338,8 @@ return MultiVersioning::CPUDispatch; if (FD->hasAttr()) return MultiVersioning::CPUSpecific; + if (FD->hasAttr()) + return MultiVersioning::TargetClones; return MultiVersioning::None; } /// Check the target attribute of the function for MultiVersion @@ -9399,9 +9401,7 @@ Linkage = 5 }; - bool IsCPUSpecificCPUDispatchMVType = - MVType == MultiVersioning::CPUDispatch || - MVType == MultiVersioning::CPUSpecific; + unsigned MVTypeForDiag = MVType - 1; if (OldFD && !OldFD->getType()->getAs()) { S.Diag(OldFD->getLocation(), diag::err_multiversion_noproto); @@ -9424,48 +9424,48 @@ if (CausesMV && OldFD && std::distance(OldFD->attr_begin(), OldFD->attr_end()) != 1) { S.Diag(OldFD->getLocation(), diag::err_multiversion_no_other_attrs) - << IsCPUSpecificCPUDispatchMVType; + << MVTypeForDiag; S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here); return true; } - if (std::distance(NewFD->attr_begin(), NewFD->attr_end()) != 1) + if (std::distance(NewFD->attr_begin(), NewFD->attr_end()) > 1) return S.Diag(NewFD->getLocation(), diag::err_multiversion_no_other_attrs) - << IsCPUSpecificCPUDispatchMVType; + << MVTypeForDiag; if (NewFD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << FuncTemplates; + << MVTypeForDiag << FuncTemplates; if (const auto *NewCXXFD = dyn_cast(NewFD)) { if (NewCXXFD->isVirtual()) return S.Diag(NewCXXFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << VirtFuncs; + << MVTypeForDiag << VirtFuncs; if (const auto *NewCXXCtor = dyn_cast(NewFD)) return S.Diag(NewCXXCtor->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << Constructors; + << MVTypeForDiag << Constructors; if (const auto *NewCXXDtor = dyn_cast(NewFD)) return S.Diag(NewCXXDtor->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << Destructors; + << MVTypeForDiag << Destructors; } if (NewFD->isDeleted()) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << DeletedFuncs; + << MVTypeForDiag << DeletedFuncs; if (NewFD->isDefaulted()) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << DefaultedFuncs; + << MVTypeForDiag << DefaultedFuncs; if (NewFD->isConstexpr() && (MVType == MultiVersioning::CPUDispatch || MVType == MultiVersioning::CPUSpecific)) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << ConstexprFuncs; + << MVTypeForDiag << ConstexprFuncs; QualType NewQType = S.getASTContext().getCanonicalType(NewFD->getType()); const auto *NewType = cast(NewQType); @@ -9473,7 +9473,7 @@ if (NewReturnType->isUndeducedType()) return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType << DeducedReturn; + << MVTypeForDiag << DeducedReturn; // Only allow transition to MultiVersion if it hasn't been used. if (OldFD && CausesMV && OldFD->isUsed(false)) @@ -9628,15 +9628,18 @@ Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD, MultiVersioning::Type NewMVType, const TargetAttr *NewTA, const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec, - bool &Redeclaration, NamedDecl *&OldDecl, bool &MergeTypeWithPrevious, - LookupResult &Previous) { + const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl, + bool &MergeTypeWithPrevious, LookupResult &Previous) { MultiVersioning::Type OldMVType = getMultiVersionType(OldFD); + // Disallow mixing of multiversioning types. - if ((OldMVType == MultiVersioning::Target && - NewMVType != MultiVersioning::Target) || - (NewMVType == MultiVersioning::Target && - OldMVType != MultiVersioning::Target)) { + if (OldMVType != MultiVersioning::None && + NewMVType != MultiVersioning::None && OldMVType != NewMVType && + !(OldMVType == MultiVersioning::CPUDispatch && + NewMVType == MultiVersioning::CPUSpecific) && + !(OldMVType == MultiVersioning::CPUSpecific && + NewMVType == MultiVersioning::CPUDispatch)) { S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); S.Diag(OldFD->getLocation(), diag::note_previous_declaration); NewFD->setInvalidDecl(); @@ -9678,6 +9681,25 @@ NewFD->setInvalidDecl(); return true; } + } else if (NewMVType == MultiVersioning::TargetClones) { + const auto *CurClones = CurFD->getAttr(); + Redeclaration = true; + OldDecl = CurFD; + MergeTypeWithPrevious = true; + NewFD->setIsMultiVersion(); + + if (CurClones && NewClones && + (CurClones->featuresStrs_size() != NewClones->featuresStrs_size() || + !std::equal(CurClones->featuresStrs_begin(), + CurClones->featuresStrs_end(), + NewClones->featuresStrs_begin()))) { + S.Diag(NewFD->getLocation(), diag::err_target_clone_doesnt_match); + S.Diag(CurFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + return true; + } + + return false; } else { const auto *CurCPUSpec = CurFD->getAttr(); const auto *CurCPUDisp = CurFD->getAttr(); @@ -9760,7 +9782,6 @@ return false; } - /// Check the validity of a mulitversion function declaration. /// Also sets the multiversion'ness' of the function itself. /// @@ -9774,10 +9795,12 @@ const auto *NewTA = NewFD->getAttr(); const auto *NewCPUDisp = NewFD->getAttr(); const auto *NewCPUSpec = NewFD->getAttr(); + const auto *NewTargetClones = NewFD->getAttr(); // Mixing Multiversioning types is prohibited. - if ((NewTA && NewCPUDisp) || (NewTA && NewCPUSpec) || - (NewCPUDisp && NewCPUSpec)) { + if ((static_cast(NewTA) + static_cast(NewCPUDisp) + + static_cast(NewCPUSpec) + static_cast(NewTargetClones)) > + 1) { S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); NewFD->setInvalidDecl(); return true; @@ -9788,9 +9811,8 @@ // Main isn't allowed to become a multiversion function, however it IS // permitted to have 'main' be marked with the 'target' optimization hint. if (NewFD->isMain()) { - if ((MVType == MultiVersioning::Target && NewTA->isDefaultVersion()) || - MVType == MultiVersioning::CPUDispatch || - MVType == MultiVersioning::CPUSpecific) { + if (MVType != MultiVersioning::None && + (MVType != MultiVersioning::Target || NewTA->isDefaultVersion())) { S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main); NewFD->setInvalidDecl(); return true; @@ -9814,7 +9836,10 @@ if (!OldFD->isMultiVersion() && MVType == MultiVersioning::None) return false; - if (OldFD->isMultiVersion() && MVType == MultiVersioning::None) { + // MultiVersioned redeclarations aren't allowed to omit the attribute except + // for target_clones. + if (OldFD->isMultiVersion() && MVType == MultiVersioning::None && + getMultiVersionType(OldFD) != MultiVersioning::TargetClones) { S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl) << (getMultiVersionType(OldFD) != MultiVersioning::Target); NewFD->setInvalidDecl(); @@ -9826,8 +9851,18 @@ return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA, Redeclaration, OldDecl, MergeTypeWithPrevious, Previous); - // Previous declarations lack CPUDispatch/CPUSpecific. - if (!OldFD->isMultiVersion()) { + + if (!OldFD->isMultiVersion() && MVType == MultiVersioning::TargetClones) { + if (OldFD->isUsed(false)) { + NewFD->setInvalidDecl(); + return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used); + } + OldFD->setIsMultiVersion(); + } + + // Check if previous declarations lack CPUDispatch/CPUSpecific. + if (!OldFD->isMultiVersion() && (MVType == MultiVersioning::CPUDispatch || + MVType == MultiVersioning::CPUSpecific)) { S.Diag(OldFD->getLocation(), diag::err_multiversion_required_in_redecl) << 1; S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here); @@ -9839,8 +9874,8 @@ // appropriate attribute in the current function decl. Resolve that these are // still compatible with previous declarations. return CheckMultiVersionAdditionalDecl( - S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, Redeclaration, - OldDecl, MergeTypeWithPrevious, Previous); + S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, NewTargetClones, + Redeclaration, OldDecl, MergeTypeWithPrevious, Previous); } /// Perform semantic checking of a new function declaration. Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -2963,33 +2963,39 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { enum FirstParam { Unsupported, Duplicate }; enum SecondParam { None, Architecture }; + enum FourthParam { Target, TargetClones }; for (auto Str : {"tune=", "fpmath="}) if (AttrStr.find(Str) != StringRef::npos) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << Str; + << Unsupported << None << Str << Target; TargetAttr::ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr); if (!ParsedAttrs.Architecture.empty() && !Context.getTargetInfo().isValidCPUName(ParsedAttrs.Architecture)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << Architecture << ParsedAttrs.Architecture; + << Unsupported << Architecture << ParsedAttrs.Architecture << Target; if (ParsedAttrs.DuplicateArchitecture) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Duplicate << None << "arch="; + << Duplicate << None << "arch=" << Target; for (const auto &Feature : ParsedAttrs.Features) { auto CurFeature = StringRef(Feature).drop_front(); // remove + or -. if (!Context.getTargetInfo().isValidFeatureName(CurFeature)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << CurFeature; + << Unsupported << None << CurFeature << Target; } return false; } static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (checkAttrMutualExclusion(S, D, AL) || + checkAttrMutualExclusion(S, D, AL) || + checkAttrMutualExclusion(S, D, AL)) + return; + StringRef Str; SourceLocation LiteralLoc; if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) || @@ -3002,6 +3008,78 @@ D->addAttr(NewAttr); } +bool Sema::checkTargetClonesAttr(SourceLocation LiteralLoc, StringRef Str, + bool &HasDefault, + SmallVectorImpl &Strings) { + enum FirstParam { Unsupported, Duplicate }; + enum SecondParam { None, Architecture }; + enum FourthParam { Target, TargetClones }; + + // Warn on empty at the beginning of a string. + if (Str.size() == 0 || Str[0] == ',') + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + + while (Str.size() != 0) { + // Remove the comma we found last time through. + if (Str[0] == ',') + Str = Str.substr(1); + + StringRef Cur{Str.data(), std::min(Str.find(','), Str.size())}; + Str = Str.substr(Cur.size()); + Cur = Cur.trim(); + + if (Cur.startswith("arch=")) { + if (!Context.getTargetInfo().isValidCPUName( + Cur.drop_front(sizeof("arch=") - 1))) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << Architecture + << Cur.drop_front(sizeof("arch=") - 1) << TargetClones; + } else if (Cur == "default") { + HasDefault = true; + continue; + } else if (!Context.getTargetInfo().isValidFeatureName(Cur)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Cur << TargetClones; + + Strings.push_back(Cur); + } + + return false; +} + +static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (checkAttrMutualExclusion(S, D, AL) || + checkAttrMutualExclusion(S, D, AL) || + checkAttrMutualExclusion(S, D, AL)) + return; + + SmallVector Strings; + bool HasDefault = false; + + for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) { + StringRef CurStr; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(AL, I, CurStr, &LiteralLoc) || + S.checkTargetClonesAttr(LiteralLoc, CurStr, HasDefault, Strings)) + return; + } + + if (!HasDefault) { + S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default); + return; + } + + Strings.push_back("default"); + + FunctionDecl *FD = cast(D); + FD->setIsMultiVersion(true); + unsigned Index = AL.getAttributeSpellingListIndex(); + TargetClonesAttr *NewAttr = ::new (S.Context) TargetClonesAttr( + AL.getRange(), S.Context, Strings.data(), Strings.size(), Index); + D->addAttr(NewAttr); +} + static void handleMinVectorWidthAttr(Sema &S, Decl *D, const ParsedAttr &AL) { Expr *E = AL.getArgAsExpr(0); uint32_t VecWidth; @@ -6308,6 +6386,9 @@ case ParsedAttr::AT_Target: handleTargetAttr(S, D, AL); break; + case ParsedAttr::AT_TargetClones: + handleTargetClonesAttr(S, D, AL); + break; case ParsedAttr::AT_MinVectorWidth: handleMinVectorWidthAttr(S, D, AL); break; Index: test/CodeGen/attr-cpuspecific.c =================================================================== --- test/CodeGen/attr-cpuspecific.c +++ test/CodeGen/attr-cpuspecific.c @@ -11,8 +11,10 @@ void SingleVersion(void){} // CHECK: define void @SingleVersion.S() #[[S:[0-9]+]] -__attribute__((cpu_specific(ivybridge))) -void NotCalled(void){} +__attribute__((cpu_specific(ivybridge))) inline void InlineSingleVersion(void) {} +// CHECK: define available_externally void @InlineSingleVersion.S() #[[S:[0-9]+]] + +__attribute__((cpu_specific(ivybridge))) void NotCalled(void) {} // CHECK: define void @NotCalled.S() #[[S]] // Done before any of the implementations. Index: test/CodeGen/attr-target-clones.c =================================================================== --- /dev/null +++ test/CodeGen/attr-target-clones.c @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +// CHECK: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver +// CHECK: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver +// CHECK: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver +// CHECK: @unused.ifunc = ifunc void (), void ()* ()* @unused.resolver + +int __attribute__((target_clones("sse4.2, default"))) foo(void) { return 0; } +// CHECK: define i32 @foo.sse4.2.0() +// CHECK: define i32 @foo.default.1() +// CHECK: define i32 ()* @foo.resolver() +// CHECK: ret i32 ()* @foo.sse4.2.0 +// CHECK: ret i32 ()* @foo.default.1 + +int bar() { + // CHECK: define i32 @bar() + return foo(); + // CHECK: call i32 @foo.ifunc() +} + +inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2"))) +foo_inline(void) { return 0; } +// CHECK: define available_externally i32 @foo_inline.arch_sandybridge.0() #[[SB:[0-9]+]] +// CHECK: define available_externally i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]] +// CHECK: define available_externally i32 @foo_inline.default.2() #[[DEF:[0-9]+]] +// CHECK: define i32 ()* @foo_inline.resolver() +// CHECK: ret i32 ()* @foo_inline.arch_sandybridge.0 +// CHECK: ret i32 ()* @foo_inline.sse4.2.1 +// CHECK: ret i32 ()* @foo_inline.default.2 + +int bar2() { + // CHECK: define i32 @bar2() + return foo_inline(); + // CHECK: call i32 @foo_inline.ifunc() +} + +inline __attribute__((target_clones("default,default ,sse4.2"))) void foo_decls(void) {} +// CHECK: define available_externally void @foo_decls.sse4.2.0() +// CHECK: define available_externally void @foo_decls.default.1() +// CHECK: define void ()* @foo_decls.resolver() +// CHECK: ret void ()* @foo_decls.sse4.2.0 +// CHECK: ret void ()* @foo_decls.default.1 + +void bar3() { + // CHECK: define void @bar3() + foo_decls(); + // CHECK: call void @foo_decls.ifunc() +} + +void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {} +// CHECK: define void @unused.arch_ivybridge.0() +// CHECK: define void @unused.default.1() +// CHECK: define void ()* @unused.resolver() +// CHECK: ret void ()* @unused.arch_ivybridge.0 +// CHECK: ret void ()* @unused.default.1 + +// CHECK: attributes #[[SSE42]] = +// CHECK-SAME: "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +// CHECK: attributes #[[DEF]] = +// CHECK-SAME: "target-features"="+mmx,+sse,+sse2,+x87" +// CHECK: attributes #[[SB]] = +// CHECK-SAME: "target-features"="+aes,+avx,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" Index: test/Misc/pragma-attribute-supported-attributes-list.test =================================================================== --- test/Misc/pragma-attribute-supported-attributes-list.test +++ test/Misc/pragma-attribute-supported-attributes-list.test @@ -2,7 +2,7 @@ // The number of supported attributes should never go down! -// CHECK: #pragma clang attribute supports 128 attributes: +// CHECK: #pragma clang attribute supports 129 attributes: // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function) @@ -120,6 +120,7 @@ // CHECK-NEXT: SwiftIndirectResult (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local) // CHECK-NEXT: Target (SubjectMatchRule_function) +// CHECK-NEXT: TargetClones (SubjectMatchRule_function) // CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member) // CHECK-NEXT: TrivialABI (SubjectMatchRule_record) // CHECK-NEXT: VecReturn (SubjectMatchRule_record) Index: test/Sema/attr-target-clones.c =================================================================== --- /dev/null +++ test/Sema/attr-target-clones.c @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s + +// expected-error@+1 {{'target_clones' multiversioning requires a default target}} +void __attribute__((target_clones("sse4.2", "arch=sandybridge"))) +no_default(void); + +// expected-error@+2 {{'target_clones' and 'target' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target("sse4.2"), target_clones("arch=sandybridge"))) +ignored_attr(void); +// expected-error@+2 {{'target' and 'target_clones' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target_clones("arch=sandybridge,default"), target("sse4.2"))) +ignored_attr2(void); + +int redecl(void); +int __attribute__((target_clones("sse4.2", "default"))) redecl(void) { return 1; } + +int __attribute__((target_clones("sse4.2", "default"))) redecl2(void); +int __attribute__((target_clones("sse4.2", "default"))) redecl2(void) { return 1; } + +int __attribute__((target_clones("sse4.2", "default"))) redecl3(void); +int redecl3(void); + +int __attribute__((target_clones("sse4.2", "arch=atom", "default"))) redecl4(void); +// expected-error@+3 {{'target_clones' attribute does not match previous declaration}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("sse4.2", "arch=sandybridge", "default"))) +redecl4(void) { return 1; } + +int __attribute__((target("sse4.2"))) redef2(void) { return 1; } +// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("sse4.2", "default"))) redef2(void) { return 1; } + +int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; } +// expected-error@+2 {{redefinition of 'redef3'}} +// expected-note@-2 {{previous definition is here}} +int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; } + +int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; } +// expected-error@+2 {{redefinition of 'redef4'}} +// expected-note@-2 {{previous definition is here}} +int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; } + +// No error here... duplicates are allowed because they alter name mangling. +int __attribute__((target_clones("arch=atom,arch=atom", "arch=atom,default"))) +dupes(void) { return 1; } + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(""))) +empty_target_1(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(",default"))) +empty_target_2(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default,"))) +empty_target_3(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default, ,avx2"))) +empty_target_4(void); + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default,avx2", ""))) +empty_target_5(void); + +int mv_after_use(void); +int useage() { + return mv_after_use(); +} +// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} +int __attribute__((target_clones("sse4.2", "default"))) mv_after_use(void) { return 1; }