Index: include/clang/AST/GlobalDecl.h =================================================================== --- include/clang/AST/GlobalDecl.h +++ include/clang/AST/GlobalDecl.h @@ -34,6 +34,7 @@ /// a VarDecl, a FunctionDecl or a BlockDecl. class GlobalDecl { llvm::PointerIntPair Value; + unsigned MultiVersionIndex = 0; void Init(const Decl *D) { assert(!isa(D) && "Use other ctor with ctor decls!"); @@ -45,7 +46,10 @@ public: GlobalDecl() = default; GlobalDecl(const VarDecl *D) { Init(D);} - GlobalDecl(const FunctionDecl *D) { Init(D); } + GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0) + : MultiVersionIndex(MVIndex) { + Init(D); + } GlobalDecl(const BlockDecl *D) { Init(D); } GlobalDecl(const CapturedDecl *D) { Init(D); } GlobalDecl(const ObjCMethodDecl *D) { Init(D); } @@ -57,6 +61,7 @@ GlobalDecl CanonGD; CanonGD.Value.setPointer(Value.getPointer()->getCanonicalDecl()); CanonGD.Value.setInt(Value.getInt()); + CanonGD.MultiVersionIndex = MultiVersionIndex; return CanonGD; } @@ -73,8 +78,17 @@ return static_cast(Value.getInt()); } + unsigned getMultiVersionIndex() const { + assert(isa(getDecl()) && + !isa(getDecl()) && + !isa(getDecl()) && + "Decl is not a plain FunctionDecl!"); + return MultiVersionIndex; + } + friend bool operator==(const GlobalDecl &LHS, const GlobalDecl &RHS) { - return LHS.Value == RHS.Value; + return LHS.Value == RHS.Value && + LHS.MultiVersionIndex == RHS.MultiVersionIndex; } void *getAsOpaquePtr() const { return Value.getOpaqueValue(); } @@ -90,6 +104,16 @@ Result.Value.setPointer(D); return Result; } + + GlobalDecl getWithMultiVersionIndex(unsigned Index) { + assert(isa(getDecl()) && + !isa(getDecl()) && + !isa(getDecl()) && + "Decl is not a plain FunctionDecl!"); + GlobalDecl Result(*this); + Result.MultiVersionIndex = Index; + return Result; + } }; } // namespace clang Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -863,10 +863,8 @@ let Subjects = SubjectList<[Function]>; let Documentation = [CPUSpecificCPUDispatchDocs]; let AdditionalMembers = [{ - unsigned ActiveArgIndex = 0; - - IdentifierInfo *getCurCPUName() const { - return *(cpus_begin() + ActiveArgIndex); + IdentifierInfo *getCPUName(unsigned Index) const { + return *(cpus_begin() + Index); } }]; } Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -1311,6 +1311,8 @@ void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr); void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); + void EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); + void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false); void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -877,10 +877,11 @@ static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, const CPUSpecificAttr *Attr, + unsigned CPUIndex, raw_ostream &Out) { // cpu_specific gets the current name, dispatch gets the resolver. if (Attr) - Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName()); + Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName()); else Out << ".resolver"; } @@ -948,8 +949,9 @@ if (const auto *FD = dyn_cast(ND)) if (FD->isMultiVersion() && !OmitMultiVersionMangling) { if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion()) - AppendCPUSpecificCPUDispatchMangling( - CGM, FD->getAttr(), Out); + AppendCPUSpecificCPUDispatchMangling(CGM, + FD->getAttr(), + GD.getMultiVersionIndex(), Out); else AppendTargetMangling(CGM, FD->getAttr(), Out); } @@ -1009,26 +1011,6 @@ } } - const auto *FD = dyn_cast(GD.getDecl()); - // Since CPUSpecific can require multiple emits per decl, store the manglings - // separately. - if (FD && - (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) { - const auto *SD = FD->getAttr(); - - std::pair SpecCanonicalGD{ - CanonicalGD, - SD ? SD->ActiveArgIndex : std::numeric_limits::max()}; - - auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD); - if (FoundName != CPUSpecificMangledDeclNames.end()) - return FoundName->second; - - auto Result = CPUSpecificManglings.insert( - std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD)); - return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first(); - } - auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; @@ -2400,6 +2382,19 @@ return CodeGenOpts.OptimizationLevel > 0; } +void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, + llvm::GlobalValue *GV) { + const auto *FD = cast(GD.getDecl()); + + if (FD->isCPUSpecificMultiVersion()) { + auto *Spec = FD->getAttr(); + for (unsigned I = 0; I < Spec->cpus_size(); ++I) + EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); + // Requires multiple emits. + } else + EmitGlobalFunctionDefinition(GD, GV); +} + void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); @@ -2407,7 +2402,7 @@ Context.getSourceManager(), "Generating code for declaration"); - if (isa(D)) { + if (const auto *FD = dyn_cast(D)) { // At -O0, don't generate IR for functions with available_externally // linkage. if (!shouldEmitFunction(GD)) @@ -2420,6 +2415,8 @@ ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType())); else if (const auto *DD = dyn_cast(Method)) ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType())); + else if (FD->isMultiVersion()) + EmitMultiVersionFunctionDefinition(GD, GV); else EmitGlobalFunctionDefinition(GD, GV); @@ -2429,6 +2426,8 @@ return; } + if (FD->isMultiVersion()) + return EmitMultiVersionFunctionDefinition(GD, GV); return EmitGlobalFunctionDefinition(GD, GV); } @@ -2500,13 +2499,21 @@ SmallVector Options; const TargetInfo &Target = getTarget(); + unsigned Index = 0; for (const IdentifierInfo *II : DD->cpus()) { // Get the name of the target function so we can look it up/create it. std::string MangledName = getMangledNameImpl(*this, GD, FD, true) + getCPUSpecificMangling(*this, II->getName()); - llvm::Constant *Func = GetOrCreateLLVMFunction( - MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false, - /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + llvm::Constant *Func = GetGlobalValue(MangledName); + + if (!Func) { + + Func = GetOrCreateLLVMFunction( + MangledName, DeclTy, GD.getWithMultiVersionIndex(Index), + /*ForVTable=*/false, /*DontDefer=*/false, + /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + } + llvm::SmallVector Features; Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); llvm::transform(Features, Features.begin(), @@ -2517,6 +2524,7 @@ }), Features.end()); Options.emplace_back(cast(Func), CodeGenFunction::GetX86CpuSupportsMask(Features)); + ++Index; } llvm::sort( @@ -3870,15 +3878,6 @@ AddGlobalDtor(Fn, DA->getPriority()); if (D->hasAttr()) AddGlobalAnnotations(D, Fn); - - if (D->isCPUSpecificMultiVersion()) { - auto *Spec = D->getAttr(); - // If there is another specific version we need to emit, do so here. - if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) { - ++Spec->ActiveArgIndex; - EmitGlobalFunctionDefinition(GD, nullptr); - } - } } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { @@ -5259,10 +5258,11 @@ ParsedAttr.Features); } else if (const auto *SD = FD->getAttr()) { llvm::SmallVector FeaturesTmp; - Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(), + /*Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(), FeaturesTmp); std::vector Features(FeaturesTmp.begin(), FeaturesTmp.end()); Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); + */// TODO! Re-enable if we can figure out what the current version is. } else { Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Target.getTargetOpts().Features); Index: test/CodeGen/attr-cpuspecific.c =================================================================== --- test/CodeGen/attr-cpuspecific.c +++ test/CodeGen/attr-cpuspecific.c @@ -27,7 +27,8 @@ __attribute__((cpu_specific(ivybridge))) void TwoVersions(void){} -// CHECK: define void @TwoVersions.S() #[[S]] +// CHECK: define void @TwoVersions.S() +// TODO: #[[S]] __attribute__((cpu_specific(knl))) void TwoVersions(void){} @@ -36,13 +37,15 @@ __attribute__((cpu_specific(ivybridge, knl))) void TwoVersionsSameAttr(void){} // CHECK: define void @TwoVersionsSameAttr.S() #[[S]] -// CHECK: define void @TwoVersionsSameAttr.Z() #[[K]] +// CHECK: define void @TwoVersionsSameAttr.Z() +// TODO: #[[K]] __attribute__((cpu_specific(atom, ivybridge, knl))) void ThreeVersionsSameAttr(void){} // CHECK: define void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]] // CHECK: define void @ThreeVersionsSameAttr.S() #[[S]] -// CHECK: define void @ThreeVersionsSameAttr.Z() #[[K]] +// CHECK: define void @ThreeVersionsSameAttr.Z() +// TODO #[[K]] void usages() { SingleVersion(); @@ -96,6 +99,12 @@ // CHECK: ret void ()* @HasGeneric.A // CHECK-NOT: call void @llvm.trap -// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" +__attribute__((cpu_specific(atom, ivybridge))) void HasGeneric(void) {} +// CHECK: define void @HasGeneric.O() +// TODO: #[[O]] +// CHECK: define void @HasGeneric.S() +// TODO: #[[S]] + +// XCHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// XCHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// XCHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"