Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1877,46 +1877,7 @@ TargetDecl->hasAttr())); FuncAttrs.addAttribute("disable-tail-calls", llvm::toStringRef(DisableTailCalls)); - - // Add target-cpu and target-features attributes to functions. If - // we have a decl for the function and it has a target attribute then - // parse that and add it to the feature set. - StringRef TargetCPU = getTarget().getTargetOpts().CPU; - std::vector Features; - const FunctionDecl *FD = dyn_cast_or_null(TargetDecl); - if (FD && FD->hasAttr()) { - llvm::StringMap FeatureMap; - getFunctionFeatureMap(FeatureMap, FD); - - // Produce the canonical string for this set of features. - for (llvm::StringMap::const_iterator it = FeatureMap.begin(), - ie = FeatureMap.end(); - it != ie; ++it) - Features.push_back((it->second ? "+" : "-") + it->first().str()); - - // Now add the target-cpu and target-features to the function. - // While we populated the feature map above, we still need to - // get and parse the target attribute so we can get the cpu for - // the function. - const auto *TD = FD->getAttr(); - TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.Architecture != "" && - getTarget().isValidCPUName(ParsedAttr.Architecture)) - TargetCPU = ParsedAttr.Architecture; - } else { - // Otherwise just add the existing target cpu and target features to the - // function. - Features = getTarget().getTargetOpts().Features; - } - - if (TargetCPU != "") - FuncAttrs.addAttribute("target-cpu", TargetCPU); - if (!Features.empty()) { - std::sort(Features.begin(), Features.end()); - FuncAttrs.addAttribute( - "target-features", - llvm::join(Features, ",")); - } + GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs); } ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -1273,6 +1273,8 @@ ForDefinition_t IsForDefinition = NotForDefinition); + bool GetCPUAndFeaturesAttributes(const Decl *D, + llvm::AttrBuilder &AttrBuilder); void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO); /// Set function attributes for a function declaration. Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -1248,6 +1248,52 @@ GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } +bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D, + llvm::AttrBuilder &Attrs) { + // Add target-cpu and target-features attributes to functions. If + // we have a decl for the function and it has a target attribute then + // parse that and add it to the feature set. + StringRef TargetCPU = getTarget().getTargetOpts().CPU; + std::vector Features; + const auto *FD = dyn_cast_or_null(D); + FD = FD ? FD->getMostRecentDecl() : FD; + const auto *TD = FD ? FD->getAttr() : nullptr; + bool AddedAttr = false; + if (TD) { + llvm::StringMap FeatureMap; + getFunctionFeatureMap(FeatureMap, FD); + + // Produce the canonical string for this set of features. + for (const llvm::StringMap::value_type &Entry : FeatureMap) + Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str()); + + // Now add the target-cpu and target-features to the function. + // While we populated the feature map above, we still need to + // get and parse the target attribute so we can get the cpu for + // the function. + TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); + if (ParsedAttr.Architecture != "" && + getTarget().isValidCPUName(ParsedAttr.Architecture)) + TargetCPU = ParsedAttr.Architecture; + } else { + // Otherwise just add the existing target cpu and target features to the + // function. + Features = getTarget().getTargetOpts().Features; + } + + if (TargetCPU != "") { + Attrs.addAttribute("target-cpu", TargetCPU); + AddedAttr = true; + } + if (!Features.empty()) { + std::sort(Features.begin(), Features.end()); + Attrs.addAttribute("target-features", llvm::join(Features, ",")); + AddedAttr = true; + } + + return AddedAttr; +} + void CodeGenModule::setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO) { SetCommonAttributes(D, GO); @@ -1266,6 +1312,16 @@ if (auto *SA = D->getAttr()) if (!D->getAttr()) F->addFnAttr("implicit-section-name", SA->getName()); + + llvm::AttrBuilder Attrs; + if (GetCPUAndFeaturesAttributes(D, Attrs)) { + // We know that GetCPUAndFeaturesAttributes will always have the + // newest set, since it has the newest possible FunctionDecl, so the + // new ones should replace the old. + F->removeFnAttr("target-cpu"); + F->removeFnAttr("target-features"); + F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs); + } } if (const SectionAttr *SA = D->getAttr()) Index: test/CodeGen/attr-target-x86.c =================================================================== --- test/CodeGen/attr-target-x86.c +++ test/CodeGen/attr-target-x86.c @@ -21,6 +21,17 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; } +int use_before_def(void); +int useage(void){ + return use_before_def(); +} + +// Adding the attribute to a definition does update it in IR. +int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) { + return 5; +} + + // Check that we emit the additional subtarget and cpu features for foo and not for baz or bar. // CHECK: baz{{.*}} #0 // CHECK: foo{{.*}} #1 @@ -36,6 +47,7 @@ // CHECK: qax{{.*}} #5 // CHECK: qq{{.*}} #6 // CHECK: lake{{.*}} #7 +// CHECK: use_before_def{{.*}} #7 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"