Index: clang/include/clang/AST/Attr.h =================================================================== --- clang/include/clang/AST/Attr.h +++ clang/include/clang/AST/Attr.h @@ -334,11 +334,17 @@ struct ParsedTargetAttr { std::vector Features; StringRef Architecture; + StringRef Tune; StringRef BranchProtection; bool DuplicateArchitecture = false; + bool DuplicateTune = false; bool operator ==(const ParsedTargetAttr &Other) const { return DuplicateArchitecture == Other.DuplicateArchitecture && - Architecture == Other.Architecture && Features == Other.Features; + DuplicateTune == Other.DuplicateTune && + Architecture == Other.Architecture && + Tune == Other.Tune && + BranchProtection == Other.BranchProtection && + Features == Other.Features; } }; Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -2346,11 +2346,10 @@ // accepting it weirdly. Feature = Feature.trim(); - // We don't support cpu tuning this way currently. // TODO: Support the fpmath option. It will require checking // overall feature validity for the function with the rest of the // attributes on the function. - if (Feature.startswith("fpmath=") || Feature.startswith("tune=")) + if (Feature.startswith("fpmath=")) continue; if (Feature.startswith("branch-protection=")) { @@ -2364,6 +2363,11 @@ Ret.DuplicateArchitecture = true; else Ret.Architecture = Feature.split("=").second.trim(); + } else if (Feature.startswith("tune=")) { + if (!Ret.Tune.empty()) + Ret.DuplicateTune = true; + else + Ret.Tune = Feature.split("=").second.trim(); } else if (Feature.startswith("no-")) Ret.Features.push_back("-" + Feature.split("-").second.str()); else Index: clang/include/clang/Basic/AttrDocs.td =================================================================== --- clang/include/clang/Basic/AttrDocs.td +++ clang/include/clang/Basic/AttrDocs.td @@ -1898,6 +1898,9 @@ of the feature, as well as ``arch="CPU"`` which will change the default "CPU" for the function. +For X86, the attribute also allows ``tune="CPU"`` to optimize the generated +code for the given CPU without changing the available instructions. + For AArch64, the attribute also allows the "branch-protection=" option, where the permissible arguments and their effect on code generation are the same as for the command-line option ``-mbranch-protection``. Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2829,8 +2829,9 @@ def err_invalid_branch_protection_spec : Error< "invalid or misplaced branch protection specification '%0'">; def warn_unsupported_target_attribute - : Warning<"%select{unsupported|duplicate}0%select{| architecture}1 '%2' in" - " the 'target' attribute string; 'target' attribute ignored">, + : Warning<"%select{unsupported|duplicate}0%select{| architecture| tune}1 " + "'%2' in the 'target' attribute string; 'target' attribute " + "ignored">, InGroup; def err_attribute_unsupported : Error<"%0 attribute is not supported for this target">; Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -1148,6 +1148,11 @@ return true; } + /// brief Determine whether this TargetInfo supports tune in target attribute. + virtual bool supportsTargetAttributeTune() const { + return false; + } + /// Use the specified ABI. /// /// \return False on error (invalid ABI name). Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -296,6 +296,10 @@ return ""; } + bool supportsTargetAttributeTune() const override { + return true; + } + bool isValidCPUName(StringRef Name) const override { bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; return llvm::X86::parseArchX86(Name, Only64Bit) != llvm::X86::CK_None; Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -1770,9 +1770,14 @@ // the function. if (TD) { ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.Architecture != "" && - getTarget().isValidCPUName(ParsedAttr.Architecture)) + if (!ParsedAttr.Architecture.empty() && + getTarget().isValidCPUName(ParsedAttr.Architecture)) { TargetCPU = ParsedAttr.Architecture; + TuneCPU = ""; // Clear the tune CPU. + } + if (!ParsedAttr.Tune.empty() && + getTarget().isValidCPUName(ParsedAttr.Tune)) + TuneCPU = ParsedAttr.Tune; } } else { // Otherwise just add the existing target cpu and target features to the @@ -1780,11 +1785,11 @@ Features = getTarget().getTargetOpts().Features; } - if (TargetCPU != "") { + if (!TargetCPU.empty()) { Attrs.addAttribute("target-cpu", TargetCPU); AddedAttr = true; } - if (TuneCPU != "") { + if (!TuneCPU.empty()) { Attrs.addAttribute("tune-cpu", TuneCPU); AddedAttr = true; } @@ -1826,6 +1831,7 @@ // new ones should replace the old. F->removeFnAttr("target-cpu"); F->removeFnAttr("target-features"); + F->removeFnAttr("tune-cpu"); F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs); } } Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -3063,11 +3063,16 @@ // handled later in the process, once we know how many exist. bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { enum FirstParam { Unsupported, Duplicate }; - enum SecondParam { None, Architecture }; - for (auto Str : {"tune=", "fpmath="}) - if (AttrStr.find(Str) != StringRef::npos) - return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << Str; + enum SecondParam { None, Architecture, Tune }; + if (AttrStr.find("fpmath=") != StringRef::npos) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "fpmath="; + + // Diagnose use of tune if target doesn't support it. + if (!Context.getTargetInfo().supportsTargetAttributeTune() && + AttrStr.find("tune=") != StringRef::npos) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "tune="; ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr); @@ -3076,9 +3081,17 @@ return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Unsupported << Architecture << ParsedAttrs.Architecture; + if (!ParsedAttrs.Tune.empty() && + !Context.getTargetInfo().isValidCPUName(ParsedAttrs.Tune)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << Tune << ParsedAttrs.Tune; + if (ParsedAttrs.DuplicateArchitecture) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Duplicate << None << "arch="; + if (ParsedAttrs.DuplicateTune) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Duplicate << None << "tune="; for (const auto &Feature : ParsedAttrs.Features) { auto CurFeature = StringRef(Feature).drop_front(); // remove + or -. Index: clang/test/CodeGen/attr-target-x86.c =================================================================== --- clang/test/CodeGen/attr-target-x86.c +++ clang/test/CodeGen/attr-target-x86.c @@ -1,10 +1,9 @@ -// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -tune-cpu i686 -emit-llvm %s -o - | FileCheck %s int baz(int a) { return 4; } int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; } -int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; } int __attribute__((target("fpmath=387"))) koala(int a) { return 4; } int __attribute__((target("no-sse2"))) echidna(int a) { return 4; } @@ -31,12 +30,11 @@ return 5; } +int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; } // Check that we emit the additional subtarget and cpu features for foo and not for baz or bar. // CHECK: baz{{.*}} #0 // CHECK: foo{{.*}} #1 -// We ignore the tune attribute so walrus should be identical to baz and bar. -// CHECK: walrus{{.*}} #0 // We're currently ignoring the fpmath attribute so koala should be identical to baz and bar. // CHECK: koala{{.*}} #0 // CHECK: echidna{{.*}} #2 @@ -48,11 +46,16 @@ // CHECK: qq{{.*}} #6 // CHECK: lake{{.*}} #7 // CHECK: use_before_def{{.*}} #7 -// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" +// CHECK: walrus{{.*}} #8 +// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop" -// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" +// CHECK-NOT: tune-cpu +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" +// CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" +// CHECK-NOT: tune-cpu +// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge" Index: clang/test/Sema/attr-target.c =================================================================== --- clang/test/Sema/attr-target.c +++ clang/test/Sema/attr-target.c @@ -1,9 +1,12 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s + +#ifdef __x86_64__ int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo() { return 4; } //expected-error@+1 {{'target' attribute takes one argument}} int __attribute__((target())) bar() { return 4; } -//expected-warning@+1 {{unsupported 'tune=' in the 'target' attribute string; 'target' attribute ignored}} +// no warning, tune is supported for x86 int __attribute__((target("tune=sandybridge"))) baz() { return 4; } //expected-warning@+1 {{unsupported 'fpmath=' in the 'target' attribute string; 'target' attribute ignored}} int __attribute__((target("fpmath=387"))) walrus() { return 4; } @@ -19,4 +22,13 @@ int __attribute__((target("arch=ivybridge,arch=haswell"))) oak_tree() { return 4; } //expected-warning@+1 {{unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored}} int __attribute__((target("branch-protection=none"))) birch_tree() { return 5; } +//expected-warning@+1 {{unsupported tune 'hiss' in the 'target' attribute string; 'target' attribute ignored}} +int __attribute__((target("tune=hiss,tune=woof"))) apple_tree() { return 4; } + +#else + +// tune is not supported by other targets. +//expected-warning@+1 {{unsupported 'tune=' in the 'target' attribute string; 'target' attribute ignored}} +int __attribute__((target("tune=hiss"))) baz() { return 4; } +#endif