diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1944,6 +1944,8 @@ FuncAttrs.addAttribute(llvm::Attribute::NoReturn); if (TargetDecl->hasAttr()) FuncAttrs.addAttribute(llvm::Attribute::Cold); + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute(llvm::Attribute::Hot); if (TargetDecl->hasAttr()) FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr()) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1744,7 +1744,8 @@ B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } - + if (D->hasAttr()) + B.addAttribute(llvm::Attribute::Hot); if (D->hasAttr()) B.addAttribute(llvm::Attribute::MinSize); } diff --git a/clang/test/CodeGen/attributes.c b/clang/test/CodeGen/attributes.c --- a/clang/test/CodeGen/attributes.c +++ b/clang/test/CodeGen/attributes.c @@ -63,6 +63,13 @@ // CHECK: call void @t71() [[COLDSITE:#[0-9]+]] // CHECK: declare void @t71() [[COLDDECL:#[0-9]+]] +// CHECK: define void @t82() [[HOTDEF:#[0-9]+]] { +void t81(void) __attribute__((hot)); +void t82() __attribute__((hot)); +void t82() { t81(); } +// CHECK: call void @t81() [[HOTSITE:#[0-9]+]] +// CHECK: declare void @t81() [[HOTDECL:#[0-9]+]] + // CHECK: define void @t10() [[NUW]] section "xSECT" { void t10(void) __attribute__((section("xSECT"))); void t10(void) {} @@ -72,6 +79,9 @@ // CHECK: define i32 @t19() [[NUW]] { extern int t19(void) __attribute__((weak_import)); int t19(void) { +// RUN: %clang_cc1 -emit-llvm -fcf-protection=branch -triple i386-linux-gnu -o %t %s +// RUN: %clang_cc1 -emit-llvm -fcf-protection=branch -triple i386-linux-gnu -o %t %s +// RUN: %clang_cc1 -emit-llvm -fcf-protection=branch -triple i386-linux-gnu -o %t %s return 10; } @@ -111,6 +121,9 @@ // CHECK: attributes [[NR]] = { noinline noreturn nounwind{{.*}} } // CHECK: attributes [[COLDDEF]] = { cold {{.*}}} // CHECK: attributes [[COLDDECL]] = { cold {{.*}}} +// CHECK: attributes [[HOTDEF]] = { hot {{.*}}} +// CHECK: attributes [[HOTDECL]] = { hot {{.*}}} // CHECK: attributes [[NOCF_CHECK_FUNC]] = { nocf_check {{.*}}} // CHECK: attributes [[COLDSITE]] = { cold {{.*}}} +// CHECK: attributes [[HOTSITE]] = { hot {{.*}}} // CHECK: attributes [[NOCF_CHECK_CALL]] = { nocf_check } diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1496,6 +1496,15 @@ can prove that the function does not execute any convergent operations. Similarly, the optimizer may remove ``convergent`` on calls/invokes when it can prove that the call/invoke cannot call a convergent function. +``hot`` + This attribute indicates that this function is a hot spot of the program + execution. The function will be optimized more aggressively and will be + placed into special subsection of the text section to improving locality. + + When profile feedback is enabled, this attribute has the precedence over + the profile information. By marking a function ``hot``, users can work + around the cases where the training input does not have good coverage + on all the hot functions. ``inaccessiblememonly`` This attribute indicates that the function may only access memory that is not accessible by the module being compiled. This is a weaker form diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -654,6 +654,7 @@ ATTR_KIND_BYREF = 69, ATTR_KIND_MUSTPROGRESS = 70, ATTR_KIND_NO_CALLBACK = 71, + ATTR_KIND_HOT = 72, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -63,6 +63,9 @@ /// Can only be moved to control-equivalent blocks. def Convergent : EnumAttr<"convergent">; +/// Marks function as being in a hot path and frequently called. +def Hot: EnumAttr<"hot">; + /// Pointer is known to be dereferenceable. def Dereferenceable : IntAttr<"dereferenceable">; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1340,6 +1340,7 @@ case lltok::kw_argmemonly: B.addAttribute(Attribute::ArgMemOnly); break; case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break; case lltok::kw_cold: B.addAttribute(Attribute::Cold); break; + case lltok::kw_hot: B.addAttribute(Attribute::Hot); break; case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break; case lltok::kw_inaccessiblememonly: B.addAttribute(Attribute::InaccessibleMemOnly); break; diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1539,6 +1539,8 @@ return Attribute::ByRef; case bitc::ATTR_KIND_MUSTPROGRESS: return Attribute::MustProgress; + case bitc::ATTR_KIND_HOT: + return Attribute::Hot; } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -626,6 +626,8 @@ return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: return bitc::ATTR_KIND_COLD; + case Attribute::Hot: + return bitc::ATTR_KIND_HOT; case Attribute::InaccessibleMemOnly: return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY; case Attribute::InaccessibleMemOrArgMemOnly: diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -472,9 +472,17 @@ PSI = &getAnalysis().getPSI(); OptSize = F.hasOptSize(); if (ProfileGuidedSectionPrefix) { - if (PSI->isFunctionHotInCallGraph(&F, *BFI)) + // The hot attribute overwrites profile count based hotness while profile + // counts based hotness overwrite the cold attribute. + // This is a conservative behabvior. + if (F.hasFnAttribute(Attribute::Hot) || + PSI->isFunctionHotInCallGraph(&F, *BFI)) F.setSectionPrefix("hot"); - else if (PSI->isFunctionColdInCallGraph(&F, *BFI)) + // If PSI shows this function is not hot, we will placed the function + // into unlikely section if (1) PSI shows this is a cold function, or + // (2) the function has a attribute of cold. + else if (PSI->isFunctionColdInCallGraph(&F, *BFI) || + F.hasFnAttribute(Attribute::Cold)) F.setSectionPrefix("unlikely"); else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() && PSI->isFunctionHotnessUnknown(F)) diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -449,6 +449,8 @@ return "zeroext"; if (hasAttribute(Attribute::Cold)) return "cold"; + if (hasAttribute(Attribute::Hot)) + return "hot"; if (hasAttribute(Attribute::ImmArg)) return "immarg"; if (hasAttribute(Attribute::NoUndef)) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1626,6 +1626,7 @@ case Attribute::Builtin: case Attribute::NoBuiltin: case Attribute::Cold: + case Attribute::Hot: case Attribute::OptForFuzzing: case Attribute::OptimizeNone: case Attribute::JumpTable: diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1921,6 +1921,17 @@ << "\n"); } for (auto &F : ColdFunctions) { + // Only set when there is no Attribute::Hot set by the user. For Hot + // attribute, user's annotation has the precedence over the profile. + if (F->hasFnAttribute(Attribute::Hot)) { + auto &Ctx = M.getContext(); + std::string Msg = std::string("Function ") + F->getName().str() + + std::string(" is annotated as a hot function but" + " the profile is cold"); + Ctx.diagnose( + DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); + continue; + } F->addFnAttr(Attribute::Cold); LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -410,6 +410,18 @@ ret void } +; CHECK: define void @f70() #43 +define void @f70() cold +{ + ret void +} + +; CHECK: define void @f71() #44 +define void @f71() hot +{ + ret void +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } @@ -453,4 +465,6 @@ ; CHECK: attributes #40 = { null_pointer_is_valid } ; CHECK: attributes #41 = { mustprogress } ; CHECK: attributes #42 = { nocallback } +; CHECK: attributes #43 = { cold } +; CHECK: attributes #44 = { hot } ; CHECK: attributes #[[NOBUILTIN]] = { nobuiltin } diff --git a/llvm/test/CodeGen/X86/hot-unlikely-section-prefix.ll b/llvm/test/CodeGen/X86/hot-unlikely-section-prefix.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/hot-unlikely-section-prefix.ll @@ -0,0 +1,101 @@ +; Test hot or unlikely section postfix based on profile and user annotation. +; RUN: llc < %s | FileCheck %s +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: inlinehint norecurse nounwind readnone uwtable +define dso_local i32 @hot1() #0 !prof !31 { +entry: + ret i32 1 +} +; CHECK: .section .text.hot.,"ax",@progbits +; CHECK: .globl hot1 + +; Function Attrs: cold norecurse nounwind readnone uwtable +define dso_local i32 @cold1() #1 !prof !32 { +entry: + ret i32 1 +} +; CHECK: .section .text.unlikely.,"ax",@progbits +; CHECK: .globl cold1 + +; Function Attrs: cold inlinehint noinline norecurse nounwind optsize readnone uwtable +define dso_local i32 @hot2() #2 !prof !31 { +entry: + ret i32 1 +} +; CHECK: .section .text.hot.,"ax",@progbits +; CHECK: .globl hot2 + +define dso_local i32 @normal() { +entry: + ret i32 1 +} +; CHECK: text +; CHECK: .globl normal + +; Function Attrs: hot noinline norecurse nounwind readnone uwtable +define dso_local i32 @hot3() #3 !prof !32 { +entry: + ret i32 1 +} +; CHECK: .section .text.hot.,"ax",@progbits +; CHECK: .globl hot3 + +; Function Attrs: cold noinline norecurse nounwind optsize readnone uwtable +define dso_local i32 @cold2() #4 { +entry: + ret i32 1 +} +; CHECK: .section .text.unlikely.,"ax",@progbits +; CHECK: .globl cold2 + +; Function Attrs: hot noinline norecurse nounwind readnone uwtable +define dso_local i32 @hot4() #3 { +entry: + ret i32 1 +} +; CHECK: .section .text.hot.,"ax",@progbits +; CHECK: .globl hot4 + +attributes #0 = { inlinehint norecurse nounwind readnone uwtable } +attributes #1 = { cold norecurse nounwind readnone uwtable } +attributes #2 = { cold inlinehint noinline norecurse nounwind optsize readnone uwtable } +attributes #3 = { hot noinline norecurse nounwind readnone uwtable } +attributes #4 = { cold noinline norecurse nounwind optsize readnone uwtable } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!30} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 402020} +!5 = !{!"MaxCount", i64 200000} +!6 = !{!"MaxInternalCount", i64 2000} +!7 = !{!"MaxFunctionCount", i64 200000} +!8 = !{!"NumCounts", i64 7} +!9 = !{!"NumFunctions", i64 5} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} +!14 = !{i32 10000, i64 200000, i32 1} +!15 = !{i32 100000, i64 200000, i32 1} +!16 = !{i32 200000, i64 200000, i32 1} +!17 = !{i32 300000, i64 200000, i32 1} +!18 = !{i32 400000, i64 200000, i32 1} +!19 = !{i32 500000, i64 100000, i32 3} +!20 = !{i32 600000, i64 100000, i32 3} +!21 = !{i32 700000, i64 100000, i32 3} +!22 = !{i32 800000, i64 100000, i32 3} +!23 = !{i32 900000, i64 100000, i32 3} +!24 = !{i32 950000, i64 100000, i32 3} +!25 = !{i32 990000, i64 100000, i32 3} +!26 = !{i32 999000, i64 2000, i32 4} +!27 = !{i32 999900, i64 2000, i32 4} +!28 = !{i32 999990, i64 10, i32 6} +!29 = !{i32 999999, i64 10, i32 6} +!30 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git 53c5fdd59a5cf7fbb4dcb7a7e84c9c4a40d32a84)"} +!31 = !{!"function_entry_count", i64 100000} +!32 = !{!"function_entry_count", i64 10} diff --git a/llvm/test/MC/AsmParser/function_hot_attr.ll b/llvm/test/MC/AsmParser/function_hot_attr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AsmParser/function_hot_attr.ll @@ -0,0 +1,13 @@ +; Test hot function attribute +; RUN: llc < %s | FileCheck %s +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: hot noinline norecurse nounwind readnone uwtable +define dso_local i32 @hot4() #0 { +entry: + ret i32 1 +} +; CHECK: .section .text.hot.,"ax",@progbits +; CHECK: .globl hot4 + +attributes #0 = { hot noinline norecurse nounwind readnone uwtable }