Index: llvm/trunk/include/llvm/Analysis/ProfileSummaryInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ProfileSummaryInfo.h +++ llvm/trunk/include/llvm/Analysis/ProfileSummaryInfo.h @@ -92,12 +92,12 @@ bool hasHugeWorkingSetSize(); /// \brief Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); - /// Returns true if \p F has hot function entry or hot call edge. - bool isFunctionHotInCallGraph(const Function *F); + /// Returns true if \p F contains hot code. + bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI); /// \brief Returns true if \p F has cold function entry. bool isFunctionEntryCold(const Function *F); - /// Returns true if \p F has cold function entry or cold call edge. - bool isFunctionColdInCallGraph(const Function *F); + /// Returns true if \p F contains only cold code. + bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI); /// \brief Returns true if \p F is a hot function. bool isHotCount(uint64_t C); /// \brief Returns true if count \p C is considered cold. Index: llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp +++ llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp @@ -115,42 +115,62 @@ return FunctionCount && isHotCount(FunctionCount.getValue()); } -/// Returns true if the function's entry or total call edge count is hot. +/// Returns true if the function contains hot code. This can include a hot +/// function entry count, hot basic block, or (in the case of Sample PGO) +/// hot total call edge count. /// If it returns false, it either means it is not hot or it is unknown -/// whether it is hot or not (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) { +/// (for example, no profile data is available). +bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, + BlockFrequencyInfo &BFI) { if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) if (isHotCount(FunctionCount.getValue())) return true; - uint64_t TotalCallCount = 0; + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa(I) || isa(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (isHotCount(TotalCallCount)) + return true; + } for (const auto &BB : *F) - for (const auto &I : BB) - if (isa(I) || isa(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) - TotalCallCount += CallCount.getValue(); - return isHotCount(TotalCallCount); + if (isHotBB(&BB, &BFI)) + return true; + return false; } -/// Returns true if the function's entry and total call edge count is cold. +/// Returns true if the function only contains cold code. This means that +/// the function entry and blocks are all cold, and (in the case of Sample PGO) +/// the total call edge count is cold. /// If it returns false, it either means it is not cold or it is unknown -/// whether it is cold or not (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) { +/// (for example, no profile data is available). +bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, + BlockFrequencyInfo &BFI) { if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) if (!isColdCount(FunctionCount.getValue())) return false; - - uint64_t TotalCallCount = 0; + + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa(I) || isa(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (!isColdCount(TotalCallCount)) + return false; + } for (const auto &BB : *F) - for (const auto &I : BB) - if (isa(I) || isa(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) - TotalCallCount += CallCount.getValue(); - return isColdCount(TotalCallCount); + if (!isColdBB(&BB, &BFI)) + return false; + return true; } /// Returns true if the function's entry is a cold. If it returns false, it Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -352,8 +352,6 @@ // Clear per function information. InsertedInsts.clear(); PromotedInsts.clear(); - BFI.reset(); - BPI.reset(); ModifiedDT = false; if (auto *TPC = getAnalysisIfAvailable()) { @@ -365,14 +363,16 @@ TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); + BPI.reset(new BranchProbabilityInfo(F, *LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); OptSize = F.optForSize(); ProfileSummaryInfo *PSI = getAnalysis().getPSI(); if (ProfileGuidedSectionPrefix) { - if (PSI->isFunctionHotInCallGraph(&F)) + if (PSI->isFunctionHotInCallGraph(&F, *BFI)) F.setSectionPrefix(".hot"); - else if (PSI->isFunctionColdInCallGraph(&F)) + else if (PSI->isFunctionColdInCallGraph(&F, *BFI)) F.setSectionPrefix(".unlikely"); } @@ -652,13 +652,6 @@ if (SameIncomingValueBBs.count(Pred)) return true; - if (!BFI) { - Function &F = *BB->getParent(); - LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); - BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); - } - BlockFrequency PredFreq = BFI->getBlockFreq(Pred); BlockFrequency BBFreq = BFI->getBlockFreq(BB); Index: llvm/trunk/test/Transforms/CodeGenPrepare/section.ll =================================================================== --- llvm/trunk/test/Transforms/CodeGenPrepare/section.ll +++ llvm/trunk/test/Transforms/CodeGenPrepare/section.ll @@ -4,33 +4,59 @@ ; This tests that hot/cold functions get correct section prefix assigned -; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] +; CHECK: hot_func1{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] ; The entry is hot -define void @hot_func() !prof !15 { +define void @hot_func1() !prof !15 { ret void } -; For instrumentation based PGO, we should only look at entry counts, +; CHECK: hot_func2{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] +; Entry is cold but inner block is hot +define void @hot_func2(i32 %n) !prof !16 { +entry: + %n.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %n.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end, !prof !19 + +for.body: + %2 = load i32, i32* %i, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: + ret void +} + +; For instrumentation based PGO, we should only look at block counts, ; not call site VP metadata (which can exist on value profiled memcpy, ; or possibly left behind after static analysis based devirtualization). ; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] define void @cold_func1() !prof !16 { - call void @hot_func(), !prof !17 - call void @hot_func(), !prof !17 + call void @hot_func1(), !prof !17 + call void @hot_func1(), !prof !17 ret void } -; CHECK: cold_func2{{.*}}!section_prefix +; CHECK: cold_func2{{.*}}!section_prefix ![[COLD_ID]] define void @cold_func2() !prof !16 { - call void @hot_func(), !prof !17 - call void @hot_func(), !prof !18 - call void @hot_func(), !prof !18 + call void @hot_func1(), !prof !17 + call void @hot_func1(), !prof !18 + call void @hot_func1(), !prof !18 ret void } ; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]] define void @cold_func3() !prof !16 { - call void @hot_func(), !prof !18 + call void @hot_func1(), !prof !18 ret void } @@ -55,3 +81,4 @@ !16 = !{!"function_entry_count", i64 1} !17 = !{!"branch_weights", i32 80} !18 = !{!"branch_weights", i32 1} +!19 = !{!"branch_weights", i32 1000, i32 1}