Index: llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp +++ llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp @@ -75,11 +75,14 @@ return None; assert((isa(Inst) || isa(Inst)) && "We can only get profile count for call/invoke instruction."); - // Check if there is a profile metadata on the instruction. If it is present, - // determine hotness solely based on that. - uint64_t TotalCount; - if (Inst->extractProfTotalWeight(TotalCount)) - return TotalCount; + if (computeSummary() && Summary->getKind() == ProfileSummary::PSK_Sample) { + // In sample PGO mode, check if there is a profile metadata on the + // instruction. If it is present, determine hotness solely based on that, + // since the sampled entry count may not be accurate. + uint64_t TotalCount; + if (Inst->extractProfTotalWeight(TotalCount)) + return TotalCount; + } if (BFI) return BFI->getBlockProfileCount(Inst->getParent()); return None; Index: llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll =================================================================== --- llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -29,7 +29,7 @@ ; CHECK-NEXT: ; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 +; CHECK-NEXT: +; CHECK-NEXT: + +; CHECK: +; COMBINED_NEXT: + + +; ModuleID = 'thinlto-function-summary-callgraph.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; This function have high profile count, so entry block is hot. +define void @hot_function(i1 %a, i1 %a2) !prof !20 { +entry: + call void @hot1() + br i1 %a, label %Cold, label %Hot, !prof !41 +Cold: ; 1/1000 goes here + call void @cold() + call void @hot2() + call void @hot4(), !prof !15 + call void @none1() + br label %exit +Hot: ; 999/1000 goes here + call void @hot2() + call void @hot3() + br i1 %a2, label %None1, label %None2, !prof !42 +None1: ; half goes here + call void @none1() + call void @none2() + br label %exit +None2: ; half goes here + call void @none3() + br label %exit +exit: + ret void +} + +declare void @hot1() #1 +declare void @hot2() #1 +declare void @hot3() #1 +declare void @hot4() #1 +declare void @cold() #1 +declare void @none1() #1 +declare void @none2() #1 +declare void @none3() #1 + + +!41 = !{!"branch_weights", i32 1, i32 1000} +!42 = !{!"branch_weights", i32 1, i32 1} + + + +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 110, i64 123} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"branch_weights", i32 100} Index: llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll =================================================================== --- llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll +++ llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll @@ -0,0 +1,57 @@ +; RUN: opt < %s -codegenprepare -S | FileCheck %s + +target triple = "x86_64-pc-linux-gnu" + +; This tests that hot/cold functions get correct section prefix assigned + +; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] +; The entry is hot +define void @hot_func() !prof !15 { + ret void +} + +; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]] +; The sum of 2 callsites are hot +define void @hot_call_func() !prof !16 { + call void @hot_func(), !prof !17 + call void @hot_func(), !prof !17 + ret void +} + +; CHECK-NOT: normal_func{{.*}}!section_prefix +; The sum of all callsites are neither hot or cold +define void @normal_func() !prof !16 { + call void @hot_func(), !prof !17 + call void @hot_func(), !prof !18 + call void @hot_func(), !prof !18 + ret void +} + +; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] +; The entry and the callsite are both cold +define void @cold_func() !prof !16 { + call void @hot_func(), !prof !18 + ret void +} + +; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"} +; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"function_entry_count", i64 1} +!17 = !{!"branch_weights", i32 80} +!18 = !{!"branch_weights", i32 1} Index: llvm/trunk/test/Transforms/CodeGenPrepare/section.ll =================================================================== --- llvm/trunk/test/Transforms/CodeGenPrepare/section.ll +++ llvm/trunk/test/Transforms/CodeGenPrepare/section.ll @@ -10,26 +10,26 @@ ret void } -; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]] -; The sum of 2 callsites are hot -define void @hot_call_func() !prof !16 { +; For instrumentation based PGO, we should only look at entry counts, +; not call site VP metadata (which can exist on value profiled memcpy, +; or possibly left behind after static analysis based devirtualization). +; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] +define void @cold_func1() !prof !16 { call void @hot_func(), !prof !17 call void @hot_func(), !prof !17 ret void } -; CHECK-NOT: normal_func{{.*}}!section_prefix -; The sum of all callsites are neither hot or cold -define void @normal_func() !prof !16 { +; CHECK: cold_func2{{.*}}!section_prefix +define void @cold_func2() !prof !16 { call void @hot_func(), !prof !17 call void @hot_func(), !prof !18 call void @hot_func(), !prof !18 ret void } -; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] -; The entry and the callsite are both cold -define void @cold_func() !prof !16 { +; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]] +define void @cold_func3() !prof !16 { call void @hot_func(), !prof !18 ret void } Index: llvm/trunk/test/Transforms/Inline/prof-update.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/prof-update.ll +++ llvm/trunk/test/Transforms/Inline/prof-update.ll @@ -6,21 +6,21 @@ @func = global void ()* null ; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]] -define void @callee(i32 %n) !prof !1 { +define void @callee(i32 %n) !prof !15 { %cond = icmp sle i32 %n, 10 br i1 %cond, label %cond_true, label %cond_false cond_true: ; ext1 is optimized away, thus not updated. ; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]] - call void @ext1(), !prof !2 + call void @ext1(), !prof !16 ret void cond_false: ; ext is cloned and updated. ; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]] - call void @ext(), !prof !2 + call void @ext(), !prof !16 %f = load void ()*, void ()** @func ; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]] - call void %f(), !prof !4 + call void %f(), !prof !18 ret void } @@ -28,16 +28,29 @@ define void @caller() { ; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]] ; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]] - call void @callee(i32 15), !prof !3 + call void @callee(i32 15), !prof !17 ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 2000} -!1 = !{!"function_entry_count", i64 1000} -!2 = !{!"branch_weights", i64 2000} -!3 = !{!"branch_weights", i64 400} -!4 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 2000} +!17 = !{!"branch_weights", i64 400} +!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} attributes #0 = { alwaysinline } ; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} ; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} Index: llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp =================================================================== --- llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp +++ llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp @@ -162,6 +162,12 @@ EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI)); EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI)); + + // Test that adding an MD_prof metadata with a hot count on CS2 does not + // change its hotness as it has no effect in instrumented profiling. + MDBuilder MDB(M->getContext()); + CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400})); + EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI)); } TEST_F(ProfileSummaryInfoTest, SampleProf) {