Index: lib/Analysis/ProfileSummaryInfo.cpp =================================================================== --- lib/Analysis/ProfileSummaryInfo.cpp +++ lib/Analysis/ProfileSummaryInfo.cpp @@ -39,6 +39,12 @@ cl::desc("A count is cold if it is below the minimum count" " to reach this percentile of total counts.")); +static cl::opt AccurateSampleProfile( + "accurate-sample-profile", cl::Hidden, cl::init(false), + cl::desc("If the sample profile is accurate, we will mark all un-sampled " + "callsite as cold. Otherwise, treat un-sampled callsites as if " + "we have no profile.")); + // Find the minimum count to reach a desired percentile of counts. static uint64_t getMinCountForPercentile(SummaryEntryVector &DS, uint64_t Percentile) { @@ -78,10 +84,12 @@ if (hasSampleProfile()) { // In sample PGO mode, check if there is a profile metadata on the // instruction. If it is present, determine hotness solely based on that, - // since the sampled entry count may not be accurate. + // since the sampled entry count may not be accurate. If there is no + // annotated on the instruction, return None. uint64_t TotalCount; if (Inst->extractProfTotalWeight(TotalCount)) return TotalCount; + return None; } if (BFI) return BFI->getBlockProfileCount(Inst->getParent()); @@ -199,7 +207,15 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI) { auto C = getProfileCount(CS.getInstruction(), BFI); - return C && isColdCount(*C); + if (C) + return isColdCount(*C); + + // In SamplePGO, if the caller has been sampled, and there is no profile + // annotatedon the callsite, we consider the callsite as cold. + // If there is no profile for the caller, and we know the profile is + // accurate, we consider the callsite as cold. + return (hasSampleProfile() && + (CS.getCaller()->getEntryCount() || AccurateSampleProfile)); } INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", Index: test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll =================================================================== --- /dev/null +++ test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll @@ -0,0 +1,31 @@ +; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @hot1() #1 { + ret void +} +define void @hot2() #1 { + ret void +} +define void @hot3() #1 { + ret void +} +define void @cold1() #1 { + ret void +} +define void @cold2() #1 { + ret void +} +define void @cold3() #1 { + ret void +} +define void @none1() #1 { + ret void +} +define void @none2() #1 { + ret void +} +define void @none3() #1 { + ret void +} Index: test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll +++ test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll @@ -1,7 +1,7 @@ ; Test to check the callgraph in summary when there is PGO ; RUN: opt -module-summary %s -o %t.o ; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s -; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-profile-summary.ll -o %t2.o +; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll -o %t2.o ; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED @@ -16,24 +16,26 @@ ; "hot3" ; CHECK-NEXT: -; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 -; CHECK-NEXT: +; CHECK-NEXT: +; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123 +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED_NEXT: @@ -63,24 +69,19 @@ ; This function have high profile count, so entry block is hot. define void @hot_function(i1 %a, i1 %a2) !prof !20 { entry: - call void @hot1() - br i1 %a, label %Cold, label %Hot, !prof !41 -Cold: ; 1/1000 goes here - call void @cold() - call void @hot2() - call void @hot4(), !prof !15 - call void @none1() - br label %exit -Hot: ; 999/1000 goes here - call void @hot2() - call void @hot3() - br i1 %a2, label %None1, label %None2, !prof !42 -None1: ; half goes here call void @none1() + call void @hot1(), !prof !15 + call void @cold1(), !prof !16 + br i1 %a, label %Cold, label %Hot, !prof !41 +Cold: ; 1/1000 goes here call void @none2() + call void @hot2(), !prof !15 + call void @cold2(), !prof !16 br label %exit -None2: ; half goes here +Hot: ; 999/1000 goes here call void @none3() + call void @hot3(), !prof !15 + call void @cold3(), !prof !16 br label %exit exit: ret void @@ -89,17 +90,14 @@ declare void @hot1() #1 declare void @hot2() #1 declare void @hot3() #1 -declare void @hot4() #1 -declare void @cold() #1 +declare void @cold1() #1 +declare void @cold2() #1 +declare void @cold3() #1 declare void @none1() #1 declare void @none2() #1 declare void @none3() #1 - !41 = !{!"branch_weights", i32 1, i32 1000} -!42 = !{!"branch_weights", i32 1, i32 1} - - !llvm.module.flags = !{!1} !20 = !{!"function_entry_count", i64 110, i64 123} @@ -119,3 +117,4 @@ !13 = !{i32 999000, i64 100, i32 1} !14 = !{i32 999999, i64 1, i32 2} !15 = !{!"branch_weights", i32 100} +!16 = !{!"branch_weights", i32 1} Index: unittests/Analysis/ProfileSummaryInfoTest.cpp =================================================================== --- unittests/Analysis/ProfileSummaryInfoTest.cpp +++ unittests/Analysis/ProfileSummaryInfoTest.cpp @@ -196,14 +196,18 @@ CallSite CS1(BB1->getFirstNonPHI()); auto *CI2 = BB2->getFirstNonPHI(); + // Manually attach branch weights metadata to the call instruction. + SmallVector Weights; + Weights.push_back(1000); + MDBuilder MDB(M->getContext()); + CI2->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); CallSite CS2(CI2); - EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI)); - EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI)); + EXPECT_FALSE(PSI.isHotCallSite(CS1, &BFI)); + EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI)); // Test that CS2 is considered hot when it gets an MD_prof metadata with // weights that exceed the hot count threshold. - MDBuilder MDB(M->getContext()); CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400})); EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI)); }