Index: lib/Analysis/ProfileSummaryInfo.cpp =================================================================== --- lib/Analysis/ProfileSummaryInfo.cpp +++ lib/Analysis/ProfileSummaryInfo.cpp @@ -39,11 +39,6 @@ cl::desc("A count is cold if it is below the minimum count" " to reach this percentile of total counts.")); -static cl::opt ProfileSampleAccurate( - "profile-sample-accurate", cl::Hidden, cl::init(false), - cl::desc("If the sample profile is accurate, we will mark all un-sampled " - "callsite as cold. Otherwise, treat un-sampled callsites as if " - "we have no profile.")); static cl::opt ProfileSummaryHugeWorkingSetSizeThreshold( "profile-summary-huge-working-set-size-threshold", cl::Hidden, cl::init(15000), cl::ZeroOrMore, @@ -261,14 +256,7 @@ bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) { auto Count = BFI->getBlockProfileCount(BB); - if (Count) - return isColdCount(*Count); - if (!hasSampleProfile()) - return false; - - const Function *F = BB->getParent(); - return ProfileSampleAccurate || - (F && F->hasFnAttribute("profile-sample-accurate")); + return Count && isColdCount(*Count); } bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, @@ -285,11 +273,7 @@ // In SamplePGO, if the caller has been sampled, and there is no profile // annotated on the callsite, we consider the callsite as cold. - // If there is no profile for the caller, and we know the profile is - // accurate, we consider the callsite as cold. - return (hasSampleProfile() && - (CS.getCaller()->hasProfileData() || ProfileSampleAccurate || - CS.getCaller()->hasFnAttribute("profile-sample-accurate"))); + return hasSampleProfile() && CS.getCaller()->hasProfileData(); } INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -123,6 +123,13 @@ cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. ")); +static cl::opt ProfileSampleAccurate( + "profile-sample-accurate", cl::Hidden, cl::init(false), + cl::desc("If the sample profile is accurate, we will mark all un-sampled " + "callsite and function as cold. Otherwise, treat un-sampled " + "callsites and functions as if they are newly added so we have " + "no profile.")); + namespace { using BlockWeightMap = DenseMap; @@ -1604,10 +1611,18 @@ } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { - // Initialize the entry count to -1, which will be treated conservatively - // by getEntryCount as the same as unknown (None). If we have samples this - // will be overwritten in emitAnnotations. - F.setEntryCount(ProfileCount(-1, Function::PCT_Real)); + // By default the entry count is initialized to -1, which will be treated + // conservatively by getEntryCount as the same as unknown (None). This is + // to avoid newly added code to be treated as cold. If we have samples + // this will be overwritten in emitAnnotations. + // If ProfileSampleAccurate is true or F has profile-sample-accurate + // attribute, initialize the entry count to 0 so callsites or functions + // unsampled will be treated as cold. + uint64_t initialEntryCount = + (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) + ? 0 + : -1; + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { auto &FAM = Index: test/Transforms/CodeGenPrepare/section-samplepgo.ll =================================================================== --- test/Transforms/CodeGenPrepare/section-samplepgo.ll +++ test/Transforms/CodeGenPrepare/section-samplepgo.ll @@ -1,19 +1,16 @@ ; RUN: opt < %s -codegenprepare -S | FileCheck %s -; RUN: opt < %s -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE target triple = "x86_64-pc-linux-gnu" ; This tests that hot/cold functions get correct section prefix assigned ; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] -; ACCURATE: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] ; The entry is hot define void @hot_func() !prof !15 { ret void } ; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]] -; ACCURATE: hot_call_func{{.*}}!section_prefix ![[HOT_ID]] ; The sum of 2 callsites are hot define void @hot_call_func() !prof !16 { call void @hot_func(), !prof !17 @@ -22,7 +19,6 @@ } ; CHECK-NOT: normal_func{{.*}}!section_prefix -; ACCURATE-NOT: normal_func{{.*}}!section_prefix ; The sum of all callsites are neither hot or cold define void @normal_func() !prof !16 { call void @hot_func(), !prof !17 @@ -32,36 +28,12 @@ } ; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] -; ACCURATE: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] ; The entry and the callsite are both cold define void @cold_func() !prof !16 { call void @hot_func(), !prof !18 ret void } - -; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix -; The function not appearing in profile is neither hot nor cold -; -; ACCURATE: foo_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] -; The function not appearing in profile is cold when -profile-sample-accurate -; is on -define void @foo_not_in_profile() !prof !19 { - call void @hot_func() - ret void -} - -; CHECK: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] -; ACCURATE: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] -; The function not appearing in profile is cold when the func has -; profile-sample-accurate attribute -define void @bar_not_in_profile() #0 !prof !19 { - call void @hot_func() - ret void -} - -attributes #0 = { "profile-sample-accurate" } - ; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"} ; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} !llvm.module.flags = !{!1} @@ -83,4 +55,3 @@ !16 = !{!"function_entry_count", i64 1} !17 = !{!"branch_weights", i32 80} !18 = !{!"branch_weights", i32 1} -!19 = !{!"function_entry_count", i64 -1} Index: test/Transforms/Inline/inline-cold-callsite-samplepgo.ll =================================================================== --- test/Transforms/Inline/inline-cold-callsite-samplepgo.ll +++ test/Transforms/Inline/inline-cold-callsite-samplepgo.ll @@ -1,47 +0,0 @@ -; For SamplePGO, if -profile-sample-accurate is specified, cold callsite -; heuristics should be honored if the caller has no profile. - -; RUN: opt < %s -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s - -define i32 @callee(i32 %x) { - %x1 = add i32 %x, 1 - %x2 = add i32 %x1, 1 - %x3 = add i32 %x2, 1 - call void @extern() - call void @extern() - ret i32 %x3 -} - -define i32 @caller(i32 %y1) { -; CHECK-LABEL: @caller -; CHECK-NOT: call i32 @callee - %y2 = call i32 @callee(i32 %y1) - ret i32 %y2 -} - -define i32 @caller_accurate(i32 %y1) #0 { -; CHECK-LABEL: @caller_accurate -; CHECK: call i32 @callee - %y2 = call i32 @callee(i32 %y1) - ret i32 %y2 -} - -declare void @extern() - -attributes #0 = { "profile-sample-accurate" } - -!llvm.module.flags = !{!1} -!1 = !{i32 1, !"ProfileSummary", !2} -!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} -!3 = !{!"ProfileFormat", !"SampleProfile"} -!4 = !{!"TotalCount", i64 10000} -!5 = !{!"MaxCount", i64 1000} -!6 = !{!"MaxInternalCount", i64 1} -!7 = !{!"MaxFunctionCount", i64 1000} -!8 = !{!"NumCounts", i64 3} -!9 = !{!"NumFunctions", i64 3} -!10 = !{!"DetailedSummary", !11} -!11 = !{!12, !13, !14} -!12 = !{i32 10000, i64 100, i32 1} -!13 = !{i32 999000, i64 100, i32 1} -!14 = !{i32 999999, i64 1, i32 2} Index: test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll =================================================================== --- test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll +++ test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll @@ -0,0 +1,36 @@ +; For SamplePGO, if -profile-sample-accurate is specified, cold callsite +; heuristics should be honored if the caller has no profile. + +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE + +define i32 @callee(i32 %x) { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + call void @extern() + ret i32 %x3 +} + +define i32 @caller(i32 %y1) { +; CHECK-LABEL: @caller +; CHECK-NOT: call i32 @callee +; ACCURATE-LABEL: @caller +; ACCURATE: call i32 @callee + %y2 = call i32 @callee(i32 %y1) + ret i32 %y2 +} + +define i32 @caller_accurate(i32 %y1) #0 { +; CHECK-LABEL: @caller_accurate +; CHECK: call i32 @callee +; ACCURATE-LABEL: @caller_accurate +; ACCURATE: call i32 @callee + %y2 = call i32 @callee(i32 %y1) + ret i32 %y2 +} + +declare void @extern() + +attributes #0 = { "profile-sample-accurate" } Index: test/Transforms/SampleProfile/section-accurate-samplepgo.ll =================================================================== --- test/Transforms/SampleProfile/section-accurate-samplepgo.ll +++ test/Transforms/SampleProfile/section-accurate-samplepgo.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE + +target triple = "x86_64-pc-linux-gnu" + +; The test checks that function without profile gets unlikely section prefix if -profile-sample-accurate is specified or the function has the profile-sample-accurate attribute. + +declare void @hot_func() + +; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix +; ACCURATE: foo_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] +; The function not appearing in profile is cold when -profile-sample-accurate +; is on. +define void @foo_not_in_profile() { + call void @hot_func() + ret void +} + +; CHECK: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] +; ACCURATE: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] +; The function not appearing in profile is cold when the func has +; profile-sample-accurate attribute. +define void @bar_not_in_profile() #0 { + call void @hot_func() + ret void +} + +attributes #0 = { "profile-sample-accurate" } + +; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2}