diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -22,6 +22,9 @@ using namespace llvm; +extern cl::opt GenerateMergedBaseProfiles; +extern cl::opt GenCSNestedProfile; + cl::opt UseContextLessSummary( "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Merge context profiles before calculating thresholds.")); @@ -115,9 +118,13 @@ uint64_t Count = I.second.getSamples(); addCount(Count); } - for (const auto &I : FS.getCallsiteSamples()) - for (const auto &CS : I.second) - addRecord(CS.second, true); + + // Do not recount callee samples if they are already merged into their base + // profiles. This can happen during the generation of CS nested profile. + if (!GenCSNestedProfile || !GenerateMergedBaseProfiles) + for (const auto &I : FS.getCallsiteSamples()) + for (const auto &CS : I.second) + addRecord(CS.second, true); } // The argument to this method is a vector of cutoff percentages and the return diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -41,6 +41,10 @@ "generate extra base profile for function with all its context " "profiles merged into it.")); +cl::opt GenCSNestedProfile( + "gen-cs-nested-profile", cl::Hidden, cl::init(false), + cl::desc("Generate nested function profiles for CSSPGO")); + namespace llvm { namespace sampleprof { SampleProfileFormat FunctionSamples::Format; diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test --- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -8,6 +8,9 @@ RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT +RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY +RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY ; CHECK:main:1968679:12 @@ -153,3 +156,28 @@ ; PREINLINE: ProfileSummarySection {{.*}} Flags: {context-nested} + + + +;SUMMARY: Total functions: 4 +;SUMMARY-NEXT: Maximum function count: 32 +;SUMMARY-NEXT: Maximum block count: 362830 +;SUMMARY-NEXT: Total number of blocks: 16 +;SUMMARY-NEXT: Total count: 772562 +;SUMMARY-NEXT: Detailed summary: +;SUMMARY-NEXT: 1 blocks with count >= 362830 account for 1 percentage of the total counts. +;SUMMARY-NEXT: 1 blocks with count >= 362830 account for 10 percentage of the total counts. +;SUMMARY-NEXT: 1 blocks with count >= 362830 account for 20 percentage of the total counts. +;SUMMARY-NEXT: 1 blocks with count >= 362830 account for 30 percentage of the total counts. +;SUMMARY-NEXT: 1 blocks with count >= 362830 account for 40 percentage of the total counts. +;SUMMARY-NEXT: 2 blocks with count >= 362805 account for 50 percentage of the total counts. +;SUMMARY-NEXT: 2 blocks with count >= 362805 account for 60 percentage of the total counts. +;SUMMARY-NEXT: 2 blocks with count >= 362805 account for 70 percentage of the total counts. +;SUMMARY-NEXT: 2 blocks with count >= 362805 account for 80 percentage of the total counts. +;SUMMARY-NEXT: 2 blocks with count >= 362805 account for 90 percentage of the total counts. +;SUMMARY-NEXT: 3 blocks with count >= 23327 account for 95 percentage of the total counts. +;SUMMARY-NEXT: 4 blocks with count >= 23324 account for 99 percentage of the total counts. +;SUMMARY-NEXT: 4 blocks with count >= 23324 account for 99.9 percentage of the total counts. +;SUMMARY-NEXT: 11 blocks with count >= 24 account for 99.99 percentage of the total counts. +;SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.999 percentage of the total counts. +;SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.9999 percentage of the total counts. diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -961,13 +961,12 @@ "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " "override the cold threshold got from profile summary. ")); - cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); + extern llvm::cl::opt GenCSNestedProfile; + cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); WeightedFileVector WeightedInputs; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -88,10 +88,7 @@ llvm::cl::Optional); extern cl::opt ProfileSummaryCutoffHot; - -static cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); +extern cl::opt GenCSNestedProfile; using namespace llvm; using namespace sampleprof;