diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -413,6 +413,8 @@ ContextNone = 0x0, ContextWasInlined = 0x1, // Leaf of context was inlined in previous build ContextShouldBeInlined = 0x2, // Leaf of context should be inlined + ContextDuplicatedIntoBase = + 0x4, // Leaf of context is duplicated into the base profile }; // Represents a context frame with function name and line location diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -110,6 +110,11 @@ NumFunctions++; if (FS.getHeadSamples() > MaxFunctionCount) MaxFunctionCount = FS.getHeadSamples(); + } else { + // Do not recount callee samples if they are already merged into their base + // profiles. This can happen to CS nested profile. + if (FS.getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase)) + return; } for (const auto &I : FS.getBodySamples()) { uint64_t Count = I.second.getSamples(); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -531,8 +531,14 @@ // thus done optionally. It is seen that duplicating context profiles into // base profiles improves the code quality for thinlto build by allowing a // profile in the prelink phase for to-be-fully-inlined functions. - if (!NodeProfile || GenerateMergedBaseProfiles) + if (!NodeProfile) { ProfileMap[ChildProfile->getContext()].merge(*ChildProfile); + } else if (GenerateMergedBaseProfiles) { + ProfileMap[ChildProfile->getContext()].merge(*ChildProfile); + auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc); + SamplesMap[ChildProfile->getName().str()].getContext().setAttribute( + ContextDuplicatedIntoBase); + } // Contexts coming with a `ContextShouldBeInlined` attribute indicate this // is a preinliner-computed profile. diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test --- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -6,8 +6,12 @@ RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE -RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 -RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT +RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT +RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY +RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY +RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY ; CHECK:main:1968679:12 @@ -60,8 +64,8 @@ ; RECOUNT-NEXT: 3: 287884 ; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608 ; RECOUNT-NEXT: 15: 23 -; RECOUNT-NEXT: !Attributes: 2 -; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 6 +; RECOUNT-NEXT: !Attributes: 6 ; RECOUNT-NEXT: 3.1: _Z5funcBi:500973 ; RECOUNT-NEXT: 0: 19 ; RECOUNT-NEXT: 1: 19 _Z8funcLeafi:20 @@ -74,8 +78,8 @@ ; RECOUNT-NEXT: 10: 23324 ; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 ; RECOUNT-NEXT: 15: 11 -; RECOUNT-NEXT: !Attributes: 2 -; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 6 +; RECOUNT-NEXT: !Attributes: 6 ; RECOUNT-NEXT:_Z8funcLeafi:1968152:31 ; RECOUNT-NEXT: 0: 21 ; RECOUNT-NEXT: 1: 21 @@ -95,7 +99,7 @@ ; RECOUNT-NEXT: 3: 287884 ; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608 ; RECOUNT-NEXT: 15: 23 -; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 6 ; RECOUNT-NEXT: !Attributes: 2 ; RECOUNT-NEXT:_Z5funcBi:501213:32 ; RECOUNT-NEXT: 0: 32 @@ -109,7 +113,7 @@ ; RECOUNT-NEXT: 10: 23324 ; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 ; RECOUNT-NEXT: 15: 11 -; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 6 ; PROBE:main:1968679:12 ; PROBE-NEXT: 2: 24 @@ -153,3 +157,27 @@ ; PREINLINE: ProfileSummarySection {{.*}} Flags: {context-nested} + + +; SUMMARY: Total functions: 4 +; SUMMARY-NEXT: Maximum function count: 32 +; SUMMARY-NEXT: Maximum block count: 362830 +; SUMMARY-NEXT: Total number of blocks: 16 +; SUMMARY-NEXT: Total count: 772562 +; SUMMARY-NEXT: Detailed summary: +; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 1 percentage of the total counts. +; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 10 percentage of the total counts. +; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 20 percentage of the total counts. +; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 30 percentage of the total counts. +; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 40 percentage of the total counts. +; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 50 percentage of the total counts. +; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 60 percentage of the total counts. +; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 70 percentage of the total counts. +; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 80 percentage of the total counts. +; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 90 percentage of the total counts. +; SUMMARY-NEXT: 3 blocks with count >= 23327 account for 95 percentage of the total counts. +; SUMMARY-NEXT: 4 blocks with count >= 23324 account for 99 percentage of the total counts. +; SUMMARY-NEXT: 4 blocks with count >= 23324 account for 99.9 percentage of the total counts. +; SUMMARY-NEXT: 11 blocks with count >= 24 account for 99.99 percentage of the total counts. +; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.999 percentage of the total counts. +; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.9999 percentage of the total counts. diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test --- a/llvm/test/tools/llvm-profgen/cs-preinline.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline.test @@ -65,4 +65,4 @@ ; CHECK-PREINL-NEST-NEXT: 65526: 14 ; CHECK-PREINL-NEST-NEXT: 3.1: bar:84 ; CHECK-PREINL-NEST-NEXT: 1: 14 -; CHECK-PREINL-NEST-NEXT: !Attributes: 3 +; CHECK-PREINL-NEST-NEXT: !Attributes: 7