diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -342,6 +342,15 @@ : sampleprof_error::success; } + /// Decrease the number of samples for this record by \p S. Return the amout + /// of samples actually decreased. + uint64_t removeSamples(uint64_t S) { + if (S > NumSamples) + S = NumSamples; + NumSamples -= S; + return S; + } + /// Add called function \p F with samples \p S. /// Optionally scale sample count \p S by \p Weight. /// @@ -357,6 +366,18 @@ : sampleprof_error::success; } + /// Remove called function from the call target map. Return the target sample + /// count of the called function. + uint64_t removeCalledTarget(StringRef F) { + uint64_t Count = 0; + auto I = CallTargets.find(F); + if (I != CallTargets.end()) { + Count = I->second; + CallTargets.erase(I); + } + return Count; + } + /// Return true if this sample record contains function calls. bool hasCalls() const { return !CallTargets.empty(); } @@ -704,6 +725,13 @@ : sampleprof_error::success; } + void removeTotalSamples(uint64_t Num) { + if (TotalSamples < Num) + TotalSamples = 0; + else + TotalSamples -= Num; + } + void setTotalSamples(uint64_t Num) { TotalSamples = Num; } sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { @@ -728,6 +756,22 @@ FName, Num, Weight); } + // Remove a call target and decrease the body sample correspondingly. Return + // the number of body samples actually decreased. + uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, + uint32_t Discriminator, + StringRef FName) { + uint64_t Count = 0; + auto I = BodySamples.find(LineLocation(LineOffset, Discriminator)); + if (I != BodySamples.end()) { + Count = I->second.removeCalledTarget(FName); + Count = I->second.removeSamples(Count); + if (!I->second.getSamples()) + BodySamples.erase(I); + } + return Count; + } + sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num, uint64_t Weight = 1) { SampleRecord S; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -518,6 +518,12 @@ auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc); SamplesMap.emplace(OrigChildContext.getName().str(), *ChildProfile); NodeProfile->addTotalSamples(ChildProfile->getTotalSamples()); + // Remove the corresponding body sample for the callsite and update the + // total weight. + auto Count = NodeProfile->removeCalledTargetAndBodySample( + ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator, + OrigChildContext.getName()); + NodeProfile->removeTotalSamples(Count); } // Separate child profile to be a standalone profile, if the current parent diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test --- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -10,17 +10,16 @@ RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY -RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY -RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY +RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY-NEST +RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST -; CHECK:main:1968679:12 +; CHECK:main:1968621:12 ; CHECK-NEXT: 2: 24 -; CHECK-NEXT: 3: 28 _Z5funcAi:18 -; CHECK-NEXT: 3.1: 28 _Z5funcBi:30 -; CHECK-NEXT: 3: _Z5funcAi:1467398 +; CHECK-NEXT: 3: 17 _Z5funcAi:7 +; CHECK-NEXT: 3.1: 10 _Z5funcBi:11 +; CHECK-NEXT: 3: _Z5funcAi:1467388 ; CHECK-NEXT: 0: 10 -; CHECK-NEXT: 1: 10 _Z8funcLeafi:11 ; CHECK-NEXT: 3: 24 ; CHECK-NEXT: 1: _Z8funcLeafi:1467299 ; CHECK-NEXT: 0: 6 @@ -30,9 +29,8 @@ ; CHECK-NEXT: 15: 23 ; CHECK-NEXT: !Attributes: 2 ; CHECK-NEXT: !Attributes: 2 -; CHECK-NEXT: 3.1: _Z5funcBi:500973 +; CHECK-NEXT: 3.1: _Z5funcBi:500954 ; CHECK-NEXT: 0: 19 -; CHECK-NEXT: 1: 19 _Z8funcLeafi:20 ; CHECK-NEXT: 3: 12 ; CHECK-NEXT: 1: _Z8funcLeafi:500853 ; CHECK-NEXT: 0: 15 @@ -49,14 +47,12 @@ ; CHECK-NEXT: 1: 13 - -; RECOUNT:main:1968679:12 +; RECOUNT:main:1968621:12 ; RECOUNT-NEXT: 2: 24 -; RECOUNT-NEXT: 3: 28 _Z5funcAi:18 -; RECOUNT-NEXT: 3.1: 28 _Z5funcBi:30 -; RECOUNT-NEXT: 3: _Z5funcAi:1467398 +; RECOUNT-NEXT: 3: 17 _Z5funcAi:7 +; RECOUNT-NEXT: 3.1: 10 _Z5funcBi:11 +; RECOUNT-NEXT: 3: _Z5funcAi:1467388 ; RECOUNT-NEXT: 0: 10 -; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11 ; RECOUNT-NEXT: 3: 24 ; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299 ; RECOUNT-NEXT: 0: 6 @@ -66,9 +62,8 @@ ; RECOUNT-NEXT: 15: 23 ; RECOUNT-NEXT: !Attributes: 6 ; RECOUNT-NEXT: !Attributes: 6 -; RECOUNT-NEXT: 3.1: _Z5funcBi:500973 +; RECOUNT-NEXT: 3.1: _Z5funcBi:500954 ; RECOUNT-NEXT: 0: 19 -; RECOUNT-NEXT: 1: 19 _Z8funcLeafi:20 ; RECOUNT-NEXT: 3: 12 ; RECOUNT-NEXT: 1: _Z8funcLeafi:500853 ; RECOUNT-NEXT: 0: 15 @@ -89,9 +84,8 @@ ; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 ; RECOUNT-NEXT: 15: 34 ; RECOUNT-NEXT: !Attributes: 2 -; RECOUNT-NEXT:_Z5funcAi:1467398:11 +; RECOUNT-NEXT:_Z5funcAi:1467388:11 ; RECOUNT-NEXT: 0: 10 -; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11 ; RECOUNT-NEXT: 3: 24 ; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299 ; RECOUNT-NEXT: 0: 6 @@ -101,9 +95,9 @@ ; RECOUNT-NEXT: 15: 23 ; RECOUNT-NEXT: !Attributes: 6 ; RECOUNT-NEXT: !Attributes: 2 -; RECOUNT-NEXT:_Z5funcBi:501213:32 +; RECOUNT-NEXT:_Z5funcBi:501194:32 ; RECOUNT-NEXT: 0: 32 -; RECOUNT-NEXT: 1: 32 _Z8funcLeafi:20 +; RECOUNT-NEXT: 1: 13 ; RECOUNT-NEXT: 3: 12 ; RECOUNT-NEXT: 1: _Z8funcLeafi:500853 ; RECOUNT-NEXT: 0: 15 @@ -115,13 +109,12 @@ ; RECOUNT-NEXT: 15: 11 ; RECOUNT-NEXT: !Attributes: 6 -; PROBE:main:1968679:12 +; PROBE:main:1968621:12 ; PROBE-NEXT: 2: 24 -; PROBE-NEXT: 3: 28 _Z5funcAi:18 -; PROBE-NEXT: 3.1: 28 _Z5funcBi:30 -; PROBE-NEXT: 3: _Z5funcAi:1467398 +; PROBE-NEXT: 3: 17 _Z5funcAi:7 +; PROBE-NEXT: 3.1: 10 _Z5funcBi:11 +; PROBE-NEXT: 3: _Z5funcAi:1467388 ; PROBE-NEXT: 0: 10 -; PROBE-NEXT: 1: 10 _Z8funcLeafi:11 ; PROBE-NEXT: 3: 24 ; PROBE-NEXT: 1: _Z8funcLeafi:1467299 ; PROBE-NEXT: 0: 6 @@ -133,9 +126,8 @@ ; PROBE-NEXT: !Attributes: 2 ; PROBE-NEXT: !CFGChecksum: 844530426352218 ; PROBE-NEXT: !Attributes: 2 -; PROBE-NEXT: 3.1: _Z5funcBi:500973 +; PROBE-NEXT: 3.1: _Z5funcBi:500954 ; PROBE-NEXT: 0: 19 -; PROBE-NEXT: 1: 19 _Z8funcLeafi:20 ; PROBE-NEXT: 3: 12 ; PROBE-NEXT: 1: _Z8funcLeafi:500853 ; PROBE-NEXT: 0: 15 @@ -181,3 +173,27 @@ ; SUMMARY-NEXT: 11 blocks with count >= 24 account for 99.99 percentage of the total counts. ; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.999 percentage of the total counts. ; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.9999 percentage of the total counts. + + +; SUMMARY-NEST: Total functions: 4 +; SUMMARY-NEST-NEXT: Maximum function count: 32 +; SUMMARY-NEST-NEXT: Maximum block count: 362830 +; SUMMARY-NEST-NEXT: Total number of blocks: 15 +; SUMMARY-NEST-NEXT: Total count: 772504 +; SUMMARY-NEST-NEXT: Detailed summary: +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 1 percentage of the total counts. +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 10 percentage of the total counts. +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 20 percentage of the total counts. +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 30 percentage of the total counts. +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 40 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 50 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 60 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 70 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 80 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 90 percentage of the total counts. +; SUMMARY-NEST-NEXT: 3 blocks with count >= 23327 account for 95 percentage of the total counts. +; SUMMARY-NEST-NEXT: 4 blocks with count >= 23324 account for 99 percentage of the total counts. +; SUMMARY-NEST-NEXT: 4 blocks with count >= 23324 account for 99.9 percentage of the total counts. +; SUMMARY-NEST-NEXT: 10 blocks with count >= 21 account for 99.99 percentage of the total counts. +; SUMMARY-NEST-NEXT: 15 blocks with count >= 10 account for 99.999 percentage of the total counts. +; SUMMARY-NEST-NEXT: 15 blocks with count >= 10 account for 99.9999 percentage of the total counts. diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test --- a/llvm/test/tools/llvm-profgen/cs-preinline.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline.test @@ -70,10 +70,9 @@ ; CHECK-TRIM-NEXT: 1: 14 ; CHECK-TRIM-NEXT: !Attributes: 3 -; CHECK-PREINL-NEST: foo:393:0 +; CHECK-PREINL-NEST: foo:379:0 ; CHECK-PREINL-NEST-NEXT: 2.1: 14 ; CHECK-PREINL-NEST-NEXT: 3: 15 -; CHECK-PREINL-NEST-NEXT: 3.1: 14 bar:14 ; CHECK-PREINL-NEST-NEXT: 3.2: 1 ; CHECK-PREINL-NEST-NEXT: 65526: 14 ; CHECK-PREINL-NEST-NEXT: 3.1: bar:84