diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -387,6 +387,13 @@ return SortCallTargets(CallTargets); } + uint64_t getCallTargetSum() const { + uint64_t Sum = 0; + for (const auto &I : CallTargets) + Sum += I.second; + return Sum; + } + /// Sort call targets in descending order of call frequency. static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { SortedCallTargetSet SortedTargets; @@ -779,6 +786,19 @@ return BodySamples[LineLocation(Index, 0)].merge(S, Weight); } + // Accumulate all call target samples to update the body samples. + void updateCallsiteSamples() { + for (auto &I : BodySamples) { + uint64_t TargetSamples = I.second.getCallTargetSum(); + // It's possible that the body sample count can be greater than the call + // target sum. E.g, if some call targets are external targets, they won't + // be considered valid call targets, but the body sample count which is + // from lbr ranges can actually include them. + if (TargetSamples > I.second.getSamples()) + I.second.addSamples(TargetSamples - I.second.getSamples()); + } + } + // Accumulate all body samples to set total samples. void updateTotalSamples() { setTotalSamples(0); diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -59,7 +59,7 @@ ;CHECK: 6.1: 17 ;CHECK: 6.3: 17 ;CHECK: 7: 0 -;CHECK: 8: 0 quick_sort:1 +;CHECK: 8: 1 quick_sort:1 ;CHECK: 9: 0 ;CHECK: 11: 0 ;CHECK: 14: 0 @@ -97,7 +97,7 @@ ;CHECK: quick_sort:903:25 ;CHECK: 1: 24 ;CHECK: 2: 12 partition_pivot_last:7 partition_pivot_first:5 -;CHECK: 3: 11 quick_sort:12 +;CHECK: 3: 12 quick_sort:12 ;CHECK: 4: 12 quick_sort:12 ;CHECK: 6: 24 ;CHECK: 65507: 12 diff --git a/llvm/test/tools/llvm-profgen/noinline-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-noprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-noprobe.test @@ -11,7 +11,7 @@ ;CHECK: 0: 0 ;CHECK: 1: 0 ;CHECK: 2: 19 -;CHECK: 3: 19 bar:21 +;CHECK: 3: 21 bar:21 ;CHECK: 4: 0 ;CHECK: 5: 0 ;CHECK: bar:926:21 diff --git a/llvm/test/tools/llvm-profgen/update-samples.test b/llvm/test/tools/llvm-profgen/update-samples.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/update-samples.test @@ -0,0 +1,45 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t1 +; RUN: FileCheck %s --input-file %t1 --check-prefix=CALLSITE +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t2 --update-total-samples=1 +; RUN: FileCheck %s --input-file %t2 --check-prefix=TOTAL + + +;CALLSITE: foo:1241:0 +;CALLSITE: 0: 0 +;CALLSITE: 1: 0 +;CALLSITE: 2: 19 +;CALLSITE: 3: 21 bar:21 +;CALLSITE: 4: 0 +;CALLSITE: 5: 0 + +;TOTAL: foo:40:0 +;TOTAL: 0: 0 +;TOTAL: 1: 0 +;TOTAL: 2: 19 +;TOTAL: 3: 21 bar:21 +;TOTAL: 4: 0 +;TOTAL: 5: 0 + + +; original code: +; clang -O3 -g -fdebug-info-for-profiling test.c -fno-inline -o a.out +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -100,8 +100,13 @@ void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, uint64_t Count); + + void updateFunctionSamples(); + void updateTotalSamples(); + void updateCallsiteSamples(); + StringRef getCalleeNameForOffset(uint64_t TargetOffset); void computeSummaryAndThreshold(); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -381,15 +381,26 @@ } void ProfileGeneratorBase::updateTotalSamples() { - if (!UpdateTotalSamples) - return; - for (auto &Item : ProfileMap) { FunctionSamples &FunctionProfile = Item.second; FunctionProfile.updateTotalSamples(); } } +void ProfileGeneratorBase::updateCallsiteSamples() { + for (auto &Item : ProfileMap) { + FunctionSamples &FunctionProfile = Item.second; + FunctionProfile.updateCallsiteSamples(); + } +} + +void ProfileGeneratorBase::updateFunctionSamples() { + updateCallsiteSamples(); + + if (UpdateTotalSamples) + updateTotalSamples(); +} + void ProfileGeneratorBase::collectProfiledFunctions() { std::unordered_set ProfiledFunctions; if (SampleCounters) { @@ -491,7 +502,7 @@ // Fill in boundary sample counts as well as call site samples for calls populateBoundarySamplesForAllFunctions(SC.BranchCounter); - updateTotalSamples(); + updateFunctionSamples(); } void ProfileGenerator::generateProbeBasedProfile() { @@ -505,7 +516,7 @@ // Fill in boundary sample counts as well as call site samples for calls populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); - updateTotalSamples(); + updateFunctionSamples(); } void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( @@ -785,7 +796,7 @@ // body sample. populateInferredFunctionSamples(); - updateTotalSamples(); + updateFunctionSamples(); } void CSProfileGenerator::populateBodySamplesForFunction(