diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -25,6 +25,7 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MathExtras.h" #include +#include #include #include #include @@ -366,6 +367,14 @@ return SortCallTargets(CallTargets); } + uint64_t getCallTargetSamples() const { + uint64_t Samples = 0; + for (const auto &I : CallTargets) { + Samples += I.second; + } + return Samples; + } + /// Sort call targets in descending order of call frequency. static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { SortedCallTargetSet SortedTargets; @@ -735,6 +744,15 @@ return BodySamples[LineLocation(Index, 0)].merge(S, Weight); } + // Accumulate all call target samples to update the body samples. + void updateCallsiteSamples() { + for (auto &I : BodySamples) { + uint64_t TargetSamples = I.second.getCallTargetSamples(); + if (TargetSamples > I.second.getSamples()) + I.second.addSamples(TargetSamples - I.second.getSamples()); + } + } + // Accumulate all body samples to set total samples. void updateTotalSamples() { setTotalSamples(0); diff --git a/llvm/test/tools/llvm-profgen/update-samples.test b/llvm/test/tools/llvm-profgen/update-samples.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/update-samples.test @@ -0,0 +1,55 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t1 --update-callsite-samples=0 +; RUN: FileCheck %s --input-file %t1 --check-prefix=NOUPDATE +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t2 --update-callsite-samples=1 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CALLSITE +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t3 --update-callsite-samples=1 --update-total-samples=1 +; RUN: FileCheck %s --input-file %t3 --check-prefix=TOTAL + +;NOUPDATE: foo:1241:0 +;NOUPDATE: 0: 0 +;NOUPDATE: 1: 0 +;NOUPDATE: 2: 19 +;NOUPDATE: 3: 19 bar:21 +;NOUPDATE: 4: 0 +;NOUPDATE: 5: 0 + + +;CALLSITE: foo:1241:0 +;CALLSITE: 0: 0 +;CALLSITE: 1: 0 +;CALLSITE: 2: 19 +;CALLSITE: 3: 21 bar:21 +;CALLSITE: 4: 0 +;CALLSITE: 5: 0 + +;TOTAL: foo:40:0 +;TOTAL: 0: 0 +;TOTAL: 1: 0 +;TOTAL: 2: 19 +;TOTAL: 3: 21 bar:21 +;TOTAL: 4: 0 +;TOTAL: 5: 0 + + +; original code: +; clang -O3 -g -fdebug-info-for-profiling test.c -fno-inline -o a.out +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -93,8 +93,13 @@ void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, uint64_t Count); + + void updateSamples(); + void updateTotalSamples(); + void updateCallsiteSamples(); + StringRef getCalleeNameForOffset(uint64_t TargetOffset); void computeSummaryAndThreshold(); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -88,6 +88,12 @@ "Update total samples by accumulating all its body samples."), llvm::cl::Optional); +static cl::opt UpdateCallsiteSamples( + "update-callsite-samples", llvm::cl::init(false), + llvm::cl::desc( + "Update callsite body samples by summing up all call target samples."), + llvm::cl::Optional); + extern cl::opt ProfileSummaryCutoffHot; static cl::opt GenCSNestedProfile( @@ -361,15 +367,27 @@ } void ProfileGeneratorBase::updateTotalSamples() { - if (!UpdateTotalSamples) - return; - for (auto &Item : ProfileMap) { FunctionSamples &FunctionProfile = Item.second; FunctionProfile.updateTotalSamples(); } } +void ProfileGeneratorBase::updateCallsiteSamples() { + for (auto &Item : ProfileMap) { + FunctionSamples &FunctionProfile = Item.second; + FunctionProfile.updateCallsiteSamples(); + } +} + +void ProfileGeneratorBase::updateSamples() { + if (UpdateCallsiteSamples) + updateCallsiteSamples(); + + if (UpdateTotalSamples) + updateTotalSamples(); +} + void ProfileGeneratorBase::collectProfiledFunctions() { std::unordered_set ProfiledFunctions; // Go through all the stacks, ranges and branches in sample counters, use the @@ -456,7 +474,7 @@ // Fill in boundary sample counts as well as call site samples for calls populateBoundarySamplesForAllFunctions(SC.BranchCounter); - updateTotalSamples(); + updateSamples(); } void ProfileGenerator::generateProbeBasedProfile() { @@ -471,7 +489,7 @@ // Fill in boundary sample counts as well as call site samples for calls populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); - updateTotalSamples(); + updateSamples(); } void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( @@ -728,7 +746,7 @@ // body sample. populateInferredFunctionSamples(); - updateTotalSamples(); + updateSamples(); } void CSProfileGenerator::populateBodySamplesForFunction(