diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h --- a/llvm/include/llvm/IR/ProfileSummary.h +++ b/llvm/include/llvm/IR/ProfileSummary.h @@ -13,6 +13,7 @@ #ifndef LLVM_IR_PROFILESUMMARY_H #define LLVM_IR_PROFILESUMMARY_H +#include "llvm/Support/CommandLine.h" #include #include #include diff --git a/llvm/lib/IR/ProfileSummary.cpp b/llvm/lib/IR/ProfileSummary.cpp --- a/llvm/lib/IR/ProfileSummary.cpp +++ b/llvm/lib/IR/ProfileSummary.cpp @@ -22,6 +22,10 @@ using namespace llvm; +cl::opt UseContextLessSummary( + "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore, + cl::desc("Merge context profiles before calculating thresholds.")); + // Return an MDTuple with two elements. The first element is a string Key and // the second is a uint64_t Value. static Metadata *getKeyValMD(LLVMContext &Context, const char *Key, diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -44,6 +44,8 @@ using namespace llvm; using namespace sampleprof; +extern cl::opt UseContextLessSummary; + /// Dump the function profile for \p FName. /// /// \param FName Name of the function to print. @@ -1609,8 +1611,17 @@ // For text and GCC file formats, we compute the summary after reading the // profile. Binary format has the profile summary in its header. void SampleProfileReader::computeSummary() { + StringMap ContextLessProfiles; + const StringMap *ProfilesToUse = &Profiles; + if (UseContextLessSummary || + (ProfileIsCS && !UseContextLessSummary.getNumOccurrences())) { + for (const auto &I : Profiles) { + ContextLessProfiles[I.second.getName()].merge(I.second); + } + ProfilesToUse = &ContextLessProfiles; + } SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); - for (const auto &I : Profiles) { + for (const auto &I : *ProfilesToUse) { const FunctionSamples &Profile = I.second; Builder.addRecord(Profile); } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -41,6 +41,8 @@ using namespace llvm; using namespace sampleprof; +extern cl::opt UseContextLessSummary; + std::error_code SampleProfileWriter::writeFuncProfiles( const StringMap &ProfileMap) { // Sort the ProfileMap by total samples. @@ -751,6 +753,15 @@ void SampleProfileWriter::computeSummary( const StringMap &ProfileMap) { + StringMap ContextLessProfiles; + const StringMap *ProfilesToUse = &ProfileMap; + if (UseContextLessSummary || (FunctionSamples::ProfileIsCS && + !UseContextLessSummary.getNumOccurrences())) { + for (const auto &I : ProfileMap) { + ContextLessProfiles[I.second.getName()].merge(I.second); + } + ProfilesToUse = &ContextLessProfiles; + } SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); for (const auto &I : ProfileMap) { const FunctionSamples &Profile = I.second; diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -30,7 +30,6 @@ ; INLINE-NEW-LIMIT1-NOT: remark -; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: _Z8funcLeafi inlined into _Z5funcAi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 ; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 ; INLINE-NEW-LIMIT2-NOT: remark diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-summary.ll copy from llvm/test/Transforms/SampleProfile/csspgo-inline.ll copy to llvm/test/Transforms/SampleProfile/csspgo-summary.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-summary.ll @@ -1,38 +1,11 @@ -; Test for CSSPGO's new early inliner using priority queue - -; Note that we need new pass manager to enable top-down processing for sample profile loader -; Test we inlined the following in top-down order with old inliner -; main:3 @ _Z5funcAi -; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi -; _Z5funcBi:1 @ _Z8funcLeafi -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; -; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW -; -; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; -; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; -; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob. -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1 -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2 - - -; INLINE-BASE: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 -; INLINE-BASE: remark: merged.cpp:27:11: _Z8funcLeafi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 @ main:3:10 -; INLINE-BASE: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 - -; INLINE-NEW: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 -; INLINE-NEW-NOT: remark - -; INLINE-NEW-LIMIT1-NOT: remark - -; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: _Z8funcLeafi inlined into _Z5funcAi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 -; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 -; INLINE-NEW-LIMIT2-NOT: remark +; Test for CSSPGO's profile summary computation with and without pre-merging context profiles + +; RUN: opt < %s -passes=sample-profile,print-profile-summary -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-summary-cutoff-hot=999900 -profile-sample-accurate -profile-summary-contextless=0 -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=SUMMARY-UNMERGED +; RUN: opt < %s -passes=sample-profile,print-profile-summary -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-summary-cutoff-hot=999900 -profile-sample-accurate -profile-summary-contextless=1 -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=SUMMARY-MERGED + +; SUMMARY-UNMERGED: main :hot entry +; SUMMARY-MERGED-NOT: main :hot entry + @factor = dso_local global i32 3, align 4, !dbg !0