Index: include/llvm/ProfileData/ProfileCommon.h =================================================================== --- include/llvm/ProfileData/ProfileCommon.h +++ include/llvm/ProfileData/ProfileCommon.h @@ -36,6 +36,7 @@ class Metadata; class MDTuple; class MDNode; +class Module; inline const char *getHotSectionPrefix() { return ".hot"; } inline const char *getUnlikelySectionPrefix() { return ".unlikely"; } @@ -68,6 +69,11 @@ // appears in the profile. The map is kept sorted in the descending order of // counts. std::map> CountFrequencies; + // Compute profile summary for a module. + static ProfileSummary *computeProfileSummary(Module *M); + // Cache of last seen module and its profile summary. + static std::pair> CachedSummary; + protected: SummaryEntryVector DetailedSummary; std::vector DetailedSummaryCutoffs; @@ -86,14 +92,12 @@ : PSK(K), DetailedSummary(DetailedSummary), TotalCount(TotalCount), MaxCount(MaxCount), MaxFunctionCount(MaxFunctionCount), NumCounts(NumCounts), NumFunctions(NumFunctions) {} - ~ProfileSummary() = default; inline void addCount(uint64_t Count); /// \brief Return metadata specific to the profile format. /// Derived classes implement this method to return a vector of Metadata. virtual std::vector getFormatSpecificMD(LLVMContext &Context) = 0; /// \brief Return detailed summary as metadata. Metadata *getDetailedSummaryMD(LLVMContext &Context); - public: static const int Scale = 1000000; Kind getKind() const { return PSK; } @@ -112,6 +116,9 @@ static ProfileSummary *getFromMD(Metadata *MD); uint32_t getNumFunctions() { return NumFunctions; } uint64_t getMaxFunctionCount() { return MaxFunctionCount; } + /// \brief Get profile summary associated with module \p M + static inline ProfileSummary *getProfileSummary(Module *M); + ~ProfileSummary() = default; }; class InstrProfSummary final : public ProfileSummary { @@ -181,5 +188,24 @@ return DetailedSummary; } +ProfileSummary *ProfileSummary::getProfileSummary(Module *M) { + if (!M) + return nullptr; + // Computing profile summary for a module involves parsing a fairly large + // metadata and could be expensive. We use a simple cache of the last seen + // module and its profile summary. + if (CachedSummary.first != M) { + auto *Summary = computeProfileSummary(M); + // Do not cache if the summary is empty. This is because a later pass + // (sample profile loader, for example) could attach the summary metadata on + // the module. + if (!Summary) + return nullptr; + CachedSummary.first = M; + CachedSummary.second.reset(Summary); + } + return CachedSummary.second.get(); +} + } // end namespace llvm #endif Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -591,10 +592,11 @@ // a well-tuned heuristic based on *callsite* hotness and not callee hotness. uint64_t FunctionCount = 0, MaxFunctionCount = 0; bool HasPGOCounts = false; - if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) { + ProfileSummary *PS = ProfileSummary::getProfileSummary(Callee.getParent()); + if (Callee.getEntryCount() && PS) { HasPGOCounts = true; FunctionCount = Callee.getEntryCount().getValue(); - MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue(); + MaxFunctionCount = PS->getMaxFunctionCount(); } // Listen to the inlinehint attribute or profile based hotness information Index: lib/ProfileData/ProfileSummary.cpp =================================================================== --- lib/ProfileData/ProfileSummary.cpp +++ lib/ProfileData/ProfileSummary.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/ProfileCommon.h" @@ -32,6 +33,9 @@ 900000, 950000, 990000, 999000, 999900, 999990, 999999}); const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"}; +std::pair> + ProfileSummary::CachedSummary; + void InstrProfSummary::addRecord(const InstrProfRecord &R) { addEntryCount(R.Counts[0]); for (size_t I = 1, E = R.Counts.size(); I < E; ++I) @@ -362,3 +366,10 @@ else return nullptr; } + +ProfileSummary *ProfileSummary::computeProfileSummary(Module *M) { + Metadata *MD = M->getProfileSummary(); + if (!MD) + return nullptr; + return getFromMD(MD); +} Index: test/Transforms/Inline/inline-cold-callee.ll =================================================================== --- test/Transforms/Inline/inline-cold-callee.ll +++ test/Transforms/Inline/inline-cold-callee.ll @@ -5,7 +5,7 @@ ; A callee with identical body does gets inlined because cost fits within the ; inline-threshold -define i32 @callee1(i32 %x) !prof !1 { +define i32 @callee1(i32 %x) !prof !21 { %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 %x3 = add i32 %x2, 1 @@ -13,7 +13,7 @@ ret i32 %x3 } -define i32 @callee2(i32 %x) !prof !2 { +define i32 @callee2(i32 %x) !prof !22 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 @@ -22,7 +22,7 @@ ret i32 %x3 } -define i32 @caller2(i32 %y1) !prof !2 { +define i32 @caller2(i32 %y1) !prof !22 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 @@ -32,8 +32,19 @@ ret i32 %y3 } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 1000} -!1 = !{!"function_entry_count", i64 100} -!2 = !{!"function_entry_count", i64 1} - +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 100} +!22 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxBlockCount", i64 1000} +!6 = !{!"MaxInternalBlockCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumBlocks", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12} +!12 = !{i32 10000, i64 0, i32 0} Index: test/Transforms/Inline/inline-hot-callee.ll =================================================================== --- test/Transforms/Inline/inline-hot-callee.ll +++ test/Transforms/Inline/inline-hot-callee.ll @@ -5,7 +5,7 @@ ; A cold callee with identical body does not get inlined because cost exceeds the ; inline-threshold -define i32 @callee1(i32 %x) !prof !1 { +define i32 @callee1(i32 %x) !prof !20 { %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 %x3 = add i32 %x2, 1 @@ -13,7 +13,7 @@ ret i32 %x3 } -define i32 @callee2(i32 %x) !prof !2 { +define i32 @callee2(i32 %x) !prof !21 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 @@ -22,7 +22,7 @@ ret i32 %x3 } -define i32 @caller2(i32 %y1) !prof !2 { +define i32 @caller2(i32 %y1) !prof !21 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 @@ -32,8 +32,19 @@ ret i32 %y3 } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 10} -!1 = !{!"function_entry_count", i64 10} -!2 = !{!"function_entry_count", i64 1} - +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 10} +!21 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxBlockCount", i64 10} +!6 = !{!"MaxInternalBlockCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 10} +!8 = !{!"NumBlocks", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12} +!12 = !{i32 10000, i64 0, i32 0} Index: unittests/ProfileData/CMakeLists.txt =================================================================== --- unittests/ProfileData/CMakeLists.txt +++ unittests/ProfileData/CMakeLists.txt @@ -7,5 +7,6 @@ add_llvm_unittest(ProfileDataTests CoverageMappingTest.cpp InstrProfTest.cpp + ProfileSummaryTest.cpp SampleProfTest.cpp ) Index: unittests/ProfileData/ProfileSummaryTest.cpp =================================================================== --- /dev/null +++ unittests/ProfileData/ProfileSummaryTest.cpp @@ -0,0 +1,104 @@ +//===- unittest/ProfileData/ProfileSummaryTest.cpp --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Module.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/ProfileData/SampleProf.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace sampleprof; + +struct ProfileSummaryTest : ::testing::Test { + InstrProfSummary IPS; + SampleProfileSummary SPS; + + ProfileSummaryTest() + : IPS({100000, 900000, 999999}), SPS({100000, 900000, 999999}) { + InstrProfRecord Record1("func1", 0x1234, {97531, 5, 99999}); + InstrProfRecord Record2("func2", 0x1234, {57341, 10000, 10, 1}); + IPS.addRecord(Record1); + IPS.addRecord(Record2); + + IPS.computeDetailedSummary(); + + FunctionSamples FooSamples; + FooSamples.addTotalSamples(7711); + FooSamples.addHeadSamples(610); + FooSamples.addBodySamples(1, 0, 610); + FooSamples.addBodySamples(2, 0, 600); + FooSamples.addBodySamples(4, 0, 60000); + FooSamples.addBodySamples(8, 0, 60351); + FooSamples.addBodySamples(10, 0, 605); + + FunctionSamples BarSamples; + BarSamples.addTotalSamples(20301); + BarSamples.addHeadSamples(1437); + BarSamples.addBodySamples(1, 0, 1437); + + SPS.addRecord(FooSamples); + SPS.addRecord(BarSamples); + + SPS.computeDetailedSummary(); + } + + static bool compareSummary(ProfileSummary *PS1, ProfileSummary *PS2) { + if (PS1->getKind() != PS2->getKind()) + return false; + if (PS1->getNumFunctions() != PS2->getNumFunctions()) + return false; + if (PS1->getMaxFunctionCount() != PS2->getMaxFunctionCount()) + return false; + std::vector DS1 = PS1->getDetailedSummary(); + std::vector DS2 = PS2->getDetailedSummary(); + auto CompareSummaryEntry = [](ProfileSummaryEntry &E1, + ProfileSummaryEntry &E2) { + return E1.Cutoff == E2.Cutoff && E1.MinCount == E2.MinCount && + E1.NumCounts == E2.NumCounts; + }; + if (!std::equal(DS1.begin(), DS1.end(), DS2.begin(), CompareSummaryEntry)) + return false; + if (isa(PS1)) { + InstrProfSummary *IPS1 = cast(PS1); + InstrProfSummary *IPS2 = cast(PS2); + if (IPS1->getNumBlocks() != IPS2->getNumBlocks()) + return false; + if (IPS1->getTotalCount() != IPS2->getTotalCount()) + return false; + if (IPS1->getMaxBlockCount() != IPS2->getMaxBlockCount()) + return false; + if (IPS1->getMaxInternalBlockCount() != IPS2->getMaxInternalBlockCount()) + return false; + } else { + SampleProfileSummary *SPS1 = cast(PS1); + SampleProfileSummary *SPS2 = cast(PS2); + if (SPS1->getNumLinesWithSamples() != SPS2->getNumLinesWithSamples()) + return false; + if (SPS1->getTotalSamples() != SPS2->getTotalSamples()) + return false; + if (SPS1->getMaxSamplesPerLine() != SPS2->getMaxSamplesPerLine()) + return false; + } + return true; + } +}; + +TEST_F(ProfileSummaryTest, summary_from_module) { + Module M1("M1", getGlobalContext()); + EXPECT_FALSE(ProfileSummary::getProfileSummary(&M1)); + M1.setProfileSummary(IPS.getMD(getGlobalContext())); + EXPECT_TRUE(compareSummary(&IPS, ProfileSummary::getProfileSummary(&M1))); + + Module M2("M2", getGlobalContext()); + EXPECT_FALSE(ProfileSummary::getProfileSummary(&M2)); + M2.setProfileSummary(SPS.getMD(getGlobalContext())); + EXPECT_TRUE(compareSummary(&SPS, ProfileSummary::getProfileSummary(&M2))); + EXPECT_TRUE(compareSummary(&IPS, ProfileSummary::getProfileSummary(&M1))); +}