Index: llvm/trunk/include/llvm/ProfileData/ProfileCommon.h =================================================================== --- llvm/trunk/include/llvm/ProfileData/ProfileCommon.h +++ llvm/trunk/include/llvm/ProfileData/ProfileCommon.h @@ -22,6 +22,8 @@ #include #include "llvm/Support/Casting.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" namespace llvm { class Function; @@ -36,6 +38,7 @@ class Metadata; class MDTuple; class MDNode; +class Module; inline const char *getHotSectionPrefix() { return ".hot"; } inline const char *getUnlikelySectionPrefix() { return ".unlikely"; } @@ -68,6 +71,14 @@ // appears in the profile. The map is kept sorted in the descending order of // counts. std::map> CountFrequencies; + // Compute profile summary for a module. + static ProfileSummary *computeProfileSummary(Module *M); + // Cache of last seen module and its profile summary. + static ManagedStatic>> + CachedSummary; + // Mutex to access summary cache + static ManagedStatic> CacheMutex; + protected: SummaryEntryVector DetailedSummary; std::vector DetailedSummaryCutoffs; @@ -86,14 +97,12 @@ : PSK(K), DetailedSummary(DetailedSummary), TotalCount(TotalCount), MaxCount(MaxCount), MaxFunctionCount(MaxFunctionCount), NumCounts(NumCounts), NumFunctions(NumFunctions) {} - ~ProfileSummary() = default; inline void addCount(uint64_t Count); /// \brief Return metadata specific to the profile format. /// Derived classes implement this method to return a vector of Metadata. virtual std::vector getFormatSpecificMD(LLVMContext &Context) = 0; /// \brief Return detailed summary as metadata. Metadata *getDetailedSummaryMD(LLVMContext &Context); - public: static const int Scale = 1000000; Kind getKind() const { return PSK; } @@ -112,6 +121,10 @@ static ProfileSummary *getFromMD(Metadata *MD); uint32_t getNumFunctions() { return NumFunctions; } uint64_t getMaxFunctionCount() { return MaxFunctionCount; } + /// \brief Get profile summary associated with module \p M + static inline ProfileSummary *getProfileSummary(Module *M); + virtual ~ProfileSummary() = default; + virtual bool operator==(ProfileSummary &Other); }; class InstrProfSummary final : public ProfileSummary { @@ -141,6 +154,7 @@ uint64_t getTotalCount() { return TotalCount; } uint64_t getMaxBlockCount() { return MaxCount; } uint64_t getMaxInternalBlockCount() { return MaxInternalBlockCount; } + bool operator==(ProfileSummary &Other) override; }; class SampleProfileSummary final : public ProfileSummary { @@ -181,5 +195,24 @@ return DetailedSummary; } +ProfileSummary *ProfileSummary::getProfileSummary(Module *M) { + if (!M) + return nullptr; + sys::SmartScopedLock Lock(*CacheMutex); + // Computing profile summary for a module involves parsing a fairly large + // metadata and could be expensive. We use a simple cache of the last seen + // module and its profile summary. + if (CachedSummary->first != M) { + auto *Summary = computeProfileSummary(M); + // Do not cache if the summary is empty. This is because a later pass + // (sample profile loader, for example) could attach the summary metadata on + // the module. + if (!Summary) + return nullptr; + CachedSummary->first = M; + CachedSummary->second.reset(Summary); + } + return CachedSummary->second.get(); +} } // end namespace llvm #endif Index: llvm/trunk/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp +++ llvm/trunk/lib/Analysis/InlineCost.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -626,10 +627,11 @@ // a well-tuned heuristic based on *callsite* hotness and not callee hotness. uint64_t FunctionCount = 0, MaxFunctionCount = 0; bool HasPGOCounts = false; - if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) { + ProfileSummary *PS = ProfileSummary::getProfileSummary(Callee.getParent()); + if (Callee.getEntryCount() && PS) { HasPGOCounts = true; FunctionCount = Callee.getEntryCount().getValue(); - MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue(); + MaxFunctionCount = PS->getMaxFunctionCount(); } // Listen to the inlinehint attribute or profile based hotness information Index: llvm/trunk/lib/ProfileData/ProfileSummary.cpp =================================================================== --- llvm/trunk/lib/ProfileData/ProfileSummary.cpp +++ llvm/trunk/lib/ProfileData/ProfileSummary.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/ProfileCommon.h" @@ -32,6 +33,10 @@ 900000, 950000, 990000, 999000, 999900, 999990, 999999}); const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"}; +ManagedStatic>> + ProfileSummary::CachedSummary; +ManagedStatic> ProfileSummary::CacheMutex; + void InstrProfSummary::addRecord(const InstrProfRecord &R) { addEntryCount(R.Counts[0]); for (size_t I = 1, E = R.Counts.size(); I < E; ++I) @@ -82,6 +87,39 @@ } } +bool ProfileSummary::operator==(ProfileSummary &Other) { + if (getKind() != Other.getKind()) + return false; + if (TotalCount != Other.TotalCount) + return false; + if (MaxCount != Other.MaxCount) + return false; + if (MaxFunctionCount != Other.MaxFunctionCount) + return false; + if (NumFunctions != Other.NumFunctions) + return false; + if (NumCounts != Other.NumCounts) + return false; + std::vector DS1 = getDetailedSummary(); + std::vector DS2 = Other.getDetailedSummary(); + auto CompareSummaryEntry = [](ProfileSummaryEntry &E1, + ProfileSummaryEntry &E2) { + return E1.Cutoff == E2.Cutoff && E1.MinCount == E2.MinCount && + E1.NumCounts == E2.NumCounts; + }; + if (!std::equal(DS1.begin(), DS1.end(), DS2.begin(), CompareSummaryEntry)) + return false; + return true; +} + +bool InstrProfSummary::operator==(ProfileSummary &Other) { + InstrProfSummary *OtherIPS = dyn_cast(&Other); + if (!OtherIPS) + return false; + return MaxInternalBlockCount == OtherIPS->MaxInternalBlockCount && + ProfileSummary::operator==(Other); +} + // Returns true if the function is a hot function. bool ProfileSummary::isFunctionHot(const Function *F) { // FIXME: update when summary data is stored in module's metadata. @@ -362,3 +400,9 @@ else return nullptr; } + +ProfileSummary *ProfileSummary::computeProfileSummary(Module *M) { + if (Metadata *MD = M->getProfileSummary()) + return getFromMD(MD); + return nullptr; +} Index: llvm/trunk/test/Transforms/Inline/inline-cold-callee.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/inline-cold-callee.ll +++ llvm/trunk/test/Transforms/Inline/inline-cold-callee.ll @@ -5,7 +5,7 @@ ; A callee with identical body does gets inlined because cost fits within the ; inline-threshold -define i32 @callee1(i32 %x) !prof !1 { +define i32 @callee1(i32 %x) !prof !21 { %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 %x3 = add i32 %x2, 1 @@ -13,7 +13,7 @@ ret i32 %x3 } -define i32 @callee2(i32 %x) !prof !2 { +define i32 @callee2(i32 %x) !prof !22 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 @@ -22,7 +22,7 @@ ret i32 %x3 } -define i32 @caller2(i32 %y1) !prof !2 { +define i32 @caller2(i32 %y1) !prof !22 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 @@ -32,8 +32,19 @@ ret i32 %y3 } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 1000} -!1 = !{!"function_entry_count", i64 100} -!2 = !{!"function_entry_count", i64 1} - +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 100} +!22 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxBlockCount", i64 1000} +!6 = !{!"MaxInternalBlockCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumBlocks", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12} +!12 = !{i32 10000, i64 0, i32 0} Index: llvm/trunk/test/Transforms/Inline/inline-hot-callee.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/inline-hot-callee.ll +++ llvm/trunk/test/Transforms/Inline/inline-hot-callee.ll @@ -5,7 +5,7 @@ ; A cold callee with identical body does not get inlined because cost exceeds the ; inline-threshold -define i32 @callee1(i32 %x) !prof !1 { +define i32 @callee1(i32 %x) !prof !20 { %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 %x3 = add i32 %x2, 1 @@ -13,7 +13,7 @@ ret i32 %x3 } -define i32 @callee2(i32 %x) !prof !2 { +define i32 @callee2(i32 %x) !prof !21 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 @@ -22,7 +22,7 @@ ret i32 %x3 } -define i32 @caller2(i32 %y1) !prof !2 { +define i32 @caller2(i32 %y1) !prof !21 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 @@ -32,8 +32,19 @@ ret i32 %y3 } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 10} -!1 = !{!"function_entry_count", i64 10} -!2 = !{!"function_entry_count", i64 1} - +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 10} +!21 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxBlockCount", i64 10} +!6 = !{!"MaxInternalBlockCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 10} +!8 = !{!"NumBlocks", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12} +!12 = !{i32 10000, i64 0, i32 0} Index: llvm/trunk/unittests/ProfileData/CMakeLists.txt =================================================================== --- llvm/trunk/unittests/ProfileData/CMakeLists.txt +++ llvm/trunk/unittests/ProfileData/CMakeLists.txt @@ -7,5 +7,6 @@ add_llvm_unittest(ProfileDataTests CoverageMappingTest.cpp InstrProfTest.cpp + ProfileSummaryTest.cpp SampleProfTest.cpp ) Index: llvm/trunk/unittests/ProfileData/ProfileSummaryTest.cpp =================================================================== --- llvm/trunk/unittests/ProfileData/ProfileSummaryTest.cpp +++ llvm/trunk/unittests/ProfileData/ProfileSummaryTest.cpp @@ -0,0 +1,66 @@ +//===- unittest/ProfileData/ProfileSummaryTest.cpp --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Module.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/ProfileData/SampleProf.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace sampleprof; + +struct ProfileSummaryTest : ::testing::Test { + InstrProfSummary IPS; + SampleProfileSummary SPS; + + ProfileSummaryTest() + : IPS({100000, 900000, 999999}), SPS({100000, 900000, 999999}) {} + void SetUp() { + InstrProfRecord Record1("func1", 0x1234, {97531, 5, 99999}); + InstrProfRecord Record2("func2", 0x1234, {57341, 10000, 10, 1}); + IPS.addRecord(Record1); + IPS.addRecord(Record2); + + IPS.computeDetailedSummary(); + + FunctionSamples FooSamples; + FooSamples.addTotalSamples(7711); + FooSamples.addHeadSamples(610); + FooSamples.addBodySamples(1, 0, 610); + FooSamples.addBodySamples(2, 0, 600); + FooSamples.addBodySamples(4, 0, 60000); + FooSamples.addBodySamples(8, 0, 60351); + FooSamples.addBodySamples(10, 0, 605); + + FunctionSamples BarSamples; + BarSamples.addTotalSamples(20301); + BarSamples.addHeadSamples(1437); + BarSamples.addBodySamples(1, 0, 1437); + + SPS.addRecord(FooSamples); + SPS.addRecord(BarSamples); + + SPS.computeDetailedSummary(); + } + +}; + +TEST_F(ProfileSummaryTest, summary_from_module) { + LLVMContext Context; + Module M1("M1", Context); + EXPECT_FALSE(ProfileSummary::getProfileSummary(&M1)); + M1.setProfileSummary(IPS.getMD(Context)); + EXPECT_TRUE(IPS == *ProfileSummary::getProfileSummary(&M1)); + + Module M2("M2", Context); + EXPECT_FALSE(ProfileSummary::getProfileSummary(&M2)); + M2.setProfileSummary(SPS.getMD(Context)); + EXPECT_TRUE(SPS == *ProfileSummary::getProfileSummary(&M2)); +}