diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -46,6 +46,7 @@ class GVMaterializer; class LLVMContext; class MemoryBuffer; +class ModuleSummaryIndex; class Pass; class RandomNumberGenerator; template class SmallPtrSetImpl; @@ -882,6 +883,10 @@ /// Take ownership of the given memory buffer. void setOwnedMemoryBuffer(std::unique_ptr MB); + + /// Set the partial sample profile ratio in the profile summary module flag, + /// if applicable. + void setPartialSampleProfileRatio(const ModuleSummaryIndex &Index); }; /// Given "llvm.used" or "llvm.compiler.used" as a global name, collect diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h --- a/llvm/include/llvm/IR/ProfileSummary.h +++ b/llvm/include/llvm/IR/ProfileSummary.h @@ -59,7 +59,8 @@ bool Partial = false; /// This approximately represents the ratio of the number of profile counters /// of the program being built to the number of profile counters in the - /// partial sample profile. When 'Partial' is false, it is undefined. + /// partial sample profile. When 'Partial' is false, it is undefined. This is + /// currently only available under thin LTO mode. double PartialProfileRatio = 0; /// Return detailed summary as metadata. Metadata *getDetailedSummaryMD(LLVMContext &Context); diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -70,6 +70,23 @@ "partial-profile", cl::Hidden, cl::init(false), cl::desc("Specify the current profile is used as a partial profile.")); +cl::opt ScalePartialSampleProfileWorkingSetSize( + "scale-partial-sample-profile-working-set-size", cl::Hidden, + cl::init(false), + cl::desc( + "If true, scale the working set size of the partial sample profile " + "by the partial profile ratio to reflect the size of the program " + "being compiled.")); + +static cl::opt PartialSampleProfileWorkingSetSizeScaleFactor( + "partial-sample-profile-working-set-size-scale-factor", cl::Hidden, + cl::init(0.008), + cl::desc("The scale factor used to scale the working set size of the " + "partial sample profile along with the partial profile ratio. " + "This includes the factor of the profile counter per block " + "and the factor to scale the working set size to use the same " + "shared thresholds as PGO.")); + // Find the summary entry for a desired percentile of counts. static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile) { @@ -280,10 +297,23 @@ ColdCountThreshold = ProfileSummaryColdCount; assert(ColdCountThreshold <= HotCountThreshold && "Cold count threshold cannot exceed hot count threshold!"); - HasHugeWorkingSetSize = - HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; - HasLargeWorkingSetSize = - HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) { + HasHugeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + } else { + // Scale the working set size of the partial sample profile to reflect the + // size of the program being compiled. + double PartialProfileRatio = Summary->getPartialProfileRatio(); + uint64_t ScaledHotEntryNumCounts = + static_cast(HotEntry.NumCounts * PartialProfileRatio * + PartialSampleProfileWorkingSetSizeScaleFactor); + HasHugeWorkingSetSize = + ScaledHotEntryNumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + ScaledHotEntryNumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + } } Optional diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" @@ -673,3 +674,23 @@ } return GV; } + +void Module::setPartialSampleProfileRatio(const ModuleSummaryIndex &Index) { + if (auto *SummaryMD = getProfileSummary(/*IsCS*/ false)) { + std::unique_ptr ProfileSummary( + ProfileSummary::getFromMD(SummaryMD)); + if (ProfileSummary) { + if (ProfileSummary->getKind() != ProfileSummary::PSK_Sample || + !ProfileSummary->isPartialProfile()) + return; + uint64_t BlockCount = Index.getBlockCount(); + uint32_t NumCounts = ProfileSummary->getNumCounts(); + if (!NumCounts) + return; + double Ratio = (double)BlockCount / NumCounts; + ProfileSummary->setPartialProfileRatio(Ratio); + setProfileSummary(ProfileSummary->getMD(getContext()), + ProfileSummary::PSK_Sample); + } + } +} diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -541,6 +541,10 @@ return DiagFileOrErr.takeError(); auto DiagnosticOutputFile = std::move(*DiagFileOrErr); + // Set the partial sample profile ratio in the profile summary module flag of + // the module, if applicable. + Mod.setPartialSampleProfileRatio(CombinedIndex); + if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod); return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1232,6 +1232,12 @@ // have loaded all the required metadata! UpgradeDebugInfo(*SrcModule); + // Set the partial sample profile ratio in the profile summary module flag + // of the imported source module, if applicable, so that the profile summary + // module flag will match with that of the destination module when it's + // imported. + SrcModule->setPartialSampleProfileRatio(Index); + // Link in the specified functions. if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations, &GlobalsToImport)) diff --git a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp --- a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp +++ b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp @@ -23,6 +23,8 @@ #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" +extern llvm::cl::opt ScalePartialSampleProfileWorkingSetSize; + namespace llvm { namespace { @@ -42,7 +44,12 @@ BPI.reset(new BranchProbabilityInfo(F, *LI)); return BlockFrequencyInfo(F, *BPI, *LI); } - std::unique_ptr makeLLVMModule(const char *ProfKind = nullptr) { + std::unique_ptr makeLLVMModule(const char *ProfKind = nullptr, + uint64_t NumCounts = 3, + uint64_t IsPartialProfile = 0, + double PartialProfileRatio = 0.0, + uint64_t HotNumCounts = 3, + uint64_t ColdNumCounts = 10) { const char *ModuleString = "define i32 @g(i32 %x) !prof !21 {{\n" " ret i32 0\n" @@ -83,27 +90,32 @@ "!22 = !{{!\"function_entry_count\", i64 100}\n" "!23 = !{{!\"branch_weights\", i32 64, i32 4}\n" "{0}"; - const char *SummaryString = "!llvm.module.flags = !{{!1}" - "!1 = !{{i32 1, !\"ProfileSummary\", !2}" - "!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10}" - "!3 = !{{!\"ProfileFormat\", !\"{0}\"}" - "!4 = !{{!\"TotalCount\", i64 10000}" - "!5 = !{{!\"MaxCount\", i64 10}" - "!6 = !{{!\"MaxInternalCount\", i64 1}" - "!7 = !{{!\"MaxFunctionCount\", i64 1000}" - "!8 = !{{!\"NumCounts\", i64 3}" - "!9 = !{{!\"NumFunctions\", i64 3}" - "!10 = !{{!\"DetailedSummary\", !11}" - "!11 = !{{!12, !13, !14}" - "!12 = !{{i32 10000, i64 1000, i32 1}" - "!13 = !{{i32 999000, i64 300, i32 3}" - "!14 = !{{i32 999999, i64 5, i32 10}"; + const char *SummaryString = + "!llvm.module.flags = !{{!1}\n" + "!1 = !{{i32 1, !\"ProfileSummary\", !2}\n" + "!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}\n" + "!3 = !{{!\"ProfileFormat\", !\"{0}\"}\n" + "!4 = !{{!\"TotalCount\", i64 10000}\n" + "!5 = !{{!\"MaxCount\", i64 10}\n" + "!6 = !{{!\"MaxInternalCount\", i64 1}\n" + "!7 = !{{!\"MaxFunctionCount\", i64 1000}\n" + "!8 = !{{!\"NumCounts\", i64 {1}}\n" + "!9 = !{{!\"NumFunctions\", i64 3}\n" + "!10 = !{{!\"IsPartialProfile\", i64 {2}}\n" + "!11 = !{{!\"PartialProfileRatio\", double {3}}\n" + "!12 = !{{!\"DetailedSummary\", !13}\n" + "!13 = !{{!14, !15, !16}\n" + "!14 = !{{i32 10000, i64 1000, i32 1}\n" + "!15 = !{{i32 990000, i64 300, i32 {4}}\n" + "!16 = !{{i32 999999, i64 5, i32 {5}}\n"; SMDiagnostic Err; - if (ProfKind) - return parseAssemblyString( - formatv(ModuleString, formatv(SummaryString, ProfKind).str()).str(), - Err, C); - else + if (ProfKind) { + auto Summary = + formatv(SummaryString, ProfKind, NumCounts, IsPartialProfile, + PartialProfileRatio, HotNumCounts, ColdNumCounts) + .str(); + return parseAssemblyString(formatv(ModuleString, Summary).str(), Err, C); + } else return parseAssemblyString(formatv(ModuleString, "").str(), Err, C); } }; @@ -280,6 +292,7 @@ ProfileSummaryInfo PSI = buildPSI(M.get()); EXPECT_TRUE(PSI.hasProfileSummary()); EXPECT_TRUE(PSI.hasSampleProfile()); + EXPECT_FALSE(PSI.hasPartialSampleProfile()); BasicBlock &BB0 = F->getEntryBlock(); BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0); @@ -373,5 +386,47 @@ EXPECT_FALSE(PSI.isFunctionColdInCallGraphNthPercentile(990000, F, BFI)); } +TEST_F(ProfileSummaryInfoTest, PartialSampleProfWorkingSetSize) { + ScalePartialSampleProfileWorkingSetSize.setValue(true); + + // With PartialProfileRatio unset (zero.) + auto M1 = makeLLVMModule("SampleProfile", /*NumCounts*/ 3, + /*IsPartialProfile*/ 1, + /*PartialProfileRatio*/ 0.0, + /*HotNumCounts*/ 3, /*ColdNumCounts*/ 10); + ProfileSummaryInfo PSI1 = buildPSI(M1.get()); + EXPECT_TRUE(PSI1.hasProfileSummary()); + EXPECT_TRUE(PSI1.hasSampleProfile()); + EXPECT_TRUE(PSI1.hasPartialSampleProfile()); + EXPECT_FALSE(PSI1.hasHugeWorkingSetSize()); + EXPECT_FALSE(PSI1.hasLargeWorkingSetSize()); + + // With PartialProfileRatio set (non-zero) and a small working set size. + auto M2 = makeLLVMModule("SampleProfile", /*NumCounts*/ 27493235, + /*IsPartialProfile*/ 1, + /*PartialProfileRatio*/ 0.00000012, + /*HotNumCounts*/ 3102082, + /*ColdNumCounts*/ 18306149); + ProfileSummaryInfo PSI2 = buildPSI(M2.get()); + EXPECT_TRUE(PSI2.hasProfileSummary()); + EXPECT_TRUE(PSI2.hasSampleProfile()); + EXPECT_TRUE(PSI2.hasPartialSampleProfile()); + EXPECT_FALSE(PSI2.hasHugeWorkingSetSize()); + EXPECT_FALSE(PSI2.hasLargeWorkingSetSize()); + + // With PartialProfileRatio is set (non-zero) and a large working set size. + auto M3 = makeLLVMModule("SampleProfile", /*NumCounts*/ 27493235, + /*IsPartialProfile*/ 1, + /*PartialProfileRatio*/ 0.9, + /*HotNumCounts*/ 3102082, + /*ColdNumCounts*/ 18306149); + ProfileSummaryInfo PSI3 = buildPSI(M3.get()); + EXPECT_TRUE(PSI3.hasProfileSummary()); + EXPECT_TRUE(PSI3.hasSampleProfile()); + EXPECT_TRUE(PSI3.hasPartialSampleProfile()); + EXPECT_TRUE(PSI3.hasHugeWorkingSetSize()); + EXPECT_TRUE(PSI3.hasLargeWorkingSetSize()); +} + } // end anonymous namespace } // end namespace llvm diff --git a/llvm/unittests/IR/ModuleTest.cpp b/llvm/unittests/IR/ModuleTest.cpp --- a/llvm/unittests/IR/ModuleTest.cpp +++ b/llvm/unittests/IR/ModuleTest.cpp @@ -9,6 +9,7 @@ #include "llvm/IR/Module.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Pass.h" #include "llvm/Support/RandomNumberGenerator.h" #include "gtest/gtest.h" @@ -121,4 +122,40 @@ delete PS; } +TEST(ModuleTest, setPartialSampleProfileRatio) { + const char *IRString = R"IR( + !llvm.module.flags = !{!0} + + !0 = !{i32 1, !"ProfileSummary", !1} + !1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11} + !2 = !{!"ProfileFormat", !"SampleProfile"} + !3 = !{!"TotalCount", i64 10000} + !4 = !{!"MaxCount", i64 10} + !5 = !{!"MaxInternalCount", i64 1} + !6 = !{!"MaxFunctionCount", i64 1000} + !7 = !{!"NumCounts", i64 200} + !8 = !{!"NumFunctions", i64 3} + !9 = !{!"IsPartialProfile", i64 1} + !10 = !{!"PartialProfileRatio", double 0.0} + !11 = !{!"DetailedSummary", !12} + !12 = !{!13, !14, !15} + !13 = !{i32 10000, i64 1000, i32 1} + !14 = !{i32 990000, i64 300, i32 10} + !15 = !{i32 999999, i64 5, i32 100} + )IR"; + + SMDiagnostic Err; + LLVMContext Context; + std::unique_ptr M = parseAssemblyString(IRString, Err, Context); + ModuleSummaryIndex Index(/*HaveGVs*/ false); + const unsigned BlockCount = 100; + const unsigned NumCounts = 200; + Index.setBlockCount(BlockCount); + M->setPartialSampleProfileRatio(Index); + double Ratio = (double)BlockCount / NumCounts; + std::unique_ptr ProfileSummary( + ProfileSummary::getFromMD(M->getProfileSummary(/*IsCS*/ false))); + EXPECT_EQ(Ratio, ProfileSummary->getPartialProfileRatio()); +} + } // end namespace