Index: include/llvm/IR/Function.h =================================================================== --- include/llvm/IR/Function.h +++ include/llvm/IR/Function.h @@ -18,6 +18,7 @@ #ifndef LLVM_IR_FUNCTION_H #define LLVM_IR_FUNCTION_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" @@ -207,8 +208,10 @@ /// \brief Set the entry count for this function. /// /// Entry count is the number of times this function was executed based on - /// pgo data. - void setEntryCount(uint64_t Count); + /// pgo data. \p Imports points to a set of GUIDs that needs to be imported + /// by the function. + void setEntryCount(uint64_t Count, + const DenseSet *Imports = nullptr); /// \brief Get the entry count for this function. /// @@ -216,6 +219,9 @@ /// pgo data. Optional getEntryCount() const; + /// Returns the set of GUIDs that needs to be imported to the function. + DenseSet getImportGUIDs() const; + /// Set the section prefix for this function. void setSectionPrefix(StringRef Prefix); Index: include/llvm/IR/MDBuilder.h =================================================================== --- include/llvm/IR/MDBuilder.h +++ include/llvm/IR/MDBuilder.h @@ -15,7 +15,9 @@ #ifndef LLVM_IR_MDBUILDER_H #define LLVM_IR_MDBUILDER_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/DataTypes.h" #include @@ -64,7 +66,8 @@ MDNode *createUnpredictable(); /// Return metadata containing the entry count for a function. - MDNode *createFunctionEntryCount(uint64_t Count); + MDNode *createFunctionEntryCount(uint64_t Count, + const DenseSet *Imports); /// Return metadata containing the section prefix for a function. MDNode *createFunctionSectionPrefix(StringRef Prefix); Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -15,8 +15,11 @@ #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H_ #define LLVM_PROFILEDATA_SAMPLEPROF_H_ +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" @@ -300,6 +303,20 @@ return Result; } + /// Recursively traverses all children, if the corresponding function is + /// not defined in module \p M, and its total sample is no less than + /// \p Threshold, add its corresponding GUID to \p S. + void importAllFunctions(DenseSet &S, const Module *M, + uint64_t Threshold) const { + if (TotalSamples <= Threshold) + return; + Function *F = M->getFunction(Name); + if (!F || !F->getSubprogram()) + S.insert(Function::getGUID(Name)); + for (auto CS : CallsiteSamples) + CS.second.importAllFunctions(S, M, Threshold); + } + /// Set the name of the function. void setName(StringRef FunctionName) { Name = FunctionName; } Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -183,6 +183,9 @@ } } + for (auto &I : F.getImportGUIDs()) + CallGraphEdges[I].updateHotness(CalleeInfo::HotnessType::Hot); + bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || Index: lib/IR/Function.cpp =================================================================== --- lib/IR/Function.cpp +++ lib/IR/Function.cpp @@ -1279,9 +1279,10 @@ setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit)); } -void Function::setEntryCount(uint64_t Count) { +void Function::setEntryCount(uint64_t Count, + const DenseSet *S) { MDBuilder MDB(getContext()); - setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count)); + setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S)); } Optional Function::getEntryCount() const { @@ -1298,6 +1299,19 @@ return None; } +DenseSet Function::getImportGUIDs() const { + DenseSet R; + MDNode *MD = getMetadata(LLVMContext::MD_prof); + if (MD && MD->getOperand(0)) + if (MDString *MDS = dyn_cast(MD->getOperand(0))) + if (MDS->getString().equals("function_entry_count")) + for (unsigned i = 2; i < MD->getNumOperands(); i++) + R.insert(mdconst::extract(MD->getOperand(i)) + ->getValue() + .getZExtValue()); + return R; +} + void Function::setSectionPrefix(StringRef Prefix) { MDBuilder MDB(getContext()); setMetadata(LLVMContext::MD_section_prefix, Index: lib/IR/MDBuilder.cpp =================================================================== --- lib/IR/MDBuilder.cpp +++ lib/IR/MDBuilder.cpp @@ -56,11 +56,16 @@ return MDNode::get(Context, None); } -MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) { +MDNode *MDBuilder::createFunctionEntryCount( + uint64_t Count, const DenseSet *Imports) { Type *Int64Ty = Type::getInt64Ty(Context); - return MDNode::get(Context, - {createString("function_entry_count"), - createConstant(ConstantInt::get(Int64Ty, Count))}); + SmallVector Ops; + Ops.push_back(createString("function_entry_count")); + Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count))); + if (Imports) + for (auto ID : *Imports) + Ops.push_back(createConstant(ConstantInt::get(Int64Ty, ID))); + return MDNode::get(Context, Ops); } MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) { Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -1650,8 +1650,8 @@ for (const auto &Pair : MDs) { if (Pair.first == LLVMContext::MD_prof) { MDNode *MD = Pair.second; - Assert(MD->getNumOperands() == 2, - "!prof annotations should have exactly 2 operands", MD); + Assert(MD->getNumOperands() >= 2, + "!prof annotations should have no less than 2 operands", MD); // Check first operand. Assert(MD->getOperand(0) != nullptr, "first operand should not be null", Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -163,7 +163,8 @@ ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineHotFunctions(Function &F); + bool inlineHotFunctions(Function &F, + DenseSet &ImportGUIDs); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -605,7 +606,8 @@ /// \param F function to perform iterative inlining. /// /// \returns True if there is any inline happened. -bool SampleProfileLoader::inlineHotFunctions(Function &F) { +bool SampleProfileLoader::inlineHotFunctions( + Function &F, DenseSet &ImportGUIDs) { DenseSet PromotedInsns; bool Changed = false; LLVMContext &Ctx = F.getContext(); @@ -654,8 +656,12 @@ continue; } } - if (!CalledFunction || !CalledFunction->getSubprogram()) + if (!CalledFunction || !CalledFunction->getSubprogram()) { + findCalleeFunctionSamples(*I)->importAllFunctions( + ImportGUIDs, F.getParent(), + Samples->getTotalSamples() * SampleProfileHotThreshold / 100); continue; + } DebugLoc DLoc = I->getDebugLoc(); uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples(); if (InlineFunction(CallSite(DI), IFI)) { @@ -1040,10 +1046,6 @@ bool Changed = true; unsigned I = 0; - // Add an entry count to the function using the samples gathered - // at the function entry. - F.setEntryCount(Samples->getHeadSamples() + 1); - // If BB weight is larger than its corresponding loop's header BB weight, // use the BB weight to replace the loop header BB weight. for (auto &BI : F) { @@ -1272,12 +1274,17 @@ DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); - Changed |= inlineHotFunctions(F); + DenseSet ImportGUIDs; + Changed |= inlineHotFunctions(F, ImportGUIDs); // Compute basic block weights. Changed |= computeBlockWeights(F); if (Changed) { + // Add an entry count to the function using the samples gathered + // at the function entry. + F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs); + // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); Index: test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-LABEL: ; CHECK-LABEL: ; COMBINED: