Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -175,7 +175,7 @@ VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N] VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N] - // VST_COMBINED_FNENTRY: [funcsumoffset, funcguid] + // VST_COMBINED_FNENTRY: [valueid, funcsumoffset, funcguid] VST_CODE_COMBINED_FNENTRY = 4 }; @@ -187,8 +187,21 @@ // The function summary section uses different codes in the per-module // and combined index cases. enum FunctionSummarySymtabCodes { - FS_CODE_PERMODULE_ENTRY = 1, // FS_ENTRY: [valueid, linkage, instcount] - FS_CODE_COMBINED_ENTRY = 2, // FS_ENTRY: [modid, linkage, instcount] + // PERMODULE_NOCALLS: [valueid, linkage, instcount] + FS_PERMODULE_NOCALLS = 1, + // PERMODULE_CALLS: [valueid, linkage, instcount, + // n x (valueid, callsitecount)] + FS_PERMODULE_CALLS = 2, + // PERMODULE_CALLS_PROFILE: [valueid, linkage, instcount, + // n x (valueid, callsitecount, profilecount)] + FS_PERMODULE_CALLS_PROFILE = 3, + // COMBINED_NOCALLS: [modid, linkage, instcount] + FS_COMBINED_NOCALLS = 4, + // COMBINED_CALLS: [modid, linkage, instcount, n x (valueid, callsitecount)] + FS_COMBINED_CALLS = 5, + // COMBINED_CALLS_PROFILE: [modid, linkage, instcount, + // n x (valueid, callsitecount, profilecount)] + FS_COMBINED_CALLS_PROFILE = 6, }; enum MetadataCodes { Index: include/llvm/IR/FunctionInfo.h =================================================================== --- include/llvm/IR/FunctionInfo.h +++ include/llvm/IR/FunctionInfo.h @@ -25,12 +25,29 @@ namespace llvm { +struct CalleeInfo { + unsigned CallsiteCount; + uint64_t ProfileCount; + CalleeInfo() = default; + CalleeInfo(unsigned CallsiteCount, uint64_t ProfileCount) + : CallsiteCount(CallsiteCount), ProfileCount(ProfileCount) {} + CalleeInfo &operator+=(uint64_t RHSProfileCount) { + CallsiteCount++; + ProfileCount += RHSProfileCount; + return *this; + } +}; + /// \brief Function summary information to aid decisions and implementation of /// importing. /// /// This is a separate class from FunctionInfo to enable lazy reading of this /// function summary information from the combined index file during imporing. class FunctionSummary { +public: + /// call edge pair. + typedef std::pair EdgeTy; + private: /// \brief Path of module containing function IR, used to locate module when /// importing this function. @@ -59,9 +76,12 @@ /// during the initial compile step when the function index is first built. unsigned InstCount; + /// List of call edge pairs from this function. + std::vector CallGraphEdgeList; + public: - /// Construct a summary object from summary data expected for all - /// summary records. + /// Summary constructors. + FunctionSummary() : InstCount(0) {} FunctionSummary(unsigned NumInsts) : InstCount(NumInsts) {} /// Set the path to the module containing this function, for use in @@ -81,8 +101,22 @@ return FunctionLinkage; } + /// Set the instruction count for this function. + void setInstCount(unsigned NumInsts) { InstCount = NumInsts; } + /// Get the instruction count recorded for this function. unsigned instCount() const { return InstCount; } + + /// Record a call graph edge from this function to the function identified + /// by \p CalleeGUID, with \p CalleeInfo including the cumulative profile + /// count (across all calls from this function) or 0 if no FDO. + void addCallGraphEdge(uint64_t CalleeGUID, CalleeInfo Info) { + CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info)); + } + + /// Return the list of pairs. + std::vector &edges() { return CallGraphEdgeList; } + const std::vector &edges() const { return CallGraphEdgeList; } }; /// \brief Class to hold pointer to function summary and information required @@ -249,6 +283,48 @@ } }; +/// Helper class for reading and writing the function summary in bitcode. +/// Specifically, the call graph edges in the summary bitcode section +/// reference the callees by ValueId. However, in the function index in +/// memory we want these referenced by GUID. Rather than bloat the +/// FunctionSummary class with an additional map, we use this class +/// to temporarily hold the ValueId representation. +class FunctionSummaryIOHelper { +public: + /// call edge pair. + typedef std::pair EdgeTy; + +private: + /// The FunctionSummary object being built during bitcode reading, + /// or built during the per-module bitcode write process. + std::unique_ptr Summary; + /// List of call edge pairs for the function. + std::vector CallGraphEdgeValueIdList; + +public: + /// Save the new function summary. + void setFunctionSummary(std::unique_ptr FuncSummary) { + Summary = std::move(FuncSummary); + } + /// Get ownership of the function summary unique_ptr. + std::unique_ptr takeFunctionSummary() { + return std::move(Summary); + } + + /// Access the function summary pointer. + FunctionSummary *functionSummary() { return Summary.get(); } + + /// Record a call graph edge from this function to the function identified + /// by \p CalleeValueId, with \p CalleeInfo including the cumulative profile + /// count (across all calls from this function) or 0 if no FDO. + void addCallGraphEdge(unsigned CalleeValueId, CalleeInfo Info) { + CallGraphEdgeValueIdList.push_back(std::make_pair(CalleeValueId, Info)); + } + + /// Return the list of pairs. + std::vector &edges() { return CallGraphEdgeValueIdList; } +}; + } // End llvm namespace #endif Index: include/llvm/ProfileData/ProfileCommon.h =================================================================== --- include/llvm/ProfileData/ProfileCommon.h +++ include/llvm/ProfileData/ProfileCommon.h @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" #include #include #include @@ -87,5 +88,17 @@ return DetailedSummary; } +/// Helper to compute the profile count for a block, based on the +/// ratio of its frequency to the entry block frequency, multiplied +/// by the entry block count. +inline uint64_t getBlockProfileCount(uint64_t BlockFreq, uint64_t EntryFreq, + uint64_t EntryCount) { + APInt ScaledCount(128, EntryCount); + APInt BlockFreqAPInt(128, BlockFreq); + APInt EntryFreqAPInt(128, EntryFreq); + ScaledCount *= BlockFreqAPInt.udiv(EntryFreqAPInt); + return ScaledCount.getLimitedValue(); +} + } // end namespace llvm #endif Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -450,7 +450,7 @@ /// offset to the function summary (since in the combined index the /// VST records do not hold value IDs but rather hold the function /// summary record offset). - DenseMap> SummaryMap; + std::map SummaryMap; /// Map populated during module path string table parsing, from the /// module ID to a string reference owned by the index's module @@ -5437,6 +5437,19 @@ SmallVector Record; + // Map to save ValueId to GUID association that was recorded in the + // ValueSymbolTable. It is used after the VST is parsed to convert + // call graph edges read from the function summary from referencing + // callees by their ValueId to using the GUID instead, which is how + // they are recorded in the function index being built. + DenseMap ValueIdToCallGraphGUIDMap; + + // Map to keep track of which helper object was associated with which + // function summary, used when we transfer ownership of summary to + // the index, because we need this information for later creation + // of call graph edges in the index. + DenseMap HelperToFuncSummaryMap; + // Read all the records for this value table. SmallString<128> ValueName; while (1) { @@ -5446,8 +5459,33 @@ case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); - case BitstreamEntry::EndBlock: + case BitstreamEntry::EndBlock: { + // We now have saved the ValueId to GUID mapping for all functions in the + // VST. For the non-lazy function summary parsing case, we can convert the + // call graph edges from using the Callee ValueId to instead using the + // GUID and record them in the index. In the lazy summary parsing combined + // function case, we can perform this mapping as the summaries are parsed. + if (foundFuncSummary() && !IsLazy) { + for (auto &SMI : SummaryMap) { + // Walk over the call edges in this entry and add them to the + // function summary. + for (auto &EI : SMI.second.edges()) { + auto CGI = ValueIdToCallGraphGUIDMap.find(EI.first); + // For the per-module case all edges should have a corresponding + // declaration in the VST, so we should always find an entry. + // Similarly, when writing the combined index bitcode we filtered + // out any edges to functions that weren't in the index, and all + // combined VST entries were added to the map. + assert(CGI != ValueIdToCallGraphGUIDMap.end()); + // Add the edge to the summary now, using the GUID. + auto FSI = HelperToFuncSummaryMap.find(&SMI.second); + assert(FSI != HelperToFuncSummaryMap.end()); + FSI->second->addCallGraphEdge(CGI->second, EI.second); + } + } + } return std::error_code(); + } case BitstreamEntry::Record: // The interesting case. break; @@ -5458,6 +5496,14 @@ switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records). break; + case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N] + if (convertToString(Record, 1, ValueName)) + return error("Invalid record"); + unsigned ValueID = Record[0]; + ValueIdToCallGraphGUIDMap[ValueID] = Function::getGUID(ValueName); + ValueName.clear(); + break; + } case bitc::VST_CODE_FNENTRY: { // VST_CODE_FNENTRY: [valueid, offset, namechar x N] if (convertToString(Record, 2, ValueName)) @@ -5468,37 +5514,39 @@ // Gracefully handle bitcode without a function summary section, // which will simply not populate the index. if (foundFuncSummary()) { - DenseMap>::iterator SMI = - SummaryMap.find(ValueID); + auto SMI = SummaryMap.find(ValueID); assert(SMI != SummaryMap.end() && "Summary info not found"); std::unique_ptr FuncInfo = llvm::make_unique(FuncOffset); - FuncInfo->setFunctionSummary(std::move(SMI->second)); + FuncInfo->setFunctionSummary(SMI->second.takeFunctionSummary()); + HelperToFuncSummaryMap[&SMI->second] = FuncInfo->functionSummary(); assert(!SourceFileName.empty()); std::string FunctionGlobalId = Function::getGlobalIdentifier( ValueName, FuncInfo->functionSummary()->getFunctionLinkage(), SourceFileName); TheIndex->addFunctionInfo(FunctionGlobalId, std::move(FuncInfo)); + ValueIdToCallGraphGUIDMap[ValueID] = + Function::getGUID(FunctionGlobalId); } ValueName.clear(); break; } case bitc::VST_CODE_COMBINED_FNENTRY: { - // VST_CODE_COMBINED_FNENTRY: [offset, funcguid] - uint64_t FuncSummaryOffset = Record[0]; - uint64_t FuncGUID = Record[1]; + // VST_CODE_COMBINED_FNENTRY: [valueid, offset, funcguid] + unsigned ValueID = Record[0]; + uint64_t FuncSummaryOffset = Record[1]; + uint64_t FuncGUID = Record[2]; std::unique_ptr FuncInfo = llvm::make_unique(FuncSummaryOffset); if (foundFuncSummary() && !IsLazy) { - DenseMap>::iterator SMI = - SummaryMap.find(FuncSummaryOffset); + auto SMI = SummaryMap.find(FuncSummaryOffset); assert(SMI != SummaryMap.end() && "Summary info not found"); - FuncInfo->setFunctionSummary(std::move(SMI->second)); + FuncInfo->setFunctionSummary(SMI->second.takeFunctionSummary()); + HelperToFuncSummaryMap[&SMI->second] = FuncInfo->functionSummary(); } TheIndex->addFunctionInfo(FuncGUID, std::move(FuncInfo)); - - ValueName.clear(); + ValueIdToCallGraphGUIDMap[ValueID] = FuncGUID; break; } } @@ -5624,11 +5672,18 @@ // information used for ThinLTO renaming and importing. Record.clear(); uint64_t CurRecordBit = Stream.GetCurrentBitNo(); - switch (Stream.readRecord(Entry.ID, Record)) { + auto BitCode = Stream.readRecord(Entry.ID, Record); + switch (BitCode) { default: // Default behavior: ignore. break; - // FS_PERMODULE_ENTRY: [valueid, linkage, instcount] - case bitc::FS_CODE_PERMODULE_ENTRY: { + // FS_PERMODULE_NOCALLS: [valueid, linkage, instcount] + // FS_PERMODULE_CALLS: [valueid, linkage, instcount, + // n x (valueid, callsitecount)] + // FS_PERMODULE_CALLS_PROFILE: [valueid, linkage, instcount, + // n x (valueid, callsitecount, profilecount)] + case bitc::FS_PERMODULE_NOCALLS: + case bitc::FS_PERMODULE_CALLS: + case bitc::FS_PERMODULE_CALLS_PROFILE: { unsigned ValueID = Record[0]; uint64_t RawLinkage = Record[1]; unsigned InstCount = Record[2]; @@ -5642,10 +5697,31 @@ // ownership. FS->setModulePath( TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); - SummaryMap[ValueID] = std::move(FS); + FunctionSummaryIOHelper &BitcodeSummary = SummaryMap[ValueID]; + BitcodeSummary.setFunctionSummary(std::move(FS)); + bool HasProfile = (BitCode == bitc::FS_PERMODULE_CALLS_PROFILE); + assert(BitCode != bitc::FS_PERMODULE_NOCALLS || Record.size() < 4); + // For now save the call graph edges using the recorded ValueId, + // on the summary helper object. After reading the VST this will + // be transferred into the function summary in the index, using the + // callee GUID instead. + for (unsigned I = 3, E = Record.size(); I != E; ++I) { + unsigned CalleeValueId = Record[I]; + unsigned CallsiteCount = Record[++I]; + uint64_t ProfileCount = HasProfile ? Record[++I] : 0; + BitcodeSummary.addCallGraphEdge( + CalleeValueId, CalleeInfo(CallsiteCount, ProfileCount)); + } + break; } - // FS_COMBINED_ENTRY: [modid, linkage, instcount] - case bitc::FS_CODE_COMBINED_ENTRY: { + // FS_COMBINED_NOCALLS: [modid, linkage, instcount] + // FS_COMBINED_CALLS: [modid, linkage, instcount, + // n x (valueid, callsitecount)] + // FS_COMBINED_CALLS_PROFILE: [modid, linkage, instcount, + // n x (valueid, callsitecount, profilecount)] + case bitc::FS_COMBINED_NOCALLS: + case bitc::FS_COMBINED_CALLS: + case bitc::FS_COMBINED_CALLS_PROFILE: { uint64_t ModuleId = Record[0]; uint64_t RawLinkage = Record[1]; unsigned InstCount = Record[2]; @@ -5653,7 +5729,24 @@ llvm::make_unique(InstCount); FS->setFunctionLinkage(getDecodedLinkage(RawLinkage)); FS->setModulePath(ModuleIdMap[ModuleId]); - SummaryMap[CurRecordBit] = std::move(FS); + FunctionSummaryIOHelper &BitcodeSummary = SummaryMap[CurRecordBit]; + BitcodeSummary.setFunctionSummary(std::move(FS)); + bool HasProfile = (BitCode == bitc::FS_COMBINED_CALLS_PROFILE); + assert(BitCode != bitc::FS_COMBINED_NOCALLS || Record.size() < 4); + // For now save the call graph edges using the recorded ValueId, + // on the summary helper object. After reading the VST this will + // be transferred into the function summary in the index, using the + // callee GUID instead. + static int CallGraphEdgeStartIndex = 3; + for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E; + ++I) { + unsigned CalleeValueId = Record[I]; + unsigned CallsiteCount = Record[++I]; + uint64_t ProfileCount = HasProfile ? Record[++I] : 0; + BitcodeSummary.addCallGraphEdge( + CalleeValueId, CalleeInfo(CallsiteCount, ProfileCount)); + } + break; } } } @@ -5771,7 +5864,9 @@ // importing is added so that it can be tested. SmallVector Record; switch (Stream.readRecord(Entry.ID, Record)) { - case bitc::FS_CODE_COMBINED_ENTRY: + case bitc::FS_COMBINED_NOCALLS: + case bitc::FS_COMBINED_CALLS: + case bitc::FS_COMBINED_CALLS_PROFILE: default: return error("Invalid record"); } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -11,24 +11,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Bitcode/ReaderWriter.h" #include "ValueEnumerator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Bitcode/BitstreamWriter.h" #include "llvm/Bitcode/LLVMBitCodes.h" +#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" +#include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -38,6 +44,71 @@ #include using namespace llvm; +namespace { +/// Helper class for writing function summary section. Used to hold information +/// built while writing the function to bitcode, then later accessed when +/// writing the function summary and the ValueSymbolTable. +class FunctionWriteInfo { +private: + // Pair holding the bitcode index of the corresponding function block + // (written to the VST) and the helper object holding other summary + // information. + typedef std::pair> ValueTy; + std::map FunctionMap; + bool EmitFunctionSummary; + +public: + FunctionWriteInfo(bool EmitFunctionSummary) + : EmitFunctionSummary(EmitFunctionSummary) {} + + /// Create an entry in the map for the given function, which is + /// written to the bitcode index \p FuncOffset. + void recordFunction(const Function &F, uint64_t FuncOffset) { + std::unique_ptr BitcodeSummaryInfo; + // If writing function summary sections, create the helper object and + // start populating the function summary. + if (EmitFunctionSummary) { + BitcodeSummaryInfo = llvm::make_unique(); + std::unique_ptr FuncSummary = + llvm::make_unique(); + FuncSummary->setFunctionLinkage(F.getLinkage()); + BitcodeSummaryInfo->setFunctionSummary(std::move(FuncSummary)); + } + FunctionMap[&F] = std::make_pair(FuncOffset, std::move(BitcodeSummaryInfo)); + } + + /// Save the instruction count to be written to the summary section for \p F. + void setInstCount(const Function &F, unsigned NumInsts) { + assert(EmitFunctionSummary); + FunctionMap[&F].second->functionSummary()->setInstCount(NumInsts); + } + + /// Save the call graph edges to be written to the summary section for \p F. + /// The edges are pairs of , where ProfileCount + /// is 0 when there is no PGO. + void addCallGraphEdges(const Function &F, + DenseMap &CallGraphEdges) { + assert(EmitFunctionSummary); + for (auto &EI : CallGraphEdges) + FunctionMap[&F].second->addCallGraphEdge(EI.first, EI.second); + } + + /// Return the bitcode index where the function block for \p F was written. + uint64_t getBitcodeIndex(const Function &F) { + auto FMI = FunctionMap.find(&F); + assert(FMI != FunctionMap.end()); + return FMI->second.first; + } + + /// Return the summary helper object for writing the summary section for \p F. + FunctionSummaryIOHelper &getBitcodeSummary(const Function &F) { + auto FMI = FunctionMap.find(&F); + assert(FMI != FunctionMap.end()); + return *FMI->second.second; + } +}; +} + /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { @@ -2241,15 +2312,15 @@ } /// Emit names for globals/functions etc. The VSTOffsetPlaceholder, -/// BitcodeStartBit and FunctionIndex are only passed for the module-level +/// BitcodeStartBit and FunctionInfo are only passed for the module-level /// VST, where we are including a function bitcode index and need to /// backpatch the VST forward declaration record. -static void WriteValueSymbolTable( - const ValueSymbolTable &VST, const ValueEnumerator &VE, - BitstreamWriter &Stream, uint64_t VSTOffsetPlaceholder = 0, - uint64_t BitcodeStartBit = 0, - DenseMap> *FunctionIndex = - nullptr) { +static void WriteValueSymbolTable(const ValueSymbolTable &VST, + const ValueEnumerator &VE, + BitstreamWriter &Stream, + uint64_t VSTOffsetPlaceholder = 0, + uint64_t BitcodeStartBit = 0, + FunctionWriteInfo *FunctionInfo = nullptr) { if (VST.empty()) { // WriteValueSymbolTableForwardDecl should have returned early as // well. Ensure this handling remains in sync by asserting that @@ -2338,14 +2409,13 @@ // Must be the module-level VST, where we pass in the Index and // have a VSTOffsetPlaceholder. The function-level VST should not // contain any Function symbols. - assert(FunctionIndex); + assert(FunctionInfo); assert(VSTOffsetPlaceholder > 0); // Save the word offset of the function (from the start of the // actual bitcode written to the stream). - assert(FunctionIndex->count(F) == 1); uint64_t BitcodeIndex = - (*FunctionIndex)[F]->bitcodeIndex() - BitcodeStartBit; + FunctionInfo->getBitcodeIndex(*F) - BitcodeStartBit; assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); NameVals.push_back(BitcodeIndex / 32); @@ -2375,12 +2445,15 @@ /// Emit function names and summary offsets for the combined index /// used by ThinLTO. -static void WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index, - BitstreamWriter &Stream) { +static void +WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index, + BitstreamWriter &Stream, + std::map &GUIDToValueIdMap) { Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcsumoffset Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcguid unsigned FnEntryAbbrev = Stream.EmitAbbrev(Abbv); @@ -2389,13 +2462,15 @@ for (const auto &FII : Index) { for (const auto &FI : FII.second) { - NameVals.push_back(FI->bitcodeIndex()); + // VST_CODE_COMBINED_FNENTRY: [valueid, funcsumoffset, funcguid] + unsigned AbbrevToUse = FnEntryAbbrev; uint64_t FuncGUID = FII.first; + const auto &VMI = GUIDToValueIdMap.find(FuncGUID); + assert(VMI != GUIDToValueIdMap.end()); - // VST_CODE_COMBINED_FNENTRY: [funcsumoffset, funcguid] - unsigned AbbrevToUse = FnEntryAbbrev; - + NameVals.push_back(VMI->second); + NameVals.push_back(FI->bitcodeIndex()); NameVals.push_back(FuncGUID); // Emit the finished record. @@ -2440,33 +2515,23 @@ Stream.ExitBlock(); } -/// \brief Save information for the given function into the function index. -/// -/// At a minimum this saves the bitcode index of the function record that -/// was just written. However, if we are emitting function summary information, -/// for example for ThinLTO, then a \a FunctionSummary object is created -/// to hold the provided summary information. -static void SaveFunctionInfo( - const Function &F, - DenseMap> &FunctionIndex, - unsigned NumInsts, uint64_t BitcodeIndex, bool EmitFunctionSummary) { - std::unique_ptr FuncSummary; - if (EmitFunctionSummary) { - FuncSummary = llvm::make_unique(NumInsts); - FuncSummary->setFunctionLinkage(F.getLinkage()); - } - FunctionIndex[&F] = - llvm::make_unique(BitcodeIndex, std::move(FuncSummary)); -} - /// Emit a function body to the module stream. -static void WriteFunction( - const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream, - DenseMap> &FunctionIndex, - bool EmitFunctionSummary) { +static void WriteFunction(const Function &F, const Module *M, + ValueEnumerator &VE, BitstreamWriter &Stream, + FunctionWriteInfo &FunctionInfo, + bool EmitFunctionSummary) { // Save the bitcode index of the start of this function block for recording // in the VST. - uint64_t BitcodeIndex = Stream.GetCurrentBitNo(); + FunctionInfo.recordFunction(F, Stream.GetCurrentBitNo()); + + bool HasProfileData = F.getEntryCount().hasValue(); + std::unique_ptr BFI; + if (EmitFunctionSummary && HasProfileData) { + Function &Func = const_cast(F); + LoopInfo LI{DominatorTree(Func)}; + BranchProbabilityInfo BPI{Func, LI}; + BFI = llvm::make_unique(Func, BPI, LI); + } Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4); VE.incorporateFunction(F); @@ -2494,6 +2559,9 @@ DILocation *LastDL = nullptr; unsigned NumInsts = 0; + // Map from callee ValueId to profile count. Used to accumulate profile + // counts for all static calls to a given callee. + DenseMap CallGraphEdges; // Finally, emit all the instructions, in order. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -2507,6 +2575,21 @@ if (!I->getType()->isVoidTy()) ++InstID; + if (EmitFunctionSummary && isa(I)) { + auto CalledFunction = cast(I)->getCalledFunction(); + if (CalledFunction && CalledFunction->hasName() && + !CalledFunction->isIntrinsic()) { + uint64_t ScaledCount = 0; + if (HasProfileData) + ScaledCount = getBlockProfileCount( + BFI->getBlockFreq(&(*BB)).getFrequency(), BFI->getEntryFreq(), + F.getEntryCount().getValue()); + unsigned CalleeId = VE.getValueID( + M->getValueSymbolTable().lookup(CalledFunction->getName())); + CallGraphEdges[CalleeId] += ScaledCount; + } + } + // If the instruction has metadata, write a metadata attachment later. NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc(); @@ -2531,6 +2614,11 @@ LastDL = DL; } + if (EmitFunctionSummary) { + FunctionInfo.setInstCount(F, NumInsts); + FunctionInfo.addCallGraphEdges(F, CallGraphEdges); + } + // Emit names for all the instructions etc. WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream); @@ -2540,9 +2628,6 @@ WriteUseListBlock(&F, VE, Stream); VE.purgeFunction(); Stream.ExitBlock(); - - SaveFunctionInfo(F, FunctionIndex, NumInsts, BitcodeIndex, - EmitFunctionSummary); } // Emit blockinfo, which defines the standard abbreviations etc. @@ -2776,35 +2861,79 @@ // Helper to emit a single function summary record. static void WritePerModuleFunctionSummaryRecord( - SmallVector &NameVals, FunctionSummary *FS, unsigned ValueID, - unsigned FSAbbrev, BitstreamWriter &Stream) { + SmallVector &NameVals, + FunctionSummaryIOHelper &BitcodeSummary, unsigned ValueID, + unsigned FSNoCallsAbbrev, unsigned FSCallsAbbrev, + unsigned FSCallsProfileAbbrev, BitstreamWriter &Stream, const Function &F) { + FunctionSummary *FS = BitcodeSummary.functionSummary(); assert(FS); NameVals.push_back(ValueID); NameVals.push_back(getEncodedLinkage(FS->getFunctionLinkage())); NameVals.push_back(FS->instCount()); + bool HasProfileData = F.getEntryCount().hasValue(); + for (auto &ECI : BitcodeSummary.edges()) { + NameVals.push_back(ECI.first); + assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite"); + NameVals.push_back(ECI.second.CallsiteCount); + if (HasProfileData) + NameVals.push_back(ECI.second.ProfileCount); + } + + unsigned FSAbbrev = + BitcodeSummary.edges().empty() + ? FSNoCallsAbbrev + : (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); + unsigned Code = BitcodeSummary.edges().empty() + ? bitc::FS_PERMODULE_NOCALLS + : (HasProfileData ? bitc::FS_PERMODULE_CALLS_PROFILE + : bitc::FS_PERMODULE_CALLS); + // Emit the finished record. - Stream.EmitRecord(bitc::FS_CODE_PERMODULE_ENTRY, NameVals, FSAbbrev); + Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); } /// Emit the per-module function summary section alongside the rest of /// the module's bitcode. -static void WritePerModuleFunctionSummary( - DenseMap> &FunctionIndex, - const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) { +static void WritePerModuleFunctionSummary(FunctionWriteInfo &FunctionInfo, + const Module *M, + const ValueEnumerator &VE, + BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3); - // Abbrev for FS_CODE_PERMODULE_ENTRY. + // Abbrev for FS_PERMODULE_NOCALLS. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_PERMODULE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_NOCALLS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount - unsigned FSAbbrev = Stream.EmitAbbrev(Abbv); + unsigned FSNoCallsAbbrev = Stream.EmitAbbrev(Abbv); - SmallVector NameVals; - // Iterate over the list of functions instead of the FunctionIndex map to + // Abbrev for FS_PERMODULE_CALLS. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_CALLS)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + Abbv->Add( + BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueid/callsitecount pairs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for FS_PERMODULE_CALLS_PROFILE. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_CALLS_PROFILE)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + Abbv->Add(BitCodeAbbrevOp( + BitCodeAbbrevOp::Array)); // valueid/callsitecount/profilecount triples + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; + // Iterate over the list of functions instead of the FunctionInfo map to // ensure the ordering is stable. for (const Function &F : *M) { if (F.isDeclaration()) @@ -2814,12 +2943,10 @@ if (!F.hasName()) continue; - assert(FunctionIndex.count(&F) == 1); - WritePerModuleFunctionSummaryRecord( - NameVals, FunctionIndex[&F]->functionSummary(), - VE.getValueID(M->getValueSymbolTable().lookup(F.getName())), FSAbbrev, - Stream); + NameVals, FunctionInfo.getBitcodeSummary(F), + VE.getValueID(M->getValueSymbolTable().lookup(F.getName())), + FSNoCallsAbbrev, FSCallsAbbrev, FSCallsProfileAbbrev, Stream, F); } for (const GlobalAlias &A : M->aliases()) { @@ -2829,11 +2956,10 @@ if (!F || F->isDeclaration()) continue; - assert(FunctionIndex.count(F) == 1); WritePerModuleFunctionSummaryRecord( - NameVals, FunctionIndex[F]->functionSummary(), - VE.getValueID(M->getValueSymbolTable().lookup(A.getName())), FSAbbrev, - Stream); + NameVals, FunctionInfo.getBitcodeSummary(*F), + VE.getValueID(M->getValueSymbolTable().lookup(A.getName())), + FSNoCallsAbbrev, FSCallsAbbrev, FSCallsProfileAbbrev, Stream, *F); } Stream.ExitBlock(); @@ -2841,19 +2967,43 @@ /// Emit the combined function summary section into the combined index /// file. -static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I, - BitstreamWriter &Stream) { +static void +WriteCombinedFunctionSummary(const FunctionInfoIndex &I, + BitstreamWriter &Stream, + std::map &GUIDToValueIdMap) { Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3); - // Abbrev for FS_CODE_COMBINED_ENTRY. + // Abbrev for FS_COMBINED_NOCALLS. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_COMBINED_ENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid + Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_NOCALLS)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount - unsigned FSAbbrev = Stream.EmitAbbrev(Abbv); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + unsigned FSNoCallsAbbrev = Stream.EmitAbbrev(Abbv); - SmallVector NameVals; + // Abbrev for FS_COMBINED_CALLS. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_CALLS)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + Abbv->Add( + BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueid/callsitecount pairs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for FS_COMBINED_CALLS_PROFILE. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_CALLS_PROFILE)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + Abbv->Add(BitCodeAbbrevOp( + BitCodeAbbrevOp::Array)); // valueid/callsitecount/profilecount triples + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; for (const auto &FII : I) { for (auto &FI : FII.second) { FunctionSummary *FS = FI->functionSummary(); @@ -2863,13 +3013,43 @@ NameVals.push_back(getEncodedLinkage(FS->getFunctionLinkage())); NameVals.push_back(FS->instCount()); + bool HasProfileData = false; + for (auto &EI : FS->edges()) { + HasProfileData |= EI.second.ProfileCount != 0; + if (HasProfileData) + break; + } + + bool HasCalls = false; + for (auto &EI : FS->edges()) { + const auto &VMI = GUIDToValueIdMap.find(EI.first); + // If this GUID doesn't have an entry, it doesn't have a function + // summary and we don't need to record any calls to it. + if (VMI == GUIDToValueIdMap.end()) + continue; + HasCalls = true; + NameVals.push_back(VMI->second); + assert(EI.second.CallsiteCount > 0 && "Expected at least one callsite"); + NameVals.push_back(EI.second.CallsiteCount); + if (HasProfileData) + NameVals.push_back(EI.second.ProfileCount); + } + // Record the starting offset of this summary entry for use // in the VST entry. Add the current code size since the // reader will invoke readRecord after the abbrev id read. FI->setBitcodeIndex(Stream.GetCurrentBitNo() + Stream.GetAbbrevIDWidth()); + unsigned FSAbbrev = + HasCalls ? (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev) + : FSNoCallsAbbrev; + unsigned Code = HasCalls + ? (HasProfileData ? bitc::FS_COMBINED_CALLS_PROFILE + : bitc::FS_COMBINED_CALLS) + : bitc::FS_COMBINED_NOCALLS; + // Emit the finished record. - Stream.EmitRecord(bitc::FS_CODE_COMBINED_ENTRY, NameVals, FSAbbrev); + Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); } } @@ -2949,18 +3129,18 @@ WriteOperandBundleTags(M, Stream); // Emit function bodies. - DenseMap> FunctionIndex; + FunctionWriteInfo FunctionInfo(EmitFunctionSummary); for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) if (!F->isDeclaration()) - WriteFunction(*F, VE, Stream, FunctionIndex, EmitFunctionSummary); + WriteFunction(*F, M, VE, Stream, FunctionInfo, EmitFunctionSummary); // Need to write after the above call to WriteFunction which populates // the summary information in the index. if (EmitFunctionSummary) - WritePerModuleFunctionSummary(FunctionIndex, M, VE, Stream); + WritePerModuleFunctionSummary(FunctionInfo, M, VE, Stream); WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream, - VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex); + VSTOffsetPlaceholder, BitcodeStartBit, &FunctionInfo); Stream.ExitBlock(); } @@ -3105,12 +3285,21 @@ // Write the module paths in the combined index. WriteModStrings(Index, Stream); + // Assign unique value ids to all functions in the index for use + // in writing out the call graph edges. Save the mapping from GUID + // to the new global value id to use when writing those edges, which + // are currently saved in the index in terms of GUID. + std::map GUIDToValueIdMap; + unsigned GlobalValueId = 0; + for (auto &II : Index) + GUIDToValueIdMap[II.first] = ++GlobalValueId; + // Write the function summary combined index records. - WriteCombinedFunctionSummary(Index, Stream); + WriteCombinedFunctionSummary(Index, Stream, GUIDToValueIdMap); // Need a special VST writer for the combined index (we don't have a // real VST and real values when this is invoked). - WriteCombinedValueSymbolTable(Index, Stream); + WriteCombinedValueSymbolTable(Index, Stream, GUIDToValueIdMap); Stream.ExitBlock(); Index: lib/Bitcode/Writer/LLVMBuild.txt =================================================================== --- lib/Bitcode/Writer/LLVMBuild.txt +++ lib/Bitcode/Writer/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = BitWriter parent = Bitcode -required_libraries = Core Support +required_libraries = Analysis Core Support Index: test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo.ll =================================================================== --- /dev/null +++ test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo.ll @@ -0,0 +1,11 @@ +; ModuleID = 'thinlto-function-summary-callgraph2.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @func() #0 !prof !2 { +entry: + ret void +} + +!2 = !{!"function_entry_count", i64 1} Index: test/Bitcode/Inputs/thinlto-function-summary-callgraph.ll =================================================================== --- /dev/null +++ test/Bitcode/Inputs/thinlto-function-summary-callgraph.ll @@ -0,0 +1,10 @@ +; ModuleID = 'thinlto-function-summary-callgraph2.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @func() #0 { +entry: + ret void +} + Index: test/Bitcode/thinlto-function-summary-callgraph-pgo.ll =================================================================== --- /dev/null +++ test/Bitcode/thinlto-function-summary-callgraph-pgo.ll @@ -0,0 +1,43 @@ +; RUN: llvm-as -function-summary %s -o %t.o +; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s +; RUN: llvm-as -function-summary %p/Inputs/thinlto-function-summary-callgraph.ll -o %t2.o +; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o +; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED + +; CHECK: +; CHECK-NEXT: +; CHECK-NEXT: + +; COMBINED: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: + +; ModuleID = 'thinlto-function-summary-callgraph.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @main() #0 !prof !2 { +entry: + call void (...) @func() + ret i32 0 +} + +declare void @func(...) #1 + +!2 = !{!"function_entry_count", i64 1} Index: test/Bitcode/thinlto-function-summary-callgraph.ll =================================================================== --- /dev/null +++ test/Bitcode/thinlto-function-summary-callgraph.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as -function-summary %s -o %t.o +; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s +; RUN: llvm-as -function-summary %p/Inputs/thinlto-function-summary-callgraph.ll -o %t2.o +; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o +; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED + +; CHECK: +; CHECK-NEXT: +; CHECK-NEXT: + +; COMBINED: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: + +; ModuleID = 'thinlto-function-summary-callgraph.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @main() #0 { +entry: + call void (...) @func() + ret i32 0 +} + +declare void @func(...) #1 Index: test/Bitcode/thinlto-function-summary.ll =================================================================== --- test/Bitcode/thinlto-function-summary.ll +++ test/Bitcode/thinlto-function-summary.ll @@ -5,9 +5,9 @@ ; same in the ValueSumbolTable, to ensure the ordering is stable. ; Also check the linkage field on the summary entries. ; BC: record string = 'foo' Index: test/Bitcode/thinlto-summary-linkage-types.ll =================================================================== --- test/Bitcode/thinlto-summary-linkage-types.ll +++ test/Bitcode/thinlto-summary-linkage-types.ll @@ -5,57 +5,57 @@ ; RUN: llvm-bcanalyzer -dump %t2.thinlto.bc | FileCheck %s --check-prefix=COMBINED define private void @private() -; CHECK: