diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -256,6 +256,11 @@ // strings in strtab. // [n * name] FS_CFI_FUNCTION_DECLS = 18, + // Per-module summary that also adds relative block frequency to callee info. + // PERMODULE_RELBF: [valueid, flags, instcount, numrefs, + // numrefs x valueid, + // n x (valueid, relblockfreq)] + FS_PERMODULE_RELBF = 19, }; enum MetadataCodes { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -25,6 +25,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -54,14 +55,28 @@ Hot = 3, Critical = 4 }; - HotnessType Hotness = HotnessType::Unknown; - CalleeInfo() = default; - explicit CalleeInfo(HotnessType Hotness) : Hotness(Hotness) {} + // The size of the bit-field might need to be adjusted if more values are + // added to HotnessType enum. + HotnessType Hotness : 3; + uint32_t RelBlockFreq : 29; + static constexpr uint64_t MaxRelBlockFreq = (1 << 29) - 1; + + CalleeInfo() : Hotness(HotnessType::Unknown), RelBlockFreq(0) {} + explicit CalleeInfo(HotnessType Hotness, uint64_t RelBF) + : Hotness(Hotness), RelBlockFreq(RelBF) {} void updateHotness(const HotnessType OtherHotness) { Hotness = std::max(Hotness, OtherHotness); } + + // When there are multiple edges between the same (caller, callee) pair, the + // relative block frequencies are summed up. + void updateRelBlockFreq(uint64_t RBF) { + uint64_t Sum = SaturatingAdd(RelBlockFreq, RBF); + Sum = std::min(Sum, uint64_t(MaxRelBlockFreq)); + RelBlockFreq = static_cast(Sum); + } }; class GlobalValueSummary; diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -273,9 +273,24 @@ // to record the call edge to the alias in that case. Eventually // an alias summary will be created to associate the alias and // aliasee. - CallGraphEdges[Index.getOrInsertValueInfo( - cast(CalledValue))] - .updateHotness(Hotness); + auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo( + cast(CalledValue))]; + ValueInfo.updateHotness(Hotness); + // Add the relative block frequency to CalleeInfo if there is no profile + // information. + if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) { + auto BBFreq = BFI->getBlockFreq(CI->getParent()).getFrequency(); + // FIXME: This might need some scaling to prevent BBFreq values from + // being rounded down to 0. + auto EntryFreq = BFI->getEntryFreq(); + // Block frequencies can be directly set for a block and so we need to + // handle the case of entry frequency being 0. + if (EntryFreq) + BBFreq /= EntryFreq; + else + BBFreq = 0; + ValueInfo.updateRelBlockFreq(BBFreq); + } } else { // Skip inline assembly calls. if (CI && CI->isInlineAsm()) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -743,7 +743,8 @@ std::vector makeRefList(ArrayRef Record); std::vector makeCallList(ArrayRef Record, bool IsOldProfileFormat, - bool HasProfile); + bool HasProfile, + bool HasRelBF); Error parseEntireSummary(unsigned ID); Error parseModuleStringTable(); @@ -5047,12 +5048,15 @@ return Ret; } -std::vector ModuleSummaryIndexBitcodeReader::makeCallList( - ArrayRef Record, bool IsOldProfileFormat, bool HasProfile) { +std::vector +ModuleSummaryIndexBitcodeReader::makeCallList(ArrayRef Record, + bool IsOldProfileFormat, + bool HasProfile, bool HasRelBF) { std::vector Ret; Ret.reserve(Record.size()); for (unsigned I = 0, E = Record.size(); I != E; ++I) { CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown; + uint64_t RelBF = 0; ValueInfo Callee = getValueInfoFromValueId(Record[I]).first; if (IsOldProfileFormat) { I += 1; // Skip old callsitecount field @@ -5060,7 +5064,9 @@ I += 1; // Skip old profilecount field } else if (HasProfile) Hotness = static_cast(Record[++I]); - Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo{Hotness}}); + else if (HasRelBF) + RelBF = Record[++I]; + Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo(Hotness, RelBF)}); } return Ret; } @@ -5139,7 +5145,11 @@ // FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, numrefs, // numrefs x valueid, // n x (valueid, hotness)] + // FS_PERMODULE_RELBF: [valueid, flags, instcount, fflags, numrefs, + // numrefs x valueid, + // n x (valueid, relblockfreq)] case bitc::FS_PERMODULE: + case bitc::FS_PERMODULE_RELBF: case bitc::FS_PERMODULE_PROFILE: { unsigned ValueID = Record[0]; uint64_t RawFlags = Record[1]; @@ -5165,9 +5175,10 @@ std::vector Refs = makeRefList( ArrayRef(Record).slice(RefListStartIndex, NumRefs)); bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE); + bool HasRelBF = (BitCode == bitc::FS_PERMODULE_RELBF); std::vector Calls = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), - IsOldProfileFormat, HasProfile); + IsOldProfileFormat, HasProfile, HasRelBF); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Calls), std::move(PendingTypeTests), @@ -5259,7 +5270,7 @@ bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE); std::vector Edges = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), - IsOldProfileFormat, HasProfile); + IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -86,6 +86,9 @@ cl::desc("Number of metadatas above which we emit an index " "to enable lazy-loading")); +cl::opt WriteRelBFToSummary( + "write-relbf-to-summary", cl::Hidden, cl::init(false), + cl::desc("Write relative block frequency to function summary ")); namespace { /// These are manifest constants used by the bitcode writer. They do not need to @@ -3378,11 +3381,15 @@ NameVals.push_back(getValueId(ECI.first)); if (HasProfileData) NameVals.push_back(static_cast(ECI.second.Hotness)); + else if (WriteRelBFToSummary) + NameVals.push_back(ECI.second.RelBlockFreq); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); unsigned Code = - (HasProfileData ? bitc::FS_PERMODULE_PROFILE : bitc::FS_PERMODULE); + (HasProfileData ? bitc::FS_PERMODULE_PROFILE + : (WriteRelBFToSummary ? bitc::FS_PERMODULE_RELBF + : bitc::FS_PERMODULE)); // Emit the finished record. Stream.EmitRecord(Code, NameVals, FSAbbrev); @@ -3448,31 +3455,34 @@ ArrayRef{GVI.second, GVI.first}); } - // Abbrev for FS_PERMODULE. + // Abbrev for FS_PERMODULE_PROFILE. auto Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs - // numrefs x valueid, n x (valueid) + // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); - unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - // Abbrev for FS_PERMODULE_PROFILE. + // Abbrev for FS_PERMODULE or FS_PERMODULE_RELBF. Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); + if (WriteRelBFToSummary) + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_RELBF)); + else + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs - // numrefs x valueid, n x (valueid, hotness) + // numrefs x valueid, n x (valueid [, rel_block_freq]) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); - unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); // Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS. Abbv = std::make_shared(); diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll @@ -0,0 +1,35 @@ +; Test to check the callgraph in summary +; RUN: opt -write-relbf-to-summary -module-summary %s -o %t.o +; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s + + +; CHECK: +; CHECK: