Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -194,20 +194,22 @@ // and combined index cases. enum GlobalValueSummarySymtabCodes { // PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, - // n x (valueid, callsitecount)] + // n x (valueid, callsitecount, constArgumentsBitmask)] FS_PERMODULE = 1, // PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, callsitecount, constArgumentsBitmask, + // profilecount)] FS_PERMODULE_PROFILE = 2, // PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid] FS_PERMODULE_GLOBALVAR_INIT_REFS = 3, // COMBINED: [valueid, modid, flags, instcount, numrefs, numrefs x valueid, - // n x (valueid, callsitecount)] + // n x (valueid, callsitecount, constArgumentBitmask)] FS_COMBINED = 4, // COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, callsitecount, constArgumentsBitmask, + // profilecount)] FS_COMBINED_PROFILE = 5, // COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid] FS_COMBINED_GLOBALVAR_INIT_REFS = 6, Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -240,13 +240,24 @@ return *AliaseeSummary; } }; +/// Specific info about the call site, like the info about the arguments. +struct CallsiteInfo { + /// if n_th bit is set, it means that n_th argument is constant. + /// Only 16 bits, because functions with more than that are very rare. + typedef uint16_t ConstArgumentsBitmaskTy; + ConstArgumentsBitmaskTy ConstArgumentsBitmask = 0; + + void setConstArgument(unsigned int ArgNumber); + // Returns count of set bits of ConstArgumentBitmask. + int getConstArgumentsCount() const; +}; /// \brief Function summary information to aid decisions and implementation of /// importing. class FunctionSummary : public GlobalValueSummary { public: - /// call edge pair. - typedef std::pair EdgeTy; + /// > call edge pair. + typedef std::pair> EdgeTy; private: /// Number of instructions (ignoring debug instructions, e.g.) computed @@ -272,30 +283,35 @@ /// Record a call graph edge from this function to the function identified /// by \p CalleeGUID, with \p CalleeInfo including the cumulative profile /// count (across all calls from this function) or 0 if no PGO. - void addCallGraphEdge(GlobalValue::GUID CalleeGUID, CalleeInfo Info) { - CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info)); + void addCallGraphEdge(GlobalValue::GUID CalleeGUID, CalleeInfo Info, + CallsiteInfo SiteInfo) { + CallGraphEdgeList.emplace_back(CalleeGUID, std::make_pair(Info, SiteInfo)); } /// Record a call graph edge from this function to each function GUID recorded /// in \p CallGraphEdges. - void - addCallGraphEdges(DenseMap &CallGraphEdges) { + void addCallGraphEdges( + DenseMap> + &CallGraphEdges) { for (auto &EI : CallGraphEdges) - addCallGraphEdge(EI.first, EI.second); + addCallGraphEdge(EI.first, EI.second.first, EI.second.second); } /// Record a call graph edge from this function to the function identified /// by \p CalleeV, with \p CalleeInfo including the cumulative profile /// count (across all calls from this function) or 0 if no PGO. - void addCallGraphEdge(const Value *CalleeV, CalleeInfo Info) { - CallGraphEdgeList.push_back(std::make_pair(CalleeV, Info)); + void addCallGraphEdge(const Value *CalleeV, CalleeInfo Info, + CallsiteInfo SiteInfo) { + CallGraphEdgeList.emplace_back(CalleeV, std::make_pair(Info, SiteInfo)); } /// Record a call graph edge from this function to each function recorded /// in \p CallGraphEdges. - void addCallGraphEdges(DenseMap &CallGraphEdges) { + void + addCallGraphEdges(DenseMap> + &CallGraphEdges) { for (auto &EI : CallGraphEdges) - addCallGraphEdge(EI.first, EI.second); + addCallGraphEdge(EI.first, EI.second.first, EI.second.second); } /// Return the list of pairs. Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -73,8 +73,9 @@ unsigned NumInsts = 0; // Map from callee ValueId to profile count. Used to accumulate profile // counts for all static calls to a given callee. - DenseMap CallGraphEdges; - DenseMap IndirectCallEdges; + DenseMap> CallGraphEdges; + DenseMap> + IndirectCallEdges; DenseSet RefEdges; ICallPromotionAnalysis ICallAnalysis; @@ -92,8 +93,19 @@ auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; auto *CalleeId = M.getValueSymbolTable().lookup(CalledFunction->getName()); - CallGraphEdges[CalleeId] += + CallGraphEdges[CalleeId].first += (ScaledCount ? ScaledCount.getValue() : 0); + + auto *Call = cast(CS.getInstruction()); + // For each const argument add bit into CallsiteInfo. + // Note that multiple calls to same function from this function + // with different set of const arguments will result in union of + // all bits. + for (unsigned ArgNum = 0; ArgNum < Call->getNumArgOperands(); + ArgNum++) { + if (isa(Call->getArgOperand(ArgNum))) + CallGraphEdges[CalleeId].second.setConstArgument(ArgNum); + } } } else { // Otherwise, check for an indirect call (call to a non-const value @@ -106,8 +118,12 @@ auto CandidateProfileData = ICallAnalysis.getPromotionCandidatesForInstruction( &I, NumVals, TotalCount, NumCandidates); - for (auto &Candidate : CandidateProfileData) - IndirectCallEdges[Candidate.Value] += Candidate.Count; + for (auto &Candidate : CandidateProfileData) { + IndirectCallEdges[Candidate.Value].first += Candidate.Count; + // FIXME: CallsiteInfo should be set here, but we would end up + // setting it for every candidate which might result in + // nonsense data. It is worth investigating later. + } } } } Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -6272,10 +6272,12 @@ ++I) { unsigned CalleeValueId = Record[I]; unsigned CallsiteCount = Record[++I]; + CallsiteInfo Info; + Info.ConstArgumentsBitmask = Record[++I]; uint64_t ProfileCount = HasProfile ? Record[++I] : 0; GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first; FS->addCallGraphEdge(CalleeGUID, - CalleeInfo(CallsiteCount, ProfileCount)); + CalleeInfo(CallsiteCount, ProfileCount), Info); } auto GUID = getGUIDFromValueId(ValueID); FS->setOriginalName(GUID.second); @@ -6330,10 +6332,12 @@ break; } // FS_COMBINED: [valueid, modid, flags, instcount, numrefs, - // numrefs x valueid, n x (valueid, callsitecount)] + // numrefs x valueid, n x (valueid, callsitecount, + // constArgumentBitmask)] // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, callsitecount, constArgumentBitmask, + // profilecount)] case bitc::FS_COMBINED: case bitc::FS_COMBINED_PROFILE: { unsigned ValueID = Record[0]; @@ -6361,10 +6365,12 @@ ++I) { unsigned CalleeValueId = Record[I]; unsigned CallsiteCount = Record[++I]; + CallsiteInfo Info; + Info.ConstArgumentsBitmask = Record[++I]; uint64_t ProfileCount = HasProfile ? Record[++I] : 0; GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first; FS->addCallGraphEdge(CalleeGUID, - CalleeInfo(CallsiteCount, ProfileCount)); + CalleeInfo(CallsiteCount, ProfileCount), Info); } GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first; TheIndex->addGlobalValueSummary(GUID, std::move(FS)); Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3292,10 +3292,12 @@ bool HasProfileData = F.getEntryCount().hasValue(); for (auto &ECI : Calls) { NameVals.push_back(getValueId(ECI.first)); - assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite"); - NameVals.push_back(ECI.second.CallsiteCount); + assert(ECI.second.first.CallsiteCount > 0 && + "Expected at least one callsite"); + NameVals.push_back(ECI.second.first.CallsiteCount); + NameVals.push_back(ECI.second.second.ConstArgumentsBitmask); if (HasProfileData) - NameVals.push_back(ECI.second.ProfileCount); + NameVals.push_back(ECI.second.first.ProfileCount); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); @@ -3498,7 +3500,6 @@ for (const auto &I : *this) { GlobalValueSummary *S = I.second; assert(S); - assert(hasValueId(I.first)); unsigned ValueId = getValueId(I.first); SummaryToValueIdMap[S] = ValueId; @@ -3539,21 +3540,26 @@ bool HasProfileData = false; for (auto &EI : FS->calls()) { - HasProfileData |= EI.second.ProfileCount != 0; + HasProfileData |= EI.second.first.ProfileCount != 0; if (HasProfileData) break; } - + // Write if HasProfileData: + // n x (valueid, callsitecount, constArgumentBitmask, profilecount) + // else: + // n x (valueid, callsitecount, constArgumentBitmask) for (auto &EI : FS->calls()) { // If this GUID doesn't have a value id, it doesn't have a function // summary and we don't need to record any calls to it. if (!hasValueId(EI.first.getGUID())) continue; NameVals.push_back(getValueId(EI.first.getGUID())); - assert(EI.second.CallsiteCount > 0 && "Expected at least one callsite"); - NameVals.push_back(EI.second.CallsiteCount); + assert(EI.second.first.CallsiteCount > 0 && + "Expected at least one callsite"); + NameVals.push_back(EI.second.first.CallsiteCount); + NameVals.push_back(EI.second.second.ConstArgumentsBitmask); if (HasProfileData) - NameVals.push_back(EI.second.ProfileCount); + NameVals.push_back(EI.second.first.ProfileCount); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); Index: lib/IR/ModuleSummaryIndex.cpp =================================================================== --- lib/IR/ModuleSummaryIndex.cpp +++ lib/IR/ModuleSummaryIndex.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; // Create the combined module index/summary from multiple @@ -105,3 +106,15 @@ auto &Summary = SummaryList->second[0]; return Summary.get(); } +void CallsiteInfo::setConstArgument(unsigned int ArgNumber) { + // Don't store information about any argument greater than 16, because it + // is very uncommon to have such function. Maybe it would make sense + // to set bit number ArgNumber % 16 depending on importer heuristic. + if (ArgNumber >= 16) + return; + + ConstArgumentsBitmask |= 1u << ArgNumber; +} +int CallsiteInfo::getConstArgumentsCount() const { + return countPopulation(ConstArgumentsBitmask); +} Index: test/Bitcode/Inputs/thinlto-function-summary-callgraph-const-arguments.ll =================================================================== --- /dev/null +++ test/Bitcode/Inputs/thinlto-function-summary-callgraph-const-arguments.ll @@ -0,0 +1,11 @@ +; ModuleID = 'thinlto-function-summary-callgraph-const-arguments2.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function with 17 arguments +; Function Attrs: nounwind uwtable +define void @multipleArgs(i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8) #0 { +entry: + ret void +} + Index: test/Bitcode/thinlto-alias.ll =================================================================== --- test/Bitcode/thinlto-alias.ll +++ test/Bitcode/thinlto-alias.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: +; COMBINED-NEXT: ; Followed by the alias and aliasee ; COMBINED-NEXT: +; op8 is bitmask for const arguments op8=32781 = 1000000000001101b +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: ; COMBINED: +; COMBINED-NEXT: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: -; ModuleID = 'thinlto-function-summary-callgraph.ll' + +; ModuleID = 'thinlto-function-summary-callgraph-const-arguments.ll' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +@p8 = external global i8 +@p32 = external global i32 ; Function Attrs: nounwind uwtable define i32 @main() #0 { entry: - call void (...) @func() + %n8 = load i8, i8* @p8 + %n = load i32, i32* @p32 + + ; Passing some 0th, 1st, 2nd, 3rd, 15th, 16th argument as const. + ; This should set 0th, 2nd, 3rd, 15th bit in ConstArgumentsBitmask and skip + ; 16th argument (mask have only 16 bits) + call void @multipleArgs(i32 0, i32 %n, i8 2, i8 3, i8 %n8, i8 %n8, i8 %n8, i8 %n8, i8 %n8, i8 %n8, i8 %n8, i8 %n8, i8 %n8, i8 %n8, i8 %n8, i8 15, i8 16) ret i32 0 } -declare void @func(...) #1 +declare void @multipleArgs(i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8) #1 Index: test/Bitcode/thinlto-function-summary-callgraph-pgo.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-pgo.ll +++ test/Bitcode/thinlto-function-summary-callgraph-pgo.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: ; COMBINED-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: ; COMBINED-NEXT: +; CHECK-DAG: ; Function W contains a call to func3 as well as a reference to globalvar: -; CHECK-DAG: +; CHECK-DAG: ; Function X contains call to foo, as well as address reference to foo ; which is in the same instruction as the call: -; CHECK-DAG: +; CHECK-DAG: ; Function Y contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while earlier analyzing the phi using its ; return value: -; CHECK-DAG: +; CHECK-DAG: ; Function Z contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while analyzing subsequent use of its return ; value: -; CHECK-DAG: +; CHECK-DAG: ; Variable bar initialization contains address reference to func: ; CHECK-DAG: ; CHECK: