Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -80,6 +80,7 @@ assert(Kind == VI_Value && "Not a Value type"); return TheValue.V; } + bool isGUID() const { return Kind == VI_GUID; } }; /// \brief Function and variable summary information to aid decisions and @@ -261,6 +262,14 @@ CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info)); } + /// Record a call graph edge from this function to each function GUID recorded + /// in \p CallGraphEdges. + void + addCallGraphEdges(DenseMap &CallGraphEdges) { + for (auto &EI : CallGraphEdges) + addCallGraphEdge(EI.first, EI.second); + } + /// Record a call graph edge from this function to the function identified /// by \p CalleeV, with \p CalleeInfo including the cumulative profile /// count (across all calls from this function) or 0 if no PGO. Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" @@ -73,7 +74,9 @@ // Map from callee ValueId to profile count. Used to accumulate profile // counts for all static calls to a given callee. DenseMap CallGraphEdges; + DenseMap IndirectCallEdges; DenseSet RefEdges; + ICallPromotionAnalysis ICallAnalysis; SmallPtrSet Visited; for (const BasicBlock &BB : F) @@ -83,13 +86,26 @@ if (auto CS = ImmutableCallSite(&I)) { auto *CalledFunction = CS.getCalledFunction(); - if (CalledFunction && CalledFunction->hasName() && - !CalledFunction->isIntrinsic()) { - auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; - auto *CalleeId = - M->getValueSymbolTable().lookup(CalledFunction->getName()); - CallGraphEdges[CalleeId] += - (ScaledCount ? ScaledCount.getValue() : 0); + if (CalledFunction) { + if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) { + auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; + auto *CalleeId = + M->getValueSymbolTable().lookup(CalledFunction->getName()); + CallGraphEdges[CalleeId] += + (ScaledCount ? ScaledCount.getValue() : 0); + } + } else { + const CallInst *CI = dyn_cast(&I); + if (CS.getCalledValue() && !isa(CS.getCalledValue()) && + !(CI && CI->isInlineAsm())) { + uint32_t NumVals, NumCandidates; + uint64_t TotalCount; + auto CandidateProfileData = + ICallAnalysis.getPromotionCandidatesForInstruction( + &I, NumVals, TotalCount, NumCandidates); + for (auto &Candidate : CandidateProfileData) + IndirectCallEdges[Candidate.Value] += Candidate.Count; + } } } findRefEdges(&I, RefEdges, Visited); @@ -99,6 +115,7 @@ std::unique_ptr FuncSummary = llvm::make_unique(Flags, NumInsts); FuncSummary->addCallGraphEdges(CallGraphEdges); + FuncSummary->addCallGraphEdges(IndirectCallEdges); FuncSummary->addRefEdges(RefEdges); Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary)); } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -119,6 +119,14 @@ /// The start bit of the module block, for use in generating a module hash uint64_t BitcodeStartBit = 0; + /// Map that holds the correspondence between GUIDs in the summary index, + /// that came from indirect call profiles, and a value id generated by this + /// class to use in the VST and summary block records. + std::map GUIDToValueIdMap; + + /// Tracks the last value id recorded in the GUIDToValueMap. + unsigned GlobalValueId; + public: /// Constructs a ModuleBitcodeWriter object for the given Module, /// writing to the provided \p Buffer. @@ -132,6 +140,25 @@ // will start at the bitcode, and we need the offset of the VST // to line up. BitcodeStartBit = Stream.GetCurrentBitNo(); + + // Assign ValueIds to any callee values in the index that came from + // indirect call profiles and were recorded as a GUID not a Value* + // (which would have been assigned an ID by the ValueEnumerator). + // The starting ValueId is just after the number of values in the + // ValueEnumerator, so that they can be emitted in the VST. + GlobalValueId = VE.getValues().size(); + if (Index) + for (const auto &GUIDSummaryLists : *Index) + // Examine all summaries for this GUID. + for (auto &Summary : GUIDSummaryLists.second) + if (auto FS = dyn_cast(Summary.get())) + // For each call in the function summary, see if the call + // is to a GUID (which means it is for an indirect call, + // otherwise we would have a Value for it). If so, synthesize + // a value id. + for (auto &CallEdge : FS->calls()) + if (CallEdge.first.isGUID()) + assignValueId(CallEdge.first.getGUID()); } private: @@ -260,6 +287,22 @@ unsigned FSModRefsAbbrev); void writePerModuleGlobalValueSummary(); void writeModuleHash(size_t BlockStartPos); + + void assignValueId(GlobalValue::GUID ValGUID) { + GUIDToValueIdMap[ValGUID] = ++GlobalValueId; + } + unsigned getValueId(GlobalValue::GUID ValGUID) { + const auto &VMI = GUIDToValueIdMap.find(ValGUID); + assert(VMI != GUIDToValueIdMap.end()); + return VMI->second; + } + // Helper to get the valueId for the type of value recorded in VI. + unsigned getValueId(ValueInfo VI) { + if (VI.isGUID()) + return getValueId(VI.getGUID()); + return VE.getValueID(VI.getValue()); + } + std::map &valueIds() { return GUIDToValueIdMap; } }; /// Class to manage the bitcode writing for a combined index. @@ -2704,6 +2747,7 @@ unsigned FnEntry8BitAbbrev; unsigned FnEntry7BitAbbrev; unsigned FnEntry6BitAbbrev; + unsigned GUIDEntryAbbrev; if (IsModuleLevel && hasVSTOffsetPlaceholder()) { // 8-bit fixed-width VST_CODE_FNENTRY function strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); @@ -2731,11 +2775,19 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv); + + // FIXME: Change the name of this record as it is now used by + // the per-module index as well. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid + GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv); } // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. - SmallVector NameVals; + SmallVector NameVals; for (const ValueName &Name : VST) { // Figure out the encoding to use for the name. @@ -2796,6 +2848,16 @@ Stream.EmitRecord(Code, NameVals, AbbrevToUse); NameVals.clear(); } + // Emit any GUID valueIDs created for indirect call edges into the + // module-level VST. + if (IsModuleLevel && hasVSTOffsetPlaceholder()) + for (const auto &GI : valueIds()) { + NameVals.push_back(GI.second); + NameVals.push_back(GI.first); + Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, + GUIDEntryAbbrev); + NameVals.clear(); + } Stream.ExitBlock(); } @@ -3217,12 +3279,11 @@ std::sort(Calls.begin(), Calls.end(), [this](const FunctionSummary::EdgeTy &L, const FunctionSummary::EdgeTy &R) { - return VE.getValueID(L.first.getValue()) < - VE.getValueID(R.first.getValue()); + return getValueId(L.first) < getValueId(R.first); }); bool HasProfileData = F.getEntryCount().hasValue(); for (auto &ECI : Calls) { - NameVals.push_back(VE.getValueID(ECI.first.getValue())); + NameVals.push_back(getValueId(ECI.first)); assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite"); NameVals.push_back(ECI.second.CallsiteCount); if (HasProfileData) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -360,10 +360,11 @@ /// PGO instrumentation is added during the compile phase for ThinLTO, do /// not run it a second time addPGOInstrPasses(MPM); - // Indirect call promotion that promotes intra-module targets only. - MPM.add(createPGOIndirectCallPromotionLegacyPass()); } + // Indirect call promotion that promotes intra-module targets only. + MPM.add(createPGOIndirectCallPromotionLegacyPass()); + if (EnableNonLTOGlobalsModRef) // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive Index: test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll @@ -0,0 +1,9 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common local_unnamed_addr global void ()* null, align 8 + +define void @a() { +entry: + ret void +} Index: test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll @@ -0,0 +1,32 @@ +; Do setup work for all below tests: generate bitcode and combined index +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc + +; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS +; IMPORTS: Import a + +; RUN: opt %t4.bc -pgo-icall-prom -S -icp-count-threshold=1 | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt %t4.bc -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=1 2>&1 | FileCheck %s --check-prefix=PASS-REMARK +; PASS-REMARK: Promote indirect call to a with count 1 out of 1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = external local_unnamed_addr global void ()*, align 8 + +define i32 @main() local_unnamed_addr { +entry: + %0 = load void ()*, void ()** @foo, align 8 +; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] + tail call void %0(), !prof !1 + ret i32 0 +} + +!1 = !{!"VP", i32 0, i64 1, i64 -6289574019528802036, i64 1} + +; Should not have a VP annotation on new indirect call (check before and after +; branch_weights annotation). +; ICALL-PROM-NOT: !"VP" +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0} +; ICALL-PROM-NOT: !"VP"