Index: include/llvm/Analysis/IndirectCallPromotionAnalysis.h =================================================================== --- /dev/null +++ include/llvm/Analysis/IndirectCallPromotionAnalysis.h @@ -0,0 +1,67 @@ +//===- IndirectCallPromotionAnalysis.h - Indirect call analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// Interface to identify indirect call promotion candidates. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INDIRECTCALLPROMOTIONANALYSIS_H +#define LLVM_ANALYSIS_INDIRECTCALLPROMOTIONANALYSIS_H + +#include "llvm/ProfileData/InstrProf.h" + +namespace llvm { + +class Instruction; + +// Class for identifying profitable indirect call promotion candidates when +// the indirect-call value profile metadata is available. +class ICallPromotionAnalysis { +private: + // Allocate space to read the profile annotation. + std::unique_ptr ValueDataArray; + + // Count is the call count for the direct-call target and + // TotalCount is the call count for the indirect-call callsite. + // Return true we should promote this indirect-call target. + bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount); + + // Returns the number of profitable candidates to promote for the + // current ValueDataArray and the given \p Inst. + uint32_t getProfitablePromotionCandidates(const Instruction *Inst, + uint32_t NumVals, + uint64_t TotalCount); + + // Noncopyable + ICallPromotionAnalysis(const ICallPromotionAnalysis &other) = delete; + ICallPromotionAnalysis & + operator=(const ICallPromotionAnalysis &other) = delete; + +public: + ICallPromotionAnalysis(); + + /// \brief Returns reference to array of InstrProfValueData for the given + /// instruction \p I. + /// + /// The \p NumVals, \p TotalCount and \p NumCandidates + /// are set to the number of values in the array, the total profile count + /// of the indirect call \p I, and the number of profitable candidates + /// in the given array (which is sorted in reverse order of profitability). + /// + /// The returned array space is owned by this class, and overwritten on + /// subsequent calls. + ArrayRef + getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals, + uint64_t &TotalCount, + uint32_t &NumCandidates); +}; + +} // end namespace llvm + +#endif Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -80,6 +80,7 @@ assert(Kind == VI_Value && "Not a Value type"); return TheValue.V; } + bool isGUID() const { return Kind == VI_GUID; } }; /// \brief Function and variable summary information to aid decisions and @@ -261,6 +262,14 @@ CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info)); } + /// Record a call graph edge from this function to each function GUID recorded + /// in \p CallGraphEdges. + void + addCallGraphEdges(DenseMap &CallGraphEdges) { + for (auto &EI : CallGraphEdges) + addCallGraphEdge(EI.first, EI.second); + } + /// Record a call graph edge from this function to the function identified /// by \p CalleeV, with \p CalleeInfo including the cumulative profile /// count (across all calls from this function) or 0 if no PGO. Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -28,6 +28,7 @@ EHPersonalities.cpp GlobalsModRef.cpp IVUsers.cpp + IndirectCallPromotionAnalysis.cpp InlineCost.cpp InstCount.cpp InstructionSimplify.cpp Index: lib/Analysis/IndirectCallPromotionAnalysis.cpp =================================================================== --- /dev/null +++ lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -0,0 +1,132 @@ +//===-- IndirectCallPromotionAnalysis.cpp - Find promotion candidates ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Helper methods for identifying profitable indirect call promotion +// candidates for an instruction when the indirect-call value profile metadata +// is available. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IndirectCallPromotionAnalysis.h" +#include "IndirectCallSiteVisitor.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Debug.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "pgo-icall-prom-analysis" + +// The minimum call count for the direct-call target to be considered as the +// promotion candidate. +static cl::opt + ICPCountThreshold("icp-count-threshold", cl::Hidden, cl::ZeroOrMore, + cl::init(1000), + cl::desc("The minimum count to the direct call target " + "for the promotion")); + +// The percent threshold for the direct-call target (this call site vs the +// total call count) for it to be considered as the promotion target. +static cl::opt + ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden, + cl::ZeroOrMore, + cl::desc("The percentage threshold for the promotion")); + +// Set the maximum number of targets to promote for a single indirect-call +// callsite. +static cl::opt + MaxNumPromotions("icp-max-prom", cl::init(2), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of promotions for a single indirect " + "call callsite")); + +// If the option is set to true, only call instructions will be considered for +// transformation -- invoke instructions will be ignored. +static cl::opt + ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion for call instructions " + "only")); + +// If the option is set to true, only invoke instructions will be considered for +// transformation -- call instructions will be ignored. +static cl::opt ICPInvokeOnly("icp-invoke-only", cl::init(false), + cl::Hidden, + cl::desc("Run indirect-call promotion for " + "invoke instruction only")); + +ICallPromotionAnalysis::ICallPromotionAnalysis() { + ValueDataArray = llvm::make_unique(MaxNumPromotions); +} + +bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count, + uint64_t TotalCount) { + if (Count < ICPCountThreshold) + return false; + + unsigned Percentage = (Count * 100) / TotalCount; + return (Percentage >= ICPPercentThreshold); +} + +// Indirect-call promotion heuristic. The direct targets are sorted based on +// the count. Stop at the first target that is not promoted. Returns the +// number of candidates deemed profitable. +uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates( + const Instruction *Inst, uint32_t NumVals, uint64_t TotalCount) { + ArrayRef ValueDataRef(ValueDataArray.get(), NumVals); + + DEBUG(dbgs() << " \nWork on callsite " << *Inst << " Num_targets: " << NumVals + << "\n"); + + uint32_t I = 0; + for (; I < MaxNumPromotions && I < NumVals; I++) { + uint64_t Count = ValueDataRef[I].Count; + assert(Count <= TotalCount); + uint64_t Target = ValueDataRef[I].Value; + DEBUG(dbgs() << " Candidate " << I << " Count=" << Count + << " Target_func: " << Target << "\n"); + + if (ICPInvokeOnly && dyn_cast(Inst)) { + DEBUG(dbgs() << " Not promote: User options.\n"); + return I; + } + if (ICPCallOnly && dyn_cast(Inst)) { + DEBUG(dbgs() << " Not promote: User option.\n"); + return I; + } + if (!isPromotionProfitable(Count, TotalCount)) { + DEBUG(dbgs() << " Not promote: Cold target.\n"); + return I; + } + TotalCount -= Count; + } + return I; +} + +ArrayRef +ICallPromotionAnalysis::getPromotionCandidatesForInstruction( + const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount, + uint32_t &NumCandidates) { + bool Res = + getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions, + ValueDataArray.get(), NumVals, TotalCount); + if (!Res) { + NumCandidates = 0; + return ArrayRef(); + } + NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount); + return ArrayRef(ValueDataArray.get(), NumVals); +} Index: lib/Analysis/IndirectCallSiteVisitor.h =================================================================== --- /dev/null +++ lib/Analysis/IndirectCallSiteVisitor.h @@ -0,0 +1,43 @@ +//===-- IndirectCallSiteVisitor.h - indirect call-sites visitor -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements defines a visitor class and a helper function that find +// all indirect call-sites in a function. + +#include "llvm/IR/InstVisitor.h" +#include + +namespace llvm { +// Visitor class that finds all indirect call sites. +struct PGOIndirectCallSiteVisitor + : public InstVisitor { + std::vector IndirectCallInsts; + PGOIndirectCallSiteVisitor() {} + + void visitCallSite(CallSite CS) { + if (CS.getCalledFunction() || !CS.getCalledValue()) + return; + Instruction *I = CS.getInstruction(); + if (CallInst *CI = dyn_cast(I)) { + if (CI->isInlineAsm()) + return; + } + if (isa(CS.getCalledValue())) + return; + IndirectCallInsts.push_back(I); + } +}; + +// Helper function that finds all indirect call sites. +static inline std::vector findIndirectCallSites(Function &F) { + PGOIndirectCallSiteVisitor ICV; + ICV.visit(F); + return ICV.IndirectCallInsts; +} +} Index: lib/Analysis/LLVMBuild.txt =================================================================== --- lib/Analysis/LLVMBuild.txt +++ lib/Analysis/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Analysis parent = Libraries -required_libraries = Core Support +required_libraries = Core Support ProfileData Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" @@ -73,7 +74,9 @@ // Map from callee ValueId to profile count. Used to accumulate profile // counts for all static calls to a given callee. DenseMap CallGraphEdges; + DenseMap IndirectCallEdges; DenseSet RefEdges; + ICallPromotionAnalysis ICallAnalysis; SmallPtrSet Visited; for (const BasicBlock &BB : F) @@ -83,13 +86,26 @@ if (auto CS = ImmutableCallSite(&I)) { auto *CalledFunction = CS.getCalledFunction(); - if (CalledFunction && CalledFunction->hasName() && - !CalledFunction->isIntrinsic()) { - auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; - auto *CalleeId = - M->getValueSymbolTable().lookup(CalledFunction->getName()); - CallGraphEdges[CalleeId] += - (ScaledCount ? ScaledCount.getValue() : 0); + if (CalledFunction) { + if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) { + auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; + auto *CalleeId = + M->getValueSymbolTable().lookup(CalledFunction->getName()); + CallGraphEdges[CalleeId] += + (ScaledCount ? ScaledCount.getValue() : 0); + } + } else { + const CallInst *CI = dyn_cast(&I); + if (CS.getCalledValue() && !isa(CS.getCalledValue()) && + !(CI && CI->isInlineAsm())) { + uint32_t NumVals, NumCandidates; + uint64_t TotalCount; + auto CandidateProfileData = + ICallAnalysis.getPromotionCandidatesForInstruction( + &I, NumVals, TotalCount, NumCandidates); + for (auto &Candidate : CandidateProfileData) + IndirectCallEdges[Candidate.Value] += Candidate.Count; + } } } findRefEdges(&I, RefEdges, Visited); @@ -99,6 +115,7 @@ std::unique_ptr FuncSummary = llvm::make_unique(Flags, NumInsts); FuncSummary->addCallGraphEdges(CallGraphEdges); + FuncSummary->addCallGraphEdges(IndirectCallEdges); FuncSummary->addRefEdges(RefEdges); Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary)); } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -119,6 +119,14 @@ /// The start bit of the module block, for use in generating a module hash uint64_t BitcodeStartBit = 0; + /// Map that holds the correspondence between GUIDs in the summary index, + /// that came from indirect call profiles, and a value id generated by this + /// class to use in the VST and summary block records. + std::map GUIDToValueIdMap; + + /// Tracks the last value id recorded in the GUIDToValueMap. + unsigned GlobalValueId; + public: /// Constructs a ModuleBitcodeWriter object for the given Module, /// writing to the provided \p Buffer. @@ -132,6 +140,20 @@ // will start at the bitcode, and we need the offset of the VST // to line up. BitcodeStartBit = Stream.GetCurrentBitNo(); + + // Assign ValueIds to any callee values in the index that came from + // indirect call profiles and were recorded as a GUID not a Value* + // (which would have been assigned an ID by the ValueEnumerator). + // The starting ValueId is just after the number of values in the + // ValueEnumerator, so that they can be emitted in the VST. + GlobalValueId = VE.getValues().size(); + if (Index) + for (const auto &I : *Index) + for (auto &S : I.second) + if (auto FS = dyn_cast(S.get())) + for (auto &EI : FS->calls()) + if (EI.first.isGUID()) + assignValueId(EI.first.getGUID()); } private: @@ -260,6 +282,22 @@ unsigned FSModRefsAbbrev); void writePerModuleGlobalValueSummary(); void writeModuleHash(size_t BlockStartPos); + + void assignValueId(GlobalValue::GUID ValGUID) { + GUIDToValueIdMap[ValGUID] = ++GlobalValueId; + } + unsigned getValueId(GlobalValue::GUID ValGUID) { + const auto &VMI = GUIDToValueIdMap.find(ValGUID); + assert(VMI != GUIDToValueIdMap.end()); + return VMI->second; + } + // Helper to get the valueId for the type of value recorded in VI. + unsigned getValueId(ValueInfo VI) { + if (VI.isGUID()) + return getValueId(VI.getGUID()); + return VE.getValueID(VI.getValue()); + } + std::map &valueIds() { return GUIDToValueIdMap; } }; /// Class to manage the bitcode writing for a combined index. @@ -2704,6 +2742,7 @@ unsigned FnEntry8BitAbbrev; unsigned FnEntry7BitAbbrev; unsigned FnEntry6BitAbbrev; + unsigned GUIDEntryAbbrev; if (IsModuleLevel && hasVSTOffsetPlaceholder()) { // 8-bit fixed-width VST_CODE_FNENTRY function strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); @@ -2731,11 +2770,19 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv); + + // FIXME: Change the name of this record as it is now used by + // the per-module index as well. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid + GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv); } // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. - SmallVector NameVals; + SmallVector NameVals; for (const ValueName &Name : VST) { // Figure out the encoding to use for the name. @@ -2796,6 +2843,16 @@ Stream.EmitRecord(Code, NameVals, AbbrevToUse); NameVals.clear(); } + // Emit any GUID valueIDs created for indirect call edges into the + // module-level VST. + if (IsModuleLevel && hasVSTOffsetPlaceholder()) + for (const auto &GI : valueIds()) { + NameVals.push_back(GI.second); + NameVals.push_back(GI.first); + Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, + GUIDEntryAbbrev); + NameVals.clear(); + } Stream.ExitBlock(); } @@ -3217,12 +3274,11 @@ std::sort(Calls.begin(), Calls.end(), [this](const FunctionSummary::EdgeTy &L, const FunctionSummary::EdgeTy &R) { - return VE.getValueID(L.first.getValue()) < - VE.getValueID(R.first.getValue()); + return getValueId(L.first) < getValueId(R.first); }); bool HasProfileData = F.getEntryCount().hasValue(); for (auto &ECI : Calls) { - NameVals.push_back(VE.getValueID(ECI.first.getValue())); + NameVals.push_back(getValueId(ECI.first)); assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite"); NameVals.push_back(ECI.second.CallsiteCount); if (HasProfileData) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -360,10 +360,11 @@ /// PGO instrumentation is added during the compile phase for ThinLTO, do /// not run it a second time addPGOInstrPasses(MPM); - // Indirect call promotion that promotes intra-module targets only. - MPM.add(createPGOIndirectCallPromotionLegacyPass()); } + // Indirect call promotion that promotes intra-module targets only. + MPM.add(createPGOIndirectCallPromotionLegacyPass()); + if (EnableNonLTOGlobalsModRef) // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive Index: lib/Transforms/Instrumentation/IndirectCallPromotion.cpp =================================================================== --- lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRBuilder.h" @@ -49,28 +50,6 @@ static cl::opt DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion")); -// The minimum call count for the direct-call target to be considered as the -// promotion candidate. -static cl::opt - ICPCountThreshold("icp-count-threshold", cl::Hidden, cl::ZeroOrMore, - cl::init(1000), - cl::desc("The minimum count to the direct call target " - "for the promotion")); - -// The percent threshold for the direct-call target (this call site vs the -// total call count) for it to be considered as the promotion target. -static cl::opt - ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden, - cl::ZeroOrMore, - cl::desc("The percentage threshold for the promotion")); - -// Set the maximum number of targets to promote for a single indirect-call -// callsite. -static cl::opt - MaxNumPromotions("icp-max-prom", cl::init(2), cl::Hidden, cl::ZeroOrMore, - cl::desc("Max number of promotions for a single indirect " - "call callsite")); - // Set the cutoff value for the promotion. If the value is other than 0, we // stop the transformation once the total number of promotions equals the cutoff // value. @@ -91,19 +70,6 @@ static cl::opt ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode")); -// If the option is set to true, only call instructions will be considered for -// transformation -- invoke instructions will be ignored. -static cl::opt - ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, - cl::desc("Run indirect-call promotion for call instructions " - "only")); - -// If the option is set to true, only invoke instructions will be considered for -// transformation -- call instructions will be ignored. -static cl::opt ICPInvokeOnly("icp-invoke-only", cl::init(false), - cl::Hidden, - cl::desc("Run indirect-call promotion for " - "invoke instruction only")); // Dump the function level IR if the transformation happened in this // function. For debug use only. @@ -157,14 +123,6 @@ // defines. InstrProfSymtab *Symtab; - // Allocate space to read the profile annotation. - std::unique_ptr ValueDataArray; - - // Count is the call count for the direct-call target and - // TotalCount is the call count for the indirect-call callsite. - // Return true we should promote this indirect-call target. - bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount); - enum TargetStatus { OK, // Should be able to promote. NotAvailableInModule, // Cannot find the target in current module. @@ -188,7 +146,7 @@ // of promotions. std::vector getPromotionCandidatesForCallSite( Instruction *Inst, const ArrayRef &ValueDataRef, - uint64_t TotalCount); + uint64_t TotalCount, uint32_t NumCandidates); // Main function that transforms Inst (either a indirect-call instruction, or // an invoke instruction , to a conditional call to F. This is like: @@ -232,21 +190,11 @@ public: ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab) : F(Func), M(Modu), Symtab(Symtab) { - ValueDataArray = llvm::make_unique(MaxNumPromotions); } bool processFunction(); }; } // end anonymous namespace -bool ICallPromotionFunc::isPromotionProfitable(uint64_t Count, - uint64_t TotalCount) { - if (Count < ICPCountThreshold) - return false; - - unsigned Percentage = (Count * 100) / TotalCount; - return (Percentage >= ICPPercentThreshold); -} - ICallPromotionFunc::TargetStatus ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target, Function *&TargetFunction) { @@ -291,41 +239,30 @@ std::vector ICallPromotionFunc::getPromotionCandidatesForCallSite( Instruction *Inst, const ArrayRef &ValueDataRef, - uint64_t TotalCount) { + uint64_t TotalCount, uint32_t NumCandidates) { uint32_t NumVals = ValueDataRef.size(); std::vector Ret; DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst - << " Num_targets: " << NumVals << "\n"); + << " Num_targets: " << NumVals + << " Num_candidates: " << NumCandidates << "\n"); NumOfPGOICallsites++; if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { DEBUG(dbgs() << " Skip: User options.\n"); return Ret; } - for (uint32_t I = 0; I < MaxNumPromotions && I < NumVals; I++) { + for (uint32_t I = 0; I < NumCandidates; I++) { uint64_t Count = ValueDataRef[I].Count; assert(Count <= TotalCount); uint64_t Target = ValueDataRef[I].Value; DEBUG(dbgs() << " Candidate " << I << " Count=" << Count << " Target_func: " << Target << "\n"); - if (ICPInvokeOnly && dyn_cast(Inst)) { - DEBUG(dbgs() << " Not promote: User options.\n"); - break; - } - if (ICPCallOnly && dyn_cast(Inst)) { - DEBUG(dbgs() << " Not promote: User option.\n"); - break; - } if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); break; } - if (!isPromotionProfitable(Count, TotalCount)) { - DEBUG(dbgs() << " Not promote: Cold target.\n"); - break; - } Function *TargetFunction = nullptr; TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction); if (Status != OK) { @@ -633,18 +570,16 @@ // annotation to perform indirect-call promotion. bool ICallPromotionFunc::processFunction() { bool Changed = false; + ICallPromotionAnalysis ICallAnalysis; for (auto &I : findIndirectCallSites(F)) { - uint32_t NumVals; + uint32_t NumVals, NumCandidates; uint64_t TotalCount; - bool Res = - getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions, - ValueDataArray.get(), NumVals, TotalCount); - if (!Res) + auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( + I, NumVals, TotalCount, NumCandidates); + if (!NumCandidates) continue; - ArrayRef ValueDataArrayRef(ValueDataArray.get(), - NumVals); - auto PromotionCandidates = - getPromotionCandidatesForCallSite(I, ValueDataArrayRef, TotalCount); + auto PromotionCandidates = getPromotionCandidatesForCallSite( + I, ICallProfDataRef, TotalCount, NumCandidates); uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount); if (NumPromoted == 0) continue; @@ -656,8 +591,8 @@ if (TotalCount == 0 || NumPromoted == NumVals) continue; // Otherwise we need update with the un-promoted records back. - annotateValueSite(*M, *I, ValueDataArrayRef.slice(NumPromoted), TotalCount, - IPVK_IndirectCallTarget, MaxNumPromotions); + annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount, + IPVK_IndirectCallTarget, NumPromoted); } return Changed; } Index: test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll @@ -0,0 +1,9 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common local_unnamed_addr global void ()* null, align 8 + +define void @a() { +entry: + ret void +} Index: test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll @@ -0,0 +1,32 @@ +; Do setup work for all below tests: generate bitcode and combined index +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc + +; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS +; IMPORTS: Import a + +; RUN: opt %t4.bc -pgo-icall-prom -S -icp-count-threshold=1 | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt %t4.bc -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=1 2>&1 | FileCheck %s --check-prefix=PASS-REMARK +; PASS-REMARK: Promote indirect call to a with count 1 out of 1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = external local_unnamed_addr global void ()*, align 8 + +define i32 @main() local_unnamed_addr { +entry: + %0 = load void ()*, void ()** @foo, align 8 +; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] + tail call void %0(), !prof !1 + ret i32 0 +} + +!1 = !{!"VP", i32 0, i64 1, i64 -6289574019528802036, i64 1} + +; Should not have a VP annotation on new indirect call (check before and after +; branch_weights annotation). +; ICALL-PROM-NOT: !"VP" +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0} +; ICALL-PROM-NOT: !"VP"