Index: include/llvm/IR/InstrTypes.h =================================================================== --- include/llvm/IR/InstrTypes.h +++ include/llvm/IR/InstrTypes.h @@ -866,6 +866,8 @@ /// @returns true iff the proposed cast is valid. /// @brief Determine if a cast is valid without creating one. static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy); + /// Same as the above method, but test if type SrcTy can be casted to DestTy. + static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy); /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -124,6 +124,7 @@ void initializeGCOVProfilerPass(PassRegistry&); void initializePGOInstrumentationGenPass(PassRegistry&); void initializePGOInstrumentationUsePass(PassRegistry&); +void initializePGOIndirectCallPromotionPass(PassRegistry&); void initializeInstrProfilingPass(PassRegistry&); void initializeAddressSanitizerPass(PassRegistry&); void initializeAddressSanitizerModulePass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -91,6 +91,7 @@ (void) llvm::createGCOVProfilerPass(); (void) llvm::createPGOInstrumentationGenPass(); (void) llvm::createPGOInstrumentationUsePass(); + (void) llvm::createPGOIndirectCallPromotionPass(); (void) llvm::createInstrProfilingPass(); (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); Index: include/llvm/Transforms/Instrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation.h +++ include/llvm/Transforms/Instrumentation.h @@ -83,6 +83,7 @@ ModulePass *createPGOInstrumentationGenPass(); ModulePass * createPGOInstrumentationUsePass(StringRef Filename = StringRef("")); +ModulePass *createPGOIndirectCallPromotionPass(bool InLTO = false); /// Options for the frontend instrumentation based profiling pass. struct InstrProfOptions { Index: lib/IR/Instructions.cpp =================================================================== --- lib/IR/Instructions.cpp +++ lib/IR/Instructions.cpp @@ -3009,6 +3009,11 @@ // Check for type sanity on the arguments Type *SrcTy = S->getType(); + return castIsValid(op, SrcTy, DstTy); +} + +bool +CastInst::castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy) { if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() || SrcTy->isAggregateType() || DstTy->isAggregateType()) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -225,6 +225,7 @@ if (!PGOInstrUse.empty()) MPM.add(createPGOInstrumentationUsePass(PGOInstrUse)); } + void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. @@ -373,10 +374,13 @@ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } - if (!PerformThinLTO) + if (!PerformThinLTO) { /// PGO instrumentation is added during the compile phase for ThinLTO, do /// not run it a second time addPGOInstrPasses(MPM); + // Indirect call promotion that promotes intra-module targets only. + MPM.add(createPGOIndirectCallPromotionPass()); + } if (EnableNonLTOGlobalsModRef) // We add a module alias analysis pass here. In part due to bugs in the @@ -581,6 +585,12 @@ // Infer attributes about declarations if possible. PM.add(createInferFunctionAttrsLegacyPass()); + // Indirect call promotion. This should promote all the targets that left by + // the earlier promotion pass that promotes intra-moduletargets. + // This two-step promotion is to save the compile time. For LTO, it should + // produce the same result as if we only do promotion here. + PM.add(createPGOIndirectCallPromotionPass(true)); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. Index: lib/Transforms/Instrumentation/CMakeLists.txt =================================================================== --- lib/Transforms/Instrumentation/CMakeLists.txt +++ lib/Transforms/Instrumentation/CMakeLists.txt @@ -4,6 +4,7 @@ DataFlowSanitizer.cpp GCOVProfiling.cpp MemorySanitizer.cpp + IndirectCallPromotion.cpp Instrumentation.cpp InstrProfiling.cpp PGOInstrumentation.cpp Index: lib/Transforms/Instrumentation/IndirectCallPromotion.cpp =================================================================== --- /dev/null +++ lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -0,0 +1,564 @@ +//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the transformation that promotes indirect calls to +// conditional direct calls when the indirect-call value profile meta-data is +// available. +// +//===----------------------------------------------------------------------===// + +#include "IndirectCallSiteVisitor.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "icall-promotion" + +STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); +STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); + +// Command line option to disable indirect-call promotion with the default as +// false. This is for debug purpose. +static cl::opt DisableICP("disable-icp", cl::init(false), cl::Hidden, + cl::desc("Disable indirect call promotion")); + +// The minimum call count for the direct-call target to be considered as the +// promotion candidate. +static cl::opt + ICPCountThreshold("icp-count-threshold", cl::Hidden, cl::ZeroOrMore, + cl::init(1000), + cl::desc("The minimum count to the direct call target " + "for the promotion")); + +// The percent threshold for the direct-call target (this call site vs the +// total call count) for it to be considered as the promotion target. +static cl::opt + ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden, + cl::ZeroOrMore, + cl::desc("The percentage threshold for the promotion")); + +// Set the maximum number of targets to promote for a single indirect-call +// callsite. +static cl::opt + MaxNumPromotions("icp-max-prom", cl::init(2), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of promotions for a single indirect " + "call callsite")); + +// Set the cutoff value for the promotion. If the value is other than 0, we +// stop the transformation once the cutoff value is reached. +// For debug use only. +static cl::opt + ICPCUTOFF("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of promotions for this compilaiton")); + +// Skip the callsite up to this number if the value is other than 0. +// For debug use only. +static cl::opt + ICPCSSKIP("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, + cl::desc("Skip Callsite up to this number for this compilaiton")); + +// Set if the pass is called in LTO optimization. The difference for LTO mode +// is the pass won't prefix the source module name to the internal linkage +// symbols. +static cl::opt ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion in LTO " + "mode")); +// Only transform call instruction. For debug use only. +static cl::opt + ICPCALLONLY("icp-call-only", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion for call instructions " + "only")); + +// Only transform invoke instructions. For debug use only. +static cl::opt ICPINVOKEONLY("icp-invoke-only", cl::init(false), + cl::Hidden, + cl::desc("Run indirect-call promotion for " + "invoke instruction only")); + +// Dump the function level IR if the transformation happened in this +// function. For debug use only. +static cl::opt + ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, + cl::desc("Dump IR after transformation happens")); + +namespace { +class PGOIndirectCallPromotion : public ModulePass { +public: + static char ID; + + PGOIndirectCallPromotion(bool InLTO = false) : ModulePass(ID), InLTO(InLTO) { + initializePGOIndirectCallPromotionPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOIndirectCallPromotion"; + } + +private: + bool runOnModule(Module &M) override; + + // If this pass is called in LTO. We need to special handling the PGOFuncName + // for the static variables due to LTO's internalization. + bool InLTO; +}; +} // end anonymous namespace + +char PGOIndirectCallPromotion::ID = 0; +INITIALIZE_PASS(PGOIndirectCallPromotion, "pgo-icall-prom", + "Use PGO instrumentation profile to promote indirect calls to " + "direct calls.", + false, false) + +ModulePass *llvm::createPGOIndirectCallPromotionPass(bool InLTO) { + return new PGOIndirectCallPromotion(InLTO); +} + +// The class for main data structure to promote indirect calls to conditional +// direct calls. +class ICallPromotionFunc { +private: + Function &F; + Module *M; + + // Symtab that maps value to instrumented function name. + InstrProfSymtab *Symtab; + + // Allocate space to read the profile annotation. + std::unique_ptr ValueDataArray; + + bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount); + + enum TargetStatus { + OK, // Should be able to promote. + NotAvailableInModule, // Cannot find the target in current module. + BodyNotAvailableInModule, // Cannot find the function defines. + ReturnTypeMismatch, // Return type mismatch b/w target and indirectcall. + NumArgsMismatch, // Number of arguments does not match. + ArgTypeMismatch // Type mismatch in the arguments (cannot bitcast). + }; + + TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target, + Function *&F); + + // A struct that record the direct target and it's call count. + struct CandidatePromotion { + Function *TargetFunction; + uint64_t Count; + CandidatePromotion(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} + }; + + // Check if the indirectcall call site should be promoted. Return the number + // of promotions. + std::vector getCandidatePromotionsForCallSite( + Instruction *Inst, const ArrayRef &ValueDataRef, + uint64_t TotalCount); + + bool promote(Instruction *Inst, Function *F, uint64_t Count, + uint64_t TotalCount); + + uint32_t tryToPromote(Instruction *Inst, + const std::vector &Candidates, + uint64_t &TotalCount); + + const char *StatusToString(const TargetStatus S) { + switch (S) { + case OK: + return "OK to promote"; + case NotAvailableInModule: + return "Cannot find the target"; + case BodyNotAvailableInModule: + return "Cannot find the target define"; + case ReturnTypeMismatch: + return "Return type mismatch"; + case NumArgsMismatch: + return "The number of arguments mismatch"; + case ArgTypeMismatch: + return "Argument Type mismatch"; + } + llvm_unreachable("Should not reach here"); + } + + // Noncopyable + ICallPromotionFunc(const ICallPromotionFunc &other) = delete; + ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete; + +public: + ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab) + : F(Func), M(Modu), Symtab(Symtab) { + ValueDataArray = llvm::make_unique(MaxNumPromotions); + } + bool processFunction(); +}; + +bool ICallPromotionFunc::isPromotionProfitable(uint64_t Count, + uint64_t TotalCount) { + if (TotalCount < ICPCountThreshold || Count < ICPCountThreshold) + return false; + if (TotalCount < Count) + Count = TotalCount; + + unsigned Percentage = (Count * 100) / TotalCount; + return (Percentage >= ICPPercentThreshold); +} + +ICallPromotionFunc::TargetStatus +ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target, + Function *&TargetFunction) { + StringRef TargetFuncName = Symtab->getFuncName(Target); + if (TargetFuncName.empty()) + return NotAvailableInModule; + Function *DirectCallee = Symtab->getFunction(Target); + if (DirectCallee == nullptr) + return BodyNotAvailableInModule; + // Check the return type. + Type *CallRetType = Inst->getType(); + if (!CallRetType->isVoidTy()) { + Type *FuncRetType = DirectCallee->getReturnType(); + if (FuncRetType != CallRetType && + !CastInst::castIsValid(Instruction::BitCast, FuncRetType, CallRetType)) + return ReturnTypeMismatch; + // Todo: Need to create a separted BB for the normal label. + if (FuncRetType != CallRetType && dyn_cast(Inst)) + return ReturnTypeMismatch; + } + + // Check if the arguments are compatible with the parameters + FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); + unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); + CallSite CS(Inst); + unsigned ArgNum = CS.arg_size(); + + if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) + return NumArgsMismatch; + + for (unsigned I = 0; I < ParamNum; ++I) { + Type *PTy = DirectCalleeType->getFunctionParamType(I); + Type *ATy = CS.getArgument(I)->getType(); + if (PTy == ATy) + continue; + if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) + return ArgTypeMismatch; + } + + DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to " + << TargetFuncName << "\n"); + TargetFunction = DirectCallee; + return OK; +} + +// Indirect-call promotion heursitic. The direct targets are sorted based on +// the count. Stop at the first target that is not promoted. +std::vector +ICallPromotionFunc::getCandidatePromotionsForCallSite( + Instruction *Inst, const ArrayRef &ValueDataRef, + uint64_t TotalCount) { + uint32_t NumPromotions = 0; + uint32_t NumVals = ValueDataRef.size(); + std::vector Ret; + + DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst + << " Num_targets: " << NumVals << "\n"); + NumOfPGOICallsites++; + if (ICPCSSKIP != 0 && NumOfPGOICallsites < ICPCSSKIP) { + DEBUG(dbgs() << " Skip: User options.\n"); + return Ret; + } + + for (uint32_t I = 0; I < MaxNumPromotions && I < NumVals; I++) { + uint64_t Count = ValueDataRef[I].Count; + uint64_t Target = ValueDataRef[I].Value; + DEBUG(dbgs() << " Candidate " << I << " Count=" << Count + << " Target_func: " << Target << "\n"); + + if (ICPINVOKEONLY && dyn_cast(Inst)) { + DEBUG(dbgs() << " Not promote: User options.\n"); + break; + } + if (ICPCALLONLY && dyn_cast(Inst)) { + DEBUG(dbgs() << " Not promote: User option.\n"); + break; + } + if (ICPCUTOFF != 0 && NumOfPGOICallPromotion >= ICPCUTOFF) { + DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); + break; + } + if (!isPromotionProfitable(Count, TotalCount)) { + DEBUG(dbgs() << " Not promote: Cold target.\n"); + break; + } + Function *TargetFunction = nullptr; + TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction); + if (Status != OK) { + StringRef TargetFuncName = Symtab->getFuncName(Target); + const char *Reason = StatusToString(Status); + DEBUG(dbgs() << " Not promote: " << Reason << "\n"); + Twine Msg = + Twine("Cannot promote indirect call to ") + + (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) + + Twine(" with count of ") + Twine(Count) + ": " + Reason; + emitOptimizationRemarkMissed(F.getContext(), "PGOIndirectCallPromotion", + F, Inst->getDebugLoc(), Msg); + break; + } + Ret.push_back(CandidatePromotion(TargetFunction, Count)); + assert(Count <= TotalCount); + TotalCount -= Count; + ++NumPromotions; + } + return Ret; +} + +static inline PHINode *getPHINode(BasicBlock &BB) { + for (Instruction &I : BB) + if (isa(I)) + return dyn_cast(&I); + return nullptr; +} + +// Fix the phi node in the basic block BB. +static void fixPHINode(BasicBlock *BB, const BasicBlock *OrigBB, + BasicBlock *IndirectCallBB, BasicBlock *DirectCallBB) { + for (auto &I : *BB) { + PHINode *PHI = dyn_cast(&I); + if (!PHI) + continue; + int IX = PHI->getBasicBlockIndex(OrigBB); + if (IX == -1) + continue; + PHI->setIncomingBlock(IX, IndirectCallBB); + PHI->addIncoming(PHI->getIncomingValue(IX), DirectCallBB); + } +} + +// This furnction does the actual indireciton call promotion transformation: +// For an icall like: +// Ret = (*Foo)(Args); +// It transforms to: +// if (Foo == DirectCallee) +// Ret1 = DirectCallee(Args); +// else +// Ret2 = (*Foo)(Args); +// Ret = phi(Ret1, Ret2); +// It adds type casts for the args do not match the parameters and the return +// value. Branch weights meta data also updated. +// +bool ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee, + uint64_t Count, uint64_t TotalCount) { + BasicBlock *BB = Inst->getParent(); + DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + DEBUG(dbgs() << *BB << "\n"); + + assert(DirectCallee != nullptr); + CallSite CS(Inst); + Value *OrigCallee = CS.getCalledValue(); + + IRBuilder<> BBBuilder(Inst); + LLVMContext &Ctx = Inst->getContext(); + Value *BCI1 = + BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), ""); + Value *BCI2 = + BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), ""); + Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, ""); + + TerminatorInst *ThenTerm, *ElseTerm; + uint64_t ElseCount = TotalCount - Count; + uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); + uint64_t Scale = calculateCountScale(MaxCount); + MDBuilder MDB(M->getContext()); + MDNode *BranchWeights = MDB.createBranchWeights( + scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); + SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm, + BranchWeights); + + BasicBlock *DirectCallBB = ThenTerm->getParent(); + DirectCallBB->setName("if.true"); + BasicBlock *IndirectCallBB = ElseTerm->getParent(); + IndirectCallBB->setName("if.false"); + Instruction *NewInst = Inst->clone(); + BasicBlock *MergeBB = Inst->getParent(); + BasicBlock *OrigMergeBB = nullptr; + bool IsInvokeInst = false; + if (CallInst *CI = dyn_cast(NewInst)) { + CI->setCalledFunction(DirectCallee); + CI->mutateFunctionType(DirectCallee->getFunctionType()); + MergeBB->setName("if.end"); + } else { + InvokeInst *II = dyn_cast(NewInst); + assert(II); + II->setCalledFunction(DirectCallee); + OrigMergeBB = MergeBB; + MergeBB = II->getNormalDest(); + IsInvokeInst = true; + } + + NewInst->setMetadata(LLVMContext::MD_prof, 0); + CallSite NewCS(NewInst); + DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(), + NewInst); + FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); + unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); + for (unsigned I = 0; I < ParamNum; ++I) { + Type *ATy = NewCS.getArgument(I)->getType(); + Type *PTy = DirectCalleeType->getParamType(I); + if (ATy != PTy) { + BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst); + NewCS.setArgument(I, BI); + } + } + + // Create a phi to unify the return values of calls + if (!Inst->getType()->isVoidTy()) { + PHINode *CallRetPHI = nullptr; + if (IsInvokeInst) { + CallRetPHI = getPHINode(*MergeBB); + if (CallRetPHI) + CallRetPHI->removeIncomingValue(OrigMergeBB); + } + if (!CallRetPHI) { + CallRetPHI = PHINode::Create(Inst->getType(), 0); + MergeBB->getInstList().push_front(CallRetPHI); + Inst->replaceAllUsesWith(CallRetPHI); + } + + // Handle the case where the return types differ. + Type *CallRetType = Inst->getType(); + Type *FuncRetType = DirectCallee->getReturnType(); + if (FuncRetType != CallRetType) + NewInst = new BitCastInst(NewInst, CallRetType, "", + &*(std::next(BasicBlock::iterator(NewInst)))); + CallRetPHI->addIncoming(NewInst, DirectCallBB); + CallRetPHI->addIncoming(Inst, IndirectCallBB); + } + + Inst->removeFromParent(); + IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(), + Inst); + if (InvokeInst *II = dyn_cast(Inst)) { + // Remove the unwanted branch instruction. + ThenTerm->removeFromParent(); + ElseTerm->removeFromParent(); + // PHI operatnd may still points to the original merge node of + // if_then_else. Fix them. + fixPHINode(II->getUnwindDest(), OrigMergeBB, IndirectCallBB, DirectCallBB); + fixPHINode(II->getNormalDest(), OrigMergeBB, IndirectCallBB, DirectCallBB); + OrigMergeBB->removeFromParent(); + } + + DEBUG(dbgs() << "\n== Basic Blocks After ==\n"); + DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); + + Twine Msg = Twine("Promote indirect call to ") + DirectCallee->getName() + + " with count " + Twine(Count) + " out of " + Twine(TotalCount); + emitOptimizationRemark(F.getContext(), "PGOIndirectCallPromotion", F, + Inst->getDebugLoc(), Msg); + return true; +} + +// Promote indirectcall to conditional direct-call for one callsite. +uint32_t ICallPromotionFunc::tryToPromote( + Instruction *Inst, const std::vector &Candidates, + uint64_t &TotalCount) { + uint32_t NumPromoted = 0; + + for (auto &C : Candidates) { + uint64_t Count = C.Count; + if (!promote(Inst, C.TargetFunction, Count, TotalCount)) + break; + assert(TotalCount >= Count); + TotalCount -= Count; + NumOfPGOICallPromotion++; + NumPromoted++; + } + return NumPromoted; +} + +// Traverse all the indirect-call callsite and get the value profile +// annotation to perform indirect-call promotion. +bool ICallPromotionFunc::processFunction() { + bool Changed = false; + for (auto &I : findIndirectCallSites(F)) { + uint32_t NumVals; + uint64_t TotalCount; + bool Res = + getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions, + ValueDataArray.get(), NumVals, TotalCount); + if (!Res) + continue; + ArrayRef ValueDataArrayRef(ValueDataArray.get(), + NumVals); + auto CandidatePromotions = + getCandidatePromotionsForCallSite(I, ValueDataArrayRef, TotalCount); + uint32_t NumPromoted = tryToPromote(I, CandidatePromotions, TotalCount); + if (NumPromoted == 0) + continue; + + Changed = true; + // Adjust the MD.prof metadata. First delete the old one. + I->setMetadata(LLVMContext::MD_prof, 0); + // If all promoted, we don't need the MD.prof metadat. + if (TotalCount == 0 || NumPromoted == NumVals) + continue; + // Otherwise we need update with the un-promoted records back. + annotateValueSite(*M, *I, ValueDataArrayRef.slice(NumPromoted), TotalCount, + IPVK_IndirectCallTarget, MaxNumPromotions); + } + return Changed; +} + +static bool promoteIndirectCalls(Module &M, bool InLTO) { + if (DisableICP) + return false; + InstrProfSymtab Symtab; + Symtab.create(M, InLTO); + bool Changed = false; + for (auto &F : M) { + if (F.isDeclaration()) + continue; + if (F.hasFnAttribute(Attribute::OptimizeNone)) + continue; + ICallPromotionFunc ICallPromotion(F, &M, &Symtab); + bool FuncChanged = ICallPromotion.processFunction(); + if (ICPDUMPAFTER && FuncChanged) { + DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); + DEBUG(dbgs() << "\n"); + } + Changed |= FuncChanged; + if (ICPCUTOFF != 0 && NumOfPGOICallPromotion >= ICPCUTOFF) { + DEBUG(dbgs() << " Stop: Cutoff reached.\n"); + break; + } + } + return Changed; +} + +bool PGOIndirectCallPromotion::runOnModule(Module &M) { + // Commandline option has the priority for InLTO. + InLTO |= ICPLTOMode; + return promoteIndirectCalls(M, InLTO); +} Index: lib/Transforms/Instrumentation/Instrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/Instrumentation.cpp +++ lib/Transforms/Instrumentation/Instrumentation.cpp @@ -62,6 +62,7 @@ initializeGCOVProfilerPass(Registry); initializePGOInstrumentationGenPass(Registry); initializePGOInstrumentationUsePass(Registry); + initializePGOIndirectCallPromotionPass(Registry); initializeInstrProfilingPass(Registry); initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); Index: test/Transforms/PGOProfile/icp_covariant_return.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/icp_covariant_return.ll @@ -0,0 +1,122 @@ +; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.D = type { %struct.B } +%struct.B = type { i32 (...)** } +%struct.Base = type { i8 } +%struct.Derived = type { i8 } +$_ZN1DC2Ev = comdat any +$_ZN1BC2Ev = comdat any +$_ZN1D4funcEv = comdat any +$_ZN1DD2Ev = comdat any +$_ZN1DD0Ev = comdat any +$_ZN1B4funcEv = comdat any +$_ZN1BD2Ev = comdat any +$_ZN1BD0Ev = comdat any +$_ZTV1D = comdat any +$_ZTS1D = comdat any +$_ZTS1B = comdat any +$_ZTI1B = comdat any +$_ZTI1D = comdat any +$_ZTV1B = comdat any +@_ZTV1D = linkonce_odr unnamed_addr constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1D to i8*), i8* bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*), i8* bitcast (void (%struct.D*)* @_ZN1DD2Ev to i8*), i8* bitcast (void (%struct.D*)* @_ZN1DD0Ev to i8*)], comdat, align 8 +@_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8* +@_ZTS1D = linkonce_odr constant [3 x i8] c"1D\00", comdat +@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* +@_ZTS1B = linkonce_odr constant [3 x i8] c"1B\00", comdat +@_ZTI1B = linkonce_odr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1B, i32 0, i32 0) }, comdat +@_ZTI1D = linkonce_odr constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1D, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI1B to i8*) }, comdat +@_ZTV1B = linkonce_odr unnamed_addr constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1B to i8*), i8* bitcast (%struct.Base* (%struct.B*)* @_ZN1B4funcEv to i8*), i8* bitcast (void (%struct.B*)* @_ZN1BD2Ev to i8*), i8* bitcast (void (%struct.B*)* @_ZN1BD0Ev to i8*)], comdat, align 8 + +define i32 @bar() { +entry: + %call = call noalias i8* @_Znwm(i64 8) + %tmp = bitcast i8* %call to %struct.D* + call void @_ZN1DC2Ev(%struct.D* %tmp) + %tmp1 = bitcast %struct.D* %tmp to %struct.B* + %tmp2 = bitcast %struct.B* %tmp1 to %struct.Base* (%struct.B*)*** + %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8 + %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0 + %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8 +; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8* +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*) +; ICALL-PROM: br i1 [[CMP]], label %if.true, label %if.false, !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICALL-PROM:if.true: +; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D* +; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]]) +; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base* +; ICALL-PROM: br label %if.end + %call1 = call %struct.Base* %tmp3(%struct.B* %tmp1), !prof !1 +; ICALL-PROM:if.false: +; ICALL-PROM: %call1 = call %struct.Base* %tmp3(%struct.B* %tmp1) +; ICALL-PROM: br label %if.end +; ICALL-PROM:if.end: +; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi %struct.Base* [ [[DIRCALL_RET_CAST]], %if.true ], [ %call1, %if.false ] + ret i32 0 +} + +declare noalias i8* @_Znwm(i64) + +define linkonce_odr void @_ZN1DC2Ev(%struct.D* %this) unnamed_addr comdat align 2 { +entry: + %tmp = bitcast %struct.D* %this to %struct.B* + call void @_ZN1BC2Ev(%struct.B* %tmp) + %tmp1 = bitcast %struct.D* %this to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %tmp1, align 8 + ret void +} + +define linkonce_odr void @_ZN1BC2Ev(%struct.B* %this) unnamed_addr comdat align 2 { +entry: + %tmp = bitcast %struct.B* %this to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @_ZTV1B, i64 0, i64 2) to i32 (...)**), i32 (...)*** %tmp, align 8 + ret void +} + +define linkonce_odr %struct.Derived* @_ZN1D4funcEv(%struct.D* %this) unnamed_addr comdat align 2 { +entry: + %call = call noalias i8* @_Znwm(i64 1) + %tmp = bitcast i8* %call to %struct.Derived* + ret %struct.Derived* %tmp +} + +define linkonce_odr void @_ZN1DD2Ev(%struct.D* %this) unnamed_addr comdat align 2 { +entry: + %tmp = bitcast %struct.D* %this to %struct.B* + call void @_ZN1BD2Ev(%struct.B* %tmp) + ret void +} + +define linkonce_odr void @_ZN1DD0Ev(%struct.D* %this) unnamed_addr comdat align 2 { +entry: + call void @_ZN1DD2Ev(%struct.D* %this) + %tmp = bitcast %struct.D* %this to i8* + call void @_ZdlPv(i8* %tmp) + ret void +} + +define linkonce_odr %struct.Base* @_ZN1B4funcEv(%struct.B* %this) unnamed_addr comdat align 2 { +entry: + %call = call noalias i8* @_Znwm(i64 1) + %tmp = bitcast i8* %call to %struct.Base* + ret %struct.Base* %tmp +} + +define linkonce_odr void @_ZN1BD2Ev(%struct.B* %this) unnamed_addr comdat align 2 { +entry: + ret void +} + +define linkonce_odr void @_ZN1BD0Ev(%struct.B* %this) unnamed_addr comdat align 2 { +entry: + call void @_ZN1BD2Ev(%struct.B* %this) + %tmp = bitcast %struct.B* %this to i8* + call void @_ZdlPv(i8* %tmp) + ret void +} + +declare void @_ZdlPv(i8*) + +!1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345} +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0} Index: test/Transforms/PGOProfile/icp_invoke.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/icp_invoke.ll @@ -0,0 +1,99 @@ +; RUN: opt < %s -icp-lto -pgo-icall-prom -S -icp-count-threshold=0 | FileCheck %s --check-prefix=ICP +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo1 = global void ()* null, align 8 +@foo2 = global i32 ()* null, align 8 +@_ZTIi = external constant i8* + +define internal void @_ZL4bar1v() !PGOFuncName !0 { +entry: + ret void +} + +define internal i32 @_ZL4bar2v() !PGOFuncName !1 { +entry: + ret i32 100 +} + +define i32 @_Z3goov() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %tmp = load void ()*, void ()** @foo1, align 8 +; ICP: [[BITCAST_IC1:%[0-9]+]] = bitcast void ()* %tmp to i8* +; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i8* [[BITCAST_IC1]], bitcast (void ()* @_ZL4bar1v to i8*) +; ICP: br i1 [[CMP_IC1]], label %[[TRUE_LABEL_IC1:.*]], label %[[FALSE_LABEL_IC1:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICP:[[TRUE_LABEL_IC1]]: +; ICP: invoke void @_ZL4bar1v() +; ICP: to label %try.cont unwind label %lpad +; ICP:[[FALSE_LABEL_IC1]]: + invoke void %tmp() + to label %try.cont unwind label %lpad, !prof !2 + +lpad: + %tmp1 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + %tmp2 = extractvalue { i8*, i32 } %tmp1, 0 + %tmp3 = extractvalue { i8*, i32 } %tmp1, 1 + %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %tmp3, %tmp4 + br i1 %matches, label %catch, label %eh.resume + +catch: + %tmp5 = tail call i8* @__cxa_begin_catch(i8* %tmp2) + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + %tmp6 = load i32 ()*, i32 ()** @foo2, align 8 +; ICP: [[BITCAST_IC2:%[0-9]+]] = bitcast i32 ()* %tmp6 to i8* +; ICP: [[CMP_IC2:%[0-9]+]] = icmp eq i8* [[BITCAST_IC2]], bitcast (i32 ()* @_ZL4bar2v to i8*) +; ICP: br i1 [[CMP_IC2]], label %[[TRUE_LABEL_IC2:.*]], label %[[FALSE_LABEL_IC2:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICP:[[TRUE_LABEL_IC2]]: +; ICP: [[RESULT_IC2:%[0-9]+]] = invoke i32 @_ZL4bar2v() +; ICP: to label %try.cont8 unwind label %lpad1 +; ICP:[[FALSE_LABEL_IC2]]: + %call = invoke i32 %tmp6() + to label %try.cont8 unwind label %lpad1, !prof !3 + +lpad1: + %tmp7 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + %tmp8 = extractvalue { i8*, i32 } %tmp7, 0 + %tmp9 = extractvalue { i8*, i32 } %tmp7, 1 + %tmp10 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches5 = icmp eq i32 %tmp9, %tmp10 + br i1 %matches5, label %catch6, label %eh.resume + +catch6: + %tmp11 = tail call i8* @__cxa_begin_catch(i8* %tmp8) + tail call void @__cxa_end_catch() + br label %try.cont8 + +try.cont8: + %i.0 = phi i32 [ undef, %catch6 ], [ %call, %try.cont ] +; ICP: %i.0 = phi i32 [ undef, %catch6 ], [ [[RESULT_IC2]], %[[TRUE_LABEL_IC2]] ], [ %call, %[[FALSE_LABEL_IC2]] ] + ret i32 %i.0 + +eh.resume: + %ehselector.slot.0 = phi i32 [ %tmp9, %lpad1 ], [ %tmp3, %lpad ] + %exn.slot.0 = phi i8* [ %tmp8, %lpad1 ], [ %tmp2, %lpad ] + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0 + %lpad.val11 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1 + resume { i8*, i32 } %lpad.val11 +} + +declare i32 @__gxx_personality_v0(...) + +declare i32 @llvm.eh.typeid.for(i8*) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +!0 = !{!"invoke.ll:_ZL4bar1v"} +!1 = !{!"invoke.ll:_ZL4bar2v"} +!2 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1} +!3 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1} +; ICP-NOT !3 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1} +; ICP-NOT !4 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1} +; ICP: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0} Index: test/Transforms/PGOProfile/icp_mismatch_msg.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/icp_mismatch_msg.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -pgo-icall-prom -pass-remarks-missed=PGOIndirectCallPromotion -S 2>& 1 | FileCheck %s + +; CHECK: remark: :0:0: Cannot promote indirect call to func4 with count of 1234: The number of arguments mismatch +; CHECK: remark: :0:0: Cannot promote indirect call to 11517462787082255043 with count of 2345: Cannot find the target +; CHECK: remark: :0:0: Cannot promote indirect call to func2 with count of 7890: Return type mismatch + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common global i32 ()* null, align 8 +@foo2 = common global i32 ()* null, align 8 +@foo3 = common global i32 ()* null, align 8 + +define i32 @func4(i32 %i) { +entry: + ret i32 %i +} + +define void @func2() { +entry: + ret void +} + +define i32 @bar() { +entry: + %tmp = load i32 ()*, i32 ()** @foo, align 8 + %call = call i32 %tmp(), !prof !1 + %tmp2 = load i32 ()*, i32 ()** @foo2, align 8 + %call1 = call i32 %tmp2(), !prof !2 + %add = add nsw i32 %call1, %call + %tmp3 = load i32 ()*, i32 ()** @foo3, align 8 + %call2 = call i32 %tmp3(), !prof !3 + %add2 = add nsw i32 %add, %call2 + ret i32 %add2 +} + +!1 = !{!"VP", i32 0, i64 1801, i64 7651369219802541373, i64 1234, i64 -4377547752858689819, i64 567} +!2 = !{!"VP", i32 0, i64 3023, i64 -6929281286627296573, i64 2345, i64 -4377547752858689819, i64 678} +!3 = !{!"VP", i32 0, i64 7890, i64 -4377547752858689819, i64 7890} Index: test/Transforms/PGOProfile/icp_vararg.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/icp_vararg.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common global i32 (i32, ...)* null, align 8 + +define i32 @va_func(i32 %num, ...) { +entry: + ret i32 0 +} + +define i32 @bar() #1 { +entry: + %tmp = load i32 (i32, ...)*, i32 (i32, ...)** @foo, align 8 +; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 (i32, ...)* %tmp to i8* +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 (i32, ...)* @va_func to i8*) +; ICALL-PROM: br i1 [[CMP]], label %if.true, label %if.false, !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICALL-PROM:if.true: ; preds = %entry +; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 (i32, ...) @va_func(i32 3, i32 12, i32 22, i32 4) +; ICALL-PROM: br label %if.end + %call = call i32 (i32, ...) %tmp(i32 3, i32 12, i32 22, i32 4), !prof !1 +; ICALL-PROM:if.false: ; preds = %entry +; ICALL-PROM: %call = call i32 (i32, ...) %tmp(i32 3, i32 12, i32 22, i32 4) +; ICALL-PROM: br label %if.end + ret i32 %call +; ICALL-PROM:if.end: ; preds = %if.false, %if.true +; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ [[DIRCALL_RET]], %if.true ], [ %call, %if.false ] +; ICALL-PROM: ret i32 [[PHI_RET]] + +} + +!1 = !{!"VP", i32 0, i64 12345, i64 989055279648259519, i64 12345} +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0} Index: test/Transforms/PGOProfile/indirect_call_promotion.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/indirect_call_promotion.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt < %s -pgo-icall-prom -S -pass-remarks=PGOIndirectCallPromotion -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK +; PASS-REMARK: remark: :0:0: Promote indirect call to func4 with count 1030 out of 1600 +; PASS-REMARK: remark: :0:0: Promote indirect call to func2 with count 410 out of 570 +; PASS-REMARK: remark: :0:0: Promote indirect call to func3 with count 150 out of 160 +; PASS-REMARK: remark: :0:0: Promote indirect call to func1 with count 10 out of 10 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common global i32 ()* null, align 8 + +define i32 @func1() { +entry: + ret i32 0 +} + +define i32 @func2() { +entry: + ret i32 1 +} + +define i32 @func3() { +entry: + ret i32 2 +} + +define i32 @func4() { +entry: + ret i32 3 +} + +define i32 @bar() { +entry: + %tmp = load i32 ()*, i32 ()** @foo, align 8 +; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 ()* %tmp to i8* +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 ()* @func4 to i8*) +; ICALL-PROM: br i1 [[CMP]], label %if.true, label %if.false, !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICALL-PROM: if.true: +; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 @func4() +; ICALL-PROM: br label %if.end + %call = call i32 %tmp(), !prof !1 +; ICALL-PROM: if.false: +; ICALL-PROM: %call = call i32 %tmp(), !prof [[NEW_VP_METADATA:![0-9]+]] + ret i32 %call +; ICALL-PROM: if.end: +; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ [[DIRCALL_RET]], %if.true ], [ %call, %if.false ] +; ICALL-PROM: ret i32 [[PHI_RET]] +} + +!1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10} + +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1030, i32 570} +; ICALL-PROM: [[NEW_VP_METADATA]] = !{!"VP", i32 0, i64 570, i64 -4377547752858689819, i64 410}