Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -124,6 +124,7 @@ void initializeGCOVProfilerPass(PassRegistry&); void initializePGOInstrumentationGenPass(PassRegistry&); void initializePGOInstrumentationUsePass(PassRegistry&); +void initializePGOIndirectCallPromotionPass(PassRegistry&); void initializeInstrProfilingLegacyPassPass(PassRegistry &); void initializeAddressSanitizerPass(PassRegistry&); void initializeAddressSanitizerModulePass(PassRegistry&); Index: llvm/trunk/include/llvm/LinkAllPasses.h =================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h +++ llvm/trunk/include/llvm/LinkAllPasses.h @@ -91,6 +91,7 @@ (void) llvm::createGCOVProfilerPass(); (void) llvm::createPGOInstrumentationGenPass(); (void) llvm::createPGOInstrumentationUsePass(); + (void) llvm::createPGOIndirectCallPromotionPass(); (void) llvm::createInstrProfilingLegacyPass(); (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); Index: llvm/trunk/include/llvm/Transforms/Instrumentation.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Instrumentation.h +++ llvm/trunk/include/llvm/Transforms/Instrumentation.h @@ -83,6 +83,7 @@ ModulePass *createPGOInstrumentationGenPass(); ModulePass * createPGOInstrumentationUsePass(StringRef Filename = StringRef("")); +ModulePass *createPGOIndirectCallPromotionPass(bool InLTO = false); /// Options for the frontend instrumentation based profiling pass. struct InstrProfOptions { Index: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -371,10 +371,13 @@ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } - if (!PerformThinLTO) + if (!PerformThinLTO) { /// PGO instrumentation is added during the compile phase for ThinLTO, do /// not run it a second time addPGOInstrPasses(MPM); + // Indirect call promotion that promotes intra-module targets only. + MPM.add(createPGOIndirectCallPromotionPass()); + } if (EnableNonLTOGlobalsModRef) // We add a module alias analysis pass here. In part due to bugs in the @@ -585,6 +588,12 @@ // Infer attributes about declarations if possible. PM.add(createInferFunctionAttrsLegacyPass()); + // Indirect call promotion. This should promote all the targets that are left + // by the earlier promotion pass that promotes intra-module targets. + // This two-step promotion is to save the compile time. For LTO, it should + // produce the same result as if we only do promotion here. + PM.add(createPGOIndirectCallPromotionPass(true)); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. Index: llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt +++ llvm/trunk/lib/Transforms/Instrumentation/CMakeLists.txt @@ -4,6 +4,7 @@ DataFlowSanitizer.cpp GCOVProfiling.cpp MemorySanitizer.cpp + IndirectCallPromotion.cpp Instrumentation.cpp InstrProfiling.cpp PGOInstrumentation.cpp Index: llvm/trunk/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -0,0 +1,693 @@ +//===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the transformation that promotes indirect calls to +// conditional direct calls when the indirect-call value profile metadata is +// available. +// +//===----------------------------------------------------------------------===// + +#include "IndirectCallSiteVisitor.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "icall-promotion" + +STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); +STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); + +// Command line option to disable indirect-call promotion with the default as +// false. This is for debug purpose. +static cl::opt DisableICP("disable-icp", cl::init(false), cl::Hidden, + cl::desc("Disable indirect call promotion")); + +// The minimum call count for the direct-call target to be considered as the +// promotion candidate. +static cl::opt + ICPCountThreshold("icp-count-threshold", cl::Hidden, cl::ZeroOrMore, + cl::init(1000), + cl::desc("The minimum count to the direct call target " + "for the promotion")); + +// The percent threshold for the direct-call target (this call site vs the +// total call count) for it to be considered as the promotion target. +static cl::opt + ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden, + cl::ZeroOrMore, + cl::desc("The percentage threshold for the promotion")); + +// Set the maximum number of targets to promote for a single indirect-call +// callsite. +static cl::opt + MaxNumPromotions("icp-max-prom", cl::init(2), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of promotions for a single indirect " + "call callsite")); + +// Set the cutoff value for the promotion. If the value is other than 0, we +// stop the transformation once the total number of promotions equals the cutoff +// value. +// For debug use only. +static cl::opt + ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of promotions for this compilaiton")); + +// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. +// For debug use only. +static cl::opt + ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, + cl::desc("Skip Callsite up to this number for this compilaiton")); + +// Set if the pass is called in LTO optimization. The difference for LTO mode +// is the pass won't prefix the source module name to the internal linkage +// symbols. +static cl::opt ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion in LTO " + "mode")); +// If the option is set to true, only call instructions will be considered for +// transformation -- invoke instructions will be ignored. +static cl::opt + ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion for call instructions " + "only")); + +// If the option is set to true, only invoke instructions will be considered for +// transformation -- call instructions will be ignored. +static cl::opt ICPInvokeOnly("icp-invoke-only", cl::init(false), + cl::Hidden, + cl::desc("Run indirect-call promotion for " + "invoke instruction only")); + +// Dump the function level IR if the transformation happened in this +// function. For debug use only. +static cl::opt + ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, + cl::desc("Dump IR after transformation happens")); + +namespace { +class PGOIndirectCallPromotion : public ModulePass { +public: + static char ID; + + PGOIndirectCallPromotion(bool InLTO = false) : ModulePass(ID), InLTO(InLTO) { + initializePGOIndirectCallPromotionPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOIndirectCallPromotion"; + } + +private: + bool runOnModule(Module &M) override; + + // If this pass is called in LTO. We need to special handling the PGOFuncName + // for the static variables due to LTO's internalization. + bool InLTO; +}; +} // end anonymous namespace + +char PGOIndirectCallPromotion::ID = 0; +INITIALIZE_PASS(PGOIndirectCallPromotion, "pgo-icall-prom", + "Use PGO instrumentation profile to promote indirect calls to " + "direct calls.", + false, false) + +ModulePass *llvm::createPGOIndirectCallPromotionPass(bool InLTO) { + return new PGOIndirectCallPromotion(InLTO); +} + +// The class for main data structure to promote indirect calls to conditional +// direct calls. +class ICallPromotionFunc { +private: + Function &F; + Module *M; + + // Symtab that maps indirect call profile values to function names and + // defines. + InstrProfSymtab *Symtab; + + // Allocate space to read the profile annotation. + std::unique_ptr ValueDataArray; + + // Count is the call count for the direct-call target and + // TotalCount is the call count for the indirect-call callsite. + // Return true we should promote this indirect-call target. + bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount); + + enum TargetStatus { + OK, // Should be able to promote. + NotAvailableInModule, // Cannot find the target in current module. + ReturnTypeMismatch, // Return type mismatch b/w target and indirect-call. + NumArgsMismatch, // Number of arguments does not match. + ArgTypeMismatch // Type mismatch in the arguments (cannot bitcast). + }; + + // Test if we can legally promote this direct-call of Target. + TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target, + Function *&F); + + // A struct that records the direct target and it's call count. + struct PromotionCandidate { + Function *TargetFunction; + uint64_t Count; + PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} + }; + + // Check if the indirect-call call site should be promoted. Return the number + // of promotions. + std::vector getPromotionCandidatesForCallSite( + Instruction *Inst, const ArrayRef &ValueDataRef, + uint64_t TotalCount); + + // Main function that transforms Inst (either a indirect-call instruction, or + // an invoke instruction , to a conditional call to F. This is like: + // if (Inst.CalledValue == F) + // F(...); + // else + // Inst(...); + // end + // TotalCount is the profile count value that the instruction executes. + // Count is the profile count value that F is the target function. + // These two values are being used to update the branch weight. + void promote(Instruction *Inst, Function *F, uint64_t Count, + uint64_t TotalCount); + + // Promote a list of targets for one indirect-call callsite. Return + // the number of promotions. + uint32_t tryToPromote(Instruction *Inst, + const std::vector &Candidates, + uint64_t &TotalCount); + + static const char *StatusToString(const TargetStatus S) { + switch (S) { + case OK: + return "OK to promote"; + case NotAvailableInModule: + return "Cannot find the target"; + case ReturnTypeMismatch: + return "Return type mismatch"; + case NumArgsMismatch: + return "The number of arguments mismatch"; + case ArgTypeMismatch: + return "Argument Type mismatch"; + } + llvm_unreachable("Should not reach here"); + } + + // Noncopyable + ICallPromotionFunc(const ICallPromotionFunc &other) = delete; + ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete; + +public: + ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab) + : F(Func), M(Modu), Symtab(Symtab) { + ValueDataArray = llvm::make_unique(MaxNumPromotions); + } + bool processFunction(); +}; + +bool ICallPromotionFunc::isPromotionProfitable(uint64_t Count, + uint64_t TotalCount) { + if (Count < ICPCountThreshold) + return false; + + unsigned Percentage = (Count * 100) / TotalCount; + return (Percentage >= ICPPercentThreshold); +} + +ICallPromotionFunc::TargetStatus +ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target, + Function *&TargetFunction) { + Function *DirectCallee = Symtab->getFunction(Target); + if (DirectCallee == nullptr) + return NotAvailableInModule; + // Check the return type. + Type *CallRetType = Inst->getType(); + if (!CallRetType->isVoidTy()) { + Type *FuncRetType = DirectCallee->getReturnType(); + if (FuncRetType != CallRetType && + !CastInst::isBitCastable(FuncRetType, CallRetType)) + return ReturnTypeMismatch; + } + + // Check if the arguments are compatible with the parameters + FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); + unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); + CallSite CS(Inst); + unsigned ArgNum = CS.arg_size(); + + if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) + return NumArgsMismatch; + + for (unsigned I = 0; I < ParamNum; ++I) { + Type *PTy = DirectCalleeType->getFunctionParamType(I); + Type *ATy = CS.getArgument(I)->getType(); + if (PTy == ATy) + continue; + if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) + return ArgTypeMismatch; + } + + DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to " + << Symtab->getFuncName(Target) << "\n"); + TargetFunction = DirectCallee; + return OK; +} + +// Indirect-call promotion heuristic. The direct targets are sorted based on +// the count. Stop at the first target that is not promoted. +std::vector +ICallPromotionFunc::getPromotionCandidatesForCallSite( + Instruction *Inst, const ArrayRef &ValueDataRef, + uint64_t TotalCount) { + uint32_t NumVals = ValueDataRef.size(); + std::vector Ret; + + DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst + << " Num_targets: " << NumVals << "\n"); + NumOfPGOICallsites++; + if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { + DEBUG(dbgs() << " Skip: User options.\n"); + return Ret; + } + + for (uint32_t I = 0; I < MaxNumPromotions && I < NumVals; I++) { + uint64_t Count = ValueDataRef[I].Count; + assert(Count <= TotalCount); + uint64_t Target = ValueDataRef[I].Value; + DEBUG(dbgs() << " Candidate " << I << " Count=" << Count + << " Target_func: " << Target << "\n"); + + if (ICPInvokeOnly && dyn_cast(Inst)) { + DEBUG(dbgs() << " Not promote: User options.\n"); + break; + } + if (ICPCallOnly && dyn_cast(Inst)) { + DEBUG(dbgs() << " Not promote: User option.\n"); + break; + } + if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { + DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); + break; + } + if (!isPromotionProfitable(Count, TotalCount)) { + DEBUG(dbgs() << " Not promote: Cold target.\n"); + break; + } + Function *TargetFunction = nullptr; + TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction); + if (Status != OK) { + StringRef TargetFuncName = Symtab->getFuncName(Target); + const char *Reason = StatusToString(Status); + DEBUG(dbgs() << " Not promote: " << Reason << "\n"); + Twine Msg = + Twine("Cannot promote indirect call to ") + + (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) + + Twine(" with count of ") + Twine(Count) + ": " + Reason; + emitOptimizationRemarkMissed(F.getContext(), "PGOIndirectCallPromotion", + F, Inst->getDebugLoc(), Msg); + break; + } + Ret.push_back(PromotionCandidate(TargetFunction, Count)); + TotalCount -= Count; + } + return Ret; +} + +// Create a diamond structure for If_Then_Else. Also update the profile +// count. Do the fix-up for the invoke instruction. +static void createIfThenElse(Instruction *Inst, Function *DirectCallee, + uint64_t Count, uint64_t TotalCount, + BasicBlock **DirectCallBB, + BasicBlock **IndirectCallBB, + BasicBlock **MergeBB) { + CallSite CS(Inst); + Value *OrigCallee = CS.getCalledValue(); + + IRBuilder<> BBBuilder(Inst); + LLVMContext &Ctx = Inst->getContext(); + Value *BCI1 = + BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), ""); + Value *BCI2 = + BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), ""); + Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, ""); + + uint64_t ElseCount = TotalCount - Count; + uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); + uint64_t Scale = calculateCountScale(MaxCount); + MDBuilder MDB(Inst->getContext()); + MDNode *BranchWeights = MDB.createBranchWeights( + scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); + TerminatorInst *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm, + BranchWeights); + *DirectCallBB = ThenTerm->getParent(); + (*DirectCallBB)->setName("if.true.direct_targ"); + *IndirectCallBB = ElseTerm->getParent(); + (*IndirectCallBB)->setName("if.false.orig_indirect"); + *MergeBB = Inst->getParent(); + (*MergeBB)->setName("if.end.icp"); + + // Special handing of Invoke instructions. + InvokeInst *II = dyn_cast(Inst); + if (!II) + return; + + // We don't need branch instructions for invoke. + ThenTerm->eraseFromParent(); + ElseTerm->eraseFromParent(); + + // Add jump from Merge BB to the NormalDest. This is needed for the newly + // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB. + BranchInst::Create(II->getNormalDest(), *MergeBB); +} + +// Find the PHI in BB that have the CallResult as the operand. +static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) { + BasicBlock *From = Inst->getParent(); + for (auto &I : *BB) { + PHINode *PHI = dyn_cast(&I); + if (!PHI) + continue; + int IX = PHI->getBasicBlockIndex(From); + if (IX == -1) + continue; + Value *V = PHI->getIncomingValue(IX); + if (dyn_cast(V) == Inst) + return true; + } + return false; +} + +// This method fixes up PHI nodes in BB where BB is the UnwindDest of an +// invoke instruction. In BB, there may be PHIs with incoming block being +// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke +// instructions to its own BB, OrigBB is no longer the predecessor block of BB. +// Instead two new predecessors are added: IndirectCallBB and DirectCallBB, +// so the PHI node's incoming BBs need to be fixed up accordingly. +static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB, + BasicBlock *OrigBB, + BasicBlock *IndirectCallBB, + BasicBlock *DirectCallBB) { + for (auto &I : *BB) { + PHINode *PHI = dyn_cast(&I); + if (!PHI) + continue; + int IX = PHI->getBasicBlockIndex(OrigBB); + if (IX == -1) + continue; + Value *V = PHI->getIncomingValue(IX); + PHI->addIncoming(V, IndirectCallBB); + PHI->setIncomingBlock(IX, DirectCallBB); + } +} + +// This method fixes up PHI nodes in BB where BB is the NormalDest of an +// invoke instruction. In BB, there may be PHIs with incoming block being +// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke +// instructions to its own BB, a new incoming edge will be added to the original +// NormalDstBB from the IndirectCallBB. +static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB, + BasicBlock *OrigBB, + BasicBlock *IndirectCallBB, + Instruction *NewInst) { + for (auto &I : *BB) { + PHINode *PHI = dyn_cast(&I); + if (!PHI) + continue; + int IX = PHI->getBasicBlockIndex(OrigBB); + if (IX == -1) + continue; + Value *V = PHI->getIncomingValue(IX); + if (dyn_cast(V) == Inst) { + PHI->setIncomingBlock(IX, IndirectCallBB); + PHI->addIncoming(NewInst, OrigBB); + continue; + } + PHI->addIncoming(V, IndirectCallBB); + } +} + +// Add a bitcast instruction to the direct-call return value if needed. +// Add a bitcast instruction to the direct-call return value if needed. +static Instruction *insertCallRetCast(const Instruction *Inst, + Instruction *DirectCallInst, + Function *DirectCallee) { + if (Inst->getType()->isVoidTy()) + return DirectCallInst; + + Type *CallRetType = Inst->getType(); + Type *FuncRetType = DirectCallee->getReturnType(); + if (FuncRetType == CallRetType) + return DirectCallInst; + + BasicBlock *InsertionBB; + if (CallInst *CI = dyn_cast(DirectCallInst)) + InsertionBB = CI->getParent(); + else + InsertionBB = (dyn_cast(DirectCallInst))->getNormalDest(); + + return (new BitCastInst(DirectCallInst, CallRetType, "", + InsertionBB->getTerminator())); +} + +// Create a DirectCall instruction in the DirectCallBB. +// Parameter Inst is the indirect-call (invoke) instruction. +// DirectCallee is the decl of the direct-call (invoke) target. +// DirecallBB is the BB that the direct-call (invoke) instruction is inserted. +// MergeBB is the bottom BB of the if-then-else-diamond after the +// transformation. For invoke instruction, the edges from DirectCallBB and +// IndirectCallBB to MergeBB are removed before this call (during +// createIfThenElse). +static Instruction *createDirectCallInst(const Instruction *Inst, + Function *DirectCallee, + BasicBlock *DirectCallBB, + BasicBlock *MergeBB) { + Instruction *NewInst = Inst->clone(); + if (CallInst *CI = dyn_cast(NewInst)) { + CI->setCalledFunction(DirectCallee); + CI->mutateFunctionType(DirectCallee->getFunctionType()); + } else { + // Must be an invoke instruction. Direct invoke's normal destination is + // fixed up to MergeBB. MergeBB is the place where return cast is inserted. + // Also since IndirectCallBB does not have an edge to MergeBB, there is no + // need to insert new PHIs into MergeBB. + InvokeInst *II = dyn_cast(NewInst); + assert(II); + II->setCalledFunction(DirectCallee); + II->mutateFunctionType(DirectCallee->getFunctionType()); + II->setNormalDest(MergeBB); + } + + DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(), + NewInst); + + // Clear the value profile data. + NewInst->setMetadata(LLVMContext::MD_prof, 0); + CallSite NewCS(NewInst); + FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); + unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); + for (unsigned I = 0; I < ParamNum; ++I) { + Type *ATy = NewCS.getArgument(I)->getType(); + Type *PTy = DirectCalleeType->getParamType(I); + if (ATy != PTy) { + BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst); + NewCS.setArgument(I, BI); + } + } + + return insertCallRetCast(Inst, NewInst, DirectCallee); +} + +// Create a PHI to unify the return values of calls. +static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult, + Function *DirectCallee) { + if (Inst->getType()->isVoidTy()) + return; + + BasicBlock *RetValBB = CallResult->getParent(); + + BasicBlock *PHIBB; + if (InvokeInst *II = dyn_cast(CallResult)) + RetValBB = II->getNormalDest(); + + PHIBB = RetValBB->getSingleSuccessor(); + if (getCallRetPHINode(PHIBB, Inst)) + return; + + PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0); + PHIBB->getInstList().push_front(CallRetPHI); + Inst->replaceAllUsesWith(CallRetPHI); + CallRetPHI->addIncoming(Inst, Inst->getParent()); + CallRetPHI->addIncoming(CallResult, RetValBB); +} + +// This function does the actual indirect-call promotion transformation: +// For an indirect-call like: +// Ret = (*Foo)(Args); +// It transforms to: +// if (Foo == DirectCallee) +// Ret1 = DirectCallee(Args); +// else +// Ret2 = (*Foo)(Args); +// Ret = phi(Ret1, Ret2); +// It adds type casts for the args do not match the parameters and the return +// value. Branch weights metadata also updated. +void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee, + uint64_t Count, uint64_t TotalCount) { + assert(DirectCallee != nullptr); + BasicBlock *BB = Inst->getParent(); + // Just to suppress the non-debug build warning. + (void)BB; + DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + DEBUG(dbgs() << *BB << "\n"); + + BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB; + createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB, + &IndirectCallBB, &MergeBB); + + Instruction *NewInst = + createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB); + + // Move Inst from MergeBB to IndirectCallBB. + Inst->removeFromParent(); + IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(), + Inst); + + if (InvokeInst *II = dyn_cast(Inst)) { + // At this point, the original indirect invoke instruction has the original + // UnwindDest and NormalDest. For the direct invoke instruction, the + // NormalDest points to MergeBB, and MergeBB jumps to the original + // NormalDest. MergeBB might have a new bitcast instruction for the return + // value. The PHIs are with the original NormalDest. Since we now have two + // incoming edges to NormalDest and UnwindDest, we have to do some fixups. + // + // UnwindDest will not use the return value. So pass nullptr here. + fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB, + DirectCallBB); + // We don't need to update the operand from NormalDest for DirectCallBB. + // Pass nullptr here. + fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB, + IndirectCallBB, NewInst); + } + + insertCallRetPHI(Inst, NewInst, DirectCallee); + + DEBUG(dbgs() << "\n== Basic Blocks After ==\n"); + DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); + + Twine Msg = Twine("Promote indirect call to ") + DirectCallee->getName() + + " with count " + Twine(Count) + " out of " + Twine(TotalCount); + emitOptimizationRemark(F.getContext(), "PGOIndirectCallPromotion", F, + Inst->getDebugLoc(), Msg); +} + +// Promote indirect-call to conditional direct-call for one callsite. +uint32_t ICallPromotionFunc::tryToPromote( + Instruction *Inst, const std::vector &Candidates, + uint64_t &TotalCount) { + uint32_t NumPromoted = 0; + + for (auto &C : Candidates) { + uint64_t Count = C.Count; + promote(Inst, C.TargetFunction, Count, TotalCount); + assert(TotalCount >= Count); + TotalCount -= Count; + NumOfPGOICallPromotion++; + NumPromoted++; + } + return NumPromoted; +} + +// Traverse all the indirect-call callsite and get the value profile +// annotation to perform indirect-call promotion. +bool ICallPromotionFunc::processFunction() { + bool Changed = false; + for (auto &I : findIndirectCallSites(F)) { + uint32_t NumVals; + uint64_t TotalCount; + bool Res = + getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions, + ValueDataArray.get(), NumVals, TotalCount); + if (!Res) + continue; + ArrayRef ValueDataArrayRef(ValueDataArray.get(), + NumVals); + auto PromotionCandidates = + getPromotionCandidatesForCallSite(I, ValueDataArrayRef, TotalCount); + uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount); + if (NumPromoted == 0) + continue; + + Changed = true; + // Adjust the MD.prof metadata. First delete the old one. + I->setMetadata(LLVMContext::MD_prof, 0); + // If all promoted, we don't need the MD.prof metadata. + if (TotalCount == 0 || NumPromoted == NumVals) + continue; + // Otherwise we need update with the un-promoted records back. + annotateValueSite(*M, *I, ValueDataArrayRef.slice(NumPromoted), TotalCount, + IPVK_IndirectCallTarget, MaxNumPromotions); + } + return Changed; +} + +// A wrapper function that does the actual work. +static bool promoteIndirectCalls(Module &M, bool InLTO) { + if (DisableICP) + return false; + InstrProfSymtab Symtab; + Symtab.create(M, InLTO); + bool Changed = false; + for (auto &F : M) { + if (F.isDeclaration()) + continue; + if (F.hasFnAttribute(Attribute::OptimizeNone)) + continue; + ICallPromotionFunc ICallPromotion(F, &M, &Symtab); + bool FuncChanged = ICallPromotion.processFunction(); + if (ICPDUMPAFTER && FuncChanged) { + DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); + DEBUG(dbgs() << "\n"); + } + Changed |= FuncChanged; + if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { + DEBUG(dbgs() << " Stop: Cutoff reached.\n"); + break; + } + } + return Changed; +} + +bool PGOIndirectCallPromotion::runOnModule(Module &M) { + // Command-line option has the priority for InLTO. + InLTO |= ICPLTOMode; + return promoteIndirectCalls(M, InLTO); +} Index: llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -62,6 +62,7 @@ initializeGCOVProfilerPass(Registry); initializePGOInstrumentationGenPass(Registry); initializePGOInstrumentationUsePass(Registry); + initializePGOIndirectCallPromotionPass(Registry); initializeInstrProfilingLegacyPassPass(Registry); initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); Index: llvm/trunk/test/Transforms/PGOProfile/icp_covariant_call_return.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/icp_covariant_call_return.ll +++ llvm/trunk/test/Transforms/PGOProfile/icp_covariant_call_return.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.D = type { %struct.B } +%struct.B = type { i32 (...)** } +%struct.Base = type { i8 } +%struct.Derived = type { i8 } + +declare noalias i8* @_Znwm(i64) +declare void @_ZN1DC2Ev(%struct.D*); +declare %struct.Derived* @_ZN1D4funcEv(%struct.D*); + +define i32 @bar() { +entry: + %call = call noalias i8* @_Znwm(i64 8) + %tmp = bitcast i8* %call to %struct.D* + call void @_ZN1DC2Ev(%struct.D* %tmp) + %tmp1 = bitcast %struct.D* %tmp to %struct.B* + %tmp2 = bitcast %struct.B* %tmp1 to %struct.Base* (%struct.B*)*** + %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8 + %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0 + %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8 +; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8* +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*) +; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICALL-PROM:if.true.direct_targ: +; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D* +; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]]) +; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base* +; ICALL-PROM: br label %if.end.icp +; ICALL-PROM:if.false.orig_indirect: +; ICALL-PROM: %call1 = call %struct.Base* %tmp3(%struct.B* %tmp1) +; ICALL-PROM: br label %if.end.icp +; ICALL-PROM:if.end.icp: +; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi %struct.Base* [ %call1, %if.false.orig_indirect ], [ [[DIRCALL_RET_CAST]], %if.true.direct_targ ] + %call1 = call %struct.Base* %tmp3(%struct.B* %tmp1), !prof !1 + ret i32 0 +} + +!1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345} +; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345} +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0} +; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345} Index: llvm/trunk/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll +++ llvm/trunk/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll @@ -0,0 +1,110 @@ +; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" +%struct.D = type { %struct.B } +%struct.B = type { i32 (...)** } +%struct.Derived = type { %struct.Base, i32 } +%struct.Base = type { i32 } + +@_ZTIi = external constant i8* +declare i8* @_Znwm(i64) +declare void @_ZN1DC2Ev(%struct.D*) +declare %struct.Derived* @_ZN1D4funcEv(%struct.D*) +declare void @_ZN1DD0Ev(%struct.D*) +declare void @_ZdlPv(i8*) +declare i32 @__gxx_personality_v0(...) +declare i32 @llvm.eh.typeid.for(i8*) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() + + +define i32 @foo() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %call = invoke i8* @_Znwm(i64 8) + to label %invoke.cont unwind label %lpad + +invoke.cont: + %tmp = bitcast i8* %call to %struct.D* + call void @_ZN1DC2Ev(%struct.D* %tmp) + %tmp1 = bitcast %struct.D* %tmp to %struct.B* + %tmp2 = bitcast %struct.B* %tmp1 to %struct.Base* (%struct.B*)*** + %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8 + %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0 + %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8 +; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8* +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*) +; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICALL-PROM:if.true.direct_targ: +; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D* +; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = invoke %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]]) +; ICALL-PROM: to label %if.end.icp unwind label %lpad +; ICALL-PROM:if.false.orig_indirect: +; ICAll-PROM: %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1) +; ICAll-PROM: to label %invoke.cont1 unwind label %lpad +; ICALL-PROM:if.end.icp: +; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base* +; ICALL-PROM: br label %invoke.cont1 + %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1) + to label %invoke.cont1 unwind label %lpad, !prof !1 + +invoke.cont1: +; ICAll-PROM: [[PHI_RET:%[0-9]+]] = phi %struct.Base* [ %call2, %if.false.orig_indirect ], [ [[DIRCALL_RET_CAST]], %if.end.icp ] +; ICAll-PROM: %isnull = icmp eq %struct.Base* [[PHI_RET]], null + %isnull = icmp eq %struct.Base* %call2, null + br i1 %isnull, label %delete.end, label %delete.notnull + +delete.notnull: + %tmp4 = bitcast %struct.Base* %call2 to i8* + call void @_ZdlPv(i8* %tmp4) + br label %delete.end + +delete.end: + %isnull3 = icmp eq %struct.B* %tmp1, null + br i1 %isnull3, label %delete.end8, label %delete.notnull4 + +delete.notnull4: + %tmp5 = bitcast %struct.B* %tmp1 to void (%struct.B*)*** + %vtable5 = load void (%struct.B*)**, void (%struct.B*)*** %tmp5, align 8 + %vfn6 = getelementptr inbounds void (%struct.B*)*, void (%struct.B*)** %vtable5, i64 2 + %tmp6 = load void (%struct.B*)*, void (%struct.B*)** %vfn6, align 8 + invoke void %tmp6(%struct.B* %tmp1) + to label %invoke.cont7 unwind label %lpad + +invoke.cont7: + br label %delete.end8 + +delete.end8: + br label %try.cont + +lpad: + %tmp7 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + %tmp8 = extractvalue { i8*, i32 } %tmp7, 0 + %tmp9 = extractvalue { i8*, i32 } %tmp7, 1 + br label %catch.dispatch + +catch.dispatch: + %tmp10 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %tmp9, %tmp10 + br i1 %matches, label %catch, label %eh.resume + +catch: + %tmp11 = call i8* @__cxa_begin_catch(i8* %tmp8) + %tmp12 = bitcast i8* %tmp11 to i32* + %tmp13 = load i32, i32* %tmp12, align 4 + call void @__cxa_end_catch() + br label %try.cont + +try.cont: + ret i32 0 + +eh.resume: + %lpad.val = insertvalue { i8*, i32 } undef, i8* %tmp8, 0 + %lpad.val11 = insertvalue { i8*, i32 } %lpad.val, i32 %tmp9, 1 + resume { i8*, i32 } %lpad.val11 +} + +!1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345} +; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345} +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0} +; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345} Index: llvm/trunk/test/Transforms/PGOProfile/icp_invoke.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/icp_invoke.ll +++ llvm/trunk/test/Transforms/PGOProfile/icp_invoke.ll @@ -0,0 +1,104 @@ +; RUN: opt < %s -icp-lto -pgo-icall-prom -S -icp-count-threshold=0 | FileCheck %s --check-prefix=ICP +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo1 = global void ()* null, align 8 +@foo2 = global i32 ()* null, align 8 +@_ZTIi = external constant i8* + +define internal void @_ZL4bar1v() !PGOFuncName !0 { +entry: + ret void +} + +define internal i32 @_ZL4bar2v() !PGOFuncName !1 { +entry: + ret i32 100 +} + +define i32 @_Z3goov() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %tmp = load void ()*, void ()** @foo1, align 8 +; ICP: [[BITCAST_IC1:%[0-9]+]] = bitcast void ()* %tmp to i8* +; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i8* [[BITCAST_IC1]], bitcast (void ()* @_ZL4bar1v to i8*) +; ICP: br i1 [[CMP_IC1]], label %[[TRUE_LABEL_IC1:.*]], label %[[FALSE_LABEL_IC1:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICP:[[TRUE_LABEL_IC1]]: +; ICP: invoke void @_ZL4bar1v() +; ICP: to label %[[DCALL_NORMAL_DEST_IC1:.*]] unwind label %lpad +; ICP:[[FALSE_LABEL_IC1]]: + invoke void %tmp() + to label %try.cont unwind label %lpad, !prof !2 + +; ICP:[[DCALL_NORMAL_DEST_IC1]]: +; ICP: br label %try.cont + +lpad: + %tmp1 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + %tmp2 = extractvalue { i8*, i32 } %tmp1, 0 + %tmp3 = extractvalue { i8*, i32 } %tmp1, 1 + %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %tmp3, %tmp4 + br i1 %matches, label %catch, label %eh.resume + +catch: + %tmp5 = tail call i8* @__cxa_begin_catch(i8* %tmp2) + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + %tmp6 = load i32 ()*, i32 ()** @foo2, align 8 +; ICP: [[BITCAST_IC2:%[0-9]+]] = bitcast i32 ()* %tmp6 to i8* +; ICP: [[CMP_IC2:%[0-9]+]] = icmp eq i8* [[BITCAST_IC2]], bitcast (i32 ()* @_ZL4bar2v to i8*) +; ICP: br i1 [[CMP_IC2]], label %[[TRUE_LABEL_IC2:.*]], label %[[FALSE_LABEL_IC2:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICP:[[TRUE_LABEL_IC2]]: +; ICP: [[RESULT_IC2:%[0-9]+]] = invoke i32 @_ZL4bar2v() +; ICP: to label %[[DCALL_NORMAL_DEST_IC2:.*]] unwind label %lpad1 +; ICP:[[FALSE_LABEL_IC2]]: + %call = invoke i32 %tmp6() + to label %try.cont8 unwind label %lpad1, !prof !3 + +; ICP:[[DCALL_NORMAL_DEST_IC2]]: +; ICP: br label %try.cont8 +lpad1: + %tmp7 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + %tmp8 = extractvalue { i8*, i32 } %tmp7, 0 + %tmp9 = extractvalue { i8*, i32 } %tmp7, 1 + %tmp10 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches5 = icmp eq i32 %tmp9, %tmp10 + br i1 %matches5, label %catch6, label %eh.resume + +catch6: + %tmp11 = tail call i8* @__cxa_begin_catch(i8* %tmp8) + tail call void @__cxa_end_catch() + br label %try.cont8 + +try.cont8: + %i.0 = phi i32 [ undef, %catch6 ], [ %call, %try.cont ] +; ICP: %i.0 = phi i32 [ undef, %catch6 ], [ %call, %[[FALSE_LABEL_IC2]] ], [ [[RESULT_IC2]], %[[DCALL_NORMAL_DEST_IC2]] ] + ret i32 %i.0 + +eh.resume: + %ehselector.slot.0 = phi i32 [ %tmp9, %lpad1 ], [ %tmp3, %lpad ] + %exn.slot.0 = phi i8* [ %tmp8, %lpad1 ], [ %tmp2, %lpad ] + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0 + %lpad.val11 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1 + resume { i8*, i32 } %lpad.val11 +} + +declare i32 @__gxx_personality_v0(...) + +declare i32 @llvm.eh.typeid.for(i8*) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +!0 = !{!"invoke.ll:_ZL4bar1v"} +!1 = !{!"invoke.ll:_ZL4bar2v"} +!2 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1} +!3 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1} +; ICP-NOT !3 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1} +; ICP-NOT !4 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1} +; ICP: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0} Index: llvm/trunk/test/Transforms/PGOProfile/icp_mismatch_msg.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/icp_mismatch_msg.ll +++ llvm/trunk/test/Transforms/PGOProfile/icp_mismatch_msg.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -pgo-icall-prom -pass-remarks-missed=PGOIndirectCallPromotion -S 2>& 1 | FileCheck %s + +; CHECK: remark: :0:0: Cannot promote indirect call to func4 with count of 1234: The number of arguments mismatch +; CHECK: remark: :0:0: Cannot promote indirect call to 11517462787082255043 with count of 2345: Cannot find the target +; CHECK: remark: :0:0: Cannot promote indirect call to func2 with count of 7890: Return type mismatch + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common global i32 ()* null, align 8 +@foo2 = common global i32 ()* null, align 8 +@foo3 = common global i32 ()* null, align 8 + +define i32 @func4(i32 %i) { +entry: + ret i32 %i +} + +define void @func2() { +entry: + ret void +} + +define i32 @bar() { +entry: + %tmp = load i32 ()*, i32 ()** @foo, align 8 + %call = call i32 %tmp(), !prof !1 + %tmp2 = load i32 ()*, i32 ()** @foo2, align 8 + %call1 = call i32 %tmp2(), !prof !2 + %add = add nsw i32 %call1, %call + %tmp3 = load i32 ()*, i32 ()** @foo3, align 8 + %call2 = call i32 %tmp3(), !prof !3 + %add2 = add nsw i32 %add, %call2 + ret i32 %add2 +} + +!1 = !{!"VP", i32 0, i64 1801, i64 7651369219802541373, i64 1234, i64 -4377547752858689819, i64 567} +!2 = !{!"VP", i32 0, i64 3023, i64 -6929281286627296573, i64 2345, i64 -4377547752858689819, i64 678} +!3 = !{!"VP", i32 0, i64 7890, i64 -4377547752858689819, i64 7890} Index: llvm/trunk/test/Transforms/PGOProfile/icp_vararg.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/icp_vararg.ll +++ llvm/trunk/test/Transforms/PGOProfile/icp_vararg.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common global i32 (i32, ...)* null, align 8 + +define i32 @va_func(i32 %num, ...) { +entry: + ret i32 0 +} + +define i32 @bar() #1 { +entry: + %tmp = load i32 (i32, ...)*, i32 (i32, ...)** @foo, align 8 +; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 (i32, ...)* %tmp to i8* +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 (i32, ...)* @va_func to i8*) +; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICALL-PROM:if.true.direct_targ: +; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 (i32, ...) @va_func(i32 3, i32 12, i32 22, i32 4) +; ICALL-PROM: br label %if.end.icp + %call = call i32 (i32, ...) %tmp(i32 3, i32 12, i32 22, i32 4), !prof !1 +; ICALL-PROM:if.false.orig_indirect: +; ICALL-PROM: %call = call i32 (i32, ...) %tmp(i32 3, i32 12, i32 22, i32 4) +; ICALL-PROM: br label %if.end.icp + ret i32 %call +; ICALL-PROM:if.end.icp: +; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ %call, %if.false.orig_indirect ], [ [[DIRCALL_RET]], %if.true.direct_targ ] +; ICALL-PROM: ret i32 [[PHI_RET]] + +} + +!1 = !{!"VP", i32 0, i64 12345, i64 989055279648259519, i64 12345} +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0} Index: llvm/trunk/test/Transforms/PGOProfile/indirect_call_promotion.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/indirect_call_promotion.ll +++ llvm/trunk/test/Transforms/PGOProfile/indirect_call_promotion.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt < %s -pgo-icall-prom -S -pass-remarks=PGOIndirectCallPromotion -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK +; PASS-REMARK: remark: :0:0: Promote indirect call to func4 with count 1030 out of 1600 +; PASS-REMARK: remark: :0:0: Promote indirect call to func2 with count 410 out of 570 +; PASS-REMARK: remark: :0:0: Promote indirect call to func3 with count 150 out of 160 +; PASS-REMARK: remark: :0:0: Promote indirect call to func1 with count 10 out of 10 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common global i32 ()* null, align 8 + +define i32 @func1() { +entry: + ret i32 0 +} + +define i32 @func2() { +entry: + ret i32 1 +} + +define i32 @func3() { +entry: + ret i32 2 +} + +define i32 @func4() { +entry: + ret i32 3 +} + +define i32 @bar() { +entry: + %tmp = load i32 ()*, i32 ()** @foo, align 8 +; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 ()* %tmp to i8* +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 ()* @func4 to i8*) +; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] +; ICALL-PROM: if.true.direct_targ: +; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 @func4() +; ICALL-PROM: br label %if.end.icp + %call = call i32 %tmp(), !prof !1 +; ICALL-PROM: if.false.orig_indirect: +; ICALL-PROM: %call = call i32 %tmp(), !prof [[NEW_VP_METADATA:![0-9]+]] + ret i32 %call +; ICALL-PROM: if.end.icp: +; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ %call, %if.false.orig_indirect ], [ [[DIRCALL_RET]], %if.true.direct_targ ] +; ICALL-PROM: ret i32 [[PHI_RET]] +} + +!1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10} + +; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1030, i32 570} +; ICALL-PROM: [[NEW_VP_METADATA]] = !{!"VP", i32 0, i64 570, i64 -4377547752858689819, i64 410}