Index: include/llvm/Transforms/Utils/CallPromotionUtils.h =================================================================== --- include/llvm/Transforms/Utils/CallPromotionUtils.h +++ include/llvm/Transforms/Utils/CallPromotionUtils.h @@ -29,26 +29,100 @@ bool isLegalToPromote(CallSite CS, Function *Callee, const char **FailureReason = nullptr); +/// CallPromotionInfo holds information created during the promotion and +/// possible versioning of a call site. This information is needed for the call +/// site to be demoted and made to be indirect once again. If demotion is not +/// required, a CallPromotionInfo object need not be created. +struct CallPromotionInfo { + + /// Indicates whether the call site was versioned during promotion or not. + /// Versioning creates an if-then-else structure where the new direct call + /// site is in the "then" block and the original indirect call site is in the + /// "else" block. + bool Versioned = false; + + /// The called value of the original indirect call site. + Value *CalledValue = nullptr; + + /// The !callees metadata attached to the original indirect call. + MDNode *Callees = nullptr; + + /// A cast instruction created for the return value of the promoted call + /// site, if required. + CastInst *RetCast = nullptr; + + /// Holds cast instructions created for the arguments of the promoted call + /// site, if required. + SmallVector ArgCasts; +}; + /// Promote the given indirect call site to unconditionally call \p Callee. /// /// This function promotes the given call site, returning the direct call or -/// invoke instruction. If the function type of the call site doesn't match that -/// of the callee, bitcast instructions are inserted where appropriate. If \p -/// RetBitCast is non-null, it will be used to store the return value bitcast, -/// if created. +/// invoke instruction. If the function type of the call site doesn't match +/// that of the callee, bitcast instructions are inserted where appropriate. If +/// \p CPI is non-null, these bitcasts are collected in the provided call +/// promotion information object. Instruction *promoteCall(CallSite CS, Function *Callee, - CastInst **RetBitCast = nullptr); + CallPromotionInfo *CPI = nullptr); /// Promote the given indirect call site to conditionally call \p Callee. /// /// This function creates an if-then-else structure at the location of the call /// site. The original call site is moved into the "else" block. A clone of the -/// indirect call site is promoted, placed in the "then" block, and returned. If -/// \p BranchWeights is non-null, it will be used to set !prof metadata on the -/// new conditional branch. +/// indirect call site is promoted, placed in the "then" block, and returned. +/// If \p BranchWeights is non-null, it will be used to set !prof metadata on +/// the new conditional branch. If \p CPI is non-null, information will be +/// stored in the provided call promotion information object that will allow +/// the call site to be demoted, if required. Instruction *promoteCallWithIfThenElse(CallSite CS, Function *Callee, - MDNode *BranchWeights = nullptr); + MDNode *BranchWeights = nullptr, + CallPromotionInfo *CPI = nullptr); +/// Demote the given direct call site. +/// +/// This function is meant to be used in conjunction with \p promoteCall and +/// \promoteCallWithIfThenElse. As such, the call promotion information created +/// by these functions will be used to demote a call that was previously +/// promoted. Together, these functions can be used to temporarily promote a +/// call in order to facilitate analyzing the program as if the call were +/// direct. The indirect call or invoke instruction is returned. +Instruction *demoteCall(CallSite CS, CallPromotionInfo &CPI); + +/// Try to promote an indirect call site. +/// +/// This function attempts to promote the given indirect call site. Promotion +/// is attempted for call sites having !callees metadata. For indirect call +/// sites having a small number of possible callees, !callees metadata +/// indicates what those functions are. +/// +/// The approach begins by inspecting the !callees metadata attached to a call +/// site. There are a few cases to consider based on the number of possible +/// callees. The first two are trivial. First, if the metadata indicates that +/// there are zero possible callees, the call site must be dead. Such a call +/// site is not touched. Second, if the metadata indicates that there is only +/// one possible callee, it is promoted to a direct call site in place. The +/// last and most interesting case is when the metadata indicates that there is +/// more than one possible callee. +/// +/// When a call site has more than one possible callee, the call site is +/// promoted inside an if-then-else structure, where the "then" block contains +/// a new direct call to a possible callee, and the "else" block contains the +/// original indirect call site. The possible callee selected for the direct +/// call is the first callee given in the indirect call site's !callees +/// metadata that is not also in \p CalleesToIgnore. This container can be used +/// to record callees that are already known to be not profitable for +/// promotion. +/// +/// The function returns a pointer to the direct call or invoke instruction if +/// the given call site was promoted. Otherwise, it returns a null pointer. If +/// \p CPI is non-null and promotion is successful, information will be stored +/// in the provided call promotion information object that will allow the call +/// site to be demoted, if required. Demotion also removes the if-then-else +/// structure if if the call site was versioned. +Instruction *tryToPromoteCallSite(CallSite CS, + SmallPtrSetImpl &CalleesToIgnore, + CallPromotionInfo *CPI = nullptr); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -58,6 +58,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -93,6 +94,17 @@ DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); +/// Flag to limit indirect call promotion based on !callees metdata. +/// +/// For indirect call sites having a small number of possible callees, !callees +/// metadata indicates what those functions are. If such metadata is present, +/// the inliner attempts to promote and inline profitable callees by versioning +/// the call site. This flag can be used to limit the number of inlined callees +/// for a given indirect call site. +static cl::opt InlineMaxCalleesToPromote( + "inline-max-calls-to-promote", cl::init(4), cl::Hidden, + cl::desc("Limit the number of promotions for indirect call sites")); + namespace { enum class InlinerFunctionImportStatsOpts { @@ -449,6 +461,204 @@ return false; } +/// A class used to manage call promotion and demotion. +/// +/// This class facilitates inlining through indirect call sites. During +/// inlining, if an InlinerCallPromotionGuard object is constructed for a given +/// indirect call site, an attempt is made to promote the indirect call site. +/// If successful, the inliner will attempt to inline the resulting direct call +/// site's callee. If inlining fails for any reason, the promoted call site is +/// demoted back to an indirect call site. +/// +/// Promotion and demotion are handled by the guard's constructor and +/// destructor, respectively. If promotion is successful, and the inliner is +/// able to inline or otherwise remove the direct call site, the inliner +/// informs the guard of this transformation using the \p setCallSiteRemoved +/// function. When the guard is deleted or goes out of scope (e.g., when +/// inlining aborts), if the guard's corresponding call site has been promoted, +/// but its callee hasn't been removed, the call site will be demoted back to +/// the original indirect call site. +class InlinerCallPromotionGuard { +public: + InlinerCallPromotionGuard(SmallVectorImpl> &Calls, + int &Index, SmallPtrSetImpl &Visited, + FunctionAnalysisManager *FAM = nullptr, + CallGraph *CG = nullptr) + : Calls(Calls), Index(Index), Visited(Visited), FAM(FAM), CG(CG), + Caller(Calls[Index].first.getCaller()) { + + // If the current call site is indirect, we can try to promote it to see if + // we can inline its callee. + if (!Calls[Index].first.getCalledFunction()) + tryToPromoteCallSite(); + } + + ~InlinerCallPromotionGuard() { + + // If the current call site was promoted but we weren't able to inline its + // callee, restore the indirect call. + if (!CallSiteRemoved && CallSitePromoted) + demoteCall(); + } + + /// Inform the call promotion guard that the direct call site was removed, + /// either due to inlining or because it is dead. If the call site was + /// promoted, this will prevent the guard from trying to demote it. + void setCallSiteRemoved() { CallSiteRemoved = true; }; + +private: + /// The identified call sites in the caller. This list is updated when + /// promotion is successful. + SmallVectorImpl> &Calls; + + /// The index of the current call site in Calls. This is a reference so we + /// can decrement the index to revisit the current call site. We want to + /// revisit the current call site when it is demoted but has additional known + /// callees that haven't yet been visited. + int &Index; + + /// The known callees of the current call site that we have already analyzed. + SmallPtrSetImpl &Visited; + + /// A function analysis manager for the new pass manager. Promotion with + /// versioning changes the control-flow graph, so we will have to invalidate + /// or update some analyses. + FunctionAnalysisManager *FAM; + + /// The legacy call graph. For the legacy pass manager, we will need to + /// update the call graph when we promote or demote a call site. + CallGraph *CG; + + /// The caller function. + Function *Caller; + + /// Indicates if a call site was promoted. + bool CallSitePromoted = false; + + /// Indicates if a call site was removed, either due to inlining or because + /// it is dead. + bool CallSiteRemoved = false; + + /// Call promotion information to use for promotion and demotion. The + /// information is populated during promotion and then consumed during + /// demotion to restore the indirect call site. + CallPromotionInfo CPI; + + void tryToPromoteCallSite() { + + // The current indirect call site. + CallSite IndirectCS = Calls[Index].first; + + // Compute the number of callees that have already been inlined or deleted + // for this call site. The number of callees is equal to the number of + // callees in the set difference between the set of visited callees and the + // set of known callees given by the !callees metadata attached to the call + // site. If the number of callees reaches InlineMaxCalleesToPromote, we end + // promotions for this call site to avoid code bloat. + unsigned NumPromotedCallees = Visited.size(); + for (Function *F : IndirectCS.getKnownCallees()) + if (Visited.count(F)) + --NumPromotedCallees; + if (NumPromotedCallees == InlineMaxCalleesToPromote) + return; + + // Try to promote the call site. If unsuccessful, there's nothing more to + // do. The call promotion information will be saved in CPI if successful. + Instruction *DirectCSInst = + llvm::tryToPromoteCallSite(IndirectCS, Visited, &CPI); + if (!DirectCSInst) + return; + + // Promotion was successful. Create the direct call site from the returned + // instruction. + auto DirectCS = CallSite(DirectCSInst); + CallSitePromoted = true; + + if (CPI.Versioned) { + // Unfortunately, since versioning the call site modifies the + // control-flow graph, we either need to update cached analyses for the + // caller or invalidate them. For now, we just invalidate them. Some of + // the analyses are needed by getInlineCost (e.g., BlockFreqencyInfo). + if (FAM) + FAM->invalidate(*Caller, PreservedAnalyses::none()); + + // The direct call site is a new instruction. Add it to the list of Calls + // and swap the current call site with the direct call site. The current + // call site will now be the direct one, and the indirect one will be + // visited again. + Calls.push_back({DirectCS, Calls[Index].second}); + std::swap(Calls[Index], Calls.back()); + } + + // Update the legacy call graph to reflect the promotion. + if (CG) { + CallGraphNode *CallerN = (*CG)[Caller]; + CallGraphNode *CalleeN = (*CG)[DirectCS.getCalledFunction()]; + if (CPI.Versioned) { + CallerN->addCalledFunction(DirectCS, CalleeN); + CallerN->replaceCallEdge(IndirectCS, IndirectCS, + CG->getOrInsertNodeForCalleesMD( + IndirectCS.getInstruction()->getMetadata( + LLVMContext::MD_callees))); + } else { + CallerN->replaceCallEdge(IndirectCS, DirectCS, CalleeN); + } + } + } + + void demoteCall() { + + // If the call site was versioned and we need to update the legacy call + // graph, remove the edge for the direct call site. This instruction will + // be erased during demotion. + if (CG && CPI.Versioned) { + CallSite DirectCS = Calls[Index].first; + CallGraphNode *CallerN = (*CG)[Caller]; + CallerN->removeCallEdgeFor(DirectCS); + } + + // Demote the current call site using the call promotion information we + // created during promotion. + llvm::demoteCall(Calls[Index].first, CPI); + + // If we versioned the call site, move the indirect call site back to its + // original location in the Calls list. + if (CPI.Versioned) + std::swap(Calls[Index], Calls.back()); + + // If we need to update the legacy call graph, make the edge from the + // indirect call site connect to the external node with the correct + // !callees metadata. + if (CG) { + CallSite IndirectCS = Calls[Index].first; + CallGraphNode *CallerN = (*CG)[Caller]; + CallerN->replaceCallEdge(IndirectCS, IndirectCS, + CG->getOrInsertNodeForCalleesMD( + IndirectCS.getInstruction()->getMetadata( + LLVMContext::MD_callees))); + } + + if (!CPI.Versioned) + return; + + // Unfortunately, since versioning the call site modifies the + // control-flow graph, we either need to update cached analyses for the + // caller or invalidate them. For now, we just invalidate them. Some of + // the analyses are needed by getInlineCost (e.g., BlockFreqencyInfo). + if (FAM) + FAM->invalidate(*Caller, PreservedAnalyses::none()); + + // If the current call site has known callees that we haven't yet tried to + // promote, we want to visit it again. Decrement the index. + if (any_of(Calls[Index].first.getKnownCallees(), + [&](Function *F) { return !Visited.count(F); })) + --Index; + + // Remove the direct call site from the list. + Calls.pop_back(); + } +}; + bool LegacyInlinerBase::doInitialization(CallGraph &CG) { if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats.setModuleInfo(CG.getModule()); @@ -541,6 +751,10 @@ InlinedArrayAllocasTy InlinedArrayAllocas; InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI); + // A cache of callees per indirect call site that we've attempted to inline. + // We don't want to consider them again. + DenseMap> AnalyzedCallees; + // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. bool Changed = false; @@ -550,28 +764,41 @@ // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. // CallSites may be modified inside so ranged for loop can not be used. - for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { - CallSite CS = CallSites[CSi].first; + for (int CSi = 0; CSi != (int)CallSites.size(); ++CSi) { + + // Construct a call promotion guard and attempt to inline through + // indirect call sites. If the call site is indirect and has a known set + // of possible callees (e.g., as specified by !callees metadata), it will + // be promoted (with possible versioning). If inlining fails, the call + // site will be demoted. + InlinerCallPromotionGuard Guard( + CallSites, CSi, + AnalyzedCallees[CallSites[CSi].first.getInstruction()], nullptr, &CG); + CallSite CS = CallSites[CSi].first; + int InlineHistoryID = CallSites[CSi].second; Function *Caller = CS.getCaller(); - Function *Callee = CS.getCalledFunction(); + + // If the call site remains indirect, even after attempting to promote + // it, there's nothing we can do. + if (!CS.getCalledFunction()) + continue; // We can only inline direct calls to non-declarations. - if (!Callee || Callee->isDeclaration()) + Function *Callee = CS.getCalledFunction(); + if (Callee->isDeclaration()) continue; Instruction *Instr = CS.getInstruction(); bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); - int InlineHistoryID; if (!IsTriviallyDead) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to - // infinitely inline. - InlineHistoryID = CallSites[CSi].second; + // infinitel inline. if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; @@ -648,6 +875,11 @@ } } + // Inform the call promotion guard that the call site was removed. If the + // call site was promoted, this will prevent the guard from trying to + // demote it. + Guard.setCallSiteRemoved(); + // If we inlined or deleted the last possible call site to the function, // delete the function body now. if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && @@ -839,9 +1071,7 @@ // Instead we should do an actual RPO walk of the function body. for (Instruction &I : instructions(N.getFunction())) if (auto CS = CallSite(&I)) - if (Function *Callee = CS.getCalledFunction()) - if (!Callee->isDeclaration()) - Calls.push_back({CS, -1}); + Calls.push_back({CS, -1}); } if (Calls.empty()) return PreservedAnalyses::all(); @@ -910,15 +1140,37 @@ PSI, &ORE); }; + // A cache of callees per indirect call site that we've attempted to + // inline. We don't want to consider them again. + DenseMap> AnalyzedCallees; + // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) { + + // Construct a call promotion guard and attempt to inline through + // indirect call sites. If the call site is indirect and has a known set + // of possible callees (e.g., as specified by !callees metadata), it will + // be promoted (with possible versioning). If inlining fails, the call + // site will be demoted. + InlinerCallPromotionGuard Guard( + Calls, i, AnalyzedCallees[Calls[i].first.getInstruction()], &FAM); + int InlineHistoryID; CallSite CS; std::tie(CS, InlineHistoryID) = Calls[i]; + + // If the call site remains indirect, even after attempting to promote + // it, there's nothing we can do. + if (!CS.getCalledFunction()) + continue; + + // We can only inline direct calls to non-declarations. Function &Callee = *CS.getCalledFunction(); + if (Callee.isDeclaration()) + continue; if (InlineHistoryID != -1 && InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) @@ -966,6 +1218,11 @@ DidInline = true; InlinedCallees.insert(&Callee); + // Inform the call promotion guard that the call site was removed. If the + // call site was promoted, this will prevent the guard from trying to + // demote it. + Guard.setCallSiteRemoved(); + ORE.emit([&]() { bool AlwaysInline = OIC->isAlways(); StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; Index: lib/Transforms/Utils/CallPromotionUtils.cpp =================================================================== --- lib/Transforms/Utils/CallPromotionUtils.cpp +++ lib/Transforms/Utils/CallPromotionUtils.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -159,7 +160,7 @@ /// %t1 = bitcast i32 %t0 to ... /// br label %normal_dst /// -static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) { +static void createRetBitCast(CallSite CS, Type *RetTy, CallPromotionInfo *CPI) { // Save the users of the calling instruction. These uses will be changed to // use the bitcast after we create it. @@ -179,8 +180,8 @@ // Bitcast the return value to the correct type. auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(), RetTy, "", InsertBefore); - if (RetBitCast) - *RetBitCast = Cast; + if (CPI) + CPI->RetCast = Cast; // Replace all the original uses of the calling instruction with the bitcast. for (User *U : UsersToUpdate) @@ -362,9 +363,17 @@ } Instruction *llvm::promoteCall(CallSite CS, Function *Callee, - CastInst **RetBitCast) { + CallPromotionInfo *CPI) { assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); + // If the client provided call promotion information, save the called value + // of the indirect call site and the !callees metata attached to it. + if (CPI) { + CPI->CalledValue = CS.getCalledValue(); + if (CS.getInstruction()->getMetadata(LLVMContext::MD_callees)) + CPI->Callees = CS.getInstruction()->getMetadata(LLVMContext::MD_callees); + } + // Set the called function of the call site to be the given callee. CS.setCalledFunction(Callee); @@ -397,27 +406,283 @@ auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "", CS.getInstruction()); CS.setArgument(ArgNo, Cast); + if (CPI) + CPI->ArgCasts.push_back(Cast); } } // If the return type of the call site doesn't match that of the callee, cast // the returned value to the appropriate type. if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) - createRetBitCast(CS, CallSiteRetTy, RetBitCast); + createRetBitCast(CS, CallSiteRetTy, CPI); + + return CS.getInstruction(); +} + +Instruction *llvm::demoteCall(CallSite CS, CallPromotionInfo &CPI) { + assert(CS.getCalledFunction() && "Only direct call sites can be demoted"); + + // Handle argument casts. For each cast instruction, replace all of its uses + // with its source operand. + for (CastInst *Cast : CPI.ArgCasts) { + while (!Cast->user_empty()) + Cast->user_back()->replaceUsesOfWith(Cast, Cast->getOperand(0)); + Cast->eraseFromParent(); + } + + // Handle the return value cast. Similar to arguments casts, we replace all + // of uses of the cast with its source operand. If the call site is an invoke + // instruction, the return value cast is placed along the edge between the + // block containing the invoke and it's normal destination. The block + // containing this cast is also erased. + if (CPI.RetCast) { + while (!CPI.RetCast->user_empty()) + CPI.RetCast->user_back()->replaceUsesOfWith(CPI.RetCast, + CPI.RetCast->getOperand(0)); + if (!isa(CS.getInstruction())) { + CPI.RetCast->eraseFromParent(); + } else { + // For invoke instructions, the edge between the block containing the + // invoke and the invoke's normal destination has been split (see + // createRetBitCast): + // + // orig_bb: + // %t0 = invoke i32 @func() + // to label %split_bb unwind label %unwind_dst + // + // ; The following block will be erased. + // split_bb: + // %t1 = bitcast i32 %t0 to ... + // br label %normal_dst + // + BasicBlock *SplitBlock = CPI.RetCast->getParent(); + BasicBlock *OrigBlock = SplitBlock->getSinglePredecessor(); + BasicBlock *NormalDest = SplitBlock->getSingleSuccessor(); + auto *OrigInvoke = cast(OrigBlock->getTerminator()); + OrigInvoke->setNormalDest(NormalDest); + SplitBlock->replaceAllUsesWith(OrigBlock); + SplitBlock->eraseFromParent(); + } + } + + if (!CPI.Versioned) { + // If the call site wasn't versioned, we demote the direct call site in + // place. For the versioning case, we simply erase the direct call site and + // move the indirect call site to the correct position. First, set the + // called value of the call site, and mutate its function type. + CS.setCalledFunction(CPI.CalledValue); + CS.mutateFunctionType(cast( + cast(CPI.CalledValue->getType())->getElementType())); + + // Attach !callees metadata to the now indirect call site. The metadata + // node should have been saved at the time the call site was promoted. + if (CPI.Callees) + CS.getInstruction()->setMetadata(LLVMContext::MD_callees, CPI.Callees); + return CS.getInstruction(); + } + + // A wrapper around llvm::MergeBlockIntoPredecessor that checks that the + // merging was successful. For our use, merging should not fail. + auto mergeBlockIntoPredecessor = [](BasicBlock *BB) { + bool Merged = MergeBlockIntoPredecessor(BB); + assert(Merged && "Block not merged into predecessor"); + (void)Merged; + }; + + // Reconstruct the control-flow created during promotion. We have an + // if-then-else structure where a new direct call site is in the "then" + // block, and the original indirect call site is in the "else" block. For + // call instructions, this looks like: + // + // orig_bb: + // %cond = icmp eq i32 ()* %ptr, @func + // br i1 %cond, %then_bb, %else_bb + // + // then_bb: + // %t1 = call i32 @func() + // br merge_bb + // + // else_bb: + // %t0 = call i32 %ptr() + // br merge_bb + // + // merge_bb: + // %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] + // + // For invoke instructions, this looks like: + // + // orig_bb: + // %cond = icmp eq i32 ()* %ptr, @func + // br i1 %cond, %then_bb, %else_bb + // + // then_bb: + // %t1 = invoke i32 @func() to label %merge_bb unwind label %unwind_dst + // + // else_bb: + // %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst + // + // merge_bb: + // %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] + // br %normal_dst + // + BasicBlock *ThenBlock = CS.getInstruction()->getParent(); + BasicBlock *OrigBlock = ThenBlock->getSinglePredecessor(); + TerminatorInst *OrigTerm = OrigBlock->getTerminator(); + Value *Cond = cast(OrigTerm)->getCondition(); + BasicBlock *ElseBlock = OrigTerm->getSuccessor(1); + + // The merge block depends on whether we have a call or an invoke + // instruction. + BasicBlock *MergeBlock = nullptr; + if (auto *PromotedInvoke = dyn_cast(CS.getInstruction())) + MergeBlock = PromotedInvoke->getNormalDest(); + else + MergeBlock = ElseBlock->getSingleSuccessor(); + + // The indirect call site we want to keep is in the "else" block. We will + // erase the direct call in the "then" block. + CS = CallSite(&ElseBlock->front()); + + // Attach !callees metadata to the now indirect call site. The metadata node + // should have been saved at the time the call site was promoted. + if (CPI.Callees) + CS.getInstruction()->setMetadata(LLVMContext::MD_callees, CPI.Callees); + + // Erase the condition and conditional branch from the end of the "original" + // block and replace it with an unconditional branch to the "else" block. + OrigTerm->eraseFromParent(); + if (auto *I = dyn_cast(Cond)) + I->eraseFromParent(); + BranchInst::Create(ElseBlock, OrigBlock); + + // We're going to delete the "then" block. First we need to fix-up the phi + // node in the "merge" block. + for (auto &Phi : MergeBlock->phis()) + Phi.removeIncomingValue(ThenBlock); + FoldSingleEntryPHINodes(MergeBlock); + + // Erase the terminator from the "then" block. For the invoke case, this will + // erase the direct call site. For the call case, this removes the branch so + // we can merge the "merge" block into a single predecessor. + ThenBlock->getTerminator()->eraseFromParent(); + + // Merge the "else" block into the "original" block. + mergeBlockIntoPredecessor(ElseBlock); + + // For the call case, merge the "merge" block into the "original" block and + // erase the "then" block. No other work is required. + auto *PromotedInvoke = dyn_cast(OrigBlock->getTerminator()); + if (!PromotedInvoke) { + mergeBlockIntoPredecessor(MergeBlock); + ThenBlock->eraseFromParent(); + return CS.getInstruction(); + } + + // For the invoke case, we need to fix-up the phi nodes in the normal and + // unwind destination blocks. First, for the normal destination, values + // coming from the "merge" block now come from the "original" block. + PromotedInvoke->setNormalDest(MergeBlock->getSingleSuccessor()); + for (auto &Phi : PromotedInvoke->getNormalDest()->phis()) { + int Idx = Phi.getBasicBlockIndex(MergeBlock); + if (Idx == -1) + continue; + Phi.setIncomingBlock(Idx, OrigBlock); + } + + // For the unwind destination, there are now no longer any value coming from + // the "then" block. + for (auto &Phi : PromotedInvoke->getUnwindDest()->phis()) { + int Idx = Phi.getBasicBlockIndex(ThenBlock); + if (Idx == -1) + continue; + Phi.removeIncomingValue(Idx); + } + + // Lastly, erase the "merge" and "then" blocks for the invoke case. + MergeBlock->eraseFromParent(); + ThenBlock->eraseFromParent(); return CS.getInstruction(); } Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee, - MDNode *BranchWeights) { + MDNode *BranchWeights, + CallPromotionInfo *CPI) { // Version the indirect call site. If the called value is equal to the given // callee, 'NewInst' will be executed, otherwise the original call site will // be executed. Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights); + // If the client requested call promotion information, indicate that the call + // site was versioned. + if (CPI) + CPI->Versioned = true; + // Promote 'NewInst' so that it directly calls the desired function. - return promoteCall(CallSite(NewInst), Callee); + return promoteCall(CallSite(NewInst), Callee, CPI); +} + +Instruction * +llvm::tryToPromoteCallSite(CallSite CS, + SmallPtrSetImpl &CalleesToIgnore, + CallPromotionInfo *CPI) { + assert(!CS.getCalledFunction() && "Can only promote indirect calls"); + + // If this call site has profile metadata attached to it, don't attempt + // promotion. We don't want to disagree with profile-guideded heuristics. + if (CS.getInstruction()->getMetadata(LLVMContext::MD_prof)) + return nullptr; + + // Collect the functions the call site might call. If the call site doesn't + // have !callees metadata, we don't know its possible callees, so there's + // nothing to do. + if (!CS.getInstruction()->getMetadata(LLVMContext::MD_callees)) + return nullptr; + SmallVector PossibleCallees = CS.getKnownCallees(); + + // If the metadata indicates that there is only one possible callee, we + // attempt to promote the call site. If successful, we clear the call site's + // !callees metadata. + if (PossibleCallees.size() == 1) { + Function *F = PossibleCallees[0]; + if (F->isDeclaration() || CalleesToIgnore.count(F) || + !isLegalToPromote(CS, F)) + return nullptr; + CalleesToIgnore.insert(F); + Instruction *Direct = promoteCall(CS, F, CPI); + return Direct; + } + + // If the metadata indicates that there is more than one possible callee, we + // will attempt to promote the call site by versioning it. + for (unsigned I = 0; I < PossibleCallees.size(); ++I) { + Function *F = PossibleCallees[I]; + + // Skip this function if we already know it to be unprofitable or it can't + // be promoted. + if (F->isDeclaration() || CalleesToIgnore.count(F) || + !isLegalToPromote(CS, F)) + continue; + CalleesToIgnore.insert(F); + + // Promote the indirect call site such that we conditionally execute the + // direct call. The original instruction is moved to the "else" block, and + // the new direct call in the "then" block is returned. + Instruction *Direct = promoteCallWithIfThenElse(CS, F, nullptr, CPI); + + // The original call site remains indirect, but we must remove the function + // from its !callees metadata. After the promotion, we know this call site + // cannot target it. + PossibleCallees.erase(PossibleCallees.begin() + I); + MDBuilder MDB(CS.getInstruction()->getContext()); + CS.getInstruction()->setMetadata(LLVMContext::MD_callees, + MDB.createCallees(PossibleCallees)); + + return Direct; + } + + return nullptr; } #undef DEBUG_TYPE Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -113,8 +113,8 @@ ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> -; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -98,8 +98,8 @@ ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> -; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. Index: test/Transforms/Inline/callees-metadata.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/callees-metadata.ll @@ -0,0 +1,224 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +; A call instruction with empty !callees metadata. This is undefined behavior. +; The inliner doesn't mess with it. +; +; CHECK-LABEL: @call.0.0( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void %fp() +; CHECK-NEXT: ret void +; +define void @call.0.0(void ()* %fp) { +entry: + call void %fp(), !callees !{} + ret void +} + +; A call instruction with two possible callees that can both inlined. +; +; CHECK-LABEL: @call.2.2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 (i64, i64)* %fp, @select_def_0 +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ %y, %[[ELSE]] ], [ %x, %[[THEN]] ] +; CHECK-NEXT: ret i64 [[TMP1]] +; +define i64 @call.2.2(i64 %x, i64 %y, i64 (i64, i64)* %fp) { +entry: + %tmp0 = call i64 %fp(i64 %x, i64 %y), + !callees !{i64 (i64, i64)* @select_def_0, i64 (i64, i64)* @select_def_1} + ret i64 %tmp0 +} + +; A call instruction with three possible callees, only one of which can be +; inlined. Since more than one possible callee remains after promotion, the +; cloned call is indirect. +; +; CHECK-LABEL: @call.3.1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 (i64, i64)* %fp, @select_def_0 +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 %fp(i64 %x, i64 %y), !callees +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP1]], %[[ELSE]] ], [ %x, %[[THEN]] ] +; CHECK-NEXT: ret i64 [[TMP2]] +; +define i64 @call.3.1(i64 %x, i64 %y, i64 (i64, i64)* %fp) { +entry: + %tmp0 = call i64 %fp(i64 %x, i64 %y), + !callees !{i64 (i64, i64)* @select_def_0, i64 (i64, i64)* @select_dec_1, + i64 (i64, i64)* @select_dec_0} + ret i64 %tmp0 +} + +; A call instruction with two possible callees, only one of which can be +; inlined. Although only one possible callee remains after promotion, the +; cloned call is not promoted since the callee cannot be inlined. +; +; CHECK-LABEL: @call.2.1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 (i64, i64)* %fp, @select_def_0 +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 %fp(i64 %x, i64 %y), !callees +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP1]], %[[ELSE]] ], [ %x, %[[THEN]] ] +; CHECK-NEXT: ret i64 [[TMP2]] +; +define i64 @call.2.1(i64 %x, i64 %y, i64 (i64, i64)* %fp) { +entry: + %tmp0 = call i64 %fp(i64 %x, i64 %y), + !callees !{i64 (i64, i64)* @select_def_0, i64 (i64, i64)* @select_dec_1} + ret i64 %tmp0 +} + +; A call instruction with two possible callees, only one of which can be +; inlined. Although only one possible callee remains after promotion, the +; cloned call is not promoted since the callee cannot be inlined. Bitcasts must +; be inserted for the promoted call's arguments and return value since the +; callee's function type doesn't match that of the function pointer. +; +; CHECK-LABEL: @call.2.1.bitcast( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32* (i32**)* %fp, bitcast (i64* (i64**)* @load_def to i32* (i32**)*) +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** %x to i64** +; CHECK-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i32* +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32* %fp(i32** %x), !callees +; CHECK-NEXT: br label %[[MERGE]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP5:%.*]] = phi i32* [ [[TMP4]], %[[ELSE]] ], [ [[TMP3]], %[[THEN]] ] +; CHECK-NEXT: ret i32* [[TMP5]] +; +define i32* @call.2.1.bitcast(i32** %x, i32* (i32**)* %fp) { +entry: + %tmp0 = call i32* %fp(i32** %x), + !callees !{i64* (i64**)* @load_def, i64* (i64**)* @load_dec} + ret i32* %tmp0 +} + +; An invoke instruction with two possible callees, only one of which can be +; inlined. Although only one possible callee remains after promotion, the +; cloned invoke is not promoted since the callee cannot be inlined. The normal +; destination has a phi node that must be fixed. +; +; CHECK-LABEL: @invoke.2.1( +; CHECK-NEXT: entry: +; CHECK: br i1 undef, label %[[TRY:.*]], label %[[NORMAL:.*]] +; CHECK: [[TRY]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64* (i64**)* %fp, @load_def +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** %x +; CHECK-NEXT: br label %[[MERGE]] +; CHECK: [[ELSE]]: +; CHECK: [[TMP2:%.*]] = invoke i64* %fp(i64** %x) +; CHECK-NEXT: to label %[[MERGE]] unwind label %[[EXCEPT:.*]], !callees +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi i64* [ [[TMP2]], %[[ELSE]] ], [ [[TMP1]], %[[THEN]] ] +; CHECK-NEXT: br label %[[NORMAL:.*]] +; CHECK: [[NORMAL]]: +; CHECK-NEXT: [[TMP4:%.*]] = phi i64* [ [[TMP3]], %[[MERGE]] ], [ null, %entry ] +; CHECK-NEXT: ret i64* [[TMP4]] +; CHECK: [[EXCEPT]]: +; CHECK-NEXT: [[TMP5:%.*]] = landingpad { i8*, i64 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: ret i64* null +; +define i64* @invoke.2.1(i64** %x, i64* (i64**)* %fp) personality i64 (...)* null { +entry: + br i1 undef, label %try, label %normal +try: + %tmp0 = invoke i64* %fp(i64 **%x) + to label %normal unwind label %except, + !callees !{i64* (i64**)* @load_def, i64* (i64**)* @load_dec} +normal: + %tmp1 = phi i64* [ %tmp0, %try ], [ null, %entry ] + ret i64* %tmp1 +except: + %tmp2 = landingpad {i8*, i64} + cleanup + ret i64* null +} + +; An invoke instruction with two possible callees, only one of which can be +; inlined. Although only one possible callee remains after promotion, the +; cloned invoke is not promoted since the callee cannot be inlined. The normal +; destination has a phi node that must be fixed. Additionally, bitcasts must be +; inserted for the promoted invoke's arguments and return value since the +; callee's function type doesn't match that of the function pointer. +; +; CHECK-LABEL: @invoke.2.1.bitcast( +; CHECK-NEXT: entry: +; CHECK: br i1 undef, label %[[TRY:.*]], label %[[NORMAL:.*]] +; CHECK: [[TRY]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32* (i32**)* %fp, bitcast (i64* (i64**)* @load_def to i32* (i32**)*) +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** %x to i64** +; CHECK-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]] +; CHECK-NEXT: br label %[[SPLIT:.*]] +; CHECK: [[SPLIT]]: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i32* +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[ELSE]]: +; CHECK: [[TMP4:%.*]] = invoke i32* %fp(i32** %x) +; CHECK-NEXT: to label %[[MERGE]] unwind label %[[EXCEPT:.*]], !callees +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP5:%.*]] = phi i32* [ [[TMP4]], %[[ELSE]] ], [ [[TMP3]], %[[SPLIT]] ] +; CHECK-NEXT: br label %[[NORMAL]] +; CHECK: [[NORMAL]]: +; CHECK-NEXT: [[TMP6:%.*]] = phi i32* [ [[TMP5]], %[[MERGE]] ], [ null, %entry ] +; CHECK-NEXT: ret i32* [[TMP6]] +; CHECK: [[EXCEPT]]: +; CHECK-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: ret i32* null +; +define i32* @invoke.2.1.bitcast(i32** %x, i32* (i32**)* %fp) personality i32 (...)* null { +entry: + br i1 undef, label %try, label %normal +try: + %tmp0 = invoke i32* %fp(i32 **%x) + to label %normal unwind label %except, + !callees !{i64* (i64**)* @load_def, i64* (i64**)* @load_dec} +normal: + %tmp1 = phi i32* [ %tmp0, %try ], [ null, %entry ] + ret i32* %tmp1 +except: + %tmp2 = landingpad {i8*, i32} + cleanup + ret i32* null +} + +; The functions declarations and definitions below are used in the above tests +; and are not interesting. The !callees metadata that references the functions +; is expanded and attached to the call sites. +; +declare i64 @select_dec_0(i64 %x, i64 %y) +declare i64 @select_dec_1(i64 %x, i64 %y) +declare i64* @load_dec(i64** %x) + +define i64 @select_def_0(i64 %x, i64 %y) { ret i64 %x } +define i64 @select_def_1(i64 %x, i64 %y) { ret i64 %y } +define i64* @load_def(i64** %x) { + %tmp0 = load i64*, i64** %x + ret i64* %tmp0 +}