Index: include/llvm/Transforms/Utils/CallSitePromotion.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/CallSitePromotion.h @@ -0,0 +1,65 @@ +//===- CallSitePromotion.h - Utilities for call promotion -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares utilities useful for promoting indirect call sites to +// direct call sites. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_CALLSITEPROMOTION_H +#define LLVM_TRANSFORMS_UTILS_CALLSITEPROMOTION_H + +#include "llvm/IR/InstrTypes.h" + +namespace llvm { + +class CallSite; + +/// Return true if the given indirect call site can be made to call the given +/// function. +/// +/// This function ensures that the number and type of the call site's arguments +/// and return value match those of the given function. If the types do not +/// match exactly, they must at least be bitcast compatible. +bool canPromoteCallSite(CallSite CS, Function *Callee); + +/// Promote the given indirect call site to directly call \p Callee. +/// +/// If the function type of the call site doesn't match that of the callee, +/// bitcast instructions are inserted where appropriate. If \p Casts is +/// non-null, these bitcasts are collected in the provided container. +void makeCallSiteDirect(CallSite CS, Function *Callee, + SmallVectorImpl *Casts = nullptr); + +/// Demote the given direct call site to indirectly call \p CalledValue. +/// +/// This function is meant to be used in conjunction with \p +/// makeCallSiteDirect. As such, any bitcast instructions contained in \p +/// Casts, which were created by \p promoteCallSite, will be erased. +void makeCallSiteIndirect(CallSite CS, Value *CalledValue, + ArrayRef Casts); + +/// Predicate and clone the given call site. +/// +/// This function creates an if-then-else structure at the location of the call +/// site. The "if" condition is determined by the given predicate and values. +/// The original call site is moved into the "then" block, and a clone of the +/// call site is placed in the "else" block. The cloned instruction is returned. +/// If the call site happens to be an invoke instruction, further work is done +/// to fix phi nodes in the invoke's normal and unwind destinations. +/// +/// This function is intended to facilitate transformations such as indirect +/// call promotion, where the callee can be changed based on some run-time +/// condition. +Instruction *versionCallSite(CallSite CS, CmpInst::Predicate Pred, Value *LHS, + Value *RHS); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_CALLSITEPROMOTION_H Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -57,6 +58,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/CallSitePromotion.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/Transforms/Utils/Local.h" @@ -434,6 +436,173 @@ return IC; } +/// Update the legacy call graph after the given call site is promoted. +/// +/// The call site should be direct at this point. If it was versioned when it +/// was promoted, \p Inst should refer to the newly created indirect call. +static void updateLegacyCGAfterCSPromotion(CallGraph &CG, CallSite CS, + Instruction *Inst = nullptr) { + Function *Callee = CS.getCalledFunction(); + assert(Callee && "Call was not promoted"); + CallGraphNode *Caller = CG[CS.getInstruction()->getFunction()]; + + // The promoted call site had !callees metadata attached to it. This is + // represented in the legacy call graph by an external node. Find it. + CallGraphNode *External = nullptr; + for (CallGraphNode::iterator I = Caller->begin(), E = Caller->end(); I != E; + ++I) + if (I->first == CS.getInstruction()) { + External = I->second; + break; + } + assert(External && "External node for !callees metadata not found"); + + // The promoted call site now directly calls Callee. Additionally, there + // should no longer be an edge from the external node to Callee. + Caller->replaceCallEdge(CS, CS, CG[Callee]); + External->removeAnyCallEdgeTo(CG[Callee]); + + // If the call site was versioned, Inst refers to the newly created indirect + // call. There should be an edge from this new call site to the external + // node. + if (Inst) { + assert(Inst->getMetadata(LLVMContext::MD_callees) && + "Indirect call must have !callees metadata"); + Caller->addCalledFunction(CallSite(Inst), External); + } +} + +/// Try to promote an indirect call site. +/// +/// This function attempts to promote the given indirect call site so that its +/// callee (or potential callees) may be inlined. Promotion is attempted for +/// call sites having !callees metadata. For indirect call sites having a small +/// number of possible callees, !callees metadata indicates what those +/// functions are. +/// +/// The approach begins by inspecting the !callees metadata attached to a call +/// site. There are a few cases to consider based on the number of possible +/// callees. The first two are trivial. First, if the metadata indicates that +/// there are zero possible callees, the call site must be dead, and so it is +/// marked unreachable. Second, if the metadata indicates that there is only +/// one possible callee, it is promoted to a direct call site regardless of +/// whether inlining the callee is profitable or not. The last and most +/// interesting case is when the metadata indicates that there are more than +/// one possible callee. +/// +/// When a call site has more then one possible callee, the inliner is allowed +/// to inline any of the callees according to its usual heuristics. This is +/// done by iterating over the possible callees and computing their inline +/// cost. If a profitable callee is found, the iteration terminates and the +/// call site is immediately promoted inside an if-then-else structure, where +/// the "then" block contains a direct call to the profitable callee, and the +/// "else" block contains a clone of the original indirect call site. The +/// callees metadata of the two call sites is updated to reflect the promotion. +/// +/// The inline cost and call site clone are communicated back to the inliner. +/// This allows the inliner to avoid recomputing the inline cost for the callee +/// that was just promoted, and it allows it to add the new indirect call site +/// to its work list to be processed once again. The new indirect call site +/// will have one less possible callee. Processing the call sites in this way +/// effectively inlines all the profitable callees of the original indirect +/// call site. Callees for which the inline cost is computed but found to be +/// unprofitable to inline are cached so that they will not be analyzed more +/// than once. +/// +/// The function returns true if the given call site was promoted. If the +/// inline cost of the resulting direct call site was computed, it is stored in +/// \p OIC. If promotion was performed by versioning the call site, the +/// instruction clone is stored in \p Inst. The inliner should add this call +/// site to its work list. Lastly, the callees for which the inline cost is +/// computed are stored in \p Visited. +static bool +tryToPromoteCallSite(CallSite CS, + function_ref GetInlineCost, + OptimizationRemarkEmitter &ORE, Optional &OIC, + Instruction *&Inst, SmallPtrSetImpl &Visited) { + assert(!CS.getCalledFunction() && "Can only promote indirect calls"); + + // Collect the functions the call site might call. If the call site doesn't + // have !callees metadata, we don't know its possible callees, so there's + // nothing to do. + if (!CS.getInstruction()->getMetadata(LLVMContext::MD_callees)) + return false; + SmallVector PossibleCallees = CS.getKnownCallees(); + + // If the metadata indicates that there are zero possible callees, the call + // site must be dead, so we mark it unreachable. + if (PossibleCallees.empty()) { + BasicBlock *BB = CS.getInstruction()->getParent(); + BB->splitBasicBlock(CS.getInstruction()); + BB->getTerminator()->eraseFromParent(); + new UnreachableInst(BB->getContext(), BB); + return false; + } + + // If the metadata indicates that there is only one possible callee, we + // attempt to promote the call site. If successful, we clear the call site's + // !callees metadata. + if (PossibleCallees.size() == 1) { + if (!canPromoteCallSite(CS, PossibleCallees[0])) + return false; + makeCallSiteDirect(CS, PossibleCallees[0]); + CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr); + return true; + } + + // If the metadata indicates that there is more than one possible callee, we + // attempt to promote the call site by versioning it. + for (unsigned I = 0; I < PossibleCallees.size(); ++I) { + Function *F = PossibleCallees[I]; + + // Skip this function if we already know it to be unprofitable, it's a + // declaration, or it can't be promoted. + if (Visited.count(F) || F->isDeclaration() || !canPromoteCallSite(CS, F)) + continue; + + Visited.insert(F); + Value *CalledValue = CS.getCalledValue(); + + // Compute the inline cost of the function. We do this by first + // speculatively modifying the call site so that it calls the desired + // function. We then compute the inline cost using the regular inline + // heuristic. Once the cost is computed, the call site is demoted to its + // original state. If inlining is not profitable, we move on to the next + // possible callee. + SmallVector Casts; + makeCallSiteDirect(CS, F, &Casts); + Optional Cost = shouldInline(CS, GetInlineCost, ORE); + makeCallSiteIndirect(CS, CalledValue, Casts); + if (!Cost) + continue; + + // Version the indirect call site. If 'CalledValue' is equal to 'F', the + // original call site will be executed, otherwise 'Inst' will be executed. + Inst = llvm::versionCallSite(CS, CmpInst::ICMP_EQ, CalledValue, F); + + // The original call site is promoted so that it directly calls the desired + // function. The call site is now direct, so we clear its !callees + // metadata. + makeCallSiteDirect(CS, F, &Casts); + CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr); + + // The call site clone remains indirect, but we must remove the function + // from its !callees metadata. After the promotion, we know this call site + // cannot target it. + PossibleCallees.erase(PossibleCallees.begin() + I); + MDBuilder MDB(Inst->getContext()); + Inst->setMetadata(LLVMContext::MD_callees, + MDB.createCallees(PossibleCallees)); + + // Save the inline cost so the inliner doesn't have to recompute it. The + // function will be inlined into the promoted call site. + OIC.emplace(*Cost); + return true; + } + + return false; +} + /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool InlineHistoryIncludes( @@ -541,6 +710,11 @@ InlinedArrayAllocasTy InlinedArrayAllocas; InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI); + // A cache of callees per indirect call site for which we've computed an + // inline cost. Inlining these callees is not profitable, and we don't want + // to recompute the cost. + DenseMap> UnprofitableCallees; + // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. bool Changed = false; @@ -554,10 +728,35 @@ CallSite CS = CallSites[CSi].first; Function *Caller = CS.getCaller(); - Function *Callee = CS.getCalledFunction(); + + // FIXME for new PM: because of the old PM we currently generate ORE and + // in turn BFI on demand. With the new PM, the ORE dependency should + // just become a regular analysis dependency. + OptimizationRemarkEmitter ORE(Caller); + + // Try to inline through indirect call sites. If we have an indirect call + // site with a known set of possible callees (e.g., as specified by + // !callees metadata), promote the call site to directly call a function + // we know will be inlined. The inline cost, if computed during + // promotion, is saved to avoid recomputation. If the call site was + // versioned by the promotion, the new indirect call site is added to the + // work list. + Optional OIC = None; + if (!CS.getCalledFunction()) { + Instruction *Inst = nullptr; + auto &Visited = UnprofitableCallees[CS.getInstruction()]; + if (!tryToPromoteCallSite(CS, GetInlineCost, ORE, OIC, Inst, Visited)) + continue; + updateLegacyCGAfterCSPromotion(CG, CS, Inst); + if (Inst) { + UnprofitableCallees[Inst] = Visited; + CallSites.push_back({CallSite(Inst), -1}); + } + } // We can only inline direct calls to non-declarations. - if (!Callee || Callee->isDeclaration()) + Function *Callee = CS.getCalledFunction(); + if (Callee->isDeclaration()) continue; Instruction *Instr = CS.getInstruction(); @@ -577,16 +776,14 @@ continue; } - // FIXME for new PM: because of the old PM we currently generate ORE and - // in turn BFI on demand. With the new PM, the ORE dependency should - // just become a regular analysis dependency. - OptimizationRemarkEmitter ORE(Caller); - - Optional OIC = shouldInline(CS, GetInlineCost, ORE); - // If the policy determines that we should inline this function, - // delete the call instead. - if (!OIC) - continue; + if (!OIC) { + Optional Cost = shouldInline(CS, GetInlineCost, ORE); + // If the policy determines that we should inline this function, + // delete the call instead. + if (!Cost) + continue; + OIC.emplace(*Cost); + } // If this call site is dead and it is to a readonly function, we should // just delete the call instead of trying to inline it, regardless of @@ -839,9 +1036,7 @@ // Instead we should do an actual RPO walk of the function body. for (Instruction &I : instructions(N.getFunction())) if (auto CS = CallSite(&I)) - if (Function *Callee = CS.getCalledFunction()) - if (!Callee->isDeclaration()) - Calls.push_back({CS, -1}); + Calls.push_back({CS, -1}); } if (Calls.empty()) return PreservedAnalyses::all(); @@ -910,15 +1105,44 @@ PSI, &ORE); }; + // A cache of callees per indirect call site for which we've computed an + // inline cost. Inlining these callees is not profitable, and we don't want + // to recompute the cost. + DenseMap> UnprofitableCallees; + // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. - bool DidInline = false; + bool ChangedCallGraph = false; for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) { int InlineHistoryID; CallSite CS; std::tie(CS, InlineHistoryID) = Calls[i]; + + // Try to inline through indirect call sites. If we have an indirect call + // site with a known set of possible callees (e.g., as specified by + // !callees metadata), promote the call site to directly call a function + // we know will be inlined. The inline cost, if computed during + // promotion, is saved to avoid recomputation. If the call site was + // versioned by the promotion, the new indirect call site is added to the + // work list. + Optional OIC = None; + if (!CS.getCalledFunction()) { + Instruction *Inst = nullptr; + auto &Visited = UnprofitableCallees[CS.getInstruction()]; + if (!tryToPromoteCallSite(CS, GetInlineCost, ORE, OIC, Inst, Visited)) + continue; + ChangedCallGraph = true; + if (Inst) { + UnprofitableCallees[Inst] = Visited; + Calls.push_back({CallSite(Inst), -1}); + } + } + + // We can only inline direct calls to non-declarations. Function &Callee = *CS.getCalledFunction(); + if (Callee.isDeclaration()) + continue; if (InlineHistoryID != -1 && InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) @@ -937,10 +1161,13 @@ continue; } - Optional OIC = shouldInline(CS, GetInlineCost, ORE); - // Check whether we want to inline this callsite. - if (!OIC) - continue; + if (!OIC) { + Optional Cost = shouldInline(CS, GetInlineCost, ORE); + // Check whether we want to inline this callsite. + if (!Cost) + continue; + OIC.emplace(*Cost); + } // Setup the data structure used to plumb customization into the // `InlineFunction` routine. @@ -963,7 +1190,7 @@ }); continue; } - DidInline = true; + ChangedCallGraph = true; InlinedCallees.insert(&Callee); ORE.emit([&]() { @@ -1026,7 +1253,7 @@ // the outer loop. --i; - if (!DidInline) + if (!ChangedCallGraph) continue; Changed = true; Index: lib/Transforms/Utils/CMakeLists.txt =================================================================== --- lib/Transforms/Utils/CMakeLists.txt +++ lib/Transforms/Utils/CMakeLists.txt @@ -5,6 +5,7 @@ BreakCriticalEdges.cpp BuildLibCalls.cpp BypassSlowDivision.cpp + CallSitePromotion.cpp CloneFunction.cpp CloneModule.cpp CodeExtractor.cpp Index: lib/Transforms/Utils/CallSitePromotion.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/CallSitePromotion.cpp @@ -0,0 +1,226 @@ +//===- CallSitePromotion.cpp - Utilities for call promotion -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities useful for promoting indirect call sites to +// direct call sites. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CallSitePromotion.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "call-site-promotion" + +bool llvm::canPromoteCallSite(CallSite CS, Function *Callee) { + assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); + + // The callee's return value type must be bitcast compatible with the call + // site's type. It's not safe to promote the call site, otherwise. + if (!CastInst::castIsValid(Instruction::BitCast, CS.getInstruction(), + Callee->getReturnType())) + return false; + + // Additionally, the callee and call site must agree on the number of + // arguments. + if (CS.arg_size() != Callee->getFunctionType()->getNumParams()) + return false; + + // And the callee's formal argument types must be bitcast compatible with the + // corresponding actual argument types of the call site. + for (Use &U : CS.args()) { + unsigned ArgNo = CS.getArgumentNo(&U); + Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo); + if (!CastInst::castIsValid(Instruction::BitCast, U.get(), FormalTy)) + return false; + } + + return true; +} + +void llvm::makeCallSiteDirect(CallSite CS, Function *Callee, + SmallVectorImpl *Casts) { + assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); + + // Set the called function of the call site to be the given callee. + CS.setCalledFunction(Callee); + + // If the function type of the call site matches that of the callee, no + // additional work is required. + if (CS.getFunctionType() == Callee->getFunctionType()) + return; + + // Save the return types of the call site and callee. + Type *CallSiteRetTy = CS.getInstruction()->getType(); + Type *CalleeRetTy = Callee->getReturnType(); + + // Change the function type of the call site the match that of the callee. + CS.mutateFunctionType(Callee->getFunctionType()); + + // Inspect the arguments of the call site. If an argument's type doesn't + // match the corresponding formal argument's type in the callee, bitcast it + // to the correct type. + for (Use &U : CS.args()) { + unsigned ArgNo = CS.getArgumentNo(&U); + Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo); + Type *ActualTy = U.get()->getType(); + if (FormalTy != ActualTy) { + auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "", + CS.getInstruction()); + CS.setArgument(ArgNo, Cast); + if (Casts) + Casts->push_back(Cast); + } + } + + // If the return type of the call site matches that of the callee, no + // additional work is required. + if (CallSiteRetTy->isVoidTy() || CallSiteRetTy == CalleeRetTy) + return; + + // Save the users of the calling instruction. These uses will be changed to + // use the bitcast after we create it. + SmallVector UsersToUpdate; + for (User *U : CS.getInstruction()->users()) + UsersToUpdate.push_back(U); + + // Determine an appropriate location to create the bitcast for the return + // value. The location depends on if we have a call or invoke instruction. + Instruction *InsertBefore = nullptr; + if (auto *Invoke = dyn_cast(CS.getInstruction())) + InsertBefore = + &SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front(); + else + InsertBefore = &*std::next(CS.getInstruction()->getIterator()); + + // Bitcast the return value to the correct type. + auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(), + CallSiteRetTy, "", InsertBefore); + if (Casts) + Casts->push_back(Cast); + + // Replace all the original uses of the calling instruction with the bitcast. + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(CS.getInstruction(), Cast); +} + +void llvm::makeCallSiteIndirect(CallSite CS, Value *CalledValue, + ArrayRef Casts) { + assert(CS.getCalledFunction() && "Only direct call sites can be demoted"); + + // For each cast instruction, replace all of its uses with its source + // operand. + for (CastInst *Cast : Casts) { + while (!Cast->user_empty()) + Cast->user_back()->replaceUsesOfWith(Cast, Cast->getOperand(0)); + Cast->eraseFromParent(); + } + + // Set the called value of the call site, and mutate its function type. + CS.setCalledFunction(CalledValue); + CS.mutateFunctionType(cast( + cast(CalledValue->getType())->getElementType())); +} + +Instruction *llvm::versionCallSite(CallSite CS, CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + IRBuilder<> Builder(CS.getInstruction()); + Instruction *OrigInst = CS.getInstruction(); + BasicBlock *OrigBlock = OrigInst->getParent(); + + // Create the compare. LHS and RHS must have the same type to be compared. If + // the don't, cast RHS to have LHS's type. + if (LHS->getType() != RHS->getType()) + RHS = Builder.CreateBitCast(RHS, LHS->getType()); + auto *Cond = Builder.CreateICmp(Pred, LHS, RHS); + + // Create an if-then-else structure. The original instruction is moved into + // the "then" block, and a clone of the original instruction is placed in the + // "else" block. + TerminatorInst *ThenTerm = nullptr; + TerminatorInst *ElseTerm = nullptr; + SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, + &ElseTerm); + BasicBlock *ThenBlock = ThenTerm->getParent(); + BasicBlock *ElseBlock = ElseTerm->getParent(); + BasicBlock *MergeBlock = OrigInst->getParent(); + Instruction *NewInst = OrigInst->clone(); + OrigInst->moveBefore(ThenTerm); + NewInst->insertBefore(ElseTerm); + + // If the original call site is an invoke instruction, we have extra work to + // do since invoke instructions are terminating. We have to fix-up phi nodes + // in the invoke's normal and unwind destinations. + if (auto *OrigInvoke = dyn_cast(OrigInst)) { + auto *NewInvoke = cast(NewInst); + + // Invoke instructions are terminating, so we don't need the terminator + // instructions that were just created. + ThenTerm->eraseFromParent(); + ElseTerm->eraseFromParent(); + + // Branch from the "merge" block to the original normal destination. + Builder.SetInsertPoint(MergeBlock); + Builder.CreateBr(OrigInvoke->getNormalDest()); + + // Fix-up phi nodes in the normal destination. Values coming from the + // original block will now be coming from the "merge" block. + for (auto &I : *OrigInvoke->getNormalDest()) { + auto *Phi = dyn_cast(&I); + if (!Phi) + break; + int Idx = Phi->getBasicBlockIndex(OrigBlock); + if (Idx == -1) + continue; + Phi->setIncomingBlock(Idx, MergeBlock); + } + + // Now set the normal destinations of the invoke instructions to be the + // "merge" block. + OrigInvoke->setNormalDest(MergeBlock); + NewInvoke->setNormalDest(MergeBlock); + + // Fix-up phi nodes in the unwind destination. Values coming from the + // original block will now be coming from either the "then" block or the + // "else" block. + for (auto &I : *OrigInvoke->getUnwindDest()) { + auto *Phi = dyn_cast(&I); + if (!Phi) + break; + int Idx = Phi->getBasicBlockIndex(OrigBlock); + if (Idx == -1) + continue; + auto *V = Phi->getIncomingValue(Idx); + Phi->setIncomingBlock(Idx, ThenBlock); + Phi->addIncoming(V, ElseBlock); + } + } + + // If the callee returns a value, we now have to merge the value of the + // original instruction and its clone. Create a new phi node at the front of + // the "merge" block, and replace uses of the original instruction with this + // phi. + if (!OrigInst->getType()->isVoidTy()) { + Builder.SetInsertPoint(&MergeBlock->front()); + PHINode *Phi = Builder.CreatePHI(NewInst->getType(), 0); + SmallVector UsersToUpdate; + for (User *U : OrigInst->users()) + UsersToUpdate.push_back(U); + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(OrigInst, Phi); + Phi->addIncoming(OrigInst, ThenBlock); + Phi->addIncoming(NewInst, ElseBlock); + } + return NewInst; +} + +#undef DEBUG_TYPE Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -101,8 +101,8 @@ ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> -; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: AAManager ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -96,8 +96,8 @@ ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> -; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: AAManager ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> Index: test/Transforms/Inline/callees-metadata.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/callees-metadata.ll @@ -0,0 +1,195 @@ +; RUN: opt < %s -inline -simplifycfg -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline,function(simplify-cfg))' -S | FileCheck %s + +; A call instruction with empty !callees metadata. This is undefined behavior, +; so the call is marked unreachable. +; +; CHECK-LABEL: @call.0.0( +; CHECK-NEXT: unreachable +define void @call.0.0(void ()* %fp) { + call void %fp(), !callees !{} + ret void +} + +; A call instruction with two possible callees that can both inlined. +; +; CHECK-LABEL: @call.2.2( +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 (i64, i64)* %fp, @select_def_0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 %x, i64 %y +; CHECK-NEXT: ret i64 [[TMP1]] +; +define i64 @call.2.2(i64 %x, i64 %y, i64 (i64, i64)* %fp) { + %tmp0 = call i64 %fp(i64 %x, i64 %y), + !callees !{i64 (i64, i64)* @select_def_0, i64 (i64, i64)* @select_def_1} + ret i64 %tmp0 +} + +; A call instruction with three possible callees, only one of which can be +; inlined. Since more than one possible callee remains after promotion, the +; cloned call is indirect. +; +; CHECK-LABEL: @call.3.1( +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 (i64, i64)* %fp, @select_def_0 +; CHECK-NEXT: br i1 [[TMP0]], label %[[MERGE:.*]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 %fp(i64 %x, i64 %y), !callees +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP1]], %[[ELSE]] ], [ %x, {{.*}} ] +; CHECK-NEXT: ret i64 [[TMP2]] +; +define i64 @call.3.1(i64 %x, i64 %y, i64 (i64, i64)* %fp) { + %tmp0 = call i64 %fp(i64 %x, i64 %y), + !callees !{i64 (i64, i64)* @select_def_0, i64 (i64, i64)* @select_dec_1, + i64 (i64, i64)* @select_dec_0} + ret i64 %tmp0 +} + +; A call instruction with two possible callees, only one of which can be +; inlined. Since only one possible callee remains after promotion, the cloned +; call is promoted as well. +; +; CHECK-LABEL: @call.2.1( +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 (i64, i64)* %fp, @select_def_0 +; CHECK-NEXT: br i1 [[TMP0]], label %[[MERGE:.*]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @select_dec_1(i64 %x, i64 %y) +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP1]], %[[ELSE]] ], [ %x, {{.*}} ] +; CHECK-NEXT: ret i64 [[TMP2]] +; +define i64 @call.2.1(i64 %x, i64 %y, i64 (i64, i64)* %fp) { + %tmp0 = call i64 %fp(i64 %x, i64 %y), + !callees !{i64 (i64, i64)* @select_def_0, i64 (i64, i64)* @select_dec_1} + ret i64 %tmp0 +} + +; A call instruction with two possible callees, only one of which can be +; inlined. Since only one possible callee remains after promotion, the cloned +; call is promoted as well. Bitcasts must be inserted for the promoted call's +; arguments and return value since the callee's function type doesn't match +; that of the function pointer. +; +; CHECK-LABEL: @call.2.1.bitcast( +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32* (i32**)* %fp, bitcast (i64* (i64**)* @load_def to i32* (i32**)*) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** %x to i64** +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]] +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[TMP3:%.*]] = call i64* @load_dec(i64** [[TMP1]]) +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP4:%.*]] = phi i64* [ [[TMP3]], %[[ELSE]] ], [ [[TMP2]], %[[THEN]] ] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to i32* +; CHECK-NEXT: ret i32* [[TMP5]] +; +define i32* @call.2.1.bitcast(i32** %x, i32* (i32**)* %fp) { + %tmp0 = call i32* %fp(i32** %x), + !callees !{i64* (i64**)* @load_def, i64* (i64**)* @load_dec} + ret i32* %tmp0 +} + +; An invoke instruction with two possible callees, only one of which can be +; inlined. Since only one possible callee remains after promotion, the cloned +; invoke is promoted as well. The normal destination has a phi node that must +; be fixed. +; +; CHECK-LABEL: @invoke.2.1( +; CHECK: br i1 undef, label %[[TRY:.*]], label %[[NORMAL:.*]] +; CHECK: [[TRY]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64* (i64**)* %op, @load_def +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** %x +; CHECK-NEXT: br label %[[NORMAL]] +; CHECK: [[ELSE]]: +; CHECK: [[TMP2:%.*]] = invoke i64* @load_dec(i64** %x) +; CHECK-NEXT: to label %[[NORMAL]] unwind label %[[EXCEPT:.*]] +; CHECK: [[NORMAL]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi i64* [ null, %entry ], [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ] +; CHECK-NEXT: ret i64* [[TMP3]] +; CHECK: [[EXCEPT]]: +; CHECK-NEXT: [[TMP4:%.*]] = landingpad { i8*, i64 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: ret i64* null +; +define i64* @invoke.2.1(i64** %x, i64* (i64**)* %op) personality i64 (...)* null { +entry: + br i1 undef, label %try, label %normal +try: + %tmp0 = invoke i64* %op(i64 **%x) + to label %normal unwind label %except, + !callees !{i64* (i64**)* @load_def, i64* (i64**)* @load_dec} +normal: + %tmp1 = phi i64* [ %tmp0, %try ], [ null, %entry ] + ret i64* %tmp1 +except: + %tmp2 = landingpad {i8*, i64} + cleanup + ret i64* null +} + +; An invoke instruction with two possible callees, only one of which can be +; inlined. Since only one possible callee remains after promotion, the cloned +; invoke is promoted as well. The normal destination has a phi node that must +; be fixed. Additionally, bitcasts must be inserted for the promoted invoke's +; arguments and return value since the callee's function type doesn't match +; that of the function pointer. +; +; CHECK-LABEL: @invoke.2.1.bitcast( +; CHECK: br i1 undef, label %[[TRY:.*]], label %[[NORMAL:.*]] +; CHECK: [[TRY]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32* (i32**)* %op, bitcast (i64* (i64**)* @load_def to i32* (i32**)*) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** %x to i64** +; CHECK-NEXT: br i1 [[TMP0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]] +; CHECK-NEXT: br label %[[MERGE:.*]] +; CHECK: [[ELSE]]: +; CHECK: [[TMP3:%.*]] = invoke i64* @load_dec(i64** [[TMP1]]) +; CHECK-NEXT: to label %[[MERGE]] unwind label %[[EXCEPT:.*]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[TMP4:%.*]] = phi i64* [ [[TMP2]], %[[THEN]] ], [ [[TMP3]], %[[ELSE]] ] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to i32* +; CHECK-NEXT: br label %[[NORMAL]] +; CHECK: [[NORMAL]]: +; CHECK-NEXT: [[TMP6:%.*]] = phi i32* [ [[TMP5]], %[[MERGE]] ], [ null, %entry ] +; CHECK-NEXT: ret i32* [[TMP6]] +; CHECK: [[EXCEPT]]: +; CHECK-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: ret i32* null +; +define i32* @invoke.2.1.bitcast(i32** %x, i32* (i32**)* %op) personality i32 (...)* null { +entry: + br i1 undef, label %try, label %normal +try: + %tmp0 = invoke i32* %op(i32 **%x) + to label %normal unwind label %except, + !callees !{i64* (i64**)* @load_def, i64* (i64**)* @load_dec} +normal: + %tmp1 = phi i32* [ %tmp0, %try ], [ null, %entry ] + ret i32* %tmp1 +except: + %tmp2 = landingpad {i8*, i32} + cleanup + ret i32* null +} + +; The functions declarations and definitions below are used in the above tests +; and are not interesting. The !callees metadata that references the functions +; is expanded and attached to the call sites. +; +declare i64 @select_dec_0(i64 %x, i64 %y) +declare i64 @select_dec_1(i64 %x, i64 %y) +declare i64* @load_dec(i64** %x) + +define i64 @select_def_0(i64 %x, i64 %y) { ret i64 %x } +define i64 @select_def_1(i64 %x, i64 %y) { ret i64 %y } +define i64* @load_def(i64** %x) { + %tmp0 = load i64*, i64** %x + ret i64* %tmp0 +}