Index: include/llvm/Analysis/InlineCost.h =================================================================== --- include/llvm/Analysis/InlineCost.h +++ include/llvm/Analysis/InlineCost.h @@ -20,6 +20,7 @@ namespace llvm { class AssumptionCacheTracker; +class BlockFrequencyInfo; class CallSite; class DataLayout; class Function; @@ -38,6 +39,21 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024; } +/// \brief Block frequency analysis for multiple functions. +/// This class mimics block frequency analysis on CGSCC level. Block frequency +/// info is computed on demand and cached unless they are invalidated. +class BlockFrequencyAnalysis { +private: + DenseMap BFM; + +public: + ~BlockFrequencyAnalysis(); + /// \brief Returns BlockFrequencyInfo for a function. + BlockFrequencyInfo *getBlockFrequencyInfo(Function *); + /// \brief Invalidates block freqquency info for a function. + void invalidateBlockFrequencyInfo(Function *); +}; + /// \brief Represents the cost of inlining a function. /// /// This supports special values for functions which should "always" or @@ -111,7 +127,8 @@ /// inlining the callsite. It is an expensive, heavyweight call. InlineCost getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT); + AssumptionCacheTracker *ACT, + BlockFrequencyAnalysis *BFA); /// \brief Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a @@ -120,7 +137,10 @@ // InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT); + AssumptionCacheTracker *ACT, + BlockFrequencyAnalysis *BFA); + +bool IsColdCallSite(CallSite CS, BlockFrequencyAnalysis *BFA); int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel); Index: include/llvm/Transforms/IPO/InlinerPass.h =================================================================== --- include/llvm/Transforms/IPO/InlinerPass.h +++ include/llvm/Transforms/IPO/InlinerPass.h @@ -26,6 +26,15 @@ class InlineCost; template class SmallPtrSet; +// Functor invoked when a block is cloned during inlining. +typedef std::function + BlockCloningFunctor; +// Functor invoked when a function is inlined inside the basic block +// containing the call. +typedef std::function FunctionCloningFunctor; +// Functor invoked when a function gets deleted during inlining. +typedef std::function FunctionDeletedFunctor; + /// Inliner - This class contains all of the helper code which is used to /// perform the inlining operations that do not depend on the policy. /// @@ -72,6 +81,16 @@ protected: AssumptionCacheTracker *ACT; + // The getXXXFunctor methods return a nullptr. Classes that inherit + // Inliner override them to update analysis information. + virtual BlockCloningFunctor getBlockCloningFunctor(CallSite &CS) { + return nullptr; + } + virtual FunctionCloningFunctor getFunctionCloningFunctor() { return nullptr; } + virtual FunctionDeletedFunctor getFunctionDeletedFunctor() { return nullptr; } + + BlockCloningFunctor BCF; + FunctionCloningFunctor FCF; }; } // End llvm namespace Index: include/llvm/Transforms/Utils/Cloning.h =================================================================== --- include/llvm/Transforms/Utils/Cloning.h +++ include/llvm/Transforms/Utils/Cloning.h @@ -48,6 +48,9 @@ class AssumptionCacheTracker; class DominatorTree; +typedef std::function + BlockCloningFunctor; + /// Return an exact copy of the specified module /// std::unique_ptr CloneModule(const Module *M); @@ -152,7 +155,8 @@ ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl &Returns, const char *NameSuffix = "", - ClonedCodeInfo *CodeInfo = nullptr); + ClonedCodeInfo *CodeInfo = nullptr, + BlockCloningFunctor Ftor = nullptr); /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The @@ -167,23 +171,27 @@ /// void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, - SmallVectorImpl &Returns, + SmallVectorImpl &Returns, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr, - Instruction *TheCall = nullptr); + Instruction *TheCall = nullptr, + BlockCloningFunctor Ftor = nullptr); /// InlineFunctionInfo - This class captures the data input to the /// InlineFunction call, and records the auxiliary results produced by it. class InlineFunctionInfo { public: explicit InlineFunctionInfo(CallGraph *cg = nullptr, - AssumptionCacheTracker *ACT = nullptr) - : CG(cg), ACT(ACT) {} + AssumptionCacheTracker *ACT = nullptr, + BlockCloningFunctor Ftor = nullptr) + : CG(cg), ACT(ACT), Ftor(Ftor) {} /// CG - If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. CallGraph *CG; AssumptionCacheTracker *ACT; + // Functor that is invoked when a block is cloned into the new function. + BlockCloningFunctor Ftor; /// StaticAllocas - InlineFunction fills this in with all static allocas that /// get copied into the caller. Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -18,13 +18,18 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/InstVisitor.h" @@ -65,8 +70,19 @@ "inlinecold-threshold", cl::Hidden, cl::init(225), cl::desc("Threshold for inlining functions with cold attribute")); +static cl::opt HotCallSitePercent( + "hot-callsite-percent", cl::Hidden, cl::init(2), cl::ZeroOrMore, + cl::desc("Percentage hotness required for a callsite to be hot. ")); + +static cl::opt + HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(650), + cl::ZeroOrMore, + cl::desc("Threshold for hot callsites ")); + namespace { +const int ColdCallSiteCountThreshold = 10; + class CallAnalyzer : public InstVisitor { typedef InstVisitor Base; friend class InstVisitor; @@ -85,6 +101,7 @@ // easily cacheable. Instead, use the cover function paramHasAttr. CallSite CandidateCS; + BlockFrequencyAnalysis *BFA; int Threshold; int Cost; @@ -194,17 +211,19 @@ public: CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, - Function &Callee, int Threshold, CallSite CSArg) - : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold), - Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + Function &Callee, int Threshold, CallSite CSArg, + BlockFrequencyAnalysis *BFA) + : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), BFA(BFA), + Threshold(Threshold), Cost(0), IsCallerRecursive(false), + IsRecursiveCall(false), ExposesReturnsTwice(false), + HasDynamicAlloca(false), ContainsNoDuplicateCall(false), + HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -597,6 +616,22 @@ FunctionCount = Callee.getEntryCount().getValue(); MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue(); } + Instruction *Call = CS.getInstruction(); + BasicBlock *CallBB = Call->getParent(); + BlockFrequencyInfo *CallerBFI = + BFA->getBlockFrequencyInfo(CallBB->getParent()); + Optional CallerEntryCount = Caller->getEntryCount(); + if (HasPGOCounts) { + uint64_t CallerEntryFreq = CallerBFI->getEntryFreq(); + uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency(); + if (Caller->getEntryCount()) { + uint64_t CallerEntryCount = Caller->getEntryCount().getValue(); + uint64_t CallSiteCount = + CallerEntryCount * CallSiteFreq / CallerEntryFreq; + if (CallSiteCount * 100 >= HotCallSitePercent * MaxFunctionCount) + Threshold = HotCallSiteThreshold; + } + } // Listen to the inlinehint attribute or profile based hotness information // when it would increase the threshold and the caller does not need to @@ -913,7 +948,8 @@ // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); + CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS, + BFA); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1423,9 +1459,31 @@ InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT) { + AssumptionCacheTracker *ACT, + BlockFrequencyAnalysis *BFA) { return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI, - ACT); + ACT, BFA); +} + +bool llvm::IsColdCallSite(CallSite CS, BlockFrequencyAnalysis *BFA) { + Function *Caller = CS.getCaller(); + BasicBlock *CallSiteBlock = CS.getInstruction()->getParent(); + BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(Caller); + uint64_t CallSiteFreq = BFI->getBlockFreq(CallSiteBlock).getFrequency(); + // If the callsite has a frequency of 0, the call is cold. + // FIXME: We could perhaps bump it up to a value > 0 + if (CallSiteFreq == 0) + return true; + Optional CallerCount = Caller->getEntryCount(); + if (!CallerCount.hasValue()) + return false; + // If we use PGO, we can use the callsite count to estimate the coldness + uint64_t CallerEntryFreq = BFI->getEntryFreq(); + uint64_t CallSiteCount = + CallerCount.getValue() * CallSiteFreq / CallerEntryFreq; + if (CallSiteCount < ColdCallSiteCountThreshold) + return true; + return false; } int llvm::computeThresholdFromOptLevels(unsigned OptLevel, @@ -1444,7 +1502,8 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT) { + AssumptionCacheTracker *ACT, + BlockFrequencyAnalysis *BFA) { // Cannot inline indirect calls. if (!Callee) @@ -1477,7 +1536,7 @@ DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS); + CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS, BFA); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1525,3 +1584,38 @@ return true; } + +////// Block frequency analysis for functions in the SCC +BlockFrequencyAnalysis::~BlockFrequencyAnalysis() { + for (auto &Entry : BFM) { + delete Entry.second; + } +} + +/// \brief Get BlockFrequencyInfo for a function. +BlockFrequencyInfo *BlockFrequencyAnalysis::getBlockFrequencyInfo(Function *F) { + auto Iter = BFM.find(F); + if (Iter != BFM.end()) + return Iter->second; + // We need to create a BlockFrequencyInfo object for F and store it. + // + // First create a dominator tree + DominatorTree DT; + DT.recalculate(*F); + // Then get a LoopInfo object + LoopInfo LI; + LI.analyze(DT); + // Then compute BranchProbabilityInfo + BranchProbabilityInfo BPI; + BPI.calculate(*F, LI); + // Finally compute BlockFrequencyInfo + BlockFrequencyInfo *BFI = new BlockFrequencyInfo; + BFI->calculate(*F, BPI, LI); + BFM[F] = BFI; + return BFI; +} + +/// \brief Invalidate BlockFrequencyInfo for a function. +void BlockFrequencyAnalysis::invalidateBlockFrequencyInfo(Function *F) { + BFM.erase(F); +} Index: lib/Transforms/IPO/InlineSimple.cpp =================================================================== --- lib/Transforms/IPO/InlineSimple.cpp +++ lib/Transforms/IPO/InlineSimple.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -23,10 +24,12 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" using namespace llvm; +using namespace std::placeholders; #define DEBUG_TYPE "inline" @@ -43,14 +46,42 @@ // --inline-threshold flag, // user specified value. int DefaultThreshold; + std::unique_ptr BFA; + void InvalidateAnalysis(CallGraphSCC &SCC); + void InvalidateFunctionAnalysis(Function *F); + void UpdateEntryCount(BasicBlock *CallBB, Function *Callee); + void UpdateBlockFreq(CallSite &CS, const BasicBlock *OrigBB, + const BasicBlock *NewBB); + +protected: + BlockCloningFunctor getBlockCloningFunctor(CallSite &CS) override { + BlockCloningFunctor BCF = + std::bind(&SimpleInliner::UpdateBlockFreq, this, CS, _1, _2); + return BCF; + } + + FunctionCloningFunctor getFunctionCloningFunctor() override { + FunctionCloningFunctor FCF = + std::bind(&SimpleInliner::UpdateEntryCount, this, _1, _2); + return FCF; + } + + FunctionDeletedFunctor getFunctionDeletedFunctor() override { + FunctionDeletedFunctor FDF = + std::bind(&SimpleInliner::InvalidateFunctionAnalysis, this, _1); + return FDF; + } public: SimpleInliner() - : Inliner(ID), DefaultThreshold(llvm::getDefaultInlineThreshold()) { + : Inliner(ID), DefaultThreshold(llvm::getDefaultInlineThreshold()), + BFA(new BlockFrequencyAnalysis()) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } - SimpleInliner(int Threshold) : Inliner(ID), DefaultThreshold(Threshold) { + SimpleInliner(int Threshold) + : Inliner(ID), DefaultThreshold(Threshold), + BFA(new BlockFrequencyAnalysis()) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } @@ -59,7 +90,7 @@ InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); - return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT); + return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, BFA.get()); } bool runOnSCC(CallGraphSCC &SCC) override; @@ -95,10 +126,71 @@ bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { TTIWP = &getAnalysis(); - return Inliner::runOnSCC(SCC); + bool RetVal = Inliner::runOnSCC(SCC); + InvalidateAnalysis(SCC); + return RetVal; } void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); Inliner::getAnalysisUsage(AU); } + +/// \brief Update the frequency of a block that is cloned into the caller. +/// This is invoked when OrigBB from the callee is cloned into NewBB in the +/// caller. +void SimpleInliner::UpdateBlockFreq(CallSite &CS, const BasicBlock *OrigBB, + const BasicBlock *NewBB) { + Instruction *Call = CS.getInstruction(); + BasicBlock *CallBB = Call->getParent(); + BlockFrequencyInfo *CalleeBFI = + BFA->getBlockFrequencyInfo(CS.getCalledFunction()); + BlockFrequencyInfo *CallerBFI = + BFA->getBlockFrequencyInfo(CallBB->getParent()); + // Find the number of times OrigBB is executed per invocation of the callee + // and multiply by the number of times callee is executed in the caller. + // Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq. + uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency(); + uint64_t CalleeEntryFreq = CalleeBFI->getEntryFreq(); + BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB); + CallerBFI->setBlockFreq(NewBB, (double)(Freq.getFrequency()) / + CalleeEntryFreq * CallSiteFreq); +} + +/// \brief Update Callee's entry count after it got inlined at a callsite. +void SimpleInliner::UpdateEntryCount(BasicBlock *CallBB, Function *Callee) { + // If the callee has a original count of N, and the estimated count of + // callsite is M, the new callee count is set to N - M. M is estimated from + // the caller's entry count, its entry block frequency and the block frequency + // of the callsite. + Function *Caller = CallBB->getParent(); + Optional CalleeCount = Callee->getEntryCount(); + Optional CallerCount = Caller->getEntryCount(); + if (!CalleeCount.hasValue() || !CallerCount.hasValue()) + return; + BlockFrequencyInfo *CallerBFI = + BFA->getBlockFrequencyInfo(CallBB->getParent()); + uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency(); + uint64_t CallerEntryFreq = CallerBFI->getEntryFreq(); + uint64_t CallSiteCount = CallerCount.getValue() * + (CallSiteFreq + CallerEntryFreq / 2) / + CallerEntryFreq; + // Since CallSiteCount is an estimate, it could exceed the original callee + // count and has to be set to 0. + if (CallSiteCount > CalleeCount.getValue()) + Callee->setEntryCount(0); + else + Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount); +} + +void SimpleInliner::InvalidateAnalysis(CallGraphSCC &SCC) { + for (CallGraphNode *Node : SCC) { + Function *F = Node->getFunction(); + InvalidateFunctionAnalysis(F); + } +} +void SimpleInliner::InvalidateFunctionAnalysis(Function *F) { + if (F) { + BFA->invalidateBlockFrequencyInfo(F); + } +} Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -258,7 +258,7 @@ Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } - + // Try to detect the case where the current inlining candidate caller (call // it B) is a static or linkonce-ODR function and is an inlining candidate // elsewhere, and the current candidate callee (call it C) is large enough @@ -418,7 +418,6 @@ InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -447,6 +446,7 @@ CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; } else { + BasicBlock *CallSiteBlock = CS.getInstruction()->getParent(); // We can only inline direct calls to non-declarations. if (!Callee || Callee->isDeclaration()) continue; @@ -474,6 +474,7 @@ Caller->getName())); continue; } + InlineFunctionInfo InlineInfo(&CG, ACT, getBlockCloningFunctor(CS)); // Attempt to inline the function. if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, @@ -484,6 +485,10 @@ Caller->getName())); continue; } + FunctionCloningFunctor FCF = getFunctionCloningFunctor(); + if (FCF) + FCF(CallSiteBlock, Callee); + ++NumInlined; // Report the inline decision. @@ -522,7 +527,11 @@ CalleeNode->removeAllCalledFunctions(); // Removing the node for callee from the call graph and delete it. - delete CG.removeFunctionFromModule(CalleeNode); + FunctionDeletedFunctor FDF = getFunctionDeletedFunctor(); + Function *F = CG.removeFunctionFromModule(CalleeNode); + if (FDF) + FDF(F); + delete F; ++NumDeleted; } @@ -649,8 +658,12 @@ FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), FunctionsToRemove.end()); + FunctionDeletedFunctor FDF = getFunctionDeletedFunctor(); for (CallGraphNode *CGN : FunctionsToRemove) { - delete CG.removeFunctionFromModule(CGN); + Function *F = CG.removeFunctionFromModule(CGN); + if (FDF) + FDF(F); + delete F; ++NumDeleted; } return true; Index: lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- lib/Transforms/Utils/CloneFunction.cpp +++ lib/Transforms/Utils/CloneFunction.cpp @@ -277,9 +277,10 @@ /// The specified block is found to be reachable, clone it and /// anything that it can reach. - void CloneBlock(const BasicBlock *BB, + void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, - std::vector &ToClone); + std::vector &ToClone, + BlockCloningFunctor Ftor = nullptr); }; } @@ -287,7 +288,8 @@ /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, - std::vector &ToClone){ + std::vector &ToClone, + BlockCloningFunctor Ftor) { WeakVH &BBEntry = VMap[BB]; // Have we already cloned this block? @@ -424,18 +426,19 @@ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } + // Call Ftor to tell BB has been cloned to NewBB + if (Ftor) + Ftor(BB, NewBB); } /// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. -void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, - const Instruction *StartingInst, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo) { +void llvm::CloneAndPruneIntoFromInst( + Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, + ValueToValueMapTy &VMap, bool ModuleLevelChanges, + SmallVectorImpl &Returns, const char *NameSuffix, + ClonedCodeInfo *CodeInfo, BlockCloningFunctor Ftor) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; @@ -461,11 +464,11 @@ // Clone the entry block, and anything recursively reachable from it. std::vector CloneWorklist; - PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); + PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist, Ftor); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); - PFC.CloneBlock(BB, BB->begin(), CloneWorklist); + PFC.CloneBlock(BB, BB->begin(), CloneWorklist, Ftor); } // Loop over all of the basic blocks in the old function. If the block was @@ -666,15 +669,14 @@ /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. -void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo, - Instruction *TheCall) { +void llvm::CloneAndPruneFunctionInto( + Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall, + BlockCloningFunctor Ftor) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, - ModuleLevelChanges, Returns, NameSuffix, CodeInfo); + ModuleLevelChanges, Returns, NameSuffix, CodeInfo, + Ftor); } /// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -1319,7 +1319,7 @@ // If IFI has any state in it, zap it before we fill it in. IFI.reset(); - + const Function *CalledFunc = CS.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! @@ -1473,7 +1473,7 @@ // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, TheCall); + &InlinedFunctionInfo, TheCall, IFI.Ftor); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock;