Index: llvm/lib/Analysis/InlineOrder.cpp =================================================================== --- llvm/lib/Analysis/InlineOrder.cpp +++ llvm/lib/Analysis/InlineOrder.cpp @@ -22,7 +22,7 @@ #define DEBUG_TYPE "inline-order" -enum class InlinePriorityMode : int { Size, Cost, CostBenefit, ML }; +enum class InlinePriorityMode : int { Size, Cost, CostBenefit, ML, TopDown }; static cl::opt UseInlinePriority( "inline-priority-mode", cl::init(InlinePriorityMode::Size), cl::Hidden, @@ -33,8 +33,9 @@ "Use inline cost priority."), clEnumValN(InlinePriorityMode::CostBenefit, "cost-benefit", "Use cost-benefit ratio."), - clEnumValN(InlinePriorityMode::ML, "ml", - "Use ML."))); + clEnumValN(InlinePriorityMode::ML, "ml", "Use ML."), + clEnumValN(InlinePriorityMode::TopDown, "top-down", + "Use callgraph top-down priority."))); static cl::opt ModuleInlinerTopPriorityThreshold( "moudle-inliner-top-priority-threshold", cl::Hidden, cl::init(0), @@ -279,6 +280,104 @@ const InlineParams &Params; }; +/// Top Down Inline Order +/// The order in which inlining decisions are made impacts the explorable +/// decision space for the inliner. +/// For example with: +/// A B +/// \ / +/// C +/// | +/// D +/// In this case, if we were to first make a decision on the C->D call site +/// we would lose the ability to make separate decisions for the A->C->D and +/// B->C->D call paths. And a final inlining graph like: +/// ACD B +/// | +/// C +/// | +/// D +/// Could not be generated since we have two different inlining decisions for CD. +/// With the top-down inliner, we can explore all possible inlining decisions. +class TopDownInlineOrder : public InlineOrder> { + using T = std::pair; + + // We keep track of how many places a function has been called. This is used + // to order call sites in a top-down fashion by selecting the call site with + // the least number of calls to its caller at each step. + + // Check which call sites caller has least calls. + bool hasLowerPriority(const CallBase *L, const CallBase *R) const { + int LeftCount = 0; + const auto LeftI = NodeCallCount.find(L->getCaller()); + if (LeftI != NodeCallCount.end()) + LeftCount = LeftI->second; + + int RightCount = 0; + const auto RightI = NodeCallCount.find(R->getCaller()); + if (RightI != NodeCallCount.end()) + RightCount = RightI->second; + + return LeftCount > RightCount; + } + +public: + TopDownInlineOrder() : NodeCallCount{} { + isLess = [&](const CallBase *L, const CallBase *R) { + return hasLowerPriority(L, R); + }; + } + + size_t size() override { return Heap.size(); } + + void push(const T &Elt) override { + CallBase *CB = Elt.first; + const int InlineHistoryID = Elt.second; + + // We keep track of how many places a function is called by other functions/ + // This count can be used to determine the top of the call graph by ordering + // the heap form least calls to most calls. + Function *Callee = CB->getCalledFunction(); + if (NodeCallCount.find(Callee) == NodeCallCount.end()) { + NodeCallCount[Callee] = 0; + } + NodeCallCount[Callee]++; + + Heap.push_back(CB); + std::push_heap(Heap.begin(), Heap.end(), isLess); + + InlineHistoryMap[CB] = InlineHistoryID; + } + + T pop() override { + assert(size() > 0); + + CallBase *CB = Heap.front(); + T Result = std::make_pair(CB, InlineHistoryMap[CB]); + InlineHistoryMap.erase(CB); + std::pop_heap(Heap.begin(), Heap.end(), isLess); + Heap.pop_back(); + NodeCallCount[CB->getCalledFunction()]--; + + return Result; + } + + void erase_if(function_ref Pred) override { + auto PredWrapper = [=](CallBase *CB) -> bool { + return Pred(std::make_pair(CB, 0)); + }; + llvm::erase_if(Heap, PredWrapper); + std::make_heap(Heap.begin(), Heap.end(), isLess); + } + +private: + SmallVector Heap; + std::function isLess; + DenseMap InlineHistoryMap; + // A map from a function to the number of call sites it is statically called from. + DenseMap NodeCallCount; +}; + } // namespace std::unique_ptr>> @@ -295,11 +394,14 @@ case InlinePriorityMode::CostBenefit: LLVM_DEBUG( dbgs() << " Current used priority: cost-benefit priority ---- \n"); - return std::make_unique>(FAM, Params); case InlinePriorityMode::ML: LLVM_DEBUG( dbgs() << " Current used priority: ML priority ---- \n"); return std::make_unique>(FAM, Params); + case InlinePriorityMode::TopDown: + LLVM_DEBUG( + dbgs() << " Current used priority: top-down priority ---- \n"); + return std::make_unique(); } return nullptr; } Index: llvm/test/Transforms/Inline/module-inliner-basic.ll =================================================================== --- llvm/test/Transforms/Inline/module-inliner-basic.ll +++ llvm/test/Transforms/Inline/module-inliner-basic.ll @@ -3,6 +3,7 @@ ; RUN: opt -passes=module-inline -inline-priority-mode=cost -S < %s | FileCheck %s ; RUN: opt -passes=module-inline -inline-priority-mode=cost-benefit -S < %s | FileCheck %s ; RUN: opt -passes=module-inline -inline-priority-mode=ml -S < %s | FileCheck %s +; RUN: opt -passes=module-inline -inline-priority-mode=top-down -S < %s | FileCheck %s define i32 @callee(i32 %a) { entry: Index: llvm/test/Transforms/Inline/top-down-inliner-multiple-heads.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/top-down-inliner-multiple-heads.ll @@ -0,0 +1,33 @@ +; RUN: opt -passes=module-inline -inline-priority-mode=top-down --debug -S < %s 2>&1 | FileCheck %s + +; Call graph: +; A->C<-B +; | +; v +; D +; | +; v +; E + +define void @A() { + call void @C() + ret void +} +define internal void @B() { + call void @C() + ret void +} +define internal void @C() { + call void @D() + ret void +} +define internal void @D() { + call void @E() + ret void +} +define internal void @E() { + ret void +} + +; Since we're inlining top-down A and B are the only valid callers +; CHECK-NOT: Analyzing call of {{C|D|E}}... (caller:{{C|D}}) Index: llvm/test/Transforms/Inline/top-down-inliner-recursion.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/top-down-inliner-recursion.ll @@ -0,0 +1,33 @@ +; RUN: opt -passes=module-inline -inline-priority-mode=top-down --debug -S < %s 2>&1 | FileCheck %s + +; Call graph: +; A +; | +; v +; B<-\ +; | | +; v | +; C--/ + +define void @A() { + call void @B(); + ret void +} +define internal void @B() { + call void @C() + ret void +} +define internal void @C() { + call void @B() + ret void +} + +; Check the top part of the call graph +; CHECK: Analyzing call of B... (caller:A) +; CHECK: Analyzing call of C... (caller:A) +; Failed recursive call +; CHECK: Inlining calls in: A +; The inlining order in the SCC may be arbitrary +; CHECK: Analyzing call of {{B|C}}... (caller:{{B|C}}) +; CHECK: Analyzing call of {{B|C}}... (caller:{{B|C}}) +; CHECK: Analyzing call of {{B|C}}... (caller:{{B|C}}) Index: llvm/test/Transforms/Inline/top-down-inliner-simple.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/top-down-inliner-simple.ll @@ -0,0 +1,34 @@ +; RUN: opt -passes=module-inline -inline-priority-mode=top-down --debug -S < %s 2>&1 | FileCheck %s + +; Call graph: +; /--A--\ +; | | +; v v +; B->D<-C +; | +; v +; E + +define void @A() { + call void @B() + call void @C() + ret void +} +define internal void @B() { + call void @D() + ret void +} +define internal void @C() { + call void @D() + ret void +} +define internal void @D() { + call void @E() + ret void +} +define internal void @E() { + ret void +} + +; Since we're inlining top-down A is the only valid caller +; CHECK-NOT: Analyzing call of {{B|C|D|E}}... (caller:{{B|C|D}})