Index: include/llvm/Transforms/IPO/InlinerPass.h =================================================================== --- include/llvm/Transforms/IPO/InlinerPass.h +++ include/llvm/Transforms/IPO/InlinerPass.h @@ -59,6 +59,10 @@ /// unsigned getInlineThreshold(CallSite CS) const; + // Find blocks post-dominated by an unreachable-terminated block in their + // normal execution paths. + void collectBlocksLeadingToUnreachable(Function *F); + /// getInlineCost - This method must be implemented by the subclass to /// determine the cost of inlining the specified call site. If the cost /// returned is greater than the current inline threshold, the call site is @@ -81,6 +85,8 @@ // InsertLifetime - Insert @llvm.lifetime intrinsics. bool InsertLifetime; + SmallPtrSet BlocksLeadingToUnreachable; + /// shouldInline - Return true if the inliner should attempt to /// inline at the given CallSite. bool shouldInline(CallSite CS); Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -62,6 +63,10 @@ ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225), cl::desc("Threshold for inlining functions with cold attribute")); +static cl::opt UnreachableThreshold( + "inlineunreachable-threshold", cl::Hidden, cl::init(0), + cl::desc("Threshold for inlining call sites leading to unreachable.")); + // Threshold to use when optsize is specified (and there is no -inline-limit). const int OptSizeThreshold = 75; @@ -274,10 +279,54 @@ return true; } +// Before analysis passes such as BPI and BFI are included in inliner, this +// function performs identifying blocks leading to an unreachable-terminated +// block. Unlike BranchProbabilityInfo::calcUnreachableHeuristics(), this +// function considers blocks post-dominated by unreachable only in their normal +// execution path (i.e., unwind edges of InvokeInsts are not considered). +// FIXME: When inliner is hooked with BPI, we may need to check edge weights to +// all successors to identify blocks post-dominated by blocks very unlikely to +// be taken. +void Inliner::collectBlocksLeadingToUnreachable(Function *F) { + // Walk the basic blocks in post-order so that we can build up state about + // the successors of a block iteratively. + for (auto BB : post_order(&F->getEntryBlock())) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0 && isa(TI)) { + BlocksLeadingToUnreachable.insert(BB); + continue; + } + + // Don't follow unwind edges of invokes as it is also extremely unlikely + // taken like the edges leading to an unreachable. + if (auto *II = dyn_cast(BB->getTerminator())) { + if (BlocksLeadingToUnreachable.count(II->getNormalDest())) + BlocksLeadingToUnreachable.insert(BB); + continue; + } + + unsigned NumUnreachableEdges = 0; + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + if (BlocksLeadingToUnreachable.count(*I)) + ++NumUnreachableEdges; + + // If all successors are leading to unreachable, this block is too. + if (NumUnreachableEdges && NumUnreachableEdges == TI->getNumSuccessors()) + BlocksLeadingToUnreachable.insert(BB); + } +} + unsigned Inliner::getInlineThreshold(CallSite CS) const { int Threshold = InlineThreshold; // -inline-threshold or else selected by // overall opt level + // If the normal execution path leads to an unreachable-terminated block, + // there is little point in inlining this unless the inline cost is obviously + // low. + if (BlocksLeadingToUnreachable.count(CS.getInstruction()->getParent()) && + Threshold > UnreachableThreshold) + Threshold = UnreachableThreshold; + // If -inline-threshold is not given, listen to the optsize attribute when it // would decrease the threshold. Function *Caller = CS.getCaller(); @@ -349,7 +398,7 @@ Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } - + // Try to detect the case where the current inlining candidate caller (call // it B) is a static or linkonce-ODR function and is an inlining candidate // elsewhere, and the current candidate callee (call it C) is large enough @@ -470,11 +519,20 @@ // index into the InlineHistory vector. SmallVector, 8> InlineHistory; + BlocksLeadingToUnreachable.clear(); + for (CallGraphNode *Node : SCC) { Function *F = Node->getFunction(); if (!F) continue; - - for (BasicBlock &BB : *F) + + bool HasCallSites = false; + bool HasUnreachableTerminatedBlock = false; + for (BasicBlock &BB : *F) { + if (!HasUnreachableTerminatedBlock) { + TerminatorInst *TI = BB.getTerminator(); + if (TI->getNumSuccessors() == 0 && isa(TI)) + HasUnreachableTerminatedBlock = true; + } for (Instruction &I : BB) { CallSite CS(cast(&I)); // If this isn't a call, or it is a call to an intrinsic, it can @@ -490,7 +548,11 @@ continue; CallSites.push_back(std::make_pair(CS, -1)); + HasCallSites = true; } + } + if (HasCallSites && HasUnreachableTerminatedBlock) + collectBlocksLeadingToUnreachable(F); } DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); Index: test/Transforms/Inline/inline_unreachable.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/inline_unreachable.ll @@ -0,0 +1,135 @@ +; RUN: opt < %s -inline -S | FileCheck %s + +@a = global i32 4 +@_ZTIi = external global i8* + +; CHECK-LABEL: callSimpleFunction +; CHECK: call i32 @simpleFunction +define i32 @callSimpleFunction(i32 %idx, i32 %limit) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: + call i32 @simpleFunction(i32 %idx) + br label %invoke.cont + +invoke.cont: + unreachable + +if.end: + ret i32 %idx +} + +; CHECK-LABEL: callSmallFunction +; CHECK-NOT: call i32 @smallFunction +define i32 @callSmallFunction(i32 %idx, i32 %limit) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: + call i32 @smallFunction(i32 %idx) + br label %invoke.cont + +invoke.cont: + unreachable + +if.end: + ret i32 %idx +} + +; CHECK-LABEL: throwSimpleException +; CHECK: invoke i32 @simpleFunction +define i32 @throwSimpleException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + invoke i32 @simpleFunction(i32 %idx) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %if.then + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + unreachable + +lpad: ; preds = %if.then + %ll = landingpad { i8*, i32 } + cleanup + ret i32 %idx + +if.end: ; preds = %entry + ret i32 %idx +} + +; CHECK-LABEL: throwSmallException +; CHECK-NOT: invoke i32 @smallFunction +define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + invoke i32 @smallFunction(i32 %idx) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %if.then + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + unreachable + +lpad: ; preds = %if.then + %ll = landingpad { i8*, i32 } + cleanup + ret i32 %idx + +if.end: ; preds = %entry + ret i32 %idx +} + +define i32 @simpleFunction(i32 %a) #0 { +entry: + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +define i32 @smallFunction(i32 %a) { +entry: + %r = load volatile i32, i32* @a + ret i32 %r +} + +attributes #0 = { nounwind } +attributes #1 = { noreturn } + +declare i8* @__cxa_allocate_exception(i64) +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_throw(i8*, i8*, i8*) + +