Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -331,7 +331,7 @@ // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to // approximate both the size and runtime cost (Note that in the current // inline cost analysis, there is no clear distinction there either). - static int computeBBInlineCost(BasicBlock *BB); + static int computeBBInlineCost(BasicBlock *BB, bool IsEntryBB = false); std::unique_ptr computeOutliningInfo(Function *F); std::unique_ptr @@ -831,7 +831,7 @@ // TODO: Ideally we should share Inliner's InlineCost Analysis code. // For now use a simplified version. The returned 'InlineCost' will be used // to esimate the size cost as well as runtime cost of the BB. -int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { +int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB, bool IsEntryBB) { int InlineCost = 0; const DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { @@ -852,6 +852,11 @@ break; } + // PHIs in the entry BB to the region will be moved outside the region by + // CodeExtractor, so skip them when computing the region cost. + if (IsEntryBB && isa(I)) + continue; + IntrinsicInst *IntrInst = dyn_cast(I); if (IntrInst) { if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || @@ -1185,7 +1190,7 @@ std::vector ToExtract; ToExtract.push_back(ClonedOI->NonReturnBlock); OutlinedRegionCost += - PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock); + PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock, true); for (BasicBlock &BB : *ClonedFunc) if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) { ToExtract.push_back(&BB); Index: test/Transforms/CodeExtractor/PartialInlineEntryPHICost.ll =================================================================== --- /dev/null +++ test/Transforms/CodeExtractor/PartialInlineEntryPHICost.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -partial-inliner -S | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s + +; Check that we do not overcompute the outlined region cost, where the PHIs in +; the outlined region entry (BB4) are moved outside the region by CodeExtractor. + +define i32 @bar(i32 %arg) { +bb: + %tmp = icmp slt i32 %arg, 0 + br i1 %tmp, label %bb1, label %bb2 + +bb1: + br i1 undef, label %bb4, label %bb2 + +bb2: ; preds = %bb, %bb1 + br i1 undef, label %bb4, label %bb5 + +bb4: ; preds = %bb1, %bb2 + %xx1 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + %xx2 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + %xx3 = phi i32 [ 1, %bb1 ], [ 9, %bb2 ] + tail call void (...) @foo() #2 + br label %bb5 + +bb5: ; preds = %bb4, %bb2 + %tmp6 = phi i32 [ 1, %bb2 ], [ 9, %bb4 ] + ret i32 %tmp6 +} + +declare void @foo(...) + +define i32 @dummy_caller(i32 %arg) { +bb: +; CHECK-LABEL: @dummy_caller +; CHECK: br i1 +; CHECK: br i1 +; CHECK: call void @bar.1. + %tmp = tail call i32 @bar(i32 %arg) + ret i32 %tmp +}