Index: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
===================================================================
--- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
+++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
@@ -38,6 +38,10 @@
     DisablePartialInlining("disable-partial-inlining", cl::init(false),
                            cl::Hidden, cl::desc("Disable partial ininling"));
 
+static cl::opt<unsigned> MaxNumInlineBlocks(
+    "max-num-inline-blocks", cl::init(5), cl::Hidden,
+    cl::desc("Max Number of Blocks  To be Partially Inlined"));
+
 // Command line option to set the maximum number of partial inlining allowed
 // for the module. The default value of -1 means no limit.
 static cl::opt<int> MaxNumPartialInlining(
@@ -45,11 +49,33 @@
     cl::desc("Max number of partial inlining. The default is unlimited"));
 
 namespace {
+
+struct FunctionOutliningInfo {
+  FunctionOutliningInfo()
+      : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
+        ReturnBlockPreds() {}
+  // Returns the number of blocks to be inlined including all blocks
+  // in Entries and one return block.
+  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
+
+  // A set of blocks including the function entry that guard
+  // the region to be outlined.
+  SmallVector<BasicBlock *, 4> Entries;
+  // The return block that is not included in the outlined region.
+  BasicBlock *ReturnBlock;
+  // The dominating block of the region ot be outlined.
+  BasicBlock *NonReturnBlock;
+  // The set of blocks in Entries that that are predecessors to ReturnBlock
+  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
+};
+
 struct PartialInlinerImpl {
   PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
   bool run(Module &M);
   Function *unswitchFunction(Function *F);
 
+  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
+
 private:
   InlineFunctionInfo IFI;
   int NumPartialInlining = 0;
@@ -59,6 +85,7 @@
             NumPartialInlining >= MaxNumPartialInlining);
   }
 };
+
 struct PartialInlinerLegacyPass : public ModulePass {
   static char ID; // Pass identification, replacement for typeid
   PartialInlinerLegacyPass() : ModulePass(ID) {
@@ -83,75 +110,249 @@
 };
 }
 
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
-  // First, verify that this function is an unswitching candidate...
-  if (F->hasAddressTaken())
-    return nullptr;
-
+std::unique_ptr<FunctionOutliningInfo>
+PartialInlinerImpl::computeOutliningInfo(Function *F) {
   BasicBlock *EntryBlock = &F->front();
   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
   if (!BR || BR->isUnconditional())
-    return nullptr;
+    return std::unique_ptr<FunctionOutliningInfo>();
+
+  // Returns true if Succ is BB's successor
+  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
+    return is_contained(successors(BB), Succ);
+  };
+
+  auto SuccSize = [](BasicBlock *BB) {
+    return std::distance(succ_begin(BB), succ_end(BB));
+  };
+
+  auto IsReturnBlock = [](BasicBlock *BB) {
+    TerminatorInst *TI = BB->getTerminator();
+    return isa<ReturnInst>(TI);
+  };
+
+  auto GetReturnBlock = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
+    if (IsReturnBlock(Succ1))
+      return std::make_tuple(Succ1, Succ2);
+    if (IsReturnBlock(Succ2))
+      return std::make_tuple(Succ2, Succ1);
+
+    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+  };
+
+  // Detect a triangular shape:
+  auto GetCommonSucc = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
+    if (IsSuccessor(Succ1, Succ2))
+      return std::make_tuple(Succ1, Succ2);
+    if (IsSuccessor(Succ2, Succ1))
+      return std::make_tuple(Succ2, Succ1);
+
+    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+  };
+
+  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+      llvm::make_unique<FunctionOutliningInfo>();
+
+  BasicBlock *CurrEntry = EntryBlock;
+  bool CandidateFound = false;
+  do {
+    // The number of blocks to be inlined has already reached
+    // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
+    // disables partial inlining for the function.
+    if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
+      break;
+
+    if (SuccSize(CurrEntry) != 2)
+      break;
+
+    BasicBlock *Succ1 = *succ_begin(CurrEntry);
+    BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
+
+    BasicBlock *ReturnBlock, *NonReturnBlock;
+    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+
+    if (ReturnBlock) {
+      OutliningInfo->Entries.push_back(CurrEntry);
+      OutliningInfo->ReturnBlock = ReturnBlock;
+      OutliningInfo->NonReturnBlock = NonReturnBlock;
+      CandidateFound = true;
+      break;
+    }
+
+    BasicBlock *CommSucc;
+    BasicBlock *OtherSucc;
+    std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
+
+    if (!CommSucc)
+      break;
 
-  BasicBlock *ReturnBlock = nullptr;
-  BasicBlock *NonReturnBlock = nullptr;
-  unsigned ReturnCount = 0;
-  for (BasicBlock *BB : successors(EntryBlock)) {
-    if (isa<ReturnInst>(BB->getTerminator())) {
-      ReturnBlock = BB;
-      ReturnCount++;
-    } else
-      NonReturnBlock = BB;
+    OutliningInfo->Entries.push_back(CurrEntry);
+    CurrEntry = OtherSucc;
+
+  } while (true);
+
+  if (!CandidateFound)
+    return std::unique_ptr<FunctionOutliningInfo>();
+
+  // Do sanity check of the entries: threre should not
+  // be any successors (not in the entry set) other than
+  // {ReturnBlock, NonReturnBlock}
+  assert(OutliningInfo->Entries[0] == &F->front());
+  DenseSet<BasicBlock *> Entries;
+  for (BasicBlock *E : OutliningInfo->Entries)
+    Entries.insert(E);
+
+  // Returns true of BB has Predecessor which is not
+  // in Entries set.
+  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
+    for (auto Pred : predecessors(BB)) {
+      if (!Entries.count(Pred))
+        return true;
+    }
+    return false;
+  };
+  auto CheckAndNormalizeCandidate =
+      [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
+        for (BasicBlock *E : OutliningInfo->Entries) {
+          for (auto Succ : successors(E)) {
+            if (Entries.count(Succ))
+              continue;
+            if (Succ == OutliningInfo->ReturnBlock)
+              OutliningInfo->ReturnBlockPreds.push_back(E);
+            else if (Succ != OutliningInfo->NonReturnBlock)
+              return false;
+          }
+          // There should not be any outside incoming edges either:
+          if (HasNonEntryPred(E))
+            return false;
+        }
+        return true;
+      };
+
+  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
+    return std::unique_ptr<FunctionOutliningInfo>();
+
+  // Now further growing the candidate's inlining region by
+  // peeling off dominating blocks from the outlining region:
+  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
+    BasicBlock *Cand = OutliningInfo->NonReturnBlock;
+    if (SuccSize(Cand) != 2)
+      break;
+
+    if (HasNonEntryPred(Cand))
+      break;
+
+    BasicBlock *Succ1 = *succ_begin(Cand);
+    BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
+
+    BasicBlock *ReturnBlock, *NonReturnBlock;
+    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+    if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
+      break;
+
+    if (NonReturnBlock->getSinglePredecessor() != Cand)
+      break;
+
+    // Now grow and update OutlininigInfo:
+    OutliningInfo->Entries.push_back(Cand);
+    OutliningInfo->NonReturnBlock = NonReturnBlock;
+    OutliningInfo->ReturnBlockPreds.push_back(Cand);
+    Entries.insert(Cand);
   }
 
-  if (ReturnCount != 1)
+  return OutliningInfo;
+}
+
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+
+  if (F->hasAddressTaken())
+    return nullptr;
+
+  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+      computeOutliningInfo(F);
+
+  if (!OutliningInfo)
     return nullptr;
 
   // Clone the function, so that we can hack away on it.
   ValueToValueMapTy VMap;
   Function *DuplicateFunction = CloneFunction(F, VMap);
   DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
-  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]);
-  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]);
-  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]);
+  BasicBlock *NewReturnBlock =
+      cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]);
+  BasicBlock *NewNonReturnBlock =
+      cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]);
+  DenseSet<BasicBlock *> NewEntries;
+  for (BasicBlock *BB : OutliningInfo->Entries) {
+    NewEntries.insert(cast<BasicBlock>(VMap[BB]));
+  }
 
   // Go ahead and update all uses to the duplicate, so that we can just
   // use the inliner functionality when we're done hacking.
   F->replaceAllUsesWith(DuplicateFunction);
 
+  auto getFirstPHI = [](BasicBlock *BB) {
+    BasicBlock::iterator I = BB->begin();
+    PHINode *FirstPhi = nullptr;
+    while (I != BB->end()) {
+      PHINode *Phi = dyn_cast<PHINode>(I);
+      if (!Phi)
+        break;
+      if (!FirstPhi) {
+        FirstPhi = Phi;
+        break;
+      }
+    }
+    return FirstPhi;
+  };
   // Special hackery is needed with PHI nodes that have inputs from more than
   // one extracted block.  For simplicity, just split the PHIs into a two-level
   // sequence of PHIs, some of which will go in the extracted region, and some
   // of which will go outside.
   BasicBlock *PreReturn = NewReturnBlock;
-  NewReturnBlock = NewReturnBlock->splitBasicBlock(
-      NewReturnBlock->getFirstNonPHI()->getIterator());
-  BasicBlock::iterator I = PreReturn->begin();
-  Instruction *Ins = &NewReturnBlock->front();
-  while (I != PreReturn->end()) {
-    PHINode *OldPhi = dyn_cast<PHINode>(I);
-    if (!OldPhi)
-      break;
-
-    PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
-    OldPhi->replaceAllUsesWith(RetPhi);
-    Ins = NewReturnBlock->getFirstNonPHI();
-
-    RetPhi->addIncoming(&*I, PreReturn);
-    RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock),
-                        NewEntryBlock);
-    OldPhi->removeIncomingValue(NewEntryBlock);
-
-    ++I;
+  // only split block when necessary:
+  PHINode *FirstPhi = getFirstPHI(PreReturn);
+  unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size();
+  if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
+
+    NewReturnBlock = NewReturnBlock->splitBasicBlock(
+        NewReturnBlock->getFirstNonPHI()->getIterator());
+    BasicBlock::iterator I = PreReturn->begin();
+    Instruction *Ins = &NewReturnBlock->front();
+    while (I != PreReturn->end()) {
+      PHINode *OldPhi = dyn_cast<PHINode>(I);
+      if (!OldPhi)
+        break;
+
+      PHINode *RetPhi =
+          PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
+      OldPhi->replaceAllUsesWith(RetPhi);
+      Ins = NewReturnBlock->getFirstNonPHI();
+
+      RetPhi->addIncoming(&*I, PreReturn);
+      for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) {
+        BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+        RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
+        OldPhi->removeIncomingValue(NewE);
+      }
+      ++I;
+    }
+    for (auto E : OutliningInfo->ReturnBlockPreds) {
+      BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+      NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+    }
   }
-  NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
 
+  // Returns true if the block is to be partial inlined into the caller
+  // (i.e. not to be extracted to the out of line function)
+  auto ToBeInlined = [=](BasicBlock *BB) {
+    return BB == NewReturnBlock || NewEntries.count(BB);
+  };
   // Gather up the blocks that we're going to extract.
   std::vector<BasicBlock *> ToExtract;
   ToExtract.push_back(NewNonReturnBlock);
   for (BasicBlock &BB : *DuplicateFunction)
-    if (&BB != NewEntryBlock && &BB != NewReturnBlock &&
-        &BB != NewNonReturnBlock)
+    if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
       ToExtract.push_back(&BB);
 
   // The CodeExtractor needs a dominator tree.
@@ -183,6 +384,7 @@
 
     if (IsLimitReached())
       continue;
+
     NumPartialInlining++;
 
     OptimizationRemarkEmitter ORE(CS.getCaller());
Index: llvm/trunk/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
+++ llvm/trunk/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s
 
 ; This test checks to make sure that CodeExtractor updates
 ;  the exit branch probabilities for multiple exit blocks.
@@ -22,7 +22,7 @@
 
 ; CHECK-LABEL: @dummyCaller
 ; CHECK: call
-; CHECK-NEXT: br i1 {{.*}}!prof [[COUNT1:![0-9]+]]
+; CHECK-NEXT: br i1 {{.*}}return.i{{.*}}return.2{{.*}}!prof [[COUNT1:![0-9]+]]
 }
 
 !llvm.module.flags = !{!0}
@@ -31,4 +31,4 @@
 !2 = !{!"branch_weights", i32 5, i32 5}
 !3 = !{!"branch_weights", i32 4, i32 1}
 
-; CHECK: [[COUNT1]] = !{!"branch_weights", i32 8, i32 31}
+; CHECK: [[COUNT1]] = !{!"branch_weights", i32 31, i32 8}
Index: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAnd.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAnd.ll
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAnd.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck  --check-prefix=LIMIT %s
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb1, label %bb5
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @channels() #2
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb1
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb1, %bb
+  %tmp6 = phi i32 [ 0, %bb4 ], [ 1, %bb1 ], [ 1, %bb ]
+  ret i32 %tmp6
+}
+
+declare i32 @channels(...) local_unnamed_addr #1
+
+declare void @foo(...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+; CHECK-LABEL: @dummy_caller
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call void @bar.1_
+; LIMIT-LABEL: @dummy_caller
+; LIMIT: br i1
+; LIMIT-NOT: br
+; LIMIT: call void @bar.1_
+  %tmp = tail call i32 @bar(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+
Index: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAndOr.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAndOr.ll
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAndOr.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -S | FileCheck  --check-prefix=LIMIT %s
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb1, label %bb4
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @n() #2
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb1, %bb
+  %tmp5 = tail call i32 (...) @m() #2
+  %tmp6 = icmp slt i32 %tmp5, %arg
+  br i1 %tmp6, label %bb7, label %bb8
+
+bb7:                                              ; preds = %bb4, %bb1
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb4
+  %tmp9 = phi i32 [ 0, %bb7 ], [ 1, %bb4 ]
+  ret i32 %tmp9
+}
+
+declare i32 @n(...) local_unnamed_addr #1
+
+declare i32 @m(...) local_unnamed_addr #1
+
+declare void @foo(...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+; CHECK-LABEL: @dummy_caller
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call void @bar.1_
+; LIMIT-LABEL: @dummy_caller
+; LIMIT-NOT: br i1
+; LIMIT: call i32 @bar
+  %tmp = tail call i32 @bar(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind } 
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+
Index: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineOr.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineOr.ll
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineOr.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck  --check-prefix=LIMIT %s
+
+; Function Attrs: noinline nounwind uwtable
+define i32 @bar(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb4, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @channels() #1
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb1, %bb
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb1
+  %.0 = phi i32 [ 0, %bb4 ], [ 1, %bb1 ]
+  ret i32 %.0
+}
+
+declare i32 @channels(...) local_unnamed_addr
+
+declare void @foo(...) local_unnamed_addr
+
+; Function Attrs: noinline nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+; CHECK-LABEL: @dummy_caller
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call void @bar.2_
+; LIMIT-LABEL: @dummy_caller
+; LIMIT-NOT: br
+; LIMIT: call i32 @bar(
+  %tmp = tail call i32 @bar(i32 %arg)
+  ret i32 %tmp
+}
+
+define i32 @bar_multi_ret(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb4, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @channels() #1
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb1, %bb
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  %tmp4 = icmp slt i32 %arg, 10
+  br i1 %tmp4, label %bb6, label %bb5
+bb6:
+  tail call void (...) @foo() #1
+  %tmp5 = icmp slt i32 %arg, 3
+  br i1 %tmp5, label %bb7, label %bb5
+bb7:
+  tail call void (...) @foo() #1
+  br label %bb8
+bb8:
+  ret i32 0 
+
+bb5:                                              ; preds = %bb4, %bb1
+  %.0 = phi i32 [ 0, %bb4 ], [ 1, %bb1 ], [0, %bb6]
+  ret i32 %.0
+}
+
+define i32 @dummy_caller2(i32 %arg) local_unnamed_addr #0 {
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call {{.*}} @bar_multi_ret.1_
+  %tmp = tail call i32 @bar_multi_ret(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { noinline nounwind uwtable }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (trunk 300576)"}
Index: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT3 %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -S | FileCheck  --check-prefix=LIMIT3 %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck  --check-prefix=LIMIT2 %s
+
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb4, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @n() #2
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb1, %bb
+  %tmp5 = tail call i32 (...) @m() #2
+  %tmp6 = icmp sgt i32 %tmp5, %arg
+  br i1 %tmp6, label %bb7, label %bb8
+
+bb7:                                              ; preds = %bb4
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb4, %bb1
+  %tmp9 = phi i32 [ 0, %bb7 ], [ 1, %bb4 ], [ 1, %bb1 ]
+  ret i32 %tmp9
+}
+
+declare i32 @n(...) local_unnamed_addr #1
+
+declare i32 @m(...) local_unnamed_addr #1
+
+declare void @foo(...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+; CHECK-LABEL: @dummy_caller
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call void @bar.1_
+; LIMIT3-LABEL: @dummy_caller
+; LIMIT3: br i1
+; LIMIT3: br i1
+; LIMIT3-NOT: br i1
+; LIMIT3: call void @bar.1_
+; LIMIT2-LABEL: @dummy_caller
+; LIMIT2-NOT: br i1
+; LIMIT2: call i32 @bar(
+  %tmp = tail call i32 @bar(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind } 
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+
Index: llvm/trunk/test/Transforms/CodeExtractor/SingleCondition.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/SingleCondition.ll
+++ llvm/trunk/test/Transforms/CodeExtractor/SingleCondition.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -partial-inliner -S  | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S  | FileCheck %s
+
+define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) {
+entry:
+  br i1 %cond, label %if.then, label %return
+if.then:
+  ; Dummy store to have more than 0 uses
+  store i32 10, i32* %align.val, align 4
+  br label %return
+return:             ; preds = %entry
+  ret i32 0
+}
+
+define internal i32 @dummyCaller(i1 %cond, i32* align 2 %align.val) {
+entry:
+; CHECK-LABEL: @dummyCaller
+; CHECK: br
+; CHECK: call void @inlinedFunc.1_ 
+  %val = call i32 @inlinedFunc(i1 %cond, i32* %align.val)
+  ret i32 %val
+}
+
Index: llvm/trunk/test/Transforms/CodeExtractor/unreachable-block.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/unreachable-block.ll
+++ llvm/trunk/test/Transforms/CodeExtractor/unreachable-block.ll
@@ -9,13 +9,11 @@
 ; CHECK-LABEL: define internal void @tinkywinky.1_ontrue() {
 ; CHECK-NEXT: newFuncRoot:
 ; CHECK-NEXT:   br label %ontrue
-; CHECK: .exitStub:
+; CHECK: onfalse{{.*}}:
 ; CHECK-NEXT:   ret void
 ; CHECK: ontrue:
 ; CHECK-NEXT:   call void @patatino()
-; CHECK-NEXT:   br label %onfalse
-; CHECK: onfalse:
-; CHECK-NEXT:   br label %.exitStub
+; CHECK-NEXT:   br label %onfalse{{.*}}
 ; CHECK-NEXT: }
 
 declare void @patatino()