Index: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
+++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
@@ -389,22 +389,50 @@
   uint32_t BestWeight = 0;
   uint32_t WeightScale = 0;
   uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
-  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+
+  // Adjust sum of weights by excluding weights on edges pointing to blocks that
+  // is either not in BlockFilter or is already in the current chain. Consider
+  // the following CFG:
+  //
+  //     --->A
+  //     |  / \
+  //     | B   C
+  //     |  \ / \
+  //     ----D   E
+  //
+  // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
+  // A->C is chosen as a fall-through, D won't be selected as a successor of C
+  // due to CFG constraint (the probability of C->D is not greater than
+  // HotProb). If we exclude E that is not in BlockFilter when calculating the
+  // probability of C->D, D will be selected and we will get A C D B as the
+  // layout of this loop.
+  uint32_t AdjustedSumWeight = SumWeight;
+  SmallVector<MachineBasicBlock *, 4> Successors;
   for (MachineBasicBlock *Succ : BB->successors()) {
-    if (BlockFilter && !BlockFilter->count(Succ))
-      continue;
-    BlockChain &SuccChain = *BlockToChain[Succ];
-    if (&SuccChain == &Chain) {
-      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Already merged!\n");
-      continue;
-    }
-    if (Succ != *SuccChain.begin()) {
-      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
-      continue;
+    bool SkipSucc = false;
+    if (BlockFilter && !BlockFilter->count(Succ)) {
+      SkipSucc = true;
+    } else {
+      BlockChain *SuccChain = BlockToChain[Succ];
+      if (SuccChain == &Chain) {
+        DEBUG(dbgs() << "    " << getBlockName(Succ)
+                     << " -> Already merged!\n");
+        SkipSucc = true;
+      } else if (Succ != *SuccChain->begin()) {
+        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
+        continue;
+      }
     }
+    if (SkipSucc)
+      AdjustedSumWeight -= MBPI->getEdgeWeight(BB, Succ) / WeightScale;
+    else
+      Successors.push_back(Succ);
+  }
 
+  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  for (MachineBasicBlock *Succ : Successors) {
     uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
-    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+    BranchProbability SuccProb(SuccWeight / WeightScale, AdjustedSumWeight);
 
     // If we outline optional branches, look whether Succ is unavoidable, i.e.
     // dominates all terminators of the MachineFunction. If it does, other
@@ -432,6 +460,7 @@
 
     // Only consider successors which are either "hot", or wouldn't violate
     // any CFG constraints.
+    BlockChain &SuccChain = *BlockToChain[Succ];
     if (SuccChain.LoopPredecessors != 0) {
       if (SuccProb < HotProb) {
         DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
@@ -441,8 +470,9 @@
 
       // Make sure that a hot successor doesn't have a globally more
       // important predecessor.
+      BranchProbability RealSuccProb(SuccWeight / WeightScale, SumWeight);
       BlockFrequency CandidateEdgeFreq =
-          MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+          MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
       bool BadCFGConflict = false;
       for (MachineBasicBlock *Pred : Succ->predecessors()) {
         if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
Index: llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
+++ llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+
+define void @foo() {
+; Test that when determining the edge probability from a node in an inner loop
+; to a node in an outer loop, the weights on edges in the inner loop should be
+; ignored if we are building the chain for the outer loop.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %while.body, label %if.end.1, !prof !1
+
+while.body:
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %if.then.1, label %while.cond
+
+if.then.1:
+  call void @d()
+  br label %while.cond
+
+while.cond:
+  %call3 = call zeroext i1 @a()
+  br i1 %call3, label %while.body, label %if.end
+
+if.end.1:
+  call void @d()
+  br label %if.end
+
+if.else:
+  call void @b()
+  br label %if.end
+
+if.end:
+  call void @c()
+  ret void
+}
+
+define void @bar() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its peer loop, the weights on edges in the first loop should be
+; ignored.
+;
+; CHECK-LABEL: bar:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.then, label %while.body, !prof !2
+
+while.body:
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %while.body, label %if.end, !prof !2
+
+if.else:
+  call void @b()
+  br label %if.end
+
+if.end:
+  call void @c()
+  ret void
+}
+
+define void @par() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its outer loop, the weights on edges in the outer loop should be
+; ignored if we are building the chain for the inner loop.
+;
+; CHECK-LABEL: par:
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq b
+
+entry:
+  br label %if.cond
+
+if.cond:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !3
+
+if.then:
+  call void @b()
+  br label %if.end
+
+if.else:
+  call void @c()
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.end, label %exit, !prof !4
+
+if.end:
+  call void @d()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %if.cond, label %if.end.2, !prof !2
+
+if.end.2:
+  call void @e()
+  br label %if.cond
+
+exit:
+  ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+
+!1 = !{!"branch_weights", i32 10, i32 1}
+!2 = !{!"branch_weights", i32 100, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 100}
+!4 = !{!"branch_weights", i32 1, i32 1}