Index: lib/CodeGen/MachineBlockPlacement.cpp
===================================================================
--- lib/CodeGen/MachineBlockPlacement.cpp
+++ lib/CodeGen/MachineBlockPlacement.cpp
@@ -452,6 +452,7 @@
 
   void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
                   BlockFilterSet *BlockFilter = nullptr);
+  bool hasRarePredecessors(MachineBasicBlock *Latch, MachineBasicBlock *Exit);
   MachineBasicBlock *findBestLoopTop(
       const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
   MachineBasicBlock *findBestLoopExit(
@@ -1749,16 +1750,48 @@
                << getBlockName(*Chain.begin()) << "\n");
 }
 
+// Check if the latch block has rarer predecessors than exit block.
+// The rare predecessors is defined as the total predecessor frequency minus
+// the max predecessor frequency. It is the number of reduced taken branches
+// when move the latch to the top of loop.
+bool
+MachineBlockPlacement::hasRarePredecessors(MachineBasicBlock *Latch,
+                                           MachineBasicBlock *Exit)
+{
+  BlockFrequency MaxPredFreq;
+  BlockFrequency TotalPredFreq;
+  for (MachineBasicBlock *Pred : Latch->predecessors()) {
+    BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+    TotalPredFreq += PredFreq;
+    if (PredFreq > MaxPredFreq)
+      MaxPredFreq = PredFreq;
+  }
+  BlockFrequency ReducedBranches = TotalPredFreq - MaxPredFreq;
+  BlockFrequency ExitFreq = MBFI->getBlockFreq(Exit);
+  return ReducedBranches < ExitFreq;
+}
+
 /// \brief Find the best loop top block for layout.
 ///
 /// Look for a block which is strictly better than the loop header for laying
-/// out at the top of the loop. This looks for one and only one pattern:
-/// a latch block with no conditional exit. This block will cause a conditional
-/// jump around it or will be the bottom of the loop if we lay it out in place,
-/// but if it it doesn't end up at the bottom of the loop for any reason,
-/// rotation alone won't fix it. Because such a block will always result in an
-/// unconditional jump (for the backedge) rotating it in front of the loop
-/// header is always profitable.
+/// out at the top of the loop. This looks for only two patterns:
+///
+///     1. a latch block
+///        its only successor is loop header
+///
+///        Because such a block will always result in an unconditional jump
+///        (for the backedge) rotating it in front of the loop header is always
+///        profitable.
+///
+///     2. a latch block
+///        it has two successors, one is loop header, another is exit
+///        it has more than one predecessors
+///
+///        If it is below one of its predecessors P, only P can fall through to
+///        it, all other predecessors need a jump to it, and another conditional
+///        jump to loop header. If it is moved before loop header, all its
+///        predecessors jump to it, then fall through to loop header. So all its
+///        predecessors except P can reduce one taken branch.
 MachineBasicBlock *
 MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
                                        const BlockFilterSet &LoopBlockSet) {
@@ -1790,9 +1823,26 @@
     DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", has "
                  << Pred->succ_size() << " successors, ";
           MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
-    if (Pred->succ_size() > 1)
+    if (Pred->succ_size() > 2)
       continue;
 
+    if (Pred->succ_size() == 2) {
+      // The candidate should have an exit edge.
+      MachineBasicBlock *OutBB = *Pred->succ_begin();
+      if (OutBB == L.getHeader())
+        OutBB = *Pred->succ_rbegin();
+      if (LoopBlockSet.count(OutBB))
+        continue;
+
+      // It must have more than 1 predecessors.
+      if (Pred->pred_size() == 1)
+        continue;
+
+      // Move the latch to top must reduce taken branches.
+      if (hasRarePredecessors(Pred, OutBB))
+        continue;
+    }
+
     BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
     if (!BestPred || PredFreq > BestPredFreq ||
         (!(PredFreq < BestPredFreq) &&
Index: test/CodeGen/AArch64/neg-imm.ll
===================================================================
--- test/CodeGen/AArch64/neg-imm.ll
+++ test/CodeGen/AArch64/neg-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -disable-block-placement -o - %s | FileCheck %s
 ; LSR used to pick a sub-optimal solution due to the target responding
 ; conservatively to isLegalAddImmediate for negative values.
 
Index: test/CodeGen/AArch64/tailmerging_in_mbp.ll
===================================================================
--- test/CodeGen/AArch64/tailmerging_in_mbp.ll
+++ test/CodeGen/AArch64/tailmerging_in_mbp.ll
@@ -1,9 +1,8 @@
 ; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s
 
 ; CHECK-LABEL: test:
-; CHECK:       LBB0_7:
-; CHECK:         b.hi	
-; CHECK-NEXT:    b	
+; CHECK-LABEL: %cond.false12.i
+; CHECK:         b.gt	
 ; CHECK-NEXT:  LBB0_8:
 ; CHECK-NEXT:    mov	 x8, x9
 ; CHECK-NEXT:  LBB0_9:
Index: test/CodeGen/AMDGPU/branch-uniformity.ll
===================================================================
--- test/CodeGen/AMDGPU/branch-uniformity.ll
+++ test/CodeGen/AMDGPU/branch-uniformity.ll
@@ -9,7 +9,7 @@
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: ; %LOOP49
 ; CHECK: v_cmp_ne_u32_e32 vcc,
-; CHECK: s_cbranch_vccnz
+; CHECK: s_cbranch_vccz
 ; CHECK: ; %ENDIF53
 define amdgpu_vs float @main(i32 %in) {
 main_body:
Index: test/CodeGen/AMDGPU/collapse-endcf.ll
===================================================================
--- test/CodeGen/AMDGPU/collapse-endcf.ll
+++ test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -205,6 +205,11 @@
 ; Make sure scc liveness is updated if sor_b64 is removed
 ; GCN-LABEL: {{^}}scc_liveness:
 
+; GCN: %bb10
+; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: s_andn2_b64
+; GCN-NEXT: s_cbranch_execz
+
 ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
 ; GCN: s_andn2_b64 exec, exec,
 ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
@@ -215,10 +220,6 @@
 ; GCN-NOT: s_or_b64 exec, exec
 
 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
-; GCN: s_andn2_b64
-; GCN-NEXT: s_cbranch_execnz
-
-; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: buffer_store_dword
 ; GCN: buffer_store_dword
 ; GCN: buffer_store_dword
Index: test/CodeGen/AMDGPU/global_smrd_cfg.ll
===================================================================
--- test/CodeGen/AMDGPU/global_smrd_cfg.ll
+++ test/CodeGen/AMDGPU/global_smrd_cfg.ll
@@ -1,27 +1,28 @@
 ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs  < %s | FileCheck %s
 
-; CHECK-LABEL: %bb11
+; CHECK-LABEL: %bb22
 
-; Load from %arg in a Loop body has alias store
+; Load from %arg has alias store in Loop
 
 ; CHECK: flat_load_dword
 
-; CHECK-LABEL: %bb20
-; CHECK: flat_store_dword
+; #####################################################################
+
+; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
+
+; CHECK: s_load_dword
 
 ; #####################################################################
 
-; CHECK-LABEL: %bb22
+; CHECK-LABEL: %bb11
 
-; Load from %arg has alias store in Loop
+; Load from %arg in a Loop body has alias store
 
 ; CHECK: flat_load_dword
 
-; #####################################################################
-
-; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
+; CHECK-LABEL: %bb20
 
-; CHECK: s_load_dword
+; CHECK: flat_store_dword
 
 define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
 bb:
Index: test/CodeGen/AMDGPU/hoist-cond.ll
===================================================================
--- test/CodeGen/AMDGPU/hoist-cond.ll
+++ test/CodeGen/AMDGPU/hoist-cond.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck %s
 
 ; Check that invariant compare is hoisted out of the loop.
 ; At the same time condition shall not be serialized into a VGPR and deserialized later
Index: test/CodeGen/AMDGPU/loop_break.ll
===================================================================
--- test/CodeGen/AMDGPU/loop_break.ll
+++ test/CodeGen/AMDGPU/loop_break.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
 
 ; Uses llvm.amdgcn.break
 
Index: test/CodeGen/AMDGPU/madmk.ll
===================================================================
--- test/CodeGen/AMDGPU/madmk.ll
+++ test/CodeGen/AMDGPU/madmk.ll
@@ -186,9 +186,9 @@
 }
 
 ; SI-LABEL: {{^}}kill_madmk_verifier_error:
+; SI: s_or_b64
 ; SI: s_xor_b64
 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
-; SI: s_or_b64
 define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
 bb:
   br label %bb2
Index: test/CodeGen/AMDGPU/multilevel-break.ll
===================================================================
--- test/CodeGen/AMDGPU/multilevel-break.ll
+++ test/CodeGen/AMDGPU/multilevel-break.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
 
 ; OPT-LABEL: {{^}}define amdgpu_vs void @multi_else_break(
 ; OPT: main_body:
Index: test/CodeGen/AMDGPU/valu-i1.ll
===================================================================
--- test/CodeGen/AMDGPU/valu-i1.ll
+++ test/CodeGen/AMDGPU/valu-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement < %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
Index: test/CodeGen/ARM/code-placement.ll
===================================================================
--- test/CodeGen/ARM/code-placement.ll
+++ test/CodeGen/ARM/code-placement.ll
@@ -38,8 +38,9 @@
   br i1 %0, label %bb5, label %bb.nph15
 
 bb1:                                              ; preds = %bb2.preheader, %bb1
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
 ; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
-; CHECK: blt LBB1_[[BB3:.]]
+; CHECK: blt LBB1_[[BB3]]
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
   %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
   %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
@@ -53,7 +54,6 @@
 bb3:                                              ; preds = %bb1, %bb2.preheader
 ; CHECK: LBB1_[[BB1:.]]: @ %bb1
 ; CHECK: bne LBB1_[[BB1]]
-; CHECK: LBB1_[[BB3]]: @ %bb3
   %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
   %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
   %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
Index: test/CodeGen/ARM/swifterror.ll
===================================================================
--- test/CodeGen/ARM/swifterror.ll
+++ test/CodeGen/ARM/swifterror.ll
@@ -183,7 +183,7 @@
 ; CHECK-APPLE: mov r0, #16
 ; CHECK-APPLE: malloc
 ; CHECK-APPLE: strb r{{.*}}, [r0, #8]
-; CHECK-APPLE: ble
+; CHECK-APPLE: b
 ; CHECK-APPLE: mov r8, [[ID]]
 
 ; CHECK-O0-LABEL: foo_loop:
Index: test/CodeGen/PowerPC/cmp_elimination.ll
===================================================================
--- test/CodeGen/PowerPC/cmp_elimination.ll
+++ test/CodeGen/PowerPC/cmp_elimination.ll
@@ -718,13 +718,14 @@
 define void @func28(i32 signext %a) {
 ; CHECK-LABEL: @func28
 ; CHECK: cmplwi	 [[REG1:[0-9]+]], [[REG2:[0-9]+]]
-; CHECK: .[[LABEL1:[A-Z0-9_]+]]:
+; CHECK: .[[LABEL2:[A-Z0-9_]+]]:
+; CHECK: cmpwi   [[REG1]], [[REG2]]
+; CHECK: ble     0, .[[LABEL1:[A-Z0-9_]+]]
 ; CHECK-NOT: cmp
-; CHECK: bne	 0, .[[LABEL2:[A-Z0-9_]+]]
+; CHECK: bne     0, .[[LABEL2]]
 ; CHECK: bl dummy1
-; CHECK: .[[LABEL2]]:
-; CHECK: cmpwi	 [[REG1]], [[REG2]]
-; CHECK: bgt	 0, .[[LABEL1]]
+; CHECK: b .[[LABEL2]]
+; CHECK: .[[LABEL1]]:
 ; CHECK: blr
 entry:
   br label %do.body
Index: test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
===================================================================
--- test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
+++ test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
@@ -1,8 +1,8 @@
 ; Test 8-bit atomic min/max operations.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT2
 
 ; Check signed minimum.
 ; - CHECK is for the main loop.
Index: test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
===================================================================
--- test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
+++ test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
@@ -1,8 +1,8 @@
 ; Test 8-bit atomic min/max operations.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT2
 
 ; Check signed minimum.
 ; - CHECK is for the main loop.
Index: test/CodeGen/SystemZ/loop-01.ll
===================================================================
--- test/CodeGen/SystemZ/loop-01.ll
+++ test/CodeGen/SystemZ/loop-01.ll
@@ -1,7 +1,7 @@
 ; Test loop tuning.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-block-placement | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-block-placement \
 ; RUN:  | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-Z13
 
 ; Test that strength reduction is applied to addresses with a scale factor,
Index: test/CodeGen/SystemZ/loop-02.ll
===================================================================
--- test/CodeGen/SystemZ/loop-02.ll
+++ test/CodeGen/SystemZ/loop-02.ll
@@ -1,7 +1,7 @@
 ; Test BRCTH.
 
 ; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z196 \
-; RUN:   -no-integrated-as | FileCheck %s
+; RUN:   -no-integrated-as -disable-block-placement | FileCheck %s
 
 ; Test a loop that should be converted into dbr form and then use BRCTH.
 define void @f2(i32 *%src, i32 *%dest) {
Index: test/CodeGen/SystemZ/swifterror.ll
===================================================================
--- test/CodeGen/SystemZ/swifterror.ll
+++ test/CodeGen/SystemZ/swifterror.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=s390x-linux-gnu| FileCheck %s
-; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu | FileCheck --check-prefix=CHECK-O0 %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s
 
 declare i8* @malloc(i64)
 declare void @free(i8*)
Index: test/CodeGen/X86/block-placement.ll
===================================================================
--- test/CodeGen/X86/block-placement.ll
+++ test/CodeGen/X86/block-placement.ll
@@ -124,7 +124,7 @@
   ret i32 %sum
 }
 
-!0 = !{!"branch_weights", i32 4, i32 64}
+!0 = !{!"branch_weights", i32 1, i32 64}
 
 define i32 @test_loop_early_exits(i32 %i, i32* %a) {
 ; Check that we sink early exit blocks out of loop bodies.
@@ -961,11 +961,11 @@
 ; CHECK: %while.cond.outer
 ; Third rotated loop top
 ; CHECK: .p2align
+; CHECK: %if.end20
 ; CHECK: %while.cond
 ; CHECK: %while.body
 ; CHECK: %land.lhs.true
 ; CHECK: %if.then19
-; CHECK: %if.end20
 ; CHECK: %if.then8
 ; CHECK: ret
 
Index: test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir
===================================================================
--- test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir
+++ test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir
@@ -27,12 +27,13 @@
 # 	return result;
 # }
 #
-# CHECK:       49:       eb 4a   jmp     74 <switchCase+0x95>
-# CHECK:       57:       eb 3c   jmp     60 <switchCase+0x95>
-# CHECK:       65:       eb 2e   jmp     46 <switchCase+0x95>
-# CHECK:       73:       eb 20   jmp     32 <switchCase+0x95>
-# CHECK:       81:       eb 12   jmp     18 <switchCase+0x95>
-# CHECK:       93:       7f 8b   jg      -117 <switchCase+0x20>
+# CHECK:       35:       77 e9   ja      -23 <switchCase+0x20>
+# CHECK:       45:       eb d9   jmp     -39 <switchCase+0x20>
+# CHECK:       4a:       eb d4   jmp     -44 <switchCase+0x20>
+# CHECK:       4f:       eb cf   jmp     -49 <switchCase+0x20>
+# CHECK:       54:       eb ca   jmp     -54 <switchCase+0x20>
+# CHECK:       59:       eb c5   jmp     -59 <switchCase+0x20>
+# CHECK:       64:       eb ba   jmp     -70 <switchCase+0x20>
 
 # Test 2:
 #
@@ -57,11 +58,11 @@
 # 	return w;
 # }
 #
-# CHECK:       129:       eb 13   jmp     19 <ifElse+0x7e>
-# CHECK:       12e:       eb a0   jmp     -96 <ifElse+0x10>
-# CHECK:       132:       eb 9c   jmp     -100 <ifElse+0x10>
-# CHECK:       137:       eb 97   jmp     -105 <ifElse+0x10>
-# CHECK:       13c:       eb 92   jmp     -110 <ifElse+0x10>
+# CHECK:        f9:       eb 13   jmp     19 <ifElse+0x7e>
+# CHECK:        fe:       eb a0   jmp     -96 <ifElse+0x10>
+# CHECK:       102:       eb 9c   jmp     -100 <ifElse+0x10>
+# CHECK:       107:       eb 97   jmp     -105 <ifElse+0x10>
+# CHECK:       10c:       eb 92   jmp     -110 <ifElse+0x10>
 --- |
   ; ModuleID = 'D:\iusers\opaparo\dev_test\branch_instruction_and_target_split_perf_nops.ll'
   source_filename = "D:\5C\5Ciusers\5C\5Copaparo\5C\5Cdev_test\5C\5Cbranch_instruction_and_target_split_perf_nops.c"
Index: test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
===================================================================
--- test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
+++ test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
@@ -1,13 +1,12 @@
 ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s
 
 define void @foo() {
-; Test that when determining the edge probability from a node in an inner loop
-; to a node in an outer loop, the weights on edges in the inner loop should be
-; ignored if we are building the chain for the outer loop.
+; After moving the latch to the top of loop, there is no fall through from the
+; latch to outer loop.
 ;
 ; CHECK-LABEL: foo:
-; CHECK: callq c
 ; CHECK: callq b
+; CHECK: callq c
 
 entry:
   %call = call zeroext i1 @a()
Index: test/CodeGen/X86/code_placement_loop_rotation2.ll
===================================================================
--- test/CodeGen/X86/code_placement_loop_rotation2.ll
+++ test/CodeGen/X86/code_placement_loop_rotation2.ll
@@ -5,13 +5,13 @@
 ; Test a nested loop case when profile data is not available.
 ;
 ; CHECK-LABEL: foo:
+; CHECK: callq h
 ; CHECK: callq b
+; CHECK: callq g
+; CHECK: callq f
 ; CHECK: callq c
 ; CHECK: callq d
 ; CHECK: callq e
-; CHECK: callq f
-; CHECK: callq g
-; CHECK: callq h
 
 entry:
   br label %header
Index: test/CodeGen/X86/pr5145.ll
===================================================================
--- test/CodeGen/X86/pr5145.ll
+++ test/CodeGen/X86/pr5145.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s
+; RUN: llc -disable-block-placement -mtriple=x86_64-- < %s | FileCheck %s
 @sc8 = external global i8
 
 define void @atomic_maxmin_i8() {
Index: test/CodeGen/X86/swifterror.ll
===================================================================
--- test/CodeGen/X86/swifterror.ll
+++ test/CodeGen/X86/swifterror.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-APPLE %s
-; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-O0 %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin | FileCheck --check-prefix=CHECK-i386 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-APPLE %s
+; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-i386 %s
 
 declare i8* @malloc(i64)
 declare void @free(i8*)
Index: test/CodeGen/X86/tail-dup-merge-loop-headers.ll
===================================================================
--- test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -73,11 +73,11 @@
 ; CHECK-LABEL: loop_shared_header
 ; CHECK: # %entry
 ; CHECK: # %shared_preheader
+; CHECK: # %outer_loop_latch
 ; CHECK: # %shared_loop_header
 ; CHECK: # %inner_loop_body
 ; CHECK: # %outer_loop_latch
 ; CHECK: # %merge_predecessor_split
-; CHECK: # %outer_loop_latch
 ; CHECK: # %cleanup
 define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i32 %wwprva, i32 %e_lfanew, i8* readonly %wwp, i32 %wwpsz, i16 zeroext %sects) local_unnamed_addr #0 {
 entry:
Index: test/CodeGen/X86/tail-dup-repeat.ll
===================================================================
--- test/CodeGen/X86/tail-dup-repeat.ll
+++ test/CodeGen/X86/tail-dup-repeat.ll
@@ -12,7 +12,9 @@
 entry:
   br label %for.cond
 
-; CHECK: {{^}}.[[HEADER:LBB0_[1-9]]]: # %for.cond
+; CHECK: {{^}}.[[HEADER:LBB0_[1-9]]]: # %dup1
+
+; CHECK: # %for.cond
 for.cond:                                         ; preds = %dup1, %entry
   br i1 %a1, label %land.lhs.true, label %if.end56
 
@@ -28,11 +30,9 @@
   br label %dup1
 
 ; CHECK:      # %if.end70
-; CHECK-NEXT: # in Loop:
 ; CHECK-NEXT: movl $12, (%rdx)
-; CHECK-NEXT: movl $2, (%rcx)
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .[[HEADER]]
+; CHECK:      movl $2, (%rcx)
+; CHECK-NEXT: jmp .[[HEADER]]
 if.end70:                                         ; preds = %if.end56
   store i32 12, i32* %a4, align 8
   br label %dup2
Index: test/CodeGen/X86/x86-cmov-converter.ll
===================================================================
--- test/CodeGen/X86/x86-cmov-converter.ll
+++ test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; This test checks that x86-cmov-converter optimization transform CMOV
Index: test/DebugInfo/X86/dbg-value-transfer-order.ll
===================================================================
--- test/DebugInfo/X86/dbg-value-transfer-order.ll
+++ test/DebugInfo/X86/dbg-value-transfer-order.ll
@@ -24,6 +24,12 @@
 ; with the Orders insertion point vector.
 
 ; CHECK-LABEL: f: # @f
+; CHECK: .LBB0_3:
+;        Check that this DEBUG_VALUE comes before the left shift.
+; CHECK:         #DEBUG_VALUE: bit_offset <- $ecx
+; CHECK:         .cv_loc 0 1 8 28                # t.c:8:28
+; CHECK:         movl    $1, %[[reg:[^ ]*]]
+; CHECK:         shll    %cl, %[[reg]]
 ; CHECK: .LBB0_1:                                # %while.body
 ; CHECK:         movl    $32, %ecx
 ; CHECK:         testl   {{.*}}
@@ -31,12 +37,7 @@
 ; CHECK: # %bb.2:                                 # %if.then
 ; CHECK:         callq   if_then
 ; CHECK:         movl    %eax, %ecx
-; CHECK: .LBB0_3:                                # %if.end
-;        Check that this DEBUG_VALUE comes before the left shift.
-; CHECK:         #DEBUG_VALUE: bit_offset <- $ecx
-; CHECK:         .cv_loc 0 1 8 28                # t.c:8:28
-; CHECK:         movl    $1, %[[reg:[^ ]*]]
-; CHECK:         shll    %cl, %[[reg]]
+; CHECK:         jmp     .LBB0_3
 
 ; ModuleID = 't.c'
 source_filename = "t.c"