Index: lib/CodeGen/MachineCSE.cpp
===================================================================
--- lib/CodeGen/MachineCSE.cpp
+++ lib/CodeGen/MachineCSE.cpp
@@ -473,11 +473,34 @@
   }
 
   // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
-  // it unless the defined value is already used in the BB of the new use.
+  // it unless:
+  // a) the defined value is already used in the BB of the new use or
+  // b) all uses of the defined value are in the BB whose only successor
+  //    contains the new use
+
+  // First check for BB containing all uses
+  MachineBasicBlock *BBUses = nullptr;
+  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
+    if (BBUses && BBUses != UseMI.getParent()) {
+      BBUses = nullptr;
+      break;
+    }
+    if (!BBUses) {
+      if (UseMI.getParent()->succ_size() != 1)
+        break;
+      BBUses = UseMI.getParent();
+    }
+  }
+
   bool HasPHI = false;
-  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
-    HasPHI |= UseMI.isPHI();
-    if (UseMI.getParent() == MI->getParent())
+  for (MachineInstr &UseCSMI : MRI->use_nodbg_instructions(CSReg)) {
+    HasPHI |= UseCSMI.isPHI();
+    // a) the defined value is already used in the BB of the new use
+    if (UseCSMI.getParent() == MI->getParent())
+      return true;
+
+    // b) the BB's only successor contains the new use
+    if (BBUses && UseCSMI.getParent() == *BBUses->succ_begin())
       return true;
   }
 
Index: test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs | FileCheck %s
+
+; Check that the redundant immediate MOV instruction
+; (by-product of handling phi nodes) is not found
+; in the generated code due to CSE heuristic.
+
+; CHECK-LABEL: {{^}}mov_opt:
+; CHECK: v_mov_b32_e32 {{v[0-9]+}}, 1.0
+; CHECK: %bb.1:
+; CHECK-NOT: v_mov_b32_e32 {{v[0-9]+}}, 1.0
+; CHECK: BB0_2:
+
+define amdgpu_ps void @mov_opt(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp eq i32 %arg1, 0
+  br i1 %tmp, label %bb3, label %bb10
+
+bb3:                                              ; preds = %bb
+  %tmp4 = icmp eq i32 %arg2, 0
+  br i1 %tmp4, label %bb5, label %bb10
+
+bb5:                                              ; preds = %bb3
+  %tmp6 = getelementptr <{ [4294967295 x i32] }>, <{ [4294967295 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %arg
+  %tmp7 = load i32, i32 addrspace(6)* %tmp6
+  %tmp8 = icmp eq i32 %tmp7, 1
+  br i1 %tmp8, label %bb10, label %bb9
+
+bb9:                                              ; preds = %bb5
+  br label %bb10
+
+bb10:                                             ; preds = %bb9, %bb5, %bb3, %bb
+  %tmp11 = phi float [ 1.000000e+00, %bb3 ], [ 0.000000e+00, %bb9 ], [ 1.000000e+00, %bb ], [ undef, %bb5 ]
+  call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %tmp11, float undef, float undef, float undef, i1 immarg false, i1 immarg false) #0
+  ret void
+}
+
+; Function Attrs: inaccessiblememonly nounwind
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { inaccessiblememonly nounwind }
Index: test/CodeGen/AMDGPU/cse-phi-incoming-val.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/cse-phi-incoming-val.mir
@@ -0,0 +1,73 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass machine-cse -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# Check that the redundant immediate MOV instruction
+# (by-product of handling phi nodes) is not found
+# in the generated code due to CSE heuristic.
+
+# GCN-LABEL: name: cse_phi_incoming_val
+# GCN: bb.0:
+# GCN: V_MOV_B32_e32 1065353216
+# GCN: bb.1:
+# GCN-NOT: V_MOV_B32_e32 1065353216
+# GCN: bb.2:
+
+---
+name: cse_phi_incoming_val
+tracksRegLiveness: true
+body: |
+  bb.0:
+    successors: %bb.1, %bb.6
+    liveins: $vgpr0, $sgpr0, $sgpr1
+    %5:sgpr_32 = COPY $sgpr1
+    %4:sgpr_32 = COPY $sgpr0
+    %3:vgpr_32 = COPY $vgpr0
+    %7:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+    %8:sreg_32_xm0 = S_MOV_B32 0
+    S_CMP_LG_U32 %4:sgpr_32, killed %8:sreg_32_xm0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.6, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.5
+
+    %9:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+    %10:sreg_32_xm0 = S_MOV_B32 0
+    S_CMP_LG_U32 %5:sgpr_32, killed %10:sreg_32_xm0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.5, implicit $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3, %bb.4
+
+    %11:sreg_32_xm0 = S_MOV_B32 2
+    %12:vgpr_32 = V_LSHLREV_B32_e64 %11:sreg_32_xm0, %3:vgpr_32, implicit $exec
+
+    %17:sreg_64 = V_CMP_NE_U32_e64 killed %11:sreg_32_xm0, %12:vgpr_32, implicit $exec
+
+    %0:sreg_64 = SI_IF killed %17:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    successors: %bb.4
+
+  bb.4:
+    successors: %bb.5
+
+    SI_END_CF %0:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    %19:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+  bb.5:
+    successors: %bb.6
+
+    %1:vgpr_32 = PHI %9:vgpr_32, %bb.1, %19:vgpr_32, %bb.4
+
+  bb.6:
+
+    %2:vgpr_32 = PHI %7:vgpr_32, %bb.0, %1:vgpr_32, %bb.5
+    %20:vgpr_32 = IMPLICIT_DEF
+    %21:vgpr_32 = IMPLICIT_DEF
+    %22:vgpr_32 = IMPLICIT_DEF
+    EXP 40, %2:vgpr_32, %20:vgpr_32, %21:vgpr_32, %22:vgpr_32, 0, 0, 15, implicit $exec
+    S_ENDPGM 0
+---
+
Index: test/CodeGen/AMDGPU/multilevel-break.ll
===================================================================
--- test/CodeGen/AMDGPU/multilevel-break.ll
+++ test/CodeGen/AMDGPU/multilevel-break.ll
@@ -100,9 +100,9 @@
 
 ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}}
 ; GCN:      s_mov_b64          [[OLD_LEFT]], [[LEFT]]
+; GCN:      s_mov_b64
 
 ; GCN: ; %LeafBlock1
-; GCN:      s_mov_b64
 ; GCN:      s_mov_b64          [[BREAK]], -1{{$}}
 
 ; GCN: ; %case1