diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -206,17 +206,22 @@
   bool Changed = false;
   std::vector<DominatorTree::UpdateType> Updates;
 
+  // TODO: For now we unify all exit blocks, even though they are uniformly
+  // reachable, if there are any exits not uniformly reached. This is to
+  // workaround the limitation of structurizer, which can not handle multiple
+  // function exits. After structurizer is able to handle multiple function
+  // exits, we should only unify UnreachableBlocks that are not uniformly
+  // reachable.
+  bool HasDivergentExitBlock = llvm::any_of(
+      PDT.roots(), [&](auto BB) { return !isUniformlyReached(DA, *BB); });
+
   for (BasicBlock *BB : PDT.roots()) {
     if (isa<ReturnInst>(BB->getTerminator())) {
-      if (!isUniformlyReached(DA, *BB))
+      if (HasDivergentExitBlock)
         ReturningBlocks.push_back(BB);
     } else if (isa<UnreachableInst>(BB->getTerminator())) {
-      // TODO: For now we unify UnreachableBlocks even though they are uniformly
-      // reachable. This is to workaround the limitation of structurizer, which
-      // can not handle multiple function exits. After structurizer is able to
-      // handle multiple function exits, we should only unify UnreachableBlocks
-      // that are not uniformly reachable.
-      UnreachableBlocks.push_back(BB);
+      if (HasDivergentExitBlock)
+        UnreachableBlocks.push_back(BB);
     } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
 
       ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
@@ -50,28 +50,20 @@
 define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) {
 ; GCN-LABEL: sgpr_trunc_brcond:
 ; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_load_dword s1, s[0:1], 0x9
-; GCN-NEXT:    s_mov_b32 s0, -1
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x9
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_xor_b32 s1, s1, -1
-; GCN-NEXT:    s_and_b32 s1, s1, 1
-; GCN-NEXT:    s_cmp_lg_u32 s1, 0
-; GCN-NEXT:    s_cbranch_scc0 .LBB3_2
-; GCN-NEXT:  ; %bb.1: ; %bb1
-; GCN-NEXT:    v_mov_b32_e32 v0, 1
-; GCN-NEXT:    s_mov_b32 s0, 0
-; GCN-NEXT:    flat_store_dword v[0:1], v0
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:  .LBB3_2: ; %Flow
 ; GCN-NEXT:    s_xor_b32 s0, s0, -1
 ; GCN-NEXT:    s_and_b32 s0, s0, 1
 ; GCN-NEXT:    s_cmp_lg_u32 s0, 0
-; GCN-NEXT:    s_cbranch_scc1 .LBB3_4
-; GCN-NEXT:  ; %bb.3: ; %bb0
+; GCN-NEXT:    s_cbranch_scc1 .LBB3_2
+; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    flat_store_dword v[0:1], v0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:  .LBB3_4: ; %UnifiedUnreachableBlock
+; GCN-NEXT:  .LBB3_2: ; %bb1
+; GCN-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 entry:
   %trunc = trunc i32 %cond to i1
   br i1 %trunc, label %bb0, label %bb1
@@ -90,27 +82,19 @@
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_and_b32 s1, s0, s1
-; GCN-NEXT:    s_xor_b32 s1, s1, -1
-; GCN-NEXT:    s_and_b32 s1, s1, 1
-; GCN-NEXT:    s_mov_b32 s0, -1
-; GCN-NEXT:    s_cmp_lg_u32 s1, 0
-; GCN-NEXT:    s_cbranch_scc0 .LBB4_2
-; GCN-NEXT:  ; %bb.1: ; %bb1
-; GCN-NEXT:    v_mov_b32_e32 v0, 1
-; GCN-NEXT:    s_mov_b32 s0, 0
-; GCN-NEXT:    flat_store_dword v[0:1], v0
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:  .LBB4_2: ; %Flow
+; GCN-NEXT:    s_and_b32 s0, s0, s1
 ; GCN-NEXT:    s_xor_b32 s0, s0, -1
 ; GCN-NEXT:    s_and_b32 s0, s0, 1
 ; GCN-NEXT:    s_cmp_lg_u32 s0, 0
-; GCN-NEXT:    s_cbranch_scc1 .LBB4_4
-; GCN-NEXT:  ; %bb.3: ; %bb0
+; GCN-NEXT:    s_cbranch_scc1 .LBB4_2
+; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    flat_store_dword v[0:1], v0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:  .LBB4_4: ; %UnifiedUnreachableBlock
+; GCN-NEXT:  .LBB4_2: ; %bb1
+; GCN-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 entry:
   %trunc0 = trunc i32 %cond0 to i1
   %trunc1 = trunc i32 %cond1 to i1
diff --git a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll
--- a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll
+++ b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll
@@ -112,16 +112,17 @@
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_22
 ; CHECK-NEXT:  ; %bb.18: ; %loop.exit.guard5
 ; CHECK-NEXT:    s_and_b64 vcc, exec, s[18:19]
-; CHECK-NEXT:    s_cbranch_vccnz .LBB0_22
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_23
 ; CHECK-NEXT:  ; %bb.19: ; %bb17
 ; CHECK-NEXT:    s_and_b64 vcc, exec, s[6:7]
 ; CHECK-NEXT:    s_cbranch_vccz .LBB0_21
 ; CHECK-NEXT:  ; %bb.20: ; %bb19
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; CHECK-NEXT:    s_cbranch_vccz .LBB0_22
-; CHECK-NEXT:  .LBB0_21: ; %bb21
+; CHECK-NEXT:  .LBB0_21: ; %bb18
 ; CHECK-NEXT:    s_endpgm
-; CHECK-NEXT:  .LBB0_22: ; %UnifiedUnreachableBlock
+; CHECK-NEXT:  .LBB0_22: ; %bb20
+; CHECK-NEXT:  .LBB0_23: ; %bb12
 bb:
   br label %bb6
 
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
@@ -0,0 +1,222 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
+
+; A test with a divergent unreachable block and uniform return block. The
+; compiler needs to create a regions that includes them so that
+; StructurizeCFG correctly transform the CFG, and then SI Annotate Control
+; Flow does not fail during annotation.
+
+define void @my_func(i32 %0) {
+; IR-LABEL: @my_func(
+; IR-NEXT:  entry:
+; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(4) null, align 8
+; IR-NEXT:    br label [[NODEBLOCK:%.*]]
+; IR:       NodeBlock:
+; IR-NEXT:    [[PIVOT:%.*]] = icmp sge i32 [[TMP1]], 1
+; IR-NEXT:    br i1 [[PIVOT]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]]
+; IR:       LeafBlock1:
+; IR-NEXT:    [[SWITCHLEAF2:%.*]] = icmp ne i32 [[TMP1]], 1
+; IR-NEXT:    br label [[FLOW]]
+; IR:       Flow:
+; IR-NEXT:    [[TMP2:%.*]] = phi i1 [ [[SWITCHLEAF2]], [[LEAFBLOCK1]] ], [ false, [[NODEBLOCK]] ]
+; IR-NEXT:    [[TMP3:%.*]] = phi i1 [ false, [[LEAFBLOCK1]] ], [ true, [[NODEBLOCK]] ]
+; IR-NEXT:    br i1 [[TMP3]], label [[LEAFBLOCK:%.*]], label [[FLOW11:%.*]]
+; IR:       LeafBlock:
+; IR-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[TMP1]], 0
+; IR-NEXT:    br i1 [[SWITCHLEAF]], label [[SW_BB2:%.*]], label [[FLOW12:%.*]]
+; IR:       Flow11:
+; IR-NEXT:    [[TMP4:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW12]] ], [ false, [[FLOW]] ]
+; IR-NEXT:    [[TMP5:%.*]] = phi i1 [ [[TMP10:%.*]], [[FLOW12]] ], [ [[TMP2]], [[FLOW]] ]
+; IR-NEXT:    [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP5]])
+; IR-NEXT:    [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0
+; IR-NEXT:    [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1
+; IR-NEXT:    br i1 [[TMP7]], label [[DO_BODY:%.*]], label [[FLOW17:%.*]]
+; IR:       sw.bb2:
+; IR-NEXT:    br label [[NODEBLOCK7:%.*]]
+; IR:       Flow12:
+; IR-NEXT:    [[TMP9]] = phi i1 [ [[TMP24:%.*]], [[FLOW15:%.*]] ], [ false, [[LEAFBLOCK]] ]
+; IR-NEXT:    [[TMP10]] = phi i1 [ [[TMP25:%.*]], [[FLOW15]] ], [ true, [[LEAFBLOCK]] ]
+; IR-NEXT:    br label [[FLOW11]]
+; IR:       NodeBlock7:
+; IR-NEXT:    [[PIVOT8:%.*]] = icmp sge i32 [[TMP0:%.*]], 2
+; IR-NEXT:    [[TMP11:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[PIVOT8]])
+; IR-NEXT:    [[TMP12:%.*]] = extractvalue { i1, i64 } [[TMP11]], 0
+; IR-NEXT:    [[TMP13:%.*]] = extractvalue { i1, i64 } [[TMP11]], 1
+; IR-NEXT:    br i1 [[TMP12]], label [[LEAFBLOCK5:%.*]], label [[FLOW13:%.*]]
+; IR:       LeafBlock5:
+; IR-NEXT:    [[SWITCHLEAF6:%.*]] = icmp eq i32 [[TMP0]], 2
+; IR-NEXT:    br label [[FLOW13]]
+; IR:       Flow13:
+; IR-NEXT:    [[TMP14:%.*]] = phi i1 [ true, [[LEAFBLOCK5]] ], [ false, [[NODEBLOCK7]] ]
+; IR-NEXT:    [[TMP15:%.*]] = phi i1 [ [[SWITCHLEAF6]], [[LEAFBLOCK5]] ], [ false, [[NODEBLOCK7]] ]
+; IR-NEXT:    [[TMP16:%.*]] = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 [[TMP13]])
+; IR-NEXT:    [[TMP17:%.*]] = extractvalue { i1, i64 } [[TMP16]], 0
+; IR-NEXT:    [[TMP18:%.*]] = extractvalue { i1, i64 } [[TMP16]], 1
+; IR-NEXT:    br i1 [[TMP17]], label [[LEAFBLOCK3:%.*]], label [[FLOW14:%.*]]
+; IR:       LeafBlock3:
+; IR-NEXT:    [[SWITCHLEAF4:%.*]] = icmp eq i32 [[TMP0]], 0
+; IR-NEXT:    [[SWITCHLEAF4_INV:%.*]] = xor i1 [[SWITCHLEAF4]], true
+; IR-NEXT:    br label [[FLOW14]]
+; IR:       Flow14:
+; IR-NEXT:    [[TMP19:%.*]] = phi i1 [ [[SWITCHLEAF4_INV]], [[LEAFBLOCK3]] ], [ [[TMP14]], [[FLOW13]] ]
+; IR-NEXT:    [[TMP20:%.*]] = phi i1 [ [[SWITCHLEAF4]], [[LEAFBLOCK3]] ], [ [[TMP15]], [[FLOW13]] ]
+; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP18]])
+; IR-NEXT:    [[TMP21:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP20]])
+; IR-NEXT:    [[TMP22:%.*]] = extractvalue { i1, i64 } [[TMP21]], 0
+; IR-NEXT:    [[TMP23:%.*]] = extractvalue { i1, i64 } [[TMP21]], 1
+; IR-NEXT:    br i1 [[TMP22]], label [[LAND_LHS_TRUE_I:%.*]], label [[FLOW15]]
+; IR:       land.lhs.true.i:
+; IR-NEXT:    br label [[LEAFBLOCK9:%.*]]
+; IR:       Flow15:
+; IR-NEXT:    [[TMP24]] = phi i1 [ [[TMP29:%.*]], [[FLOW16:%.*]] ], [ false, [[FLOW14]] ]
+; IR-NEXT:    [[TMP25]] = phi i1 [ [[TMP30:%.*]], [[FLOW16]] ], [ [[TMP19]], [[FLOW14]] ]
+; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP23]])
+; IR-NEXT:    br label [[FLOW12]]
+; IR:       LeafBlock9:
+; IR-NEXT:    [[SWITCHLEAF10:%.*]] = icmp sgt i32 [[TMP0]], 1
+; IR-NEXT:    [[TMP26:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[SWITCHLEAF10]])
+; IR-NEXT:    [[TMP27:%.*]] = extractvalue { i1, i64 } [[TMP26]], 0
+; IR-NEXT:    [[TMP28:%.*]] = extractvalue { i1, i64 } [[TMP26]], 1
+; IR-NEXT:    br i1 [[TMP27]], label [[DO_BODY_I_I_I_I:%.*]], label [[FLOW16]]
+; IR:       do.body.i.i.i.i:
+; IR-NEXT:    tail call fastcc void null()
+; IR-NEXT:    br label [[FLOW16]]
+; IR:       Flow16:
+; IR-NEXT:    [[TMP29]] = phi i1 [ true, [[DO_BODY_I_I_I_I]] ], [ false, [[LEAFBLOCK9]] ]
+; IR-NEXT:    [[TMP30]] = phi i1 [ false, [[DO_BODY_I_I_I_I]] ], [ true, [[LEAFBLOCK9]] ]
+; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP28]])
+; IR-NEXT:    br label [[FLOW15]]
+; IR:       do.body:
+; IR-NEXT:    tail call fastcc void null()
+; IR-NEXT:    br label [[FLOW17]]
+; IR:       Flow17:
+; IR-NEXT:    [[TMP31:%.*]] = phi i1 [ true, [[DO_BODY]] ], [ [[TMP4]], [[FLOW11]] ]
+; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
+; IR-NEXT:    [[TMP32:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP31]])
+; IR-NEXT:    [[TMP33:%.*]] = extractvalue { i1, i64 } [[TMP32]], 0
+; IR-NEXT:    [[TMP34:%.*]] = extractvalue { i1, i64 } [[TMP32]], 1
+; IR-NEXT:    br i1 [[TMP33]], label [[UNIFIEDUNREACHABLEBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
+; IR:       UnifiedUnreachableBlock:
+; IR-NEXT:    call void @llvm.amdgcn.unreachable()
+; IR-NEXT:    br label [[UNIFIEDRETURNBLOCK]]
+; IR:       UnifiedReturnBlock:
+; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP34]])
+; IR-NEXT:    ret void
+;
+; GCN-LABEL: my_func:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    s_load_dword s10, s[4:5], 0x0
+; GCN-NEXT:    s_mov_b64 s[8:9], -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_lt_i32 s10, 1
+; GCN-NEXT:    s_mov_b64 s[6:7], 0
+; GCN-NEXT:    s_cbranch_scc1 .LBB0_7
+; GCN-NEXT:  ; %bb.1: ; %LeafBlock1
+; GCN-NEXT:    s_cmp_lg_u32 s10, 1
+; GCN-NEXT:    s_cselect_b64 s[6:7], -1, 0
+; GCN-NEXT:    s_mov_b64 vcc, exec
+; GCN-NEXT:    s_cbranch_execz .LBB0_8
+; GCN-NEXT:  .LBB0_2: ; %Flow11
+; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
+; GCN-NEXT:  .LBB0_3: ; %do.body
+; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
+; GCN-NEXT:  .LBB0_4: ; %Flow17
+; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-NEXT:  ; %bb.5: ; %UnifiedUnreachableBlock
+; GCN-NEXT:    ; divergent unreachable
+; GCN-NEXT:  ; %bb.6: ; %UnifiedReturnBlock
+; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-NEXT:  .LBB0_7: ; %Flow
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
+; GCN-NEXT:    s_cbranch_vccnz .LBB0_2
+; GCN-NEXT:  .LBB0_8: ; %LeafBlock
+; GCN-NEXT:    s_cmp_eq_u32 s10, 0
+; GCN-NEXT:    s_cbranch_scc1 .LBB0_10
+; GCN-NEXT:  ; %bb.9:
+; GCN-NEXT:    s_mov_b64 s[6:7], -1
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
+; GCN-NEXT:    s_cbranch_execnz .LBB0_3
+; GCN-NEXT:    s_branch .LBB0_4
+; GCN-NEXT:  .LBB0_10: ; %NodeBlock7
+; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
+; GCN-NEXT:    s_mov_b64 s[8:9], 0
+; GCN-NEXT:    s_mov_b64 s[6:7], 0
+; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GCN-NEXT:  ; %bb.11: ; %LeafBlock5
+; GCN-NEXT:    s_mov_b64 s[6:7], exec
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
+; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
+; GCN-NEXT:  ; %bb.12: ; %Flow13
+; GCN-NEXT:    s_andn2_saveexec_b64 s[10:11], s[4:5]
+; GCN-NEXT:  ; %bb.13: ; %LeafBlock3
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
+; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
+; GCN-NEXT:    s_andn2_b64 s[8:9], s[8:9], exec
+; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GCN-NEXT:    s_and_b64 s[12:13], vcc, exec
+; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[4:5]
+; GCN-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
+; GCN-NEXT:  ; %bb.14: ; %Flow14
+; GCN-NEXT:    s_or_b64 exec, exec, s[10:11]
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    s_and_saveexec_b64 s[10:11], s[8:9]
+; GCN-NEXT:    s_cbranch_execz .LBB0_18
+; GCN-NEXT:  ; %bb.15: ; %LeafBlock9
+; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
+; GCN-NEXT:    s_mov_b64 s[8:9], -1
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GCN-NEXT:  ; %bb.16: ; %do.body.i.i.i.i
+; GCN-NEXT:    s_mov_b64 s[4:5], exec
+; GCN-NEXT:    s_xor_b64 s[8:9], exec, -1
+; GCN-NEXT:  ; %bb.17: ; %Flow16
+; GCN-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
+; GCN-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
+; GCN-NEXT:  .LBB0_18: ; %Flow15
+; GCN-NEXT:    s_or_b64 exec, exec, s[10:11]
+; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
+; GCN-NEXT:    s_cbranch_execnz .LBB0_3
+; GCN-NEXT:    s_branch .LBB0_4
+entry:
+  %1 = load i32, ptr addrspace(4) null, align 8
+  switch i32 %1, label %do.body [
+  i32 1, label %sw.bb
+  i32 0, label %sw.bb2
+  ]
+
+sw.bb:
+  ret void
+
+sw.bb2:
+  switch i32 %0, label %do.body [
+  i32 0, label %land.lhs.true.i
+  i32 2, label %land.lhs.true.i
+  ]
+
+land.lhs.true.i:
+  switch i32 %0, label %do.body.i.i.i.i [
+  i32 0, label %do.body
+  i32 1, label %do.body
+  ]
+
+do.body.i.i.i.i:
+  tail call fastcc void null()
+  unreachable
+
+do.body:
+  tail call fastcc void null()
+  unreachable
+
+}
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1292,45 +1292,40 @@
 ; SI-LABEL: no_skip_no_successors:
 ; SI:       ; %bb.0: ; %bb
 ; SI-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
-; SI-NEXT:    s_mov_b64 s[2:3], exec
-; SI-NEXT:    s_mov_b64 s[0:1], -1
 ; SI-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; SI-NEXT:    s_cbranch_vccz .LBB12_3
-; SI-NEXT:  ; %bb.1: ; %Flow
-; SI-NEXT:    s_and_b64 vcc, exec, s[0:1]
-; SI-NEXT:    s_cbranch_vccnz .LBB12_4
-; SI-NEXT:  .LBB12_2: ; %UnifiedUnreachableBlock
-; SI-NEXT:  .LBB12_3: ; %bb3
-; SI-NEXT:    s_branch .LBB12_2
-; SI-NEXT:  .LBB12_4: ; %bb6
+; SI-NEXT:  ; %bb.1: ; %bb6
+; SI-NEXT:    s_mov_b64 s[2:3], exec
 ; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
-; SI-NEXT:    s_cbranch_scc0 .LBB12_6
-; SI-NEXT:  ; %bb.5: ; %bb6
+; SI-NEXT:    s_cbranch_scc0 .LBB12_5
+; SI-NEXT:  ; %bb.2: ; %bb6
 ; SI-NEXT:    s_mov_b64 exec, 0
-; SI-NEXT:  .LBB12_6:
+; SI-NEXT:  .LBB12_3: ; %bb3
+; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7ae148
+; SI-NEXT:    v_cmp_nge_f32_e32 vcc, s0, v0
+; SI-NEXT:    s_and_b64 vcc, exec, vcc
+; SI-NEXT:  ; %bb.4: ; %bb5
+; SI-NEXT:  .LBB12_5:
 ; SI-NEXT:    s_mov_b64 exec, 0
 ; SI-NEXT:    exp null off, off, off, off done vm
 ; SI-NEXT:    s_endpgm
 ;
 ; GFX10-WAVE64-LABEL: no_skip_no_successors:
 ; GFX10-WAVE64:       ; %bb.0: ; %bb
-; GFX10-WAVE64-NEXT:    v_cmp_nge_f32_e64 s[0:1], s1, 0
-; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
-; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[0:1]
-; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], -1
+; GFX10-WAVE64-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
+; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB12_3
-; GFX10-WAVE64-NEXT:  ; %bb.1: ; %Flow
-; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[0:1]
-; GFX10-WAVE64-NEXT:    s_cbranch_vccnz .LBB12_4
-; GFX10-WAVE64-NEXT:  .LBB12_2: ; %UnifiedUnreachableBlock
-; GFX10-WAVE64-NEXT:  .LBB12_3: ; %bb3
-; GFX10-WAVE64-NEXT:    s_branch .LBB12_2
-; GFX10-WAVE64-NEXT:  .LBB12_4: ; %bb6
+; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb6
+; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
-; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB12_6
-; GFX10-WAVE64-NEXT:  ; %bb.5: ; %bb6
+; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB12_5
+; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb6
 ; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
-; GFX10-WAVE64-NEXT:  .LBB12_6:
+; GFX10-WAVE64-NEXT:  .LBB12_3: ; %bb3
+; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
+; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[0:1]
+; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb5
+; GFX10-WAVE64-NEXT:  .LBB12_5:
 ; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
 ; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
 ; GFX10-WAVE64-NEXT:    s_endpgm
@@ -1338,46 +1333,42 @@
 ; GFX10-WAVE32-LABEL: no_skip_no_successors:
 ; GFX10-WAVE32:       ; %bb.0: ; %bb
 ; GFX10-WAVE32-NEXT:    v_cmp_nge_f32_e64 s1, s1, 0
-; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s1
-; GFX10-WAVE32-NEXT:    s_mov_b32 s1, -1
 ; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB12_3
-; GFX10-WAVE32-NEXT:  ; %bb.1: ; %Flow
-; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s1
-; GFX10-WAVE32-NEXT:    s_cbranch_vccnz .LBB12_4
-; GFX10-WAVE32-NEXT:  .LBB12_2: ; %UnifiedUnreachableBlock
-; GFX10-WAVE32-NEXT:  .LBB12_3: ; %bb3
-; GFX10-WAVE32-NEXT:    s_branch .LBB12_2
-; GFX10-WAVE32-NEXT:  .LBB12_4: ; %bb6
-; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
-; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB12_6
-; GFX10-WAVE32-NEXT:  ; %bb.5: ; %bb6
+; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb6
+; GFX10-WAVE32-NEXT:    s_mov_b32 s2, exec_lo
+; GFX10-WAVE32-NEXT:    s_andn2_b32 s2, s2, exec_lo
+; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB12_5
+; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb6
 ; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
-; GFX10-WAVE32-NEXT:  .LBB12_6:
+; GFX10-WAVE32-NEXT:  .LBB12_3: ; %bb3
+; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0
+; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb5
+; GFX10-WAVE32-NEXT:  .LBB12_5:
 ; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
 ; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
 ; GFX10-WAVE32-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: no_skip_no_successors:
 ; GFX11:       ; %bb.0: ; %bb
-; GFX11-NEXT:    v_cmp_nge_f32_e64 s[0:1], s1, 0
-; GFX11-NEXT:    s_mov_b64 s[2:3], exec
+; GFX11-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    s_and_b64 vcc, exec, s[0:1]
-; GFX11-NEXT:    s_mov_b64 s[0:1], -1
+; GFX11-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; GFX11-NEXT:    s_cbranch_vccz .LBB12_3
-; GFX11-NEXT:  ; %bb.1: ; %Flow
-; GFX11-NEXT:    s_and_b64 vcc, exec, s[0:1]
-; GFX11-NEXT:    s_cbranch_vccnz .LBB12_4
-; GFX11-NEXT:  .LBB12_2: ; %UnifiedUnreachableBlock
-; GFX11-NEXT:  .LBB12_3: ; %bb3
-; GFX11-NEXT:    s_branch .LBB12_2
-; GFX11-NEXT:  .LBB12_4: ; %bb6
+; GFX11-NEXT:  ; %bb.1: ; %bb6
+; GFX11-NEXT:    s_mov_b64 s[2:3], exec
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], exec
-; GFX11-NEXT:    s_cbranch_scc0 .LBB12_6
-; GFX11-NEXT:  ; %bb.5: ; %bb6
+; GFX11-NEXT:    s_cbranch_scc0 .LBB12_5
+; GFX11-NEXT:  ; %bb.2: ; %bb6
 ; GFX11-NEXT:    s_mov_b64 exec, 0
-; GFX11-NEXT:  .LBB12_6:
+; GFX11-NEXT:  .LBB12_3: ; %bb3
+; GFX11-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    s_and_b64 vcc, exec, s[0:1]
+; GFX11-NEXT:  ; %bb.4: ; %bb5
+; GFX11-NEXT:  .LBB12_5:
 ; GFX11-NEXT:    s_mov_b64 exec, 0
 ; GFX11-NEXT:    exp mrt0 off, off, off, off done
 ; GFX11-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -7,57 +7,135 @@
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[16:17]
-; GCN-NEXT:    v_writelane_b32 v40, s33, 2
+; GCN-NEXT:    v_writelane_b32 v40, s33, 16
 ; GCN-NEXT:    s_mov_b32 s33, s32
 ; GCN-NEXT:    s_addk_i32 s32, 0x400
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GCN-NEXT:    v_writelane_b32 v40, s30, 0
 ; GCN-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN-NEXT:    v_mov_b32_e32 v41, v31
+; GCN-NEXT:    s_mov_b32 s42, s15
+; GCN-NEXT:    s_mov_b32 s43, s14
+; GCN-NEXT:    s_mov_b32 s44, s13
+; GCN-NEXT:    s_mov_b32 s45, s12
+; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    flat_load_dword v0, v[0:1]
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    s_mov_b64 s[8:9], -1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 21, v0
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    s_cbranch_vccz .LBB0_3
-; GCN-NEXT:  ; %bb.1: ; %bb4
-; GCN-NEXT:    s_cmp_lg_u32 s16, 9
-; GCN-NEXT:    s_cbranch_scc1 .LBB0_4
-; GCN-NEXT:  ; %bb.2: ; %bb7
-; GCN-NEXT:    s_getpc_b64 s[16:17]
-; GCN-NEXT:    s_add_u32 s16, s16, wibble@rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s17, s17, wibble@rel32@hi+12
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    s_branch .LBB0_7
-; GCN-NEXT:  .LBB0_3: ; %bb2
-; GCN-NEXT:    s_cmp_eq_u32 s16, 21
-; GCN-NEXT:    s_cbranch_scc1 .LBB0_6
-; GCN-NEXT:  .LBB0_4: ; %bb9
+; GCN-NEXT:    s_mov_b64 s[46:47], 0
+; GCN-NEXT:    s_mov_b64 s[6:7], 0
+; GCN-NEXT:    s_cbranch_vccz .LBB0_9
+; GCN-NEXT:  ; %bb.1: ; %Flow
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
+; GCN-NEXT:    s_cbranch_vccz .LBB0_10
+; GCN-NEXT:  .LBB0_2: ; %Flow1
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
+; GCN-NEXT:    s_cbranch_vccnz .LBB0_4
+; GCN-NEXT:  .LBB0_3: ; %bb9
 ; GCN-NEXT:    s_getpc_b64 s[16:17]
 ; GCN-NEXT:    s_add_u32 s16, s16, wibble@rel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s17, s17, wibble@rel32@hi+12
+; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN-NEXT:    s_mov_b32 s12, s45
+; GCN-NEXT:    s_mov_b32 s13, s44
+; GCN-NEXT:    s_mov_b32 s14, s43
+; GCN-NEXT:    s_mov_b32 s15, s42
+; GCN-NEXT:    v_mov_b32_e32 v31, v41
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:    s_cbranch_execnz .LBB0_7
-; GCN-NEXT:  ; %bb.5: ; %bb9.bb12_crit_edge
-; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT:  .LBB0_6: ; %bb12
+; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    s_andn2_b64 s[6:7], s[46:47], exec
+; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
+; GCN-NEXT:    s_or_b64 s[46:47], s[6:7], s[8:9]
+; GCN-NEXT:  .LBB0_4: ; %Flow2
+; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[46:47]
+; GCN-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; GCN-NEXT:    s_cbranch_execz .LBB0_6
+; GCN-NEXT:  ; %bb.5: ; %bb12
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    flat_store_dword v[0:1], v2
-; GCN-NEXT:  .LBB0_7: ; %UnifiedReturnBlock
+; GCN-NEXT:  .LBB0_6: ; %Flow3
+; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; GCN-NEXT:    s_cbranch_vccnz .LBB0_8
+; GCN-NEXT:  ; %bb.7: ; %bb7
+; GCN-NEXT:    s_getpc_b64 s[16:17]
+; GCN-NEXT:    s_add_u32 s16, s16, wibble@rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s17, s17, wibble@rel32@hi+12
+; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN-NEXT:    s_mov_b32 s12, s45
+; GCN-NEXT:    s_mov_b32 s13, s44
+; GCN-NEXT:    s_mov_b32 s14, s43
+; GCN-NEXT:    s_mov_b32 s15, s42
+; GCN-NEXT:    v_mov_b32_e32 v31, v41
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN-NEXT:  .LBB0_8: ; %UnifiedReturnBlock
+; GCN-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN-NEXT:    v_readlane_b32 s34, v40, 2
 ; GCN-NEXT:    v_readlane_b32 s31, v40, 1
 ; GCN-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    v_readlane_b32 s33, v40, 2
+; GCN-NEXT:    v_readlane_b32 s33, v40, 16
 ; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-NEXT:  .LBB0_9: ; %bb2
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[46:47], 21, v0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[6:7], 21, v0
+; GCN-NEXT:    s_mov_b64 vcc, exec
+; GCN-NEXT:    s_cbranch_execnz .LBB0_2
+; GCN-NEXT:  .LBB0_10: ; %bb4
+; GCN-NEXT:    s_mov_b64 s[4:5], -1
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[6:7], 9, v0
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
+; GCN-NEXT:    s_cbranch_vccz .LBB0_3
+; GCN-NEXT:    s_branch .LBB0_4
 ; SI-OPT-LABEL: @widget(
 ; SI-OPT-NEXT:  bb:
 ; SI-OPT-NEXT:    [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16