diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
@@ -246,6 +247,7 @@
 
   SmallVector<RegionNode *, 8> Order;
   BBSet Visited;
+  BBSet FlowSet;
 
   SmallVector<WeakVH, 8> AffectedPhis;
   BBPhiMap DeletedPhis;
@@ -643,6 +645,8 @@
     if (!DeletedPhis.count(To))
       continue;
 
+    SmallVector<BasicBlock *> UndefBlks;
+    bool CachedUndefs = false;
     PhiMap &Map = DeletedPhis[To];
     for (const auto &PI : Map) {
       PHINode *Phi = PI.first;
@@ -651,15 +655,51 @@
       Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
       Updater.AddAvailableValue(To, Undef);
 
-      NearestCommonDominator Dominator(DT);
-      Dominator.addBlock(To);
+      SmallSet<BasicBlock *, 8> Incomings;
       for (const auto &VI : PI.second) {
+        Incomings.insert(VI.first);
         Updater.AddAvailableValue(VI.first, VI.second);
-        Dominator.addAndRememberBlock(VI.first);
       }
 
-      if (!Dominator.resultIsRememberedBlock())
-        Updater.AddAvailableValue(Dominator.result(), Undef);
+      if (!CachedUndefs) {
+        // Build the undef blocks cache.
+        SmallSet<BasicBlock *, 8> VisitedBlock;
+        SmallVector<BasicBlock *, 8> Stack;
+        if (To == ParentRegion->getExit()) {
+          for (auto P : predecessors(To)) {
+            if (ParentRegion->contains(P))
+              Stack.push_back(P);
+          }
+        } else {
+          append_range(Stack, predecessors(To));
+        }
+
+        while (!Stack.empty() && !Incomings.empty()) {
+          BasicBlock *Current = Stack.pop_back_val();
+          if (VisitedBlock.contains(Current))
+            continue;
+
+          VisitedBlock.insert(Current);
+          if (!FlowSet.contains(Current)) {
+            if (Incomings.contains(Current)) {
+              Incomings.erase(Current);
+            } else {
+              // If this is not flow node and not the incoming block before
+              // structurization, then this block does not contribute any value
+              // to the phi.
+              UndefBlks.push_back(Current);
+            }
+            continue;
+          }
+
+          for (auto P : predecessors(Current))
+            Stack.push_back(P);
+        }
+        CachedUndefs = true;
+      }
+
+      for (auto UB : UndefBlks)
+        Updater.AddAvailableValue(UB, Undef);
 
       for (BasicBlock *FI : From)
         Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
@@ -759,6 +799,7 @@
                        Order.back()->getEntry();
   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
                                         Func, Insert);
+  FlowSet.insert(Flow);
   DT->addNewBlock(Flow, Dominator);
   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
   return Flow;
@@ -1103,6 +1144,7 @@
   Loops.clear();
   LoopPreds.clear();
   LoopConds.clear();
+  FlowSet.clear();
 
   return true;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -9,34 +9,32 @@
 ; OPT-NEXT:  main_body:
 ; OPT-NEXT:    br label [[LOOP_OUTER:%.*]]
 ; OPT:       LOOP.outer:
-; OPT-NEXT:    [[PHI_BROKEN2:%.*]] = phi i64 [ [[TMP10:%.*]], [[FLOW1:%.*]] ], [ 0, [[MAIN_BODY:%.*]] ]
-; OPT-NEXT:    [[TMP43:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP4:%.*]], [[FLOW1]] ]
+; OPT-NEXT:    [[PHI_BROKEN2:%.*]] = phi i64 [ [[TMP8:%.*]], [[FLOW1:%.*]] ], [ 0, [[MAIN_BODY:%.*]] ]
+; OPT-NEXT:    [[TMP43:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP3:%.*]], [[FLOW1]] ]
 ; OPT-NEXT:    br label [[LOOP:%.*]]
 ; OPT:       LOOP:
-; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP8:%.*]], [[FLOW:%.*]] ], [ 0, [[LOOP_OUTER]] ]
-; OPT-NEXT:    [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP4]], [[FLOW]] ]
-; OPT-NEXT:    [[TMP45:%.*]] = phi i32 [ [[TMP43]], [[LOOP_OUTER]] ], [ [[TMP5:%.*]], [[FLOW]] ]
+; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], [[FLOW:%.*]] ], [ 0, [[LOOP_OUTER]] ]
+; OPT-NEXT:    [[TMP45:%.*]] = phi i32 [ [[TMP43]], [[LOOP_OUTER]] ], [ [[TMP3]], [[FLOW]] ]
 ; OPT-NEXT:    [[TMP48:%.*]] = icmp slt i32 [[TMP45]], [[UB:%.*]]
-; OPT-NEXT:    [[TMP1:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP48]])
-; OPT-NEXT:    [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP1]], 0
-; OPT-NEXT:    [[TMP3:%.*]] = extractvalue { i1, i64 } [[TMP1]], 1
-; OPT-NEXT:    br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
+; OPT-NEXT:    [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP48]])
+; OPT-NEXT:    [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
+; OPT-NEXT:    [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
+; OPT-NEXT:    br i1 [[TMP1]], label [[ENDIF:%.*]], label [[FLOW]]
 ; OPT:       Flow:
-; OPT-NEXT:    [[TMP4]] = phi i32 [ [[TMP47:%.*]], [[ENDIF]] ], [ [[TMP0]], [[LOOP]] ]
-; OPT-NEXT:    [[TMP5]] = phi i32 [ [[TMP47]], [[ENDIF]] ], [ undef, [[LOOP]] ]
-; OPT-NEXT:    [[TMP6:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
-; OPT-NEXT:    [[TMP7:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
-; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP3]])
-; OPT-NEXT:    [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP7]], i64 [[PHI_BROKEN]])
-; OPT-NEXT:    [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
-; OPT-NEXT:    [[TMP10]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP6]], i64 [[PHI_BROKEN2]])
-; OPT-NEXT:    br i1 [[TMP9]], label [[FLOW1]], label [[LOOP]]
+; OPT-NEXT:    [[TMP3]] = phi i32 [ [[TMP47:%.*]], [[ENDIF]] ], [ undef, [[LOOP]] ]
+; OPT-NEXT:    [[TMP4:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
+; OPT-NEXT:    [[TMP5:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
+; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
+; OPT-NEXT:    [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN]])
+; OPT-NEXT:    [[TMP7:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]])
+; OPT-NEXT:    [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN2]])
+; OPT-NEXT:    br i1 [[TMP7]], label [[FLOW1]], label [[LOOP]]
 ; OPT:       Flow1:
-; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
-; OPT-NEXT:    [[TMP11:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP10]])
-; OPT-NEXT:    br i1 [[TMP11]], label [[IF:%.*]], label [[LOOP_OUTER]]
+; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
+; OPT-NEXT:    [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
+; OPT-NEXT:    br i1 [[TMP9]], label [[IF:%.*]], label [[LOOP_OUTER]]
 ; OPT:       IF:
-; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]])
+; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
 ; OPT-NEXT:    ret void
 ; OPT:       ENDIF:
 ; OPT-NEXT:    [[TMP47]] = add i32 [[TMP45]], 1
@@ -156,7 +154,7 @@
 ; OPT-NEXT:    [[CMP2]] = icmp sge i32 [[TMP]], [[LOAD2]]
 ; OPT-NEXT:    br label [[FLOW3]]
 ; OPT:       Flow5:
-; OPT-NEXT:    [[TMP9]] = phi i32 [ [[LSR_IV_NEXT]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ]
+; OPT-NEXT:    [[TMP9]] = phi i32 [ [[LSR_IV_NEXT]], [[CASE0]] ], [ undef, [[LEAFBLOCK]] ]
 ; OPT-NEXT:    [[TMP10]] = phi i1 [ [[CMP1]], [[CASE0]] ], [ [[TMP7]], [[LEAFBLOCK]] ]
 ; OPT-NEXT:    br label [[FLOW4]]
 ; OPT:       bb9:
diff --git a/llvm/test/CodeGen/AMDGPU/while-break.ll b/llvm/test/CodeGen/AMDGPU/while-break.ll
--- a/llvm/test/CodeGen/AMDGPU/while-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/while-break.ll
@@ -10,7 +10,6 @@
 ; GCN-NEXT:  .LBB0_1: ; %Flow2
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s4
-; GCN-NEXT:    v_mov_b32_e32 v1, v5
 ; GCN-NEXT:    s_and_b32 s2, exec_lo, s3
 ; GCN-NEXT:    s_or_b32 s0, s2, s0
 ; GCN-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
@@ -20,22 +19,18 @@
 ; GCN-NEXT:    s_add_i32 s1, s1, 1
 ; GCN-NEXT:    s_mov_b32 s2, 0
 ; GCN-NEXT:    v_cmp_ge_i32_e32 vcc_lo, s1, v2
-; GCN-NEXT:    ; implicit-def: $vgpr4
 ; GCN-NEXT:    s_and_saveexec_b32 s3, vcc_lo
 ; GCN-NEXT:    s_xor_b32 s3, exec_lo, s3
 ; GCN-NEXT:  ; %bb.3: ; %else
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s1, v3
-; GCN-NEXT:    v_mov_b32_e32 v4, v1
 ; GCN-NEXT:    s_and_b32 s2, vcc_lo, exec_lo
 ; GCN-NEXT:  ; %bb.4: ; %Flow
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    s_or_saveexec_b32 s3, s3
-; GCN-NEXT:    v_mov_b32_e32 v5, v4
-; GCN-NEXT:    s_xor_b32 exec_lo, exec_lo, s3
+; GCN-NEXT:    s_andn2_saveexec_b32 s3, s3
 ; GCN-NEXT:  ; %bb.5: ; %if
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    v_add_f32_e32 v5, 1.0, v1
+; GCN-NEXT:    v_add_f32_e32 v1, 1.0, v1
 ; GCN-NEXT:    s_or_b32 s2, s2, exec_lo
 ; GCN-NEXT:  ; %bb.6: ; %Flow1
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
@@ -46,12 +41,11 @@
 ; GCN-NEXT:  ; %bb.7: ; %latch
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s1, v0
-; GCN-NEXT:    v_mov_b32_e32 v4, v5
 ; GCN-NEXT:    s_orn2_b32 s3, vcc_lo, exec_lo
 ; GCN-NEXT:    s_branch .LBB0_1
 ; GCN-NEXT:  .LBB0_8: ; %end
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GCN-NEXT:    v_mov_b32_e32 v0, v4
+; GCN-NEXT:    v_mov_b32_e32 v0, v1
 ; GCN-NEXT:    ; return to shader part epilog
 entry:
   br label %header
diff --git a/llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll b/llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll
--- a/llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll
+++ b/llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll
@@ -68,7 +68,7 @@
 ; CHECK:       cond.end61:
 ; CHECK-NEXT:    br label [[FLOW7]]
 ; CHECK:       Flow14:
-; CHECK-NEXT:    [[TMP15:%.*]] = phi i1 [ [[TMP20:%.*]], [[FLOW15:%.*]] ], [ [[TMP17:%.*]], [[LOOP_EXIT_GUARD1]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i1 [ [[TMP20:%.*]], [[FLOW15:%.*]] ], [ undef, [[LOOP_EXIT_GUARD1]] ]
 ; CHECK-NEXT:    [[TMP16:%.*]] = phi i1 [ [[TMP21:%.*]], [[FLOW15]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD1]] ]
 ; CHECK-NEXT:    br label [[FLOW13:%.*]]
 ; CHECK:       if.then69:
@@ -102,7 +102,7 @@
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ; CHECK:       Flow12:
-; CHECK-NEXT:    [[TMP17]] = phi i1 [ true, [[LOR_RHS]] ], [ undef, [[WHILE_COND]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i1 [ true, [[LOR_RHS]] ], [ undef, [[WHILE_COND]] ]
 ; CHECK-NEXT:    [[TMP18:%.*]] = phi i1 [ false, [[LOR_RHS]] ], [ true, [[WHILE_COND]] ]
 ; CHECK-NEXT:    [[TMP19:%.*]] = phi i1 [ [[PRED9:%.*]], [[LOR_RHS]] ], [ [[PRED3]], [[WHILE_COND]] ]
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[IRR_GUARD]], label [[FLOW13]]