Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
===================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements a pass that transforms irreducible control flow
-/// into reducible control flow. Irreducible control flow means multiple-entry
+/// This file implements a pass that transforms irreducible control flow into
+/// reducible control flow. Irreducible control flow means multiple-entry
 /// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
 /// due to being unnatural.
 ///
@@ -17,12 +17,36 @@
 /// it linearizes control flow, turning diamonds into two triangles, which is
 /// both unnecessary and undesirable for WebAssembly.
 ///
-/// TODO: The transformation implemented here handles all irreducible control
-/// flow, without exponential code-size expansion, though it does so by creating
-/// inefficient code in many cases. Ideally, we should add other
-/// transformations, including code-duplicating cases, which can be more
-/// efficient in common cases, and they can fall back to this conservative
-/// implementation as needed.
+/// The big picture: Ignoring natural loops (seeing them monolithically), we
+/// find all the blocks which can return to themselves ("loopers"). Loopers
+/// reachable from the non-loopers are loop entries: if there are 2 or more,
+/// then we have irreducible control flow. We fix that as follows: a new block
+/// is created that can dispatch to each of the loop entries, based on the
+/// value of a label "helper" variable, and we replace direct branches to the
+/// entries with assignments to the label variable and a branch to the dispatch
+/// block. Then the dispatch block is the single entry in a new natural loop.
+///
+/// This is similar to what the Relooper [1] does, both identify looping code
+/// that requires multiple entries, and resolve it in a similar way. In
+/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
+/// also that like the Relooper, we implement a "minimal" intervention: we only
+/// use the "label" helper for the blocks we absolutely must and no others. We
+/// also prioritize code size and do not perform node splitting (i.e. we don't
+/// duplicate code in order to resolve irreducibility).
+///
+/// The difference between this code and the Relooper is that the Relooper also
+/// generates ifs and loops and works in a recursive manner, knowing at each
+/// point what the entries are, and recursively breaks down the problem. Here
+/// we just want to resolve irreducible control flow, and we also want to use
+/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
+/// identify natural loops, etc., and we start with the whole CFG and must
+/// identify both the looping code and its entries.
+///
+/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
+/// Proceedings of the ACM international conference companion on Object oriented
+/// programming systems languages and applications companion (SPLASH '11). ACM,
+/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
+/// http://doi.acm.org/10.1145/2048147.2048224
 ///
 //===----------------------------------------------------------------------===//
 
@@ -46,141 +70,192 @@
 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
 
 namespace {
-class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
-  StringRef getPassName() const override {
-    return "WebAssembly Fix Irreducible Control Flow";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addRequired<MachineDominatorTree>();
-    AU.addPreserved<MachineDominatorTree>();
-    AU.addRequired<MachineLoopInfo>();
-    AU.addPreserved<MachineLoopInfo>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  bool VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop);
 
+class LoopFixer {
 public:
-  static char ID; // Pass identification, replacement for typeid
-  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
-};
-} // end anonymous namespace
+  LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
+      : MF(MF), MLI(MLI), Loop(Loop) {}
 
-char WebAssemblyFixIrreducibleControlFlow::ID = 0;
-INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
-                "Removes irreducible control flow", false, false)
+  // Run the fixer on the given inputs. Returns whether changes were made.
+  bool run();
 
-FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
-  return new WebAssemblyFixIrreducibleControlFlow();
-}
-
-namespace {
-
-/// A utility for walking the blocks of a loop, handling a nested inner
-/// loop as a monolithic conceptual block.
-class MetaBlock {
-  MachineBasicBlock *Block;
-  SmallVector<MachineBasicBlock *, 2> Preds;
-  SmallVector<MachineBasicBlock *, 2> Succs;
-
-public:
-  explicit MetaBlock(MachineBasicBlock *MBB)
-      : Block(MBB), Preds(MBB->pred_begin(), MBB->pred_end()),
-        Succs(MBB->succ_begin(), MBB->succ_end()) {}
-
-  explicit MetaBlock(MachineLoop *Loop) : Block(Loop->getHeader()) {
-    Loop->getExitBlocks(Succs);
-    for (MachineBasicBlock *Pred : Block->predecessors())
-      if (!Loop->contains(Pred))
-        Preds.push_back(Pred);
+private:
+  MachineFunction &MF;
+  MachineLoopInfo &MLI;
+  MachineLoop *Loop;
+
+  MachineBasicBlock *Header;
+  SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;
+
+  using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+  DenseMap<MachineBasicBlock *, BlockSet> Reachable;
+
+  // The worklist contains pairs of recent additions, (a, b), where we just
+  // added a link a => b.
+  using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
+  SmallVector<BlockPair, 4> WorkList;
+
+  // Get a canonical block to represent a block or a loop: the block, or if in
+  // an inner loop, the loop header, of it in an outer loop scope, we can
+  // ignore it. We need to call this on all blocks we work on.
+  MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
+    MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
+    if (InnerLoop == Loop) {
+      return MBB;
+    } else {
+      // This is either in an outer or an inner loop, and not in ours.
+      if (!LoopBlocks.count(MBB)) {
+        // It's in outer code, ignore it.
+        return nullptr;
+      }
+      assert(InnerLoop);
+      // It's in an inner loop, canonicalize it to the header of that loop.
+      return InnerLoop->getHeader();
+    }
   }
 
-  MachineBasicBlock *getBlock() const { return Block; }
-
-  const SmallVectorImpl<MachineBasicBlock *> &predecessors() const {
-    return Preds;
-  }
-  const SmallVectorImpl<MachineBasicBlock *> &successors() const {
-    return Succs;
+  // For a successor we can additionally ignore it if it's a branch back to a
+  // natural loop top, as when we are in the scope of a loop, we just care
+  // about internal irreducibility, and can ignore the loop we are in. We need
+  // to call this on all blocks in a context where they are a successor.
+  MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
+    if (Loop && MBB == Loop->getHeader()) {
+      // Ignore branches going to the loop's natural header.
+      return nullptr;
+    }
+    return canonicalize(MBB);
   }
 
-  bool operator==(const MetaBlock &MBB) { return Block == MBB.Block; }
-  bool operator!=(const MetaBlock &MBB) { return Block != MBB.Block; }
+  // Potentially insert a new reachable edge, and if so, note it as further
+  // work.
+  void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+    assert(MBB == canonicalize(MBB));
+    assert(Succ);
+    // Succ may not be interesting as a sucessor.
+    Succ = canonicalizeSuccessor(Succ);
+    if (!Succ)
+      return;
+    if (Reachable[MBB].insert(Succ).second) {
+      // For there to be further work, it means that we have
+      //   X => MBB => Succ
+      // for some other X, and in that case X => Succ would be a new edge for
+      // us to discover later. However, if we don't care about MBB as a
+      // successor, then we don't care about that anyhow.
+      if (canonicalizeSuccessor(MBB)) {
+        WorkList.emplace_back(MBB, Succ);
+      }
+    }
+  }
 };
 
-class SuccessorList final : public MetaBlock {
-  size_t Index;
-  size_t Num;
-
-public:
-  explicit SuccessorList(MachineBasicBlock *MBB)
-      : MetaBlock(MBB), Index(0), Num(successors().size()) {}
+bool LoopFixer::run() {
+  Header = Loop ? Loop->getHeader() : &*MF.begin();
 
-  explicit SuccessorList(MachineLoop *Loop)
-      : MetaBlock(Loop), Index(0), Num(successors().size()) {}
+  // Identify all the blocks in this loop scope.
+  if (Loop) {
+    for (auto *MBB : Loop->getBlocks()) {
+      LoopBlocks.insert(MBB);
+    }
+  } else {
+    for (auto &MBB : MF) {
+      LoopBlocks.insert(&MBB);
+    }
+  }
 
-  bool HasNext() const { return Index != Num; }
+  // Compute which (canonicalized) blocks each block can reach.
 
-  MachineBasicBlock *Next() {
-    assert(HasNext());
-    return successors()[Index++];
+  // Add all the initial work.
+  for (auto *MBB : LoopBlocks) {
+    MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
+
+    if (InnerLoop == Loop) {
+      for (auto *Succ : MBB->successors()) {
+        maybeInsert(MBB, Succ);
+      }
+    } else {
+      // It can't be in an outer loop - we loop on LoopBlocks - and so it must
+      // be an inner loop.
+      assert(InnerLoop);
+      // Check if we are the canonical block for this loop.
+      if (canonicalize(MBB) != MBB) {
+        continue;
+      }
+      // The successors are those of the loop.
+      SmallVector<MachineBasicBlock *, 2> ExitBlocks;
+      InnerLoop->getExitBlocks(ExitBlocks);
+      for (auto *Succ : ExitBlocks) {
+        maybeInsert(MBB, Succ);
+      }
+    }
   }
-};
 
-} // end anonymous namespace
+  // Do work until we are all done.
+  while (!WorkList.empty()) {
+    MachineBasicBlock *MBB;
+    MachineBasicBlock *Succ;
+    std::tie(MBB, Succ) = WorkList.pop_back_val();
+    // The worklist item is an edge we just added, so it must have valid blocks
+    // (and not something canonicalized to nullptr).
+    assert(MBB);
+    assert(Succ);
+    // The successor in that pair must also be a valid successor.
+    assert(MBB == canonicalizeSuccessor(MBB));
+    // We recently added MBB => Succ, and that means we may have enabled
+    // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
+    // is correct for both a block and a block representing a loop, as the loop
+    // is natural and so the predecessors are all predecessors of the loop
+    // header, which is the block we have here.
+    for (auto *Pred : MBB->predecessors()) {
+      // Canonicalize, make sure it's relevant, and check it's not the same
+      // block (an update to the block itself doesn't help compute that same
+      // block).
+      Pred = canonicalize(Pred);
+      if (Pred && Pred != MBB) {
+        maybeInsert(Pred, Succ);
+      }
+    }
+  }
 
-bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
-                                                     MachineLoopInfo &MLI,
-                                                     MachineLoop *Loop) {
-  MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin();
-  SetVector<MachineBasicBlock *> RewriteSuccs;
-
-  // DFS through Loop's body, looking for irreducible control flow. Loop is
-  // natural, and we stay in its body, and we treat any nested loops
-  // monolithically, so any cycles we encounter indicate irreducibility.
-  SmallPtrSet<MachineBasicBlock *, 8> OnStack;
-  SmallPtrSet<MachineBasicBlock *, 8> Visited;
-  SmallVector<SuccessorList, 4> LoopWorklist;
-  LoopWorklist.push_back(SuccessorList(Header));
-  OnStack.insert(Header);
-  Visited.insert(Header);
-  while (!LoopWorklist.empty()) {
-    SuccessorList &Top = LoopWorklist.back();
-    if (Top.HasNext()) {
-      MachineBasicBlock *Next = Top.Next();
-      if (Next == Header || (Loop && !Loop->contains(Next)))
-        continue;
-      if (LLVM_LIKELY(OnStack.insert(Next).second)) {
-        if (!Visited.insert(Next).second) {
-          OnStack.erase(Next);
-          continue;
-        }
-        MachineLoop *InnerLoop = MLI.getLoopFor(Next);
-        if (InnerLoop != Loop)
-          LoopWorklist.push_back(SuccessorList(InnerLoop));
-        else
-          LoopWorklist.push_back(SuccessorList(Next));
-      } else {
-        RewriteSuccs.insert(Top.getBlock());
+  // It's now trivial to identify the loopers.
+  SmallPtrSet<MachineBasicBlock *, 4> Loopers;
+  for (auto MBB : LoopBlocks) {
+    if (Reachable[MBB].count(MBB)) {
+      Loopers.insert(MBB);
+    }
+  }
+  // The header cannot be a looper. At the toplevel, LLVM does not allow the
+  // entry to be in a loop, and in a natural loop we should ignore the header.
+  assert(Loopers.count(Header) == 0);
+
+  // Find the entries, loopers reachable from non-loopers.
+  SmallPtrSet<MachineBasicBlock *, 4> Entries;
+  SmallVector<MachineBasicBlock *, 4> SortedEntries;
+  for (auto *Looper : Loopers) {
+    for (auto *Pred : Looper->predecessors()) {
+      Pred = canonicalize(Pred);
+      if (Pred && !Loopers.count(Pred)) {
+        Entries.insert(Looper);
+        SortedEntries.push_back(Looper);
+        break;
       }
-      continue;
     }
-    OnStack.erase(Top.getBlock());
-    LoopWorklist.pop_back();
   }
 
-  // Most likely, we didn't find any irreducible control flow.
-  if (LLVM_LIKELY(RewriteSuccs.empty()))
+  // Check if we found irreducible control flow.
+  if (LLVM_LIKELY(Entries.size() <= 1))
     return false;
 
-  LLVM_DEBUG(dbgs() << "Irreducible control flow detected!\n");
+  // Sort the entries to ensure a deterministic build.
+  llvm::sort(SortedEntries,
+             [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+               auto ANum = A->getNumber();
+               auto BNum = B->getNumber();
+               assert(ANum != -1 && BNum != -1);
+               assert(ANum != BNum);
+               return ANum < BNum;
+             });
 
-  // Ok. We have irreducible control flow! Create a dispatch block which will
-  // contains a jump table to any block in the problematic set of blocks.
+  // Create a dispatch block which will contain a jump table to the entries.
   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
   MF.insert(MF.end(), Dispatch);
   MLI.changeLoopFor(Dispatch, Loop);
@@ -196,43 +271,43 @@
   unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
   MIB.addReg(Reg);
 
-  // Collect all the blocks which need to have their successors rewritten,
-  // add the successors to the jump table, and remember their index.
+  // Compute the indices in the superheader, one for each bad block, and
+  // add them as successors.
   DenseMap<MachineBasicBlock *, unsigned> Indices;
-  SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(),
-                                                   RewriteSuccs.end());
-  while (!SuccWorklist.empty()) {
-    MachineBasicBlock *MBB = SuccWorklist.pop_back_val();
+  for (auto *MBB : SortedEntries) {
     auto Pair = Indices.insert(std::make_pair(MBB, 0));
-    if (!Pair.second)
+    if (!Pair.second) {
       continue;
+    }
 
     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
-    LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index
-                      << "\n");
-
     Pair.first->second = Index;
-    for (auto Pred : MBB->predecessors())
-      RewriteSuccs.insert(Pred);
 
     MIB.addMBB(MBB);
     Dispatch->addSuccessor(MBB);
+  }
 
-    MetaBlock Meta(MBB);
-    for (auto *Succ : Meta.successors())
-      if (Succ != Header && (!Loop || Loop->contains(Succ)))
-        SuccWorklist.push_back(Succ);
+  // Rewrite the problematic successors for every block that wants to reach the
+  // bad blocks. For simplicity, we just introduce a new block for every edge
+  // we need to rewrite. (Fancier things are possible.)
+
+  SmallVector<MachineBasicBlock *, 4> AllPreds;
+  for (auto *MBB : SortedEntries) {
+    for (auto *Pred : MBB->predecessors()) {
+      if (Pred != Dispatch) {
+        AllPreds.push_back(Pred);
+      }
+    }
   }
 
-  // Rewrite the problematic successors for every block in RewriteSuccs.
-  // For simplicity, we just introduce a new block for every edge we need to
-  // rewrite. Fancier things are possible.
-  for (MachineBasicBlock *MBB : RewriteSuccs) {
+  for (MachineBasicBlock *MBB : AllPreds) {
     DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
     for (auto *Succ : MBB->successors()) {
-      if (!Indices.count(Succ))
+      if (!Entries.count(Succ)) {
         continue;
+      }
 
+      // This is a successor we need to rewrite.
       MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
       MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
                                              : MF.end(),
@@ -266,6 +341,55 @@
   return true;
 }
 
+class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
+  StringRef getPassName() const override {
+    return "WebAssembly Fix Irreducible Control Flow";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
+    // Visit the function body, which is identified as a null loop.
+    if (LoopFixer(MF, MLI, nullptr).run()) {
+      return true;
+    }
+
+    // Visit all the loops.
+    SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
+    while (!Worklist.empty()) {
+      MachineLoop *Loop = Worklist.pop_back_val();
+      Worklist.append(Loop->begin(), Loop->end());
+      if (LoopFixer(MF, MLI, Loop).run()) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyFixIrreducibleControlFlow::ID = 0;
+INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
+                "Removes irreducible control flow", false, false)
+
+FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
+  return new WebAssemblyFixIrreducibleControlFlow();
+}
+
 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
     MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
@@ -275,24 +399,19 @@
   bool Changed = false;
   auto &MLI = getAnalysis<MachineLoopInfo>();
 
-  // Visit the function body, which is identified as a null loop.
-  Changed |= VisitLoop(MF, MLI, nullptr);
-
-  // Visit all the loops.
-  SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
-  while (!Worklist.empty()) {
-    MachineLoop *CurLoop = Worklist.pop_back_val();
-    Worklist.append(CurLoop->begin(), CurLoop->end());
-    Changed |= VisitLoop(MF, MLI, CurLoop);
-  }
-
-  // If we made any changes, completely recompute everything.
-  if (LLVM_UNLIKELY(Changed)) {
-    LLVM_DEBUG(dbgs() << "Recomputing dominators and loops.\n");
+  // When we modify something, bail out and recompute MLI, then start again, as
+  // we create a new natural loop when we resolve irreducible control flow, and
+  // other loops may become nested in it, etc. In practice this is not an issue
+  // because irreducible control flow is rare, only very few cycles are needed
+  // here.
+  while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
+    // We rewrote part of the function; recompute MLI and start again.
+    LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
     MF.getRegInfo().invalidateLiveness();
     MF.RenumberBlocks();
     getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
     MLI.runOnMachineFunction(MF);
+    Changed = true;
   }
 
   return Changed;
Index: llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll
===================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll
+++ llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @__gxx_personality_v0(...)
+
+; Check an interesting case of complex control flow due to exceptions CFG rewriting.
+; There should *not* be any irreducible control flow here.
+
+; CHECK-LABEL: crashy:
+; CHECK-NOT: br_table
+
+; Function Attrs: minsize noinline optsize
+define void @crashy() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void undef()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+  invoke void undef()
+          to label %invoke.cont4 unwind label %lpad3
+
+invoke.cont4: ; preds = %invoke.cont
+  %call.i82 = invoke i8* undef()
+          to label %invoke.cont6 unwind label %lpad3
+
+invoke.cont6: ; preds = %invoke.cont4
+  invoke void undef()
+          to label %invoke.cont13 unwind label %lpad12
+
+invoke.cont13: ; preds = %invoke.cont6
+  br label %for.cond
+
+for.cond: ; preds = %for.cond.backedge, %invoke.cont13
+  br i1 undef, label %exit2, label %land.lhs
+
+land.lhs: ; preds = %for.cond
+  %call.i.i.i.i92 = invoke i32 undef()
+          to label %exit1 unwind label %lpad16.loopexit
+
+exit1: ; preds = %land.lhs
+  br label %exit2
+
+exit2: ; preds = %exit1, %for.cond
+  %call.i.i12.i.i93 = invoke i32 undef()
+          to label %exit3 unwind label %lpad16.loopexit
+
+exit3: ; preds = %exit2
+  invoke void undef()
+          to label %invoke.cont23 unwind label %lpad22
+
+invoke.cont23: ; preds = %exit3
+  invoke void undef()
+          to label %invoke.cont25 unwind label %lpad22
+
+invoke.cont25: ; preds = %invoke.cont23
+  %call.i.i137 = invoke i32 undef()
+          to label %invoke.cont29 unwind label %lpad16.loopexit
+
+lpad: ; preds = %entry
+  %0 = landingpad { i8*, i32 }
+          cleanup
+  unreachable
+
+lpad3: ; preds = %invoke.cont4, %invoke.cont
+  %1 = landingpad { i8*, i32 }
+          cleanup
+  unreachable
+
+lpad12: ; preds = %invoke.cont6
+  %2 = landingpad { i8*, i32 }
+          cleanup
+  resume { i8*, i32 } undef
+
+lpad16.loopexit: ; preds = %if.then, %invoke.cont29, %invoke.cont25, %exit2, %land.lhs
+  %lpad.loopexit = landingpad { i8*, i32 }
+          cleanup
+  unreachable
+
+lpad22: ; preds = %invoke.cont23, %exit3
+  %3 = landingpad { i8*, i32 }
+          cleanup
+  unreachable
+
+invoke.cont29: ; preds = %invoke.cont25
+  invoke void undef()
+          to label %invoke.cont33 unwind label %lpad16.loopexit
+
+invoke.cont33: ; preds = %invoke.cont29
+  br label %for.inc
+
+for.inc: ; preds = %invoke.cont33
+  %cmp.i.i141 = icmp eq i8* undef, undef
+  br i1 %cmp.i.i141, label %if.then, label %if.end.i.i146
+
+if.then: ; preds = %for.inc
+  %call.i.i148 = invoke i32 undef()
+          to label %for.cond.backedge unwind label %lpad16.loopexit
+
+for.cond.backedge: ; preds = %if.end.i.i146, %if.then
+  br label %for.cond
+
+if.end.i.i146: ; preds = %for.inc
+  call void undef()
+  br label %for.cond.backedge
+}
+
Index: llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested.ll
===================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested.ll
+++ llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+
+; Test an interesting pattern of nested irreducibility.
+; Just check we resolve all the irreducibility here (if not we'd crash).
+
+; CHECK-LABEL: tre_parse:
+
+define void @tre_parse() {
+entry:
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %do.body14, %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.backedge, %for.cond.outer
+  %nbranch.0 = phi i32* [ null, %for.cond.outer ], [ %call188, %for.cond.backedge ]
+  switch i8 undef, label %if.else [
+    i8 40, label %do.body14
+    i8 41, label %if.then63
+  ]
+
+do.body14:                                        ; preds = %for.cond
+  br label %for.cond.outer
+
+if.then63:                                        ; preds = %for.cond
+  unreachable
+
+if.else:                                          ; preds = %for.cond
+  switch i8 undef, label %if.then84 [
+    i8 92, label %if.end101
+    i8 42, label %if.end101
+  ]
+
+if.then84:                                        ; preds = %if.else
+  switch i8 undef, label %cleanup.thread [
+    i8 43, label %if.end101
+    i8 63, label %if.end101
+    i8 123, label %if.end101
+  ]
+
+if.end101:                                        ; preds = %if.then84, %if.then84, %if.then84, %if.else, %if.else
+  unreachable
+
+cleanup.thread:                                   ; preds = %if.then84
+  %call188 = tail call i32* undef(i32* %nbranch.0)
+  switch i8 undef, label %for.cond.backedge [
+    i8 92, label %land.lhs.true208
+    i8 0, label %if.else252
+  ]
+
+land.lhs.true208:                                 ; preds = %cleanup.thread
+  unreachable
+
+for.cond.backedge:                                ; preds = %cleanup.thread
+  br label %for.cond
+
+if.else252:                                       ; preds = %cleanup.thread
+  unreachable
+}
Index: llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll
===================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll
+++ llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; Test an interesting pattern of nested irreducibility.
+; Just check we resolve all the irreducibility here (if not we'd crash).
+
+; CHECK-LABEL: func_2:
+
+; Function Attrs: noinline nounwind optnone
+define void @func_2() {
+entry:
+  br i1 undef, label %lbl_937, label %if.else787
+
+lbl_937:                                          ; preds = %for.body978, %entry
+  br label %if.end965
+
+if.else787:                                       ; preds = %entry
+  br label %if.end965
+
+if.end965:                                        ; preds = %if.else787, %lbl_937
+  br label %for.cond967
+
+for.cond967:                                      ; preds = %for.end1035, %if.end965
+  br label %for.cond975
+
+for.cond975:                                      ; preds = %if.end984, %for.cond967
+  br i1 undef, label %for.body978, label %for.end1035
+
+for.body978:                                      ; preds = %for.cond975
+  br i1 undef, label %lbl_937, label %if.end984
+
+if.end984:                                        ; preds = %for.body978
+  br label %for.cond975
+
+for.end1035:                                      ; preds = %for.cond975
+  br label %for.cond967
+}
Index: llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg.ll
===================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg.ll
+++ llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg.ll
@@ -9,7 +9,7 @@
 
 ; CHECK-LABEL: test0:
 ; CHECK: f64.load
-; CHECK: i32.const $[[REG:[^,]+]]=, 0{{$}}
+; CHECK: i32.const $[[REG:[^,]+]]=
 ; CHECK: br_table  $[[REG]],
 define void @test0(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
 bb:
@@ -50,7 +50,7 @@
 
 ; CHECK-LABEL: test1:
 ; CHECK: f64.load
-; CHECK: i32.const $[[REG:[^,]+]]=, 0{{$}}
+; CHECK: i32.const $[[REG:[^,]+]]=
 ; CHECK: br_table  $[[REG]],
 define void @test1(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
 bb:
@@ -92,3 +92,128 @@
 bb19:
   ret void
 }
+
+; A simple loop 2 blocks that are both entries.
+
+; CHECK-LABEL: test2:
+; CHECK: br_if
+; CHECK: i32.const $[[REG:[^,]+]]=
+; CHECK: br_table  $[[REG]],
+define internal i32 @test2(i32) noinline {
+entry:
+  br label %A0
+
+A0:
+  %a0a = tail call i32 @test2(i32 1)
+  %a0b = icmp eq i32 %a0a, 0
+  br i1 %a0b, label %A1, label %A2
+
+A1:
+  %a1a = tail call i32 @test2(i32 2)
+  %a1b = icmp eq i32 %a1a, 0
+  br i1 %a1b, label %A1, label %A2
+
+A2:
+  %a2a = tail call i32 @test2(i32 3)
+  %a2b = icmp eq i32 %a2a, 0
+  br i1 %a2b, label %A1, label %A2
+}
+
+; An interesting loop with inner loop and if-else structure too.
+
+; CHECK-LABEL: test3:
+; CHECK: br_if
+define void @test3(i32 %ws) {
+entry:
+  %ws.addr = alloca i32, align 4
+  store volatile i32 %ws, i32* %ws.addr, align 4
+  %0 = load volatile i32, i32* %ws.addr, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %wynn
+
+if.end:                                           ; preds = %entry
+  %1 = load volatile i32, i32* %ws.addr, align 4
+  %tobool1 = icmp ne i32 %1, 0
+  br i1 %tobool1, label %if.end9, label %if.then2
+
+if.then2:                                         ; preds = %if.end
+  br label %for.cond
+
+for.cond:                                         ; preds = %wynn, %if.then7, %if.then2
+  %2 = load volatile i32, i32* %ws.addr, align 4
+  %tobool3 = icmp ne i32 %2, 0
+  br i1 %tobool3, label %if.then4, label %if.end5
+
+if.then4:                                         ; preds = %for.cond
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then4, %for.cond
+  %3 = load volatile i32, i32* %ws.addr, align 4
+  %tobool6 = icmp ne i32 %3, 0
+  br i1 %tobool6, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.end5
+  br label %for.cond
+
+if.end8:                                          ; preds = %if.end5
+  br label %wynn
+
+wynn:                                             ; preds = %if.end8, %if.then
+  br label %for.cond
+
+if.end9:                                          ; preds = %if.end
+  ret void
+}
+
+; Multi-level irreducibility, after reducing in the main scope we must then
+; reduce in the inner loop that we just created.
+; CHECK: br_table
+; CHECK: br_table
+define void @pi_next() {
+entry:
+  br i1 undef, label %sw.bb5, label %return
+
+sw.bb5:                                           ; preds = %entry
+  br i1 undef, label %if.then.i49, label %if.else.i52
+
+if.then.i49:                                      ; preds = %sw.bb5
+  br label %for.inc197.i
+
+if.else.i52:                                      ; preds = %sw.bb5
+  br label %for.cond57.i
+
+for.cond57.i:                                     ; preds = %for.inc205.i, %if.else.i52
+  store i32 0, i32* undef, align 4
+  br label %for.cond65.i
+
+for.cond65.i:                                     ; preds = %for.inc201.i, %for.cond57.i
+  br i1 undef, label %for.body70.i, label %for.inc205.i
+
+for.body70.i:                                     ; preds = %for.cond65.i
+  br label %for.cond76.i
+
+for.cond76.i:                                     ; preds = %for.inc197.i, %for.body70.i
+  %0 = phi i32 [ %inc199.i, %for.inc197.i ], [ 0, %for.body70.i ]
+  %cmp81.i = icmp slt i32 %0, 0
+  br i1 %cmp81.i, label %for.body82.i, label %for.inc201.i
+
+for.body82.i:                                     ; preds = %for.cond76.i
+  br label %for.inc197.i
+
+for.inc197.i:                                     ; preds = %for.body82.i, %if.then.i49
+  %inc199.i = add nsw i32 undef, 1
+  br label %for.cond76.i
+
+for.inc201.i:                                     ; preds = %for.cond76.i
+  br label %for.cond65.i
+
+for.inc205.i:                                     ; preds = %for.cond65.i
+  br label %for.cond57.i
+
+return:                                           ; preds = %entry
+  ret void
+}
+