Index: llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt
===================================================================
--- llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt
+++ llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt
@@ -9,6 +9,7 @@
 add_public_tablegen_target(WebAssemblyCommonTableGen)
 
 add_llvm_target(WebAssemblyCodeGen
+  Relooper.cpp
   WebAssemblyAsmPrinter.cpp
   WebAssemblyFrameLowering.cpp
   WebAssemblyISelDAGToDAG.cpp
Index: llvm/trunk/lib/Target/WebAssembly/Relooper.h
===================================================================
--- llvm/trunk/lib/Target/WebAssembly/Relooper.h
+++ llvm/trunk/lib/Target/WebAssembly/Relooper.h
@@ -0,0 +1,214 @@
+//===-- Relooper.h - Top-level interface for WebAssembly  ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This defines an optimized C++ implemention of the Relooper
+/// algorithm, originally developed as part of Emscripten, which
+/// generates a structured AST from arbitrary control flow.
+///
+//===-------------------------------------------------------------------===//
+
+#include <llvm/ADT/MapVector.h>
+#include <llvm/ADT/SetVector.h>
+#include <llvm/Support/Casting.h>
+
+#include <cassert>
+#include <cstdio>
+#include <cstdarg>
+#include <deque>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+
+namespace llvm {
+
+namespace Relooper {
+
+struct Block;
+struct Shape;
+
+///
+/// Info about a branching from one block to another
+///
+struct Branch {
+  enum FlowType {
+    Direct = 0, // We will directly reach the right location through other
+                // means, no need for continue or break
+    Break = 1,
+    Continue = 2,
+    Nested = 3 // This code is directly reached, but we must be careful to
+               // ensure it is nested in an if - it is not reached
+    // unconditionally, other code paths exist alongside it that we need to make
+    // sure do not intertwine
+  };
+  Shape
+      *Ancestor; // If not nullptr, this shape is the relevant one for purposes
+                 // of getting to the target block. We break or continue on it
+  Branch::FlowType
+      Type;     // If Ancestor is not nullptr, this says whether to break or
+                // continue
+  bool Labeled; // If a break or continue, whether we need to use a label
+  const char *Condition; // The condition for which we branch. For example,
+                         // "my_var == 1". Conditions are checked one by one.
+                         // One of the conditions should have nullptr as the
+                         // condition, in which case it is the default
+                         // FIXME: move from char* to LLVM data structures
+  const char *Code; // If provided, code that is run right before the branch is
+                    // taken. This is useful for phis
+                    // FIXME: move from char* to LLVM data structures
+
+  Branch(const char *ConditionInit, const char *CodeInit = nullptr);
+  ~Branch();
+};
+
+typedef SetVector<Block *> BlockSet;
+typedef MapVector<Block *, Branch *> BlockBranchMap;
+typedef MapVector<Block *, std::unique_ptr<Branch>> OwningBlockBranchMap;
+
+///
+/// Represents a basic block of code - some instructions that end with a
+/// control flow modifier (a branch, return or throw).
+///
+struct Block {
+  // Branches become processed after we finish the shape relevant to them. For
+  // example, when we recreate a loop, branches to the loop start become
+  // continues and are now processed. When we calculate what shape to generate
+  // from a set of blocks, we ignore processed branches. Blocks own the Branch
+  // objects they use, and destroy them when done.
+  OwningBlockBranchMap BranchesOut;
+  BlockSet BranchesIn;
+  OwningBlockBranchMap ProcessedBranchesOut;
+  BlockSet ProcessedBranchesIn;
+  Shape *Parent; // The shape we are directly inside
+  int Id; // A unique identifier, defined when added to relooper. Note that this
+          // uniquely identifies a *logical* block - if we split it, the two
+          // instances have the same content *and* the same Id
+  const char *Code;      // The string representation of the code in this block.
+                         // Owning pointer (we copy the input)
+                         // FIXME: move from char* to LLVM data structures
+  const char *BranchVar; // A variable whose value determines where we go; if
+                         // this is not nullptr, emit a switch on that variable
+                         // FIXME: move from char* to LLVM data structures
+  bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching
+                               // us requires setting the label variable
+
+  Block(const char *CodeInit, const char *BranchVarInit);
+  ~Block();
+
+  void AddBranchTo(Block *Target, const char *Condition,
+                   const char *Code = nullptr);
+};
+
+///
+/// Represents a structured control flow shape
+///
+struct Shape {
+  int Id; // A unique identifier. Used to identify loops, labels are Lx where x
+          // is the Id. Defined when added to relooper
+  Shape *Next;    // The shape that will appear in the code right after this one
+  Shape *Natural; // The shape that control flow gets to naturally (if there is
+                  // Next, then this is Next)
+
+  /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
+  enum ShapeKind { SK_Simple, SK_Multiple, SK_Loop };
+
+private:
+  ShapeKind Kind;
+
+public:
+  ShapeKind getKind() const { return Kind; }
+
+  Shape(ShapeKind KindInit) : Id(-1), Next(nullptr), Kind(KindInit) {}
+};
+
+///
+/// Simple: No control flow at all, just instructions.
+///
+struct SimpleShape : public Shape {
+  Block *Inner;
+
+  SimpleShape() : Shape(SK_Simple), Inner(nullptr) {}
+
+  static bool classof(const Shape *S) { return S->getKind() == SK_Simple; }
+};
+
+///
+/// A shape that may be implemented with a labeled loop.
+///
+struct LabeledShape : public Shape {
+  bool Labeled; // If we have a loop, whether it needs to be labeled
+
+  LabeledShape(ShapeKind KindInit) : Shape(KindInit), Labeled(false) {}
+};
+
+// Blocks with the same id were split and are identical, so we just care about
+// ids in Multiple entries
+typedef std::map<int, Shape *> IdShapeMap;
+
+///
+/// Multiple: A shape with more than one entry. If the next block to
+///           be entered is among them, we run it and continue to
+///           the next shape, otherwise we continue immediately to the
+///           next shape.
+///
+struct MultipleShape : public LabeledShape {
+  IdShapeMap InnerMap; // entry block ID -> shape
+  int Breaks; // If we have branches on us, we need a loop (or a switch). This
+              // is a counter of requirements,
+              // if we optimize it to 0, the loop is unneeded
+  bool UseSwitch; // Whether to switch on label as opposed to an if-else chain
+
+  MultipleShape() : LabeledShape(SK_Multiple), Breaks(0), UseSwitch(false) {}
+
+  static bool classof(const Shape *S) { return S->getKind() == SK_Multiple; }
+};
+
+///
+/// Loop: An infinite loop.
+///
+struct LoopShape : public LabeledShape {
+  Shape *Inner;
+
+  LoopShape() : LabeledShape(SK_Loop), Inner(nullptr) {}
+
+  static bool classof(const Shape *S) { return S->getKind() == SK_Loop; }
+};
+
+///
+/// Implements the relooper algorithm for a function's blocks.
+///
+/// Implementation details: The Relooper instance has
+/// ownership of the blocks and shapes, and frees them when done.
+///
+struct Relooper {
+  std::deque<Block *> Blocks;
+  std::deque<Shape *> Shapes;
+  Shape *Root;
+  bool MinSize;
+  int BlockIdCounter;
+  int ShapeIdCounter;
+
+  Relooper();
+  ~Relooper();
+
+  void AddBlock(Block *New, int Id = -1);
+
+  // Calculates the shapes
+  void Calculate(Block *Entry);
+
+  // Sets us to try to minimize size
+  void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
+};
+
+typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
+
+} // namespace Relooper
+
+} // namespace llvm
Index: llvm/trunk/lib/Target/WebAssembly/Relooper.cpp
===================================================================
--- llvm/trunk/lib/Target/WebAssembly/Relooper.cpp
+++ llvm/trunk/lib/Target/WebAssembly/Relooper.cpp
@@ -0,0 +1,906 @@
+//===-- Relooper.cpp - Top-level interface for WebAssembly  ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This implements the Relooper algorithm. This implementation includes
+/// optimizations added since the original academic paper [1] was published.
+///
+/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
+/// Proceedings of the ACM international conference companion on Object
+/// oriented programming systems languages and applications companion
+/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
+/// http://doi.acm.org/10.1145/2048147.2048224
+///
+//===-------------------------------------------------------------------===//
+
+#include "Relooper.h"
+
+#include <llvm/ADT/STLExtras.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/CommandLine.h>
+
+#include <cstring>
+#include <cstdlib>
+#include <functional>
+#include <list>
+#include <stack>
+#include <string>
+
+using namespace llvm;
+
+static cl::opt<int> RelooperSplittingFactor(
+    "relooper-splitting-factor",
+    cl::desc(
+        "How much to discount code size when deciding whether to split a node"),
+    cl::init(5));
+
+static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
+    "relooper-multiple-switch-threshold",
+    cl::desc(
+        "How many entries to allow in a multiple before we use a switch"),
+    cl::init(10));
+
+static cl::opt<unsigned> RelooperNestingLimit(
+    "relooper-nesting-limit",
+    cl::desc(
+        "How much nesting is acceptable"),
+    cl::init(20));
+
+namespace llvm {
+
+namespace Relooper {
+
+template <class T, class U>
+static bool contains(const T &container, const U &contained) {
+  return container.count(contained);
+}
+
+// Branch
+
+Branch::Branch(const char *ConditionInit, const char *CodeInit)
+    : Ancestor(nullptr), Labeled(true) {
+  // FIXME: move from char* to LLVM data structures
+  Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
+  Code = CodeInit ? strdup(CodeInit) : nullptr;
+}
+
+Branch::~Branch() {
+  // FIXME: move from char* to LLVM data structures
+  free(static_cast<void *>(const_cast<char *>(Condition)));
+  free(static_cast<void *>(const_cast<char *>(Code)));
+}
+
+// Block
+
+Block::Block(const char *CodeInit, const char *BranchVarInit)
+    : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
+  // FIXME: move from char* to LLVM data structures
+  Code = strdup(CodeInit);
+  BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
+}
+
+Block::~Block() {
+  // FIXME: move from char* to LLVM data structures
+  free(static_cast<void *>(const_cast<char *>(Code)));
+  free(static_cast<void *>(const_cast<char *>(BranchVar)));
+}
+
+void Block::AddBranchTo(Block *Target, const char *Condition,
+                        const char *Code) {
+  assert(
+      !contains(BranchesOut,
+                Target)); // cannot add more than one branch to the same target
+  BranchesOut[Target] = make_unique<Branch>(Condition, Code);
+}
+
+// Relooper
+
+Relooper::Relooper()
+    : Root(nullptr), MinSize(false), BlockIdCounter(1),
+      ShapeIdCounter(0) { // block ID 0 is reserved for clearings
+}
+
+Relooper::~Relooper() {
+  for (auto Curr : Blocks)
+    delete Curr;
+  for (auto Curr : Shapes)
+    delete Curr;
+}
+
+void Relooper::AddBlock(Block *New, int Id) {
+  New->Id = Id == -1 ? BlockIdCounter++ : Id;
+  Blocks.push_back(New);
+}
+
+struct RelooperRecursor {
+  Relooper *Parent;
+  RelooperRecursor(Relooper *ParentInit) : Parent(ParentInit) {}
+};
+
+typedef std::list<Block *> BlockList;
+
+void Relooper::Calculate(Block *Entry) {
+  // Scan and optimize the input
+  struct PreOptimizer : public RelooperRecursor {
+    PreOptimizer(Relooper *Parent) : RelooperRecursor(Parent) {}
+    BlockSet Live;
+
+    void FindLive(Block *Root) {
+      BlockList ToInvestigate;
+      ToInvestigate.push_back(Root);
+      while (!ToInvestigate.empty()) {
+        Block *Curr = ToInvestigate.front();
+        ToInvestigate.pop_front();
+        if (contains(Live, Curr))
+          continue;
+        Live.insert(Curr);
+        for (const auto &iter : Curr->BranchesOut)
+          ToInvestigate.push_back(iter.first);
+      }
+    }
+
+    // If a block has multiple entries but no exits, and it is small enough, it
+    // is useful to split it. A common example is a C++ function where
+    // everything ends up at a final exit block and does some RAII cleanup.
+    // Without splitting, we will be forced to introduce labelled loops to
+    // allow reaching the final block
+    void SplitDeadEnds() {
+      unsigned TotalCodeSize = 0;
+      for (const auto &Curr : Live) {
+        TotalCodeSize += strlen(Curr->Code);
+      }
+      BlockSet Splits;
+      BlockSet Removed;
+      for (const auto &Original : Live) {
+        if (Original->BranchesIn.size() <= 1 ||
+            !Original->BranchesOut.empty())
+          continue; // only dead ends, for now
+        if (contains(Original->BranchesOut, Original))
+          continue; // cannot split a looping node
+        if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
+            TotalCodeSize / RelooperSplittingFactor)
+          continue; // if splitting increases raw code size by a significant
+                    // amount, abort
+        // Split the node (for simplicity, we replace all the blocks, even
+        // though we could have reused the original)
+        for (const auto &Prior : Original->BranchesIn) {
+          Block *Split = new Block(Original->Code, Original->BranchVar);
+          Parent->AddBlock(Split, Original->Id);
+          Split->BranchesIn.insert(Prior);
+          std::unique_ptr<Branch> Details;
+          Details.swap(Prior->BranchesOut[Original]);
+          Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
+                                                          Details->Code);
+          for (const auto &iter : Original->BranchesOut) {
+            Block *Post = iter.first;
+            Branch *Details = iter.second.get();
+            Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
+                                                           Details->Code);
+            Post->BranchesIn.insert(Split);
+          }
+          Splits.insert(Split);
+          Removed.insert(Original);
+        }
+        for (const auto &iter : Original->BranchesOut) {
+          Block *Post = iter.first;
+          Post->BranchesIn.remove(Original);
+        }
+      }
+      for (const auto &iter : Splits)
+        Live.insert(iter);
+      for (const auto &iter : Removed)
+        Live.remove(iter);
+    }
+  };
+  PreOptimizer Pre(this);
+  Pre.FindLive(Entry);
+
+  // Add incoming branches from live blocks, ignoring dead code
+  for (unsigned i = 0; i < Blocks.size(); i++) {
+    Block *Curr = Blocks[i];
+    if (!contains(Pre.Live, Curr))
+      continue;
+    for (const auto &iter : Curr->BranchesOut)
+      iter.first->BranchesIn.insert(Curr);
+  }
+
+  if (!MinSize)
+    Pre.SplitDeadEnds();
+
+  // Recursively process the graph
+
+  struct Analyzer : public RelooperRecursor {
+    Analyzer(Relooper *Parent) : RelooperRecursor(Parent) {}
+
+    // Add a shape to the list of shapes in this Relooper calculation
+    void Notice(Shape *New) {
+      New->Id = Parent->ShapeIdCounter++;
+      Parent->Shapes.push_back(New);
+    }
+
+    // Create a list of entries from a block. If LimitTo is provided, only
+    // results in that set will appear
+    void GetBlocksOut(Block *Source, BlockSet &Entries,
+                      BlockSet *LimitTo = nullptr) {
+      for (const auto &iter : Source->BranchesOut)
+        if (!LimitTo || contains(*LimitTo, iter.first))
+          Entries.insert(iter.first);
+    }
+
+    // Converts/processes all branchings to a specific target
+    void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
+                   BlockSet &From) {
+      for (auto iter = Target->BranchesIn.begin();
+           iter != Target->BranchesIn.end();) {
+        Block *Prior = *iter;
+        if (!contains(From, Prior)) {
+          iter++;
+          continue;
+        }
+        std::unique_ptr<Branch> PriorOut;
+        PriorOut.swap(Prior->BranchesOut[Target]);
+        PriorOut->Ancestor = Ancestor;
+        PriorOut->Type = Type;
+        if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
+          Multiple->Breaks++; // We are breaking out of this Multiple, so need a
+                              // loop
+        iter++; // carefully increment iter before erasing
+        Target->BranchesIn.remove(Prior);
+        Target->ProcessedBranchesIn.insert(Prior);
+        Prior->ProcessedBranchesOut[Target].swap(PriorOut);
+      }
+    }
+
+    Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
+      SimpleShape *Simple = new SimpleShape;
+      Notice(Simple);
+      Simple->Inner = Inner;
+      Inner->Parent = Simple;
+      if (Blocks.size() > 1) {
+        Blocks.remove(Inner);
+        GetBlocksOut(Inner, NextEntries, &Blocks);
+        BlockSet JustInner;
+        JustInner.insert(Inner);
+        for (const auto &iter : NextEntries)
+          Solipsize(iter, Branch::Direct, Simple, JustInner);
+      }
+      return Simple;
+    }
+
+    Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
+                    BlockSet &NextEntries) {
+      // Find the inner blocks in this loop. Proceed backwards from the entries
+      // until
+      // you reach a seen block, collecting as you go.
+      BlockSet InnerBlocks;
+      BlockSet Queue = Entries;
+      while (!Queue.empty()) {
+        Block *Curr = *(Queue.begin());
+        Queue.remove(*Queue.begin());
+        if (!contains(InnerBlocks, Curr)) {
+          // This element is new, mark it as inner and remove from outer
+          InnerBlocks.insert(Curr);
+          Blocks.remove(Curr);
+          // Add the elements prior to it
+          for (const auto &iter : Curr->BranchesIn)
+            Queue.insert(iter);
+        }
+      }
+      assert(!InnerBlocks.empty());
+
+      for (const auto &Curr : InnerBlocks) {
+        for (const auto &iter : Curr->BranchesOut) {
+          Block *Possible = iter.first;
+          if (!contains(InnerBlocks, Possible))
+            NextEntries.insert(Possible);
+        }
+      }
+
+      LoopShape *Loop = new LoopShape();
+      Notice(Loop);
+
+      // Solipsize the loop, replacing with break/continue and marking branches
+      // as Processed (will not affect later calculations)
+      // A. Branches to the loop entries become a continue to this shape
+      for (const auto &iter : Entries)
+        Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
+      // B. Branches to outside the loop (a next entry) become breaks on this
+      // shape
+      for (const auto &iter : NextEntries)
+        Solipsize(iter, Branch::Break, Loop, InnerBlocks);
+      // Finish up
+      Shape *Inner = Process(InnerBlocks, Entries, nullptr);
+      Loop->Inner = Inner;
+      return Loop;
+    }
+
+    // For each entry, find the independent group reachable by it. The
+    // independent group is the entry itself, plus all the blocks it can
+    // reach that cannot be directly reached by another entry. Note that we
+    // ignore directly reaching the entry itself by another entry.
+    //   @param Ignore - previous blocks that are irrelevant
+    void FindIndependentGroups(BlockSet &Entries,
+                               BlockBlockSetMap &IndependentGroups,
+                               BlockSet *Ignore = nullptr) {
+      typedef std::map<Block *, Block *> BlockBlockMap;
+
+      struct HelperClass {
+        BlockBlockSetMap &IndependentGroups;
+        BlockBlockMap Ownership; // For each block, which entry it belongs to.
+                                 // We have reached it from there.
+
+        HelperClass(BlockBlockSetMap &IndependentGroupsInit)
+            : IndependentGroups(IndependentGroupsInit) {}
+        void InvalidateWithChildren(Block *New) {
+          // Being in the list means you need to be invalidated
+          BlockList ToInvalidate;
+          ToInvalidate.push_back(New);
+          while (!ToInvalidate.empty()) {
+            Block *Invalidatee = ToInvalidate.front();
+            ToInvalidate.pop_front();
+            Block *Owner = Ownership[Invalidatee];
+            // Owner may have been invalidated, do not add to
+            // IndependentGroups!
+            if (contains(IndependentGroups, Owner))
+              IndependentGroups[Owner].remove(Invalidatee);
+            if (Ownership[Invalidatee]) { // may have been seen before and
+                                          // invalidated already
+              Ownership[Invalidatee] = nullptr;
+              for (const auto &iter : Invalidatee->BranchesOut) {
+                Block *Target = iter.first;
+                BlockBlockMap::iterator Known = Ownership.find(Target);
+                if (Known != Ownership.end()) {
+                  Block *TargetOwner = Known->second;
+                  if (TargetOwner)
+                    ToInvalidate.push_back(Target);
+                }
+              }
+            }
+          }
+        }
+      };
+      HelperClass Helper(IndependentGroups);
+
+      // We flow out from each of the entries, simultaneously.
+      // When we reach a new block, we add it as belonging to the one we got to
+      // it from.
+      // If we reach a new block that is already marked as belonging to someone,
+      // it is reachable by two entries and is not valid for any of them.
+      // Remove it and all it can reach that have been visited.
+
+      // Being in the queue means we just added this item, and
+      // we need to add its children
+      BlockList Queue;
+      for (const auto &Entry : Entries) {
+        Helper.Ownership[Entry] = Entry;
+        IndependentGroups[Entry].insert(Entry);
+        Queue.push_back(Entry);
+      }
+      while (!Queue.empty()) {
+        Block *Curr = Queue.front();
+        Queue.pop_front();
+        Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
+                                               // map if we are in the queue
+        if (!Owner)
+          continue; // we have been invalidated meanwhile after being reached
+                    // from two entries
+        // Add all children
+        for (const auto &iter : Curr->BranchesOut) {
+          Block *New = iter.first;
+          BlockBlockMap::iterator Known = Helper.Ownership.find(New);
+          if (Known == Helper.Ownership.end()) {
+            // New node. Add it, and put it in the queue
+            Helper.Ownership[New] = Owner;
+            IndependentGroups[Owner].insert(New);
+            Queue.push_back(New);
+            continue;
+          }
+          Block *NewOwner = Known->second;
+          if (!NewOwner)
+            continue; // We reached an invalidated node
+          if (NewOwner != Owner)
+            // Invalidate this and all reachable that we have seen - we reached
+            // this from two locations
+            Helper.InvalidateWithChildren(New);
+          // otherwise, we have the same owner, so do nothing
+        }
+      }
+
+      // Having processed all the interesting blocks, we remain with just one
+      // potential issue:
+      // If a->b, and a was invalidated, but then b was later reached by
+      // someone else, we must invalidate b. To check for this, we go over all
+      // elements in the independent groups, if an element has a parent which
+      // does *not* have the same owner, we/ must remove it and all its
+      // children.
+
+      for (const auto &iter : Entries) {
+        BlockSet &CurrGroup = IndependentGroups[iter];
+        BlockList ToInvalidate;
+        for (const auto &iter : CurrGroup) {
+          Block *Child = iter;
+          for (const auto &iter : Child->BranchesIn) {
+            Block *Parent = iter;
+            if (Ignore && contains(*Ignore, Parent))
+              continue;
+            if (Helper.Ownership[Parent] != Helper.Ownership[Child])
+              ToInvalidate.push_back(Child);
+          }
+        }
+        while (!ToInvalidate.empty()) {
+          Block *Invalidatee = ToInvalidate.front();
+          ToInvalidate.pop_front();
+          Helper.InvalidateWithChildren(Invalidatee);
+        }
+      }
+
+      // Remove empty groups
+      for (const auto &iter : Entries)
+        if (IndependentGroups[iter].empty())
+          IndependentGroups.erase(iter);
+    }
+
+    Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
+                        BlockBlockSetMap &IndependentGroups, Shape *Prev,
+                        BlockSet &NextEntries) {
+      bool Fused = isa<SimpleShape>(Prev);
+      MultipleShape *Multiple = new MultipleShape();
+      Notice(Multiple);
+      BlockSet CurrEntries;
+      for (auto &iter : IndependentGroups) {
+        Block *CurrEntry = iter.first;
+        BlockSet &CurrBlocks = iter.second;
+        // Create inner block
+        CurrEntries.clear();
+        CurrEntries.insert(CurrEntry);
+        for (const auto &CurrInner : CurrBlocks) {
+          // Remove the block from the remaining blocks
+          Blocks.remove(CurrInner);
+          // Find new next entries and fix branches to them
+          for (auto iter = CurrInner->BranchesOut.begin();
+               iter != CurrInner->BranchesOut.end();) {
+            Block *CurrTarget = iter->first;
+            auto Next = iter;
+            Next++;
+            if (!contains(CurrBlocks, CurrTarget)) {
+              NextEntries.insert(CurrTarget);
+              Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
+            }
+            iter = Next; // increment carefully because Solipsize can remove us
+          }
+        }
+        Multiple->InnerMap[CurrEntry->Id] =
+            Process(CurrBlocks, CurrEntries, nullptr);
+        // If we are not fused, then our entries will actually be checked
+        if (!Fused)
+          CurrEntry->IsCheckedMultipleEntry = true;
+      }
+      // Add entries not handled as next entries, they are deferred
+      for (const auto &Entry : Entries)
+        if (!contains(IndependentGroups, Entry))
+          NextEntries.insert(Entry);
+      // The multiple has been created, we can decide how to implement it
+      if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
+        Multiple->UseSwitch = true;
+        Multiple->Breaks++; // switch captures breaks
+      }
+      return Multiple;
+    }
+
+    // Main function.
+    // Process a set of blocks with specified entries, returns a shape
+    // The Make* functions receive a NextEntries. If they fill it with data,
+    // those are the entries for the ->Next block on them, and the blocks
+    // are what remains in Blocks (which Make* modify). In this way
+    // we avoid recursing on Next (imagine a long chain of Simples, if we
+    // recursed we could blow the stack).
+    Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
+      BlockSet *Entries = &InitialEntries;
+      BlockSet TempEntries[2];
+      int CurrTempIndex = 0;
+      BlockSet *NextEntries;
+      Shape *Ret = nullptr;
+
+      auto Make = [&](Shape *Temp) {
+        if (Prev)
+          Prev->Next = Temp;
+        if (!Ret)
+          Ret = Temp;
+        Prev = Temp;
+        Entries = NextEntries;
+      };
+
+      while (1) {
+        CurrTempIndex = 1 - CurrTempIndex;
+        NextEntries = &TempEntries[CurrTempIndex];
+        NextEntries->clear();
+
+        if (Entries->empty())
+          return Ret;
+        if (Entries->size() == 1) {
+          Block *Curr = *(Entries->begin());
+          if (Curr->BranchesIn.empty()) {
+            // One entry, no looping ==> Simple
+            Make(MakeSimple(Blocks, Curr, *NextEntries));
+            if (NextEntries->empty())
+              return Ret;
+            continue;
+          }
+          // One entry, looping ==> Loop
+          Make(MakeLoop(Blocks, *Entries, *NextEntries));
+          if (NextEntries->empty())
+            return Ret;
+          continue;
+        }
+
+        // More than one entry, try to eliminate through a Multiple groups of
+        // independent blocks from an entry/ies. It is important to remove
+        // through multiples as opposed to looping since the former is more
+        // performant.
+        BlockBlockSetMap IndependentGroups;
+        FindIndependentGroups(*Entries, IndependentGroups);
+
+        if (!IndependentGroups.empty()) {
+          // We can handle a group in a multiple if its entry cannot be reached
+          // by another group.
+          // Note that it might be reachable by itself - a loop. But that is
+          // fine, we will create a loop inside the multiple block (which
+          // is the performant order to do it).
+          for (auto iter = IndependentGroups.begin();
+               iter != IndependentGroups.end();) {
+            Block *Entry = iter->first;
+            BlockSet &Group = iter->second;
+            auto curr = iter++; // iterate carefully, we may delete
+            for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
+                 iterBranch != Entry->BranchesIn.end(); iterBranch++) {
+              Block *Origin = *iterBranch;
+              if (!contains(Group, Origin)) {
+                // Reached from outside the group, so we cannot handle this
+                IndependentGroups.erase(curr);
+                break;
+              }
+            }
+          }
+
+          // As an optimization, if we have 2 independent groups, and one is a
+          // small dead end, we can handle only that dead end.
+          // The other then becomes a Next - without nesting in the code and
+          // recursion in the analysis.
+          // TODO: if the larger is the only dead end, handle that too
+          // TODO: handle >2 groups
+          // TODO: handle not just dead ends, but also that do not branch to the
+          // NextEntries. However, must be careful there since we create a
+          // Next, and that Next can prevent eliminating a break (since we no
+          // longer naturally reach the same place), which may necessitate a
+          // one-time loop, which makes the unnesting pointless.
+          if (IndependentGroups.size() == 2) {
+            // Find the smaller one
+            auto iter = IndependentGroups.begin();
+            Block *SmallEntry = iter->first;
+            auto SmallSize = iter->second.size();
+            iter++;
+            Block *LargeEntry = iter->first;
+            auto LargeSize = iter->second.size();
+            if (SmallSize != LargeSize) { // ignore the case where they are
+                                          // identical - keep things symmetrical
+                                          // there
+              if (SmallSize > LargeSize) {
+                Block *Temp = SmallEntry;
+                SmallEntry = LargeEntry;
+                LargeEntry = Temp; // Note: we did not flip the Sizes too, they
+                                   // are now invalid. TODO: use the smaller
+                                   // size as a limit?
+              }
+              // Check if dead end
+              bool DeadEnd = true;
+              BlockSet &SmallGroup = IndependentGroups[SmallEntry];
+              for (const auto &Curr : SmallGroup) {
+                for (const auto &iter : Curr->BranchesOut) {
+                  Block *Target = iter.first;
+                  if (!contains(SmallGroup, Target)) {
+                    DeadEnd = false;
+                    break;
+                  }
+                }
+                if (!DeadEnd)
+                  break;
+              }
+              if (DeadEnd)
+                IndependentGroups.erase(LargeEntry);
+            }
+          }
+
+          if (!IndependentGroups.empty())
+            // Some groups removable ==> Multiple
+            Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
+                              *NextEntries));
+            if (NextEntries->empty())
+              return Ret;
+            continue;
+        }
+        // No independent groups, must be loopable ==> Loop
+        Make(MakeLoop(Blocks, *Entries, *NextEntries));
+        if (NextEntries->empty())
+          return Ret;
+        continue;
+      }
+    }
+  };
+
+  // Main
+
+  BlockSet AllBlocks;
+  for (const auto &Curr : Pre.Live) {
+    AllBlocks.insert(Curr);
+  }
+
+  BlockSet Entries;
+  Entries.insert(Entry);
+  Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
+  assert(Root);
+
+  ///
+  /// Relooper post-optimizer
+  ///
+  struct PostOptimizer {
+    Relooper *Parent;
+    std::stack<Shape *> LoopStack;
+
+    PostOptimizer(Relooper *ParentInit) : Parent(ParentInit) {}
+
+    void ShapeSwitch(Shape* var,
+                     std::function<void (SimpleShape*)> simple,
+                     std::function<void (MultipleShape*)> multiple,
+                     std::function<void (LoopShape*)> loop) {
+      switch (var->getKind()) {
+        case Shape::SK_Simple: {
+          simple(cast<SimpleShape>(var));
+          break;
+        }
+        case Shape::SK_Multiple: {
+          multiple(cast<MultipleShape>(var));
+          break;
+        }
+        case Shape::SK_Loop: {
+          loop(cast<LoopShape>(var));
+          break;
+        }
+        default: llvm_unreachable("invalid shape");
+      }
+    }
+
+    // Find the blocks that natural control flow can get us directly to, or
+    // through a multiple that we ignore
+    void FollowNaturalFlow(Shape *S, BlockSet &Out) {
+      ShapeSwitch(S, [&](SimpleShape* Simple) {
+        Out.insert(Simple->Inner);
+      }, [&](MultipleShape* Multiple) {
+        for (const auto &iter : Multiple->InnerMap) {
+          FollowNaturalFlow(iter.second, Out);
+        }
+        FollowNaturalFlow(Multiple->Next, Out);
+      }, [&](LoopShape* Loop) {
+        FollowNaturalFlow(Loop->Inner, Out);
+      });
+    }
+
+    void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
+      if (Root->Next) {
+        Root->Natural = Root->Next;
+        FindNaturals(Root->Next, Otherwise);
+      } else {
+        Root->Natural = Otherwise;
+      }
+
+      ShapeSwitch(Root, [](SimpleShape* Simple) {
+      }, [&](MultipleShape* Multiple) {
+        for (const auto &iter : Multiple->InnerMap) {
+          FindNaturals(iter.second, Root->Natural);
+        }
+      }, [&](LoopShape* Loop){
+        FindNaturals(Loop->Inner, Loop->Inner);
+      });
+    }
+
+    // Remove unneeded breaks and continues.
+    // A flow operation is trivially unneeded if the shape we naturally get to
+    // by normal code execution is the same as the flow forces us to.
+    void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
+                             LoopShape *LastLoop = nullptr,
+                             unsigned Depth = 0) {
+      BlockSet NaturalBlocks;
+      FollowNaturalFlow(Natural, NaturalBlocks);
+      Shape *Next = Root;
+      while (Next) {
+        Root = Next;
+        Next = nullptr;
+        ShapeSwitch(
+            Root,
+            [&](SimpleShape* Simple) {
+              if (Simple->Inner->BranchVar)
+                LastLoop =
+                    nullptr; // a switch clears out the loop (TODO: only for
+                             // breaks, not continue)
+
+              if (Simple->Next) {
+                if (!Simple->Inner->BranchVar &&
+                    Simple->Inner->ProcessedBranchesOut.size() == 2 &&
+                    Depth < RelooperNestingLimit) {
+                  // If there is a next block, we already know at Simple
+                  // creation time to make direct branches, and we can do
+                  // nothing more in general. But, we try to optimize the
+                  // case of a break and a direct: This would normally be
+                  //   if (break?) { break; } ..
+                  // but if we make sure to nest the else, we can save the
+                  // break,
+                  //   if (!break?) { .. }
+                  // This is also better because the more canonical nested
+                  // form is easier to further optimize later. The
+                  // downside is more nesting, which adds to size in builds with
+                  // whitespace.
+                  // Note that we avoid switches, as it complicates control flow
+                  // and is not relevant for the common case we optimize here.
+                  bool Found = false;
+                  bool Abort = false;
+                  for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+                    Block *Target = iter.first;
+                    Branch *Details = iter.second.get();
+                    if (Details->Type == Branch::Break) {
+                      Found = true;
+                      if (!contains(NaturalBlocks, Target))
+                        Abort = true;
+                    } else if (Details->Type != Branch::Direct)
+                      Abort = true;
+                  }
+                  if (Found && !Abort) {
+                    for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+                      Branch *Details = iter.second.get();
+                      if (Details->Type == Branch::Break) {
+                        Details->Type = Branch::Direct;
+                        if (MultipleShape *Multiple =
+                                dyn_cast<MultipleShape>(Details->Ancestor))
+                          Multiple->Breaks--;
+                      } else {
+                        assert(Details->Type == Branch::Direct);
+                        Details->Type = Branch::Nested;
+                      }
+                    }
+                  }
+                  Depth++; // this optimization increases depth, for us and all
+                           // our next chain (i.e., until this call returns)
+                }
+                Next = Simple->Next;
+              } else {
+                // If there is no next then Natural is where we will
+                // go to by doing nothing, so we can potentially optimize some
+                // branches to direct.
+                for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+                  Block *Target = iter.first;
+                  Branch *Details = iter.second.get();
+                  if (Details->Type != Branch::Direct &&
+                      contains(NaturalBlocks,
+                               Target)) { // note: cannot handle split blocks
+                    Details->Type = Branch::Direct;
+                    if (MultipleShape *Multiple =
+                            dyn_cast<MultipleShape>(Details->Ancestor))
+                      Multiple->Breaks--;
+                  } else if (Details->Type == Branch::Break && LastLoop &&
+                             LastLoop->Natural == Details->Ancestor->Natural) {
+                    // it is important to simplify breaks, as simpler breaks
+                    // enable other optimizations
+                    Details->Labeled = false;
+                    if (MultipleShape *Multiple =
+                            dyn_cast<MultipleShape>(Details->Ancestor))
+                      Multiple->Breaks--;
+                  }
+                }
+              }
+            }, [&](MultipleShape* Multiple)
+            {
+              for (const auto &iter : Multiple->InnerMap) {
+                RemoveUnneededFlows(iter.second, Multiple->Next,
+                                    Multiple->Breaks ? nullptr : LastLoop,
+                                    Depth + 1);
+              }
+              Next = Multiple->Next;
+            }, [&](LoopShape* Loop)
+            {
+              RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
+              Next = Loop->Next;
+            });
+      }
+    }
+
+    // After we know which loops exist, we can calculate which need to be
+    // labeled
+    void FindLabeledLoops(Shape *Root) {
+      Shape *Next = Root;
+      while (Next) {
+        Root = Next;
+        Next = nullptr;
+
+        ShapeSwitch(
+            Root,
+            [&](SimpleShape *Simple) {
+          MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
+          // If we are fusing a Multiple with a loop into this Simple, then
+          // visit it now
+          if (Fused && Fused->Breaks)
+            LoopStack.push(Fused);
+          if (Simple->Inner->BranchVar)
+            LoopStack.push(nullptr); // a switch means breaks are now useless,
+                                     // push a dummy
+          if (Fused) {
+            if (Fused->UseSwitch)
+              LoopStack.push(nullptr); // a switch means breaks are now
+                                       // useless, push a dummy
+            for (const auto &iter : Fused->InnerMap) {
+              FindLabeledLoops(iter.second);
+            }
+          }
+          for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+            Branch *Details = iter.second.get();
+            if (Details->Type == Branch::Break ||
+                Details->Type == Branch::Continue) {
+              assert(!LoopStack.empty());
+              if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
+                if (MultipleShape *Multiple =
+                        dyn_cast<MultipleShape>(Details->Ancestor)) {
+                  Multiple->Labeled = true;
+                } else {
+                  LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
+                  Loop->Labeled = true;
+                }
+              } else {
+                Details->Labeled = false;
+              }
+            }
+            if (Fused && Fused->UseSwitch)
+              LoopStack.pop();
+            if (Simple->Inner->BranchVar)
+              LoopStack.pop();
+            if (Fused && Fused->Breaks)
+              LoopStack.pop();
+            if (Fused)
+              Next = Fused->Next;
+            else
+              Next = Root->Next;
+          }
+          }
+          , [&](MultipleShape* Multiple) {
+            if (Multiple->Breaks)
+              LoopStack.push(Multiple);
+            for (const auto &iter : Multiple->InnerMap)
+              FindLabeledLoops(iter.second);
+            if (Multiple->Breaks)
+              LoopStack.pop();
+            Next = Root->Next;
+          }
+          , [&](LoopShape* Loop) {
+            LoopStack.push(Loop);
+            FindLabeledLoops(Loop->Inner);
+            LoopStack.pop();
+            Next = Root->Next;
+          });
+      }
+    }
+
+    void Process(Shape * Root) {
+      FindNaturals(Root);
+      RemoveUnneededFlows(Root);
+      FindLabeledLoops(Root);
+    }
+  };
+
+  PostOptimizer(this).Process(Root);
+}
+
+} // namespace Relooper
+
+} // namespace llvm