Index: llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
===================================================================
--- llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -16,12 +16,14 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
 
 #include "LiveDebugValues.h"
 
@@ -589,6 +591,7 @@
   /// Used as the result type for the variable value dataflow problem.
   using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
 
+  MachineDominatorTree *DomTree;
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
   const TargetFrameLowering *TFI;
@@ -736,8 +739,19 @@
   /// live-out arrays to the (initialized to zero) multidimensional arrays in
   /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
   /// number, the inner by LocIdx.
-  void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
-                    SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+  void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs,
+                         ValueIDNum **MOutLocs,
+                         SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+  /// Calculate the iterated-dominance-frontier for a set of defs, using the
+  /// existing LLVM facilities for this. Works for a single "value" or
+  /// machine/variable location.
+  /// \p AllBlocks Set of blocks where we might consume the value.
+  /// \p DefBlocks Set of blocks where the value/location is defined.
+  /// \p PHIBlocks Output set of blocks where PHIs must be placed.
+  void BlockPHIPlacement(const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+                         const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+                         SmallVectorImpl<MachineBasicBlock *> &PHIBlocks);
 
   /// Perform a control flow join (lattice value meet) of the values in machine
   /// locations at \p MBB. Follows the algorithm described in the file-comment,
@@ -746,16 +760,15 @@
   /// \p InLocs. \returns two bools -- the first indicates whether a change
   /// was made, the second whether a lattice downgrade occurred. If the latter
   /// is true, revisiting this block is necessary.
-  std::tuple<bool, bool>
-  mlocJoin(MachineBasicBlock &MBB,
-           SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
-           ValueIDNum **OutLocs, ValueIDNum *InLocs);
+  bool mlocJoin(MachineBasicBlock &MBB,
+                SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+                ValueIDNum **OutLocs, ValueIDNum *InLocs);
 
   /// Solve the variable value dataflow problem, for a single lexical scope.
   /// Uses the algorithm from the file comment to resolve control flow joins,
   /// although there are extra hacks, see vlocJoin. Reads the
   /// locations of values from the \p MInLocs and \p MOutLocs arrays (see
-  /// mlocDataflow) and reads the variable values transfer function from
+  /// buildMLocValueMap) and reads the variable values transfer function from
   /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally,
   /// with the live-ins permanently stored to \p Output once the fixedpoint is
   /// reached.
@@ -832,8 +845,9 @@
   /// RPOT block ordering.
   void initialSetup(MachineFunction &MF);
 
-  bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC,
-                    unsigned InputBBLimit, unsigned InputDbgValLimit) override;
+  bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+                    TargetPassConfig *TPC, unsigned InputBBLimit,
+                    unsigned InputDbgValLimit) override;
 
 public:
   /// Default construct and initialize the pass.
Index: llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
===================================================================
--- llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -11,114 +11,48 @@
 /// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information.
 ///
 /// This pass propagates variable locations between basic blocks, resolving
-/// control flow conflicts between them. The problem is much like SSA
-/// construction, where each DBG_VALUE instruction assigns the *value* that
-/// a variable has, and every instruction where the variable is in scope uses
-/// that variable. The resulting map of instruction-to-value is then translated
-/// into a register (or spill) location for each variable over each instruction.
+/// control flow conflicts between them. The problem is SSA construction, where
+/// each debug instruction assigns the *value* that a variable has, and every
+/// instruction where the variable is in scope uses that variable. The resulting
+/// map of instruction-to-value is then translated into a register (or spill)
+/// location for each variable over each instruction.
 ///
-/// This pass determines which DBG_VALUE dominates which instructions, or if
-/// none do, where values must be merged (like PHI nodes). The added
-/// complication is that because codegen has already finished, a PHI node may
-/// be needed for a variable location to be correct, but no register or spill
-/// slot merges the necessary values. In these circumstances, the variable
-/// location is dropped.
+/// The primary difference from normal SSA construction is that we cannot
+/// _create_ PHI values that contain variable values. CodeGen has already
+/// completed, and we can't alter it just to make debug-info complete. Thus:
+/// we can identify function positions where we would like a PHI value for a
+/// variable, but must search the MachineFunction to see whether such a PHI is
+/// available. If no such PHI exists, the variable location must be dropped.
 ///
-/// What makes this analysis non-trivial is loops: we cannot tell in advance
-/// whether a variable location is live throughout a loop, or whether its
-/// location is clobbered (or redefined by another DBG_VALUE), without
-/// exploring all the way through.
-///
-/// To make this simpler we perform two kinds of analysis. First, we identify
+/// To achieve this, we perform two kinds of analysis. First, we identify
 /// every value defined by every instruction (ignoring those that only move
-/// another value), then compute a map of which values are available for each
-/// instruction. This is stronger than a reaching-def analysis, as we create
-/// PHI values where other values merge.
-///
-/// Secondly, for each variable, we effectively re-construct SSA using each
-/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the
-/// first analysis from the location they refer to. We can then compute the
-/// dominance frontiers of where a variable has a value, and create PHI nodes
-/// where they merge.
-/// This isn't precisely SSA-construction though, because the function shape
-/// is pre-defined. If a variable location requires a PHI node, but no
-/// PHI for the relevant values is present in the function (as computed by the
-/// first analysis), the location must be dropped.
-///
-/// Once both are complete, we can pass back over all instructions knowing:
-///  * What _value_ each variable should contain, either defined by an
-///    instruction or where control flow merges
-///  * What the location of that value is (if any).
-/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when
-/// a value moves location. After this pass runs, all variable locations within
-/// a block should be specified by DBG_VALUEs within that block, allowing
-/// DbgEntityHistoryCalculator to focus on individual blocks.
-///
-/// This pass is able to go fast because the size of the first
-/// reaching-definition analysis is proportional to the working-set size of
-/// the function, which the compiler tries to keep small. (It's also
-/// proportional to the number of blocks). Additionally, we repeatedly perform
-/// the second reaching-definition analysis with only the variables and blocks
-/// in a single lexical scope, exploiting their locality.
-///
-/// Determining where PHIs happen is trickier with this approach, and it comes
-/// to a head in the major problem for LiveDebugValues: is a value live-through
-/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of
-/// facts about a function, however this analysis needs to generate new value
-/// numbers at joins.
-///
-/// To do this, consider a lattice of all definition values, from instructions
-/// and from PHIs. Each PHI is characterised by the RPO number of the block it
-/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B):
-/// with non-PHI values at the top, and any PHI value in the last block (by RPO
-/// order) at the bottom.
-///
-/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below,
-/// "rank" always refers to the former).
-///
-/// At any join, for each register, we consider:
-///  * All incoming values, and
-///  * The PREVIOUS live-in value at this join.
-/// If all incoming values agree: that's the live-in value. If they do not, the
-/// incoming values are ranked according to the partial order, and the NEXT
-/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of
-/// the same rank are ignored as conflicting). If there are no candidate values,
-/// or if the rank of the live-in would be lower than the rank of the current
-/// blocks PHIs, create a new PHI value.
+/// another value), then re-compute an SSA-form representation of the
+/// MachineFunction, using value propagation to eliminate any un-necessary
+/// PHI values. This gives us a map of every value computed in the function,
+/// and its location within the register file / stack.
 ///
-/// Intuitively: if it's not immediately obvious what value a join should result
-/// in, we iteratively descend from instruction-definitions down through PHI
-/// values, getting closer to the current block each time. If the current block
-/// is a loop head, this ordering is effectively searching outer levels of
-/// loops, to find a value that's live-through the current loop.
+/// Secondly, for each variable we perform the same analysis, where each debug
+/// instruction is considered a def, and every instruction where the variable
+/// is in lexical scope as a use. Value propagation is used again to eliminate
+/// any un-necessary PHIs. This gives us a map of each variable to the value
+/// it should have in a block.
 ///
-/// If there is no value that's live-through this loop, a PHI is created for
-/// this location instead. We can't use a lower-ranked PHI because by definition
-/// it doesn't dominate the current block. We can't create a PHI value any
-/// earlier, because we risk creating a PHI value at a location where values do
-/// not in fact merge, thus misrepresenting the truth, and not making the true
-/// live-through value for variable locations.
+/// Once both are complete, we have two maps for each block:
+///  * Variables to the values they should have,
+///  * Values to the register / spill slot they are located in.
+/// After which we can marry-up variable values with a location, and emit
+/// DBG_VALUE instructions specifying those locations. Variable locations may
+/// be dropped in this process due to the desired variable value not being
+/// resident in any machine location, or because there is no PHI value in any
+/// location that accurately represents the desired value.  The building of
+/// location lists for each block is left to DbgEntityHistoryCalculator.
 ///
-/// This algorithm applies to both calculating the availability of values in
-/// the first analysis, and the location of variables in the second. However
-/// for the second we add an extra dimension of pain: creating a variable
-/// location PHI is only valid if, for each incoming edge,
-///  * There is a value for the variable on the incoming edge, and
-///  * All the edges have that value in the same register.
-/// Or put another way: we can only create a variable-location PHI if there is
-/// a matching machine-location PHI, each input to which is the variables value
-/// in the predecessor block.
-///
-/// To accommodate this difference, each point on the lattice is split in
-/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately
-/// have a location determined are "definite" PHIs, and no further work is
-/// needed. Otherwise, a location that all non-backedge predecessors agree
-/// on is picked and propagated as a "proposed" PHI value. If that PHI value
-/// is truly live-through, it'll appear on the loop backedges on the next
-/// dataflow iteration, after which the block live-in moves to be a "definite"
-/// PHI. If it's not truly live-through, the variable value will be downgraded
-/// further as we explore the lattice, or remains "proposed" and is considered
-/// invalid once dataflow completes.
+/// This pass is kept efficient because the size of the first SSA problem
+/// is proportional to the working-set size of the function, which the compiler
+/// tries to keep small. (It's also proportional to the number of blocks).
+/// Additionally, we repeatedly perform the second SSA problem analysis with
+/// only the variables and blocks in a single lexical scope, exploiting their
+/// locality.
 ///
 /// ### Terminology
 ///
@@ -128,15 +62,13 @@
 /// contain the appropriate variable value. A value that is a PHI node is
 /// occasionally called an mphi.
 ///
-/// The first dataflow problem is the "machine value location" problem,
+/// The first SSA problem is the "machine value location" problem,
 /// because we're determining which machine locations contain which values.
 /// The "locations" are constant: what's unknown is what value they contain.
 ///
-/// The second dataflow problem (the one for variables) is the "variable value
+/// The second SSA problem (the one for variables) is the "variable value
 /// problem", because it's determining what values a variable has, rather than
-/// what location those values are placed in. Unfortunately, it's not that
-/// simple, because producing a PHI value always involves picking a location.
-/// This is an imperfection that we just have to accept, at least for now.
+/// what location those values are placed in.
 ///
 /// TODO:
 ///   Overlapping fragments
@@ -155,6 +87,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -1784,23 +1717,21 @@
   }
 }
 
-std::tuple<bool, bool>
-InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
-                           SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
-                           ValueIDNum **OutLocs, ValueIDNum *InLocs) {
+bool InstrRefBasedLDV::mlocJoin(
+    MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+    ValueIDNum **OutLocs, ValueIDNum *InLocs) {
   LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
   bool Changed = false;
   bool DowngradeOccurred = false;
 
-  // Collect predecessors that have been visited. Anything that hasn't been
-  // visited yet is a backedge on the first iteration, and the meet of it's
-  // lattice value for all locations will be unaffected.
+  // Handle value-propagation when control flow merges on entry to a block. For
+  // any location without a PHI already placed, the location has the same value
+  // as its predecessors. If a PHI is placed, test to see whether it's now a
+  // redundant PHI that we can eliminate.
+
   SmallVector<const MachineBasicBlock *, 8> BlockOrders;
-  for (auto Pred : MBB.predecessors()) {
-    if (Visited.count(Pred)) {
-      BlockOrders.push_back(Pred);
-    }
-  }
+  for (auto Pred : MBB.predecessors())
+    BlockOrders.push_back(Pred);
 
   // Visit predecessors in RPOT order.
   auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
@@ -1810,83 +1741,61 @@
 
   // Skip entry block.
   if (BlockOrders.size() == 0)
-    return std::tuple<bool, bool>(false, false);
+    return false;
 
-  // Step through all machine locations, then look at each predecessor and
-  // detect disagreements.
+  // Step through all machine locations, look at each predecessor and test
+  // whether we can eliminate redundant PHIs.
   unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second;
   for (auto Location : MTracker->locations()) {
     LocIdx Idx = Location.Idx;
+
     // Pick out the first predecessors live-out value for this location. It's
-    // guaranteed to be not a backedge, as we order by RPO.
-    ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+    // guaranteed to not be a backedge, as we order by RPO.
+    ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+
+    // If we've already eliminated a PHI here, do no further checking, just
+    // propagate the first live-in value into this block.
+    if (InLocs[Idx.asU64()] != ValueIDNum(MBB.getNumber(), 0, Idx)) {
+      if (InLocs[Idx.asU64()] != FirstVal) {
+        InLocs[Idx.asU64()] = FirstVal;
+        Changed |= true;
+      }
+      continue;
+    }
 
-    // Some flags for whether there's a disagreement, and whether it's a
-    // disagreement with a backedge or not.
+    // We're now examining a PHI to see whether it's un-necessary. Loop around
+    // the other live-in values and test whether they're all the same.
     bool Disagree = false;
-    bool NonBackEdgeDisagree = false;
-
-    // Loop around everything that wasn't 'base'.
     for (unsigned int I = 1; I < BlockOrders.size(); ++I) {
-      auto *MBB = BlockOrders[I];
-      if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) {
-        // Live-out of a predecessor disagrees with the first predecessor.
-        Disagree = true;
+      const MachineBasicBlock *PredMBB = BlockOrders[I];
+      const ValueIDNum &PredLiveOut =
+          OutLocs[PredMBB->getNumber()][Idx.asU64()];
 
-        // Test whether it's a disagreemnt in the backedges or not.
-        if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e
-          NonBackEdgeDisagree = true;
-      }
-    }
+      // Incoming values agree, continue trying to eliminate this PHI.
+      if (FirstVal == PredLiveOut)
+        continue;
 
-    bool OverRide = false;
-    if (Disagree && !NonBackEdgeDisagree) {
-      // Only the backedges disagree. Consider demoting the livein
-      // lattice value, as per the file level comment. The value we consider
-      // demoting to is the value that the non-backedge predecessors agree on.
-      // The order of values is that non-PHIs are \top, a PHI at this block
-      // \bot, and phis between the two are ordered by their RPO number.
-      // If there's no agreement, or we've already demoted to this PHI value
-      // before, replace with a PHI value at this block.
-
-      // Calculate order numbers: zero means normal def, nonzero means RPO
-      // number.
-      unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1;
-      if (!BaseVal.isPHI())
-        BaseBlockRPONum = 0;
-
-      ValueIDNum &InLocID = InLocs[Idx.asU64()];
-      unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1;
-      if (!InLocID.isPHI())
-        InLocRPONum = 0;
-
-      // Should we ignore the disagreeing backedges, and override with the
-      // value the other predecessors agree on (in "base")?
-      unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1;
-      if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) {
-        // Override.
-        OverRide = true;
-        DowngradeOccurred = true;
-      }
+      // We can also accept a PHI value that feeds back into itself.
+      if (PredLiveOut == ValueIDNum(MBB.getNumber(), 0, Idx))
+        continue;
+
+      // Live-out of a predecessor disagrees with the first predecessor.
+      Disagree = true;
     }
-    // else: if we disagree in the non-backedges, then this is definitely
-    // a control flow merge where different values merge. Make it a PHI.
 
-    // Generate a phi...
-    ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx};
-    ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal;
-    if (InLocs[Idx.asU64()] != NewVal) {
+    // No disagreement? No PHI. Otherwise, leave the PHI in live-ins.
+    if (!Disagree) {
+      InLocs[Idx.asU64()] = FirstVal;
       Changed |= true;
-      InLocs[Idx.asU64()] = NewVal;
     }
   }
 
   // TODO: Reimplement NumInserted and NumRemoved.
-  return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+  return Changed;
 }
 
-void InstrRefBasedLDV::mlocDataflow(
-    ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+void InstrRefBasedLDV::buildMLocValueMap(
+    MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
     SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
   std::priority_queue<unsigned int, std::vector<unsigned int>,
                       std::greater<unsigned int>>
@@ -1897,20 +1806,59 @@
   // but this is probably not worth it.
   SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist;
 
-  // Initialize worklist with every block to be visited.
+  // Initialize worklist with every block to be visited. Also produce list of
+  // all blocks.
+  SmallPtrSet<MachineBasicBlock *, 32> AllBlocks;
   for (unsigned int I = 0; I < BBToOrder.size(); ++I) {
     Worklist.push(I);
     OnWorklist.insert(OrderToBB[I]);
+    AllBlocks.insert(OrderToBB[I]);
   }
 
+  // Initialize entry block to PHIs. These represent arguments.
+  for (auto Location : MTracker->locations())
+    MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx);
+
   MTracker->reset();
 
-  // Set inlocs for entry block -- each as a PHI at the entry block. Represents
-  // the incoming value to the function.
-  MTracker->setMPhis(0);
-  for (auto Location : MTracker->locations())
-    MInLocs[0][Location.Idx.asU64()] = Location.Value;
+  // Start by placing PHIs, using the usual SSA constructor algorithm. Consider
+  // any machine-location that isn't live-through a block to be def'd in that
+  // block.
+  for (auto Location : MTracker->locations()) {
+    // Collect the set of defs.
+    SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+    for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
+      MachineBasicBlock *MBB = OrderToBB[I];
+      const auto &TransferFunc = MLocTransfer[MBB->getNumber()];
+      if (TransferFunc.find(Location.Idx) != TransferFunc.end())
+        DefBlocks.insert(MBB);
+    }
+
+    // The entry block defs the location too: it's the live-in / argument value.
+    // Only insert if there are other defs though; everything is trivially live
+    // through otherwise.
+    if (!DefBlocks.empty())
+      DefBlocks.insert(&*MF.begin());
 
+    // Ask the SSA construction algorithm where we should put PHIs.
+    SmallVector<MachineBasicBlock *, 32> PHIBlocks;
+    BlockPHIPlacement(AllBlocks, DefBlocks, PHIBlocks);
+
+    // Install those PHI values into the live-in value array.
+    for (const MachineBasicBlock *MBB : PHIBlocks) {
+      MInLocs[MBB->getNumber()][Location.Idx.asU64()] =
+          ValueIDNum(MBB->getNumber(), 0, Location.Idx);
+    }
+  }
+
+  // Propagate values to eliminate redundant PHIs. At the same time, this
+  // produces the table of Block x Location => Value for the entry to each
+  // block.
+  // The kind of PHIs we can eliminate are, for example, where one path in a
+  // conditional spills and restores a register, and the register still has
+  // the same value once control flow joins, unbeknowns to the PHI placement
+  // code. Propagating values allows us to identify such un-necessary PHIs and
+  // remove them.
   SmallPtrSet<const MachineBasicBlock *, 16> Visited;
   while (!Worklist.empty() || !Pending.empty()) {
     // Vector for storing the evaluated block transfer function.
@@ -1922,16 +1870,10 @@
       Worklist.pop();
 
       // Join the values in all predecessor blocks.
-      bool InLocsChanged, DowngradeOccurred;
-      std::tie(InLocsChanged, DowngradeOccurred) =
-          mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
+      bool InLocsChanged;
+      InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
       InLocsChanged |= Visited.insert(MBB).second;
 
-      // If a downgrade occurred, book us in for re-examination on the next
-      // iteration.
-      if (DowngradeOccurred && OnPending.insert(MBB).second)
-        Pending.push(BBToOrder[MBB]);
-
       // Don't examine transfer function if we've visited this loc at least
       // once, and inlocs haven't changed.
       if (!InLocsChanged)
@@ -1976,8 +1918,8 @@
         continue;
 
       // All successors should be visited: put any back-edges on the pending
-      // list for the next dataflow iteration, and any other successors to be
-      // visited this iteration, if they're not going to be already.
+      // list for the next pass-through, and any other successors to be
+      // visited this pass, if they're not going to be already.
       for (auto s : MBB->successors()) {
         // Does branching to this successor represent a back-edge?
         if (BBToOrder[s] > BBToOrder[MBB]) {
@@ -2000,8 +1942,55 @@
     assert(Pending.empty() && "Pending should be empty");
   }
 
-  // Once all the live-ins don't change on mlocJoin(), we've reached a
-  // fixedpoint.
+  // Once all the live-ins don't change on mlocJoin(), we've eliminated all
+  // redundant PHIs.
+}
+
+// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide
+// template specialisations for graph traits and a successor enumerator.
+namespace llvm {
+template <> struct GraphTraits<MachineBasicBlock> {
+  using NodeRef = MachineBasicBlock *;
+  using ChildIteratorType = MachineBasicBlock::succ_iterator;
+
+  static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
+};
+
+template <> struct GraphTraits<const MachineBasicBlock> {
+  using NodeRef = const MachineBasicBlock *;
+  using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
+
+  static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
+};
+
+using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType;
+using MachineDomTreeChildGetter =
+    typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>;
+
+template <>
+typename MachineDomTreeChildGetter::ChildrenTy
+MachineDomTreeChildGetter::get(const NodeRef &N) {
+  return {N->succ_begin(), N->succ_end()};
+}
+} // namespace llvm
+
+void InstrRefBasedLDV::BlockPHIPlacement(
+    const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+    const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+    SmallVectorImpl<MachineBasicBlock *> &PHIBlocks) {
+  // Apply IDF calculator to the designated set of location defs, storing
+  // required PHIs into PHIBlocks. Uses the dominator tree stored in the
+  // InstrRefBasedLDV object.
+  IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo;
+  IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo);
+
+  IDF.setLiveInBlocks(AllBlocks);
+  IDF.setDefiningBlocks(DefBlocks);
+  IDF.calculate(PHIBlocks);
 }
 
 bool InstrRefBasedLDV::vlocDowngradeLattice(
@@ -2754,7 +2743,9 @@
 
 /// Calculate the liveness information for the given machine function and
 /// extend ranges across basic blocks.
-bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC,
+bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
+                                    MachineDominatorTree *DomTree,
+                                    TargetPassConfig *TPC,
                                     unsigned InputBBLimit,
                                     unsigned InputDbgValLimit) {
   // No subprogram means this function contains no debuginfo.
@@ -2764,6 +2755,7 @@
   LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
   this->TPC = TPC;
 
+  this->DomTree = DomTree;
   TRI = MF.getSubtarget().getRegisterInfo();
   TII = MF.getSubtarget().getInstrInfo();
   TFI = MF.getSubtarget().getFrameLowering();
@@ -2801,6 +2793,7 @@
   ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];
   unsigned NumLocs = MTracker->getNumLocs();
   for (int i = 0; i < MaxNumBlocks; ++i) {
+    // These all auto-initialize to ValueIDNum::EmptyValue
     MOutLocs[i] = new ValueIDNum[NumLocs];
     MInLocs[i] = new ValueIDNum[NumLocs];
   }
@@ -2809,7 +2802,7 @@
   // storing the computed live-ins / live-outs into the array-of-arrays. We use
   // both live-ins and live-outs for decision making in the variable value
   // dataflow problem.
-  mlocDataflow(MInLocs, MOutLocs, MLocTransfer);
+  buildMLocValueMap(MF, MInLocs, MOutLocs, MLocTransfer);
 
   // Patch up debug phi numbers, turning unknown block-live-in values into
   // either live-through machine values, or PHIs.
Index: llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
===================================================================
--- llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
+++ llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
 #define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
 
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 
@@ -23,8 +24,8 @@
 // implementation.
 class LDVImpl {
 public:
-  virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC,
-                            unsigned InputBBLimit,
+  virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+                            TargetPassConfig *TPC, unsigned InputBBLimit,
                             unsigned InputDbgValLimit) = 0;
   virtual ~LDVImpl() {}
 };
Index: llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
===================================================================
--- llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -75,6 +75,7 @@
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
@@ -110,5 +111,8 @@
       TheImpl = llvm::makeVarLocBasedLiveDebugValues();
   }
 
-  return TheImpl->ExtendRanges(MF, TPC, InputBBLimit, InputDbgValueLimit);
+  MachineDominatorTree *DomTree = &getAnalysis<MachineDominatorTree>();
+
+  return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit,
+                               InputDbgValueLimit);
 }
Index: llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
===================================================================
--- llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -1015,8 +1015,9 @@
   /// had their instruction creation deferred.
   void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
 
-  bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC,
-                    unsigned InputBBLimit, unsigned InputDbgValLimit) override;
+  bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+                    TargetPassConfig *TPC, unsigned InputBBLimit,
+                    unsigned InputDbgValLimit) override;
 
 public:
   /// Default construct and initialize the pass.
@@ -2108,9 +2109,11 @@
 
 /// Calculate the liveness information for the given machine function and
 /// extend ranges across basic blocks.
-bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC,
-                                  unsigned InputBBLimit,
+bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF,
+                                  MachineDominatorTree *DomTree,
+                                  TargetPassConfig *TPC, unsigned InputBBLimit,
                                   unsigned InputDbgValLimit) {
+  (void)DomTree;
   LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
 
   if (!MF.getFunction().getSubprogram())
Index: llvm/unittests/CodeGen/InstrRefLDVTest.cpp
===================================================================
--- llvm/unittests/CodeGen/InstrRefLDVTest.cpp
+++ llvm/unittests/CodeGen/InstrRefLDVTest.cpp
@@ -29,10 +29,16 @@
 
 class InstrRefLDVTest : public testing::Test {
 public:
+  friend class InstrRefBasedLDV;
+
+  using MLocTransferMap = InstrRefBasedLDV::MLocTransferMap;
+
   // Boilerplate,
   LLVMContext Ctx;
   Module Mod;
+  std::unique_ptr<TargetMachine> Machine;
   std::unique_ptr<MachineFunction> MF;
+  std::unique_ptr<MachineDominatorTree> DomTree;
   DICompileUnit *OurCU;
   DIFile *OurFile;
   DISubprogram *OurFunc;
@@ -42,33 +48,36 @@
 
   DebugLoc OutermostLoc, InBlockLoc, NotNestedBlockLoc, InlinedLoc;
 
-  MachineBasicBlock *MBB1, *MBB2, *MBB3, *MBB4;
+  MachineBasicBlock *MBB1, *MBB2, *MBB3, *MBB4, *MBB5;
+
+  std::unique_ptr<InstrRefBasedLDV> LDV;
+  MLocTracker *MTracker = nullptr;
 
   InstrRefLDVTest() : Ctx(), Mod("beehives", Ctx) {
+  }
+
+  void SetUp() {
     // Boilerplate that creates a MachineFunction and associated blocks.
-    MF = createMachineFunction(Ctx, Mod);
-    llvm::Function &F = const_cast<llvm::Function &>(MF->getFunction());
-    auto BB1 = BasicBlock::Create(Ctx, "a", &F);
-    auto BB2 = BasicBlock::Create(Ctx, "b", &F);
-    auto BB3 = BasicBlock::Create(Ctx, "c", &F);
-    auto BB4 = BasicBlock::Create(Ctx, "d", &F);
-    IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4);
-    IRB1.CreateBr(BB2);
-    IRB2.CreateBr(BB3);
-    IRB3.CreateBr(BB4);
-    IRB4.CreateRetVoid();
-    MBB1 = MF->CreateMachineBasicBlock(BB1);
-    MF->insert(MF->end(), MBB1);
-    MBB2 = MF->CreateMachineBasicBlock(BB2);
-    MF->insert(MF->end(), MBB2);
-    MBB3 = MF->CreateMachineBasicBlock(BB3);
-    MF->insert(MF->end(), MBB3);
-    MBB4 = MF->CreateMachineBasicBlock(BB4);
-    MF->insert(MF->end(), MBB4);
-    MBB1->addSuccessor(MBB2);
-    MBB1->addSuccessor(MBB3);
-    MBB2->addSuccessor(MBB4);
-    MBB3->addSuccessor(MBB4);
+
+    Mod.setDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128");
+    Triple TargetTriple("x86_64--");
+    std::string Error;
+    const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
+    if (!T)
+      GTEST_SKIP();
+
+    TargetOptions Options;
+    Machine = std::unique_ptr<TargetMachine>(T->createTargetMachine(
+        "X86", "", "", Options, None, None, CodeGenOpt::Aggressive));
+
+    auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+    auto F = Function::Create(Type, GlobalValue::ExternalLinkage, "Test", &Mod);
+
+    unsigned FunctionNum = 42;
+    MachineModuleInfo MMI((LLVMTargetMachine*)&*Machine);
+    const TargetSubtargetInfo &STI = *Machine->getSubtargetImpl(*F);
+
+    MF = std::make_unique<MachineFunction>(*F, (LLVMTargetMachine&)*Machine, STI, FunctionNum, MMI);
 
     // Create metadata: CU, subprogram, some blocks and an inline function
     // scope.
@@ -80,7 +89,7 @@
     OurFunc =
         DIB.createFunction(OurCU, "bees", "", OurFile, 1, OurSubT, 1,
                            DINode::FlagZero, DISubprogram::SPFlagDefinition);
-    F.setSubprogram(OurFunc);
+    F->setSubprogram(OurFunc);
     OurBlock = DIB.createLexicalBlock(OurFunc, OurFile, 2, 3);
     AnotherBlock = DIB.createLexicalBlock(OurFunc, OurFile, 2, 6);
     ToInlineFunc =
@@ -97,4 +106,949 @@
 
     DIB.finalize();
   }
+
+  Register getRegByName(const char *WantedName) {
+    auto *TRI = MF->getRegInfo().getTargetRegisterInfo();
+    // Slow, but works
+    for (unsigned int I = 1; I < TRI->getNumRegs(); ++I) {
+      const char *Name = TRI->getName(I);
+      if (strcmp(WantedName, Name) == 0)
+        return I;
+    }
+
+    // If this ever fails, something is very wrong with this unit test.
+    return 0;
+  }
+
+  InstrRefBasedLDV *setupLDVObj() {
+    // Create a new LDV object, and plug some relevant object ptrs into it.
+    LDV = std::make_unique<InstrRefBasedLDV>();
+    const TargetSubtargetInfo &STI = MF->getSubtarget();
+    LDV->TII = STI.getInstrInfo();
+    LDV->TRI = STI.getRegisterInfo();
+    LDV->TFI = STI.getFrameLowering();
+    LDV->MFI = &MF->getFrameInfo();
+
+    DomTree = std::make_unique<MachineDominatorTree>(*MF);
+    LDV->DomTree = &*DomTree;
+
+    // Future work: unit tests for mtracker / vtracker / ttracker.
+
+    // Setup things like the artifical block map, and BlockNo <=> RPO Order
+    // mappings.
+    LDV->initialSetup(*MF);
+    addMTracker();
+    return &*LDV;
+  }
+
+  void addMTracker() {
+    ASSERT_TRUE(LDV);
+    // Add a machine-location-tracking object to LDV. Don't initialize any
+    // register locations within it though.
+    const TargetSubtargetInfo &STI = MF->getSubtarget();
+    LDV->MTracker =
+        new MLocTracker(*MF, *LDV->TII, *LDV->TRI, *STI.getTargetLowering());
+    MTracker = LDV->MTracker;
+  }
+
+  // Some routines for bouncing into LDV,
+  void buildMLocValueMap(ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+                         SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+    LDV->buildMLocValueMap(*MF, MInLocs, MOutLocs, MLocTransfer);
+  }
+
+  void initValueArray(ValueIDNum **Nums, unsigned Blks, unsigned Locs) {
+    for (unsigned int I = 0; I < Blks; ++I)
+      for (unsigned int J = 0; J < Locs; ++J)
+        Nums[I][J] = ValueIDNum::EmptyValue;
+  }
 };
+
+TEST_F(InstrRefLDVTest, MLocSingleBlock) {
+  // Test some very simple properties about interpreting the transfer function.
+
+  // Add an entry block with nothing but 'ret void' in it.
+  Function &F = const_cast<llvm::Function &>(MF->getFunction());
+  auto BB1 = BasicBlock::Create(Ctx, "entry", &F);
+  IRBuilder<> IRB(BB1);
+  IRB.CreateRetVoid();
+  MBB1 = MF->CreateMachineBasicBlock(BB1);
+  MF->insert(MF->end(), MBB1);
+  MF->RenumberBlocks();
+
+  setupLDVObj();
+  // We should start with a single location, the stack pointer.
+  ASSERT_TRUE(MTracker->getNumLocs() == 1);
+  LocIdx RspLoc(0);
+
+  // Set up live-in and live-out tables for this function: two locations (we
+  // add one later) in a single block.
+  ValueIDNum InLocs[2], OutLocs[2];
+  ValueIDNum *InLocsPtr[1] = {&InLocs[0]};
+  ValueIDNum *OutLocsPtr[1] = {&OutLocs[0]};
+
+  // Transfer function: nothing.
+  SmallVector<MLocTransferMap, 1> TransferFunc = {{}};
+
+  // Try and build value maps...
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+
+  // The result should be that RSP is marked as a live-in-PHI -- this represents
+  // an argument. And as there's no transfer function, the block live-out should
+  // be the same.
+  EXPECT_EQ(InLocs[0], ValueIDNum(0, 0, RspLoc));
+  EXPECT_EQ(OutLocs[0], ValueIDNum(0, 0, RspLoc));
+
+  // Try again, this time initialising the in-locs to be defined by an
+  // instruction. The entry block should always be re-assigned to be the
+  // arguments.
+  initValueArray(InLocsPtr, 1, 2);
+  initValueArray(OutLocsPtr, 1, 2);
+  InLocs[0] = ValueIDNum(0, 1, RspLoc);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0], ValueIDNum(0, 0, RspLoc));
+  EXPECT_EQ(OutLocs[0], ValueIDNum(0, 0, RspLoc));
+
+  // Now insert something into the transfer function to assign to the single
+  // machine location.
+  TransferFunc[0].insert({0, ValueIDNum(0, 1, RspLoc)});
+  initValueArray(InLocsPtr, 1, 2);
+  initValueArray(OutLocsPtr, 1, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0], ValueIDNum(0, 0, RspLoc));
+  EXPECT_EQ(OutLocs[0], ValueIDNum(0, 1, RspLoc));
+
+  // Add a new register to be tracked, and insert it into the transfer function
+  // as a copy. The output of $rax should be the live-in value of $rsp.
+  Register RAX = getRegByName("RAX");
+  LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX);
+  TransferFunc[0].insert({RaxLoc, ValueIDNum(0, 0, RspLoc)});
+  initValueArray(InLocsPtr, 1, 2);
+  initValueArray(OutLocsPtr, 1, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0], ValueIDNum(0, 0, RspLoc));
+  EXPECT_EQ(InLocs[1], ValueIDNum(0, 0, RaxLoc));
+  EXPECT_EQ(OutLocs[0], ValueIDNum(0, 1, RspLoc));
+  EXPECT_EQ(OutLocs[1], ValueIDNum(0, 0, RspLoc)); // Rax contains RspLoc
+}
+
+TEST_F(InstrRefLDVTest, MLocDiamondBlocks) {
+  // Test that information flows from the entry block to two successors.
+
+  //        entry
+  //        /  \
+  //      br1  br2
+  //        \  /
+  //         ret
+  llvm::Function &F = const_cast<llvm::Function &>(MF->getFunction());
+  auto BB1 = BasicBlock::Create(Ctx, "a", &F);
+  auto BB2 = BasicBlock::Create(Ctx, "b", &F);
+  auto BB3 = BasicBlock::Create(Ctx, "c", &F);
+  auto BB4 = BasicBlock::Create(Ctx, "d", &F);
+  IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4);
+  IRB1.CreateBr(BB2);
+  IRB2.CreateBr(BB3);
+  IRB3.CreateBr(BB4);
+  IRB4.CreateRetVoid();
+  MBB1 = MF->CreateMachineBasicBlock(BB1);
+  MF->insert(MF->end(), MBB1);
+  MBB2 = MF->CreateMachineBasicBlock(BB2);
+  MF->insert(MF->end(), MBB2);
+  MBB3 = MF->CreateMachineBasicBlock(BB3);
+  MF->insert(MF->end(), MBB3);
+  MBB4 = MF->CreateMachineBasicBlock(BB4);
+  MF->insert(MF->end(), MBB4);
+  MBB1->addSuccessor(MBB2);
+  MBB1->addSuccessor(MBB3);
+  MBB2->addSuccessor(MBB4);
+  MBB3->addSuccessor(MBB4);
+  MF->RenumberBlocks();
+
+  setupLDVObj();
+
+  ASSERT_TRUE(MTracker->getNumLocs() == 1);
+  LocIdx RspLoc(0);
+  Register RAX = getRegByName("RAX");
+  LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX);
+
+  ValueIDNum InLocs[4][2], OutLocs[4][2];
+  ValueIDNum *InLocsPtr[4] = {InLocs[0], InLocs[1], InLocs[2], InLocs[3]};
+  ValueIDNum *OutLocsPtr[4] = {OutLocs[0], OutLocs[1], OutLocs[2], OutLocs[3]};
+
+  // Transfer function: start with nothing.
+  SmallVector<MLocTransferMap, 1> TransferFunc;
+  TransferFunc.resize(4);
+
+  // Name some values.
+  ValueIDNum LiveInRsp(0, 0, RspLoc);
+  ValueIDNum RspDefInBlk0(0, 1, RspLoc);
+  ValueIDNum RspDefInBlk1(1, 1, RspLoc);
+  ValueIDNum RspDefInBlk2(2, 1, RspLoc);
+  ValueIDNum RspPHIInBlk3(3, 0, RspLoc);
+  ValueIDNum LiveInRax(0, 0, RaxLoc);
+  ValueIDNum RaxLiveInBlk1(1, 0, RaxLoc);
+  ValueIDNum RaxLiveInBlk2(2, 0, RaxLoc);
+
+  // With no transfer function, the live-in values to the entry block should
+  // propagate to all live-outs and the live-ins to the two successor blocks.
+  // IN ADDITION: this checks that the exit block doesn't get a PHI put in it.
+  initValueArray(InLocsPtr, 4, 2);
+  initValueArray(OutLocsPtr, 4, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], LiveInRsp);
+  EXPECT_EQ(InLocs[2][0], LiveInRsp);
+  EXPECT_EQ(InLocs[3][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[3][0], LiveInRsp);
+  // (Skipped writing out locations for $rax).
+
+  // Check that a def of $rsp in the entry block will likewise reach all the
+  // successors.
+  TransferFunc[0].insert({RspLoc, RspDefInBlk0});
+  initValueArray(InLocsPtr, 4, 2);
+  initValueArray(OutLocsPtr, 4, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspDefInBlk0);
+  EXPECT_EQ(InLocs[2][0], RspDefInBlk0);
+  EXPECT_EQ(InLocs[3][0], RspDefInBlk0);
+  EXPECT_EQ(OutLocs[0][0], RspDefInBlk0);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk0);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk0);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk0);
+
+  // Def in one branch of the diamond means that we need a PHI in the ret block
+  TransferFunc[1].insert({RspLoc, RspDefInBlk1});
+  initValueArray(InLocsPtr, 4, 2);
+  initValueArray(OutLocsPtr, 4, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  // This value map: like above, where RspDefInBlk0 is propagated through one
+  // branch of the diamond, but is def'ed in the live-outs of the other. The
+  // ret / merging block should have a PHI in its live-ins.
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspDefInBlk0);
+  EXPECT_EQ(InLocs[2][0], RspDefInBlk0);
+  EXPECT_EQ(InLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(OutLocs[0][0], RspDefInBlk0);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk0);
+  EXPECT_EQ(OutLocs[3][0], RspPHIInBlk3);
+
+  // If we have differeing defs in either side of the diamond, we should
+  // continue to produce a PHI,
+  TransferFunc[2].insert({RspLoc, RspDefInBlk2});
+  initValueArray(InLocsPtr, 4, 2);
+  initValueArray(OutLocsPtr, 4, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspDefInBlk0);
+  EXPECT_EQ(InLocs[2][0], RspDefInBlk0);
+  EXPECT_EQ(InLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(OutLocs[0][0], RspDefInBlk0);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[3][0], RspPHIInBlk3);
+
+  // If we have defs of the same value on either side of the branch, a PHI will
+  // initially be created, however value propagation should then eliminate it.
+  // Encode this by copying the live-in value to $rax, and copying it to $rsp
+  // from $rax in each branch of the diamond. We don't allow the definition of
+  // arbitary values in transfer functions.
+  TransferFunc[1].clear();
+  TransferFunc[2].clear();
+  TransferFunc[0].insert({RaxLoc, LiveInRsp});
+  TransferFunc[1].insert({RspLoc, RaxLiveInBlk1});
+  TransferFunc[2].insert({RspLoc, RaxLiveInBlk2});
+  initValueArray(InLocsPtr, 4, 2);
+  initValueArray(OutLocsPtr, 4, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspDefInBlk0);
+  EXPECT_EQ(InLocs[2][0], RspDefInBlk0);
+  EXPECT_EQ(InLocs[3][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], RspDefInBlk0);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[3][0], LiveInRsp);
+}
+
+TEST_F(InstrRefLDVTest, MLocSimpleLoop) {
+  //    entry
+  //     |
+  //     |/-----\
+  //    loopblk |
+  //     |\-----/
+  //     |
+  //     ret
+  llvm::Function &F = const_cast<llvm::Function &>(MF->getFunction());
+  auto BB1 = BasicBlock::Create(Ctx, "entry", &F);
+  auto BB2 = BasicBlock::Create(Ctx, "loop", &F);
+  auto BB3 = BasicBlock::Create(Ctx, "ret", &F);
+  IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3);
+  IRB1.CreateBr(BB2);
+  IRB2.CreateBr(BB3);
+  IRB3.CreateRetVoid();
+  MBB1 = MF->CreateMachineBasicBlock(BB1);
+  MF->insert(MF->end(), MBB1);
+  MBB2 = MF->CreateMachineBasicBlock(BB2);
+  MF->insert(MF->end(), MBB2);
+  MBB3 = MF->CreateMachineBasicBlock(BB3);
+  MF->insert(MF->end(), MBB3);
+  MBB1->addSuccessor(MBB2);
+  MBB2->addSuccessor(MBB3);
+  MBB2->addSuccessor(MBB2);
+  MF->RenumberBlocks();
+
+  setupLDVObj();
+
+  ASSERT_TRUE(MTracker->getNumLocs() == 1);
+  LocIdx RspLoc(0);
+  Register RAX = getRegByName("RAX");
+  LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX);
+
+  ValueIDNum InLocs[3][2], OutLocs[3][2];
+  ValueIDNum *InLocsPtr[3] = {InLocs[0], InLocs[1], InLocs[2]};
+  ValueIDNum *OutLocsPtr[3] = {OutLocs[0], OutLocs[1], OutLocs[2]};
+
+  SmallVector<MLocTransferMap, 1> TransferFunc;
+  TransferFunc.resize(3);
+
+  // Name some values.
+  ValueIDNum LiveInRsp(0, 0, RspLoc);
+  ValueIDNum RspPHIInBlk1(1, 0, RspLoc);
+  ValueIDNum RspDefInBlk1(1, 1, RspLoc);
+  ValueIDNum LiveInRax(0, 0, RaxLoc);
+  ValueIDNum RaxPHIInBlk1(1, 0, RaxLoc);
+  ValueIDNum RaxPHIInBlk2(2, 0, RaxLoc);
+
+  // First off: everything though be live-through.
+  initValueArray(InLocsPtr, 3, 2);
+  initValueArray(OutLocsPtr, 3, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], LiveInRsp);
+  EXPECT_EQ(InLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+
+  // Add a def of $rsp to the loop block: it should be in the live-outs, but
+  // should cause a PHI to be placed in the live-ins. Test the transfer function
+  // by copying that PHI into $rax in the loop, then back to $rsp in the ret
+  // block.
+  TransferFunc[1].insert({RspLoc, RspDefInBlk1});
+  TransferFunc[1].insert({RaxLoc, RspPHIInBlk1});
+  TransferFunc[2].insert({RspLoc, RaxPHIInBlk2});
+  initValueArray(InLocsPtr, 3, 2);
+  initValueArray(OutLocsPtr, 3, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspPHIInBlk1);
+  // Check rax as well,
+  EXPECT_EQ(InLocs[0][1], LiveInRax);
+  EXPECT_EQ(InLocs[1][1], RaxPHIInBlk1);
+  EXPECT_EQ(InLocs[2][1], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[0][1], LiveInRax);
+  EXPECT_EQ(OutLocs[1][1], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[2][1], RspPHIInBlk1);
+
+  // As with the diamond case, a PHI will be created if there's a (implicit)
+  // def in the entry block and loop block; but should be value propagated away
+  // if it copies in the same value. Copy live-in $rsp to $rax, then copy it
+  // into $rsp in the loop. Encoded as copying the live-in $rax value in block 1
+  // to $rsp.
+  TransferFunc[1].clear();
+  TransferFunc[2].clear();
+  TransferFunc[0].insert({RaxLoc, LiveInRsp});
+  TransferFunc[1].insert({RspLoc, RaxPHIInBlk1});
+  initValueArray(InLocsPtr, 3, 2);
+  initValueArray(OutLocsPtr, 3, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], LiveInRsp);
+  EXPECT_EQ(InLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+  // rax,
+  EXPECT_EQ(InLocs[0][1], LiveInRax);
+  EXPECT_EQ(InLocs[1][1], LiveInRsp);
+  EXPECT_EQ(InLocs[2][1], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][1], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][1], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][1], LiveInRsp);
+}
+
+TEST_F(InstrRefLDVTest, MLocNestedLoop) {
+  //    entry
+  //     |
+  //    loop1
+  //     ^\
+  //     | \    /-\
+  //     |  loop2  |
+  //     |  /   \-/
+  //     ^ /
+  //     join
+  //     |
+  //     ret
+  llvm::Function &F = const_cast<llvm::Function &>(MF->getFunction());
+  auto BB1 = BasicBlock::Create(Ctx, "entry", &F);
+  auto BB2 = BasicBlock::Create(Ctx, "loop1", &F);
+  auto BB3 = BasicBlock::Create(Ctx, "loop2", &F);
+  auto BB4 = BasicBlock::Create(Ctx, "join", &F);
+  auto BB5 = BasicBlock::Create(Ctx, "ret", &F);
+  IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4), IRB5(BB5);
+  IRB1.CreateBr(BB2);
+  IRB2.CreateBr(BB3);
+  IRB3.CreateBr(BB4);
+  IRB4.CreateBr(BB5);
+  IRB5.CreateRetVoid();
+  MBB1 = MF->CreateMachineBasicBlock(BB1);
+  MF->insert(MF->end(), MBB1);
+  MBB2 = MF->CreateMachineBasicBlock(BB2);
+  MF->insert(MF->end(), MBB2);
+  MBB3 = MF->CreateMachineBasicBlock(BB3);
+  MF->insert(MF->end(), MBB3);
+  MBB4 = MF->CreateMachineBasicBlock(BB4);
+  MF->insert(MF->end(), MBB4);
+  MBB5 = MF->CreateMachineBasicBlock(BB5);
+  MF->insert(MF->end(), MBB5);
+  MBB1->addSuccessor(MBB2);
+  MBB2->addSuccessor(MBB3);
+  MBB3->addSuccessor(MBB3);
+  MBB3->addSuccessor(MBB4);
+  MBB4->addSuccessor(MBB2);
+  MBB4->addSuccessor(MBB5);
+  MF->RenumberBlocks();
+
+  setupLDVObj();
+
+  ASSERT_TRUE(MTracker->getNumLocs() == 1);
+  LocIdx RspLoc(0);
+  Register RAX = getRegByName("RAX");
+  LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX);
+
+  ValueIDNum InLocs[5][2], OutLocs[5][2];
+  ValueIDNum *InLocsPtr[5] = {InLocs[0], InLocs[1], InLocs[2], InLocs[3],
+                              InLocs[4]};
+  ValueIDNum *OutLocsPtr[5] = {OutLocs[0], OutLocs[1], OutLocs[2], OutLocs[3],
+                               OutLocs[4]};
+
+  SmallVector<MLocTransferMap, 1> TransferFunc;
+  TransferFunc.resize(5);
+
+  ValueIDNum LiveInRsp(0, 0, RspLoc);
+  ValueIDNum RspPHIInBlk1(1, 0, RspLoc);
+  ValueIDNum RspDefInBlk1(1, 1, RspLoc);
+  ValueIDNum RspPHIInBlk2(2, 0, RspLoc);
+  ValueIDNum RspDefInBlk2(2, 1, RspLoc);
+  ValueIDNum RspDefInBlk3(3, 1, RspLoc);
+  ValueIDNum LiveInRax(0, 0, RaxLoc);
+  ValueIDNum RaxPHIInBlk1(1, 0, RaxLoc);
+  ValueIDNum RaxPHIInBlk2(2, 0, RaxLoc);
+
+  // Like the other tests: first ensure that if there's nothing in the transfer
+  // function, then everything is live-through (check $rsp).
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], LiveInRsp);
+  EXPECT_EQ(InLocs[2][0], LiveInRsp);
+  EXPECT_EQ(InLocs[3][0], LiveInRsp);
+  EXPECT_EQ(InLocs[4][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[3][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[4][0], LiveInRsp);
+
+  // A def in the inner loop means we should get PHIs at the heads of both
+  // loops. Live-outs of the last three blocks will be the def, as it dominates
+  // those.
+  TransferFunc[2].insert({RspLoc, RspDefInBlk2});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], RspDefInBlk2);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk2);
+
+  // Adding a def to the outer loop header shouldn't affect this much -- the
+  // live-out of block 1 changes.
+  TransferFunc[1].insert({RspLoc, RspDefInBlk1});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], RspDefInBlk2);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk2);
+
+  // Likewise, putting a def in the outer loop tail shouldn't affect where
+  // the PHIs go, and should propagate into the ret block.
+  TransferFunc[3].insert({RspLoc, RspDefInBlk3});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], RspDefInBlk2);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk3);
+
+  // However: if we erase the inner-loop def, then we just have defs in the
+  // head and tail of the outer loop. The inner loop should be live-through.
+  TransferFunc[2].clear();
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspDefInBlk1);
+  EXPECT_EQ(InLocs[3][0], RspDefInBlk1);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk3);
+
+  // Check that this still works if we copy RspDefInBlk1 to $rax and then
+  // copy it back into $rsp in the inner loop.
+  TransferFunc[1].insert({RaxLoc, RspDefInBlk1});
+  TransferFunc[2].insert({RspLoc, RaxPHIInBlk2});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspDefInBlk1);
+  EXPECT_EQ(InLocs[3][0], RspDefInBlk1);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk3);
+  // Look at raxes value in the relevant blocks,
+  EXPECT_EQ(InLocs[2][1], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[1][1], RspDefInBlk1);
+
+  // If we have a single def in the tail of the outer loop, that should produce
+  // a PHI at the loop head, and be live-through the inner loop.
+  TransferFunc[1].clear();
+  TransferFunc[2].clear();
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[3][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk3);
+
+  // And if we copy from $rsp to $rax in block 2, it should resolve to the PHI
+  // in block 1, and we should keep that value in rax until the ret block.
+  // There'll be a PHI in block 1 and 2, because we're putting a def in the
+  // inner loop.
+  TransferFunc[2].insert({RaxLoc, RspPHIInBlk2});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  // Examining the values of rax,
+  EXPECT_EQ(InLocs[0][1], LiveInRax);
+  EXPECT_EQ(InLocs[1][1], RaxPHIInBlk1);
+  EXPECT_EQ(InLocs[2][1], RaxPHIInBlk2);
+  EXPECT_EQ(InLocs[3][1], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[4][1], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[0][1], LiveInRax);
+  EXPECT_EQ(OutLocs[1][1], RaxPHIInBlk1);
+  EXPECT_EQ(OutLocs[2][1], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[3][1], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[4][1], RspPHIInBlk1);
+}
+
+TEST_F(InstrRefLDVTest, MLocNoDominatingLoop) {
+  //           entry
+  //            / \
+  //           /   \
+  //          /     \
+  //        head1   head2
+  //        ^  \   /   ^
+  //        ^   \ /    ^
+  //        \-joinblk -/
+  //             |
+  //            ret
+  llvm::Function &F = const_cast<llvm::Function &>(MF->getFunction());
+  auto BB1 = BasicBlock::Create(Ctx, "entry", &F);
+  auto BB2 = BasicBlock::Create(Ctx, "head1", &F);
+  auto BB3 = BasicBlock::Create(Ctx, "head2", &F);
+  auto BB4 = BasicBlock::Create(Ctx, "joinblk", &F);
+  auto BB5 = BasicBlock::Create(Ctx, "ret", &F);
+  IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4), IRB5(BB5);
+  IRB1.CreateBr(BB2);
+  IRB2.CreateBr(BB3);
+  IRB3.CreateBr(BB4);
+  IRB4.CreateBr(BB5);
+  IRB5.CreateRetVoid();
+  MBB1 = MF->CreateMachineBasicBlock(BB1);
+  MF->insert(MF->end(), MBB1);
+  MBB2 = MF->CreateMachineBasicBlock(BB2);
+  MF->insert(MF->end(), MBB2);
+  MBB3 = MF->CreateMachineBasicBlock(BB3);
+  MF->insert(MF->end(), MBB3);
+  MBB4 = MF->CreateMachineBasicBlock(BB4);
+  MF->insert(MF->end(), MBB4);
+  MBB5 = MF->CreateMachineBasicBlock(BB5);
+  MF->insert(MF->end(), MBB5);
+  MBB1->addSuccessor(MBB2);
+  MBB1->addSuccessor(MBB3);
+  MBB2->addSuccessor(MBB4);
+  MBB3->addSuccessor(MBB4);
+  MBB4->addSuccessor(MBB2);
+  MBB4->addSuccessor(MBB3);
+  MBB4->addSuccessor(MBB5);
+  MF->RenumberBlocks();
+
+  setupLDVObj();
+
+  ASSERT_TRUE(MTracker->getNumLocs() == 1);
+  LocIdx RspLoc(0);
+  Register RAX = getRegByName("RAX");
+  LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX);
+
+  ValueIDNum InLocs[5][2], OutLocs[5][2];
+  ValueIDNum *InLocsPtr[5] = {InLocs[0], InLocs[1], InLocs[2], InLocs[3],
+                              InLocs[4]};
+  ValueIDNum *OutLocsPtr[5] = {OutLocs[0], OutLocs[1], OutLocs[2], OutLocs[3],
+                               OutLocs[4]};
+
+  SmallVector<MLocTransferMap, 1> TransferFunc;
+  TransferFunc.resize(5);
+
+  ValueIDNum LiveInRsp(0, 0, RspLoc);
+  ValueIDNum RspPHIInBlk1(1, 0, RspLoc);
+  ValueIDNum RspDefInBlk1(1, 1, RspLoc);
+  ValueIDNum RspPHIInBlk2(2, 0, RspLoc);
+  ValueIDNum RspDefInBlk2(2, 1, RspLoc);
+  ValueIDNum RspPHIInBlk3(3, 0, RspLoc);
+  ValueIDNum RspDefInBlk3(3, 1, RspLoc);
+  ValueIDNum RaxPHIInBlk1(1, 0, RaxLoc);
+  ValueIDNum RaxPHIInBlk2(2, 0, RaxLoc);
+
+  // As ever, test that everything is live-through if there are no defs.
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], LiveInRsp);
+  EXPECT_EQ(InLocs[2][0], LiveInRsp);
+  EXPECT_EQ(InLocs[3][0], LiveInRsp);
+  EXPECT_EQ(InLocs[4][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[3][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[4][0], LiveInRsp);
+
+  // Putting a def in the 'join' block will cause us to have two distinct
+  // PHIs in each loop head, then on entry to the join block.
+  TransferFunc[3].insert({RspLoc, RspDefInBlk3});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk3);
+
+  // We should get the same behaviour if we put the def in either of the
+  // loop heads -- it should force the other head to be a PHI.
+  TransferFunc[3].clear();
+  TransferFunc[1].insert({RspLoc, RspDefInBlk1});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(InLocs[4][0], RspPHIInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(OutLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspPHIInBlk3);
+
+  // Check symmetry,
+  TransferFunc[1].clear();
+  TransferFunc[2].insert({RspLoc, RspDefInBlk2});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(InLocs[4][0], RspPHIInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk2);
+  EXPECT_EQ(OutLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspPHIInBlk3);
+
+  // Test some scenarios where there _shouldn't_ be any PHIs created at heads.
+  // These are those PHIs are created, but value propagation eliminates them.
+  // For example, lets copy rsp-livein to $rsp inside each loop head, so that
+  // there's no need for a PHI in the join block. Put a def of $rsp in block 3
+  // to force PHIs elsewhere.
+  TransferFunc[2].clear();
+  TransferFunc[0].insert({RaxLoc, LiveInRsp});
+  TransferFunc[1].insert({RspLoc, RaxPHIInBlk1});
+  TransferFunc[2].insert({RspLoc, RaxPHIInBlk2});
+  TransferFunc[3].insert({RspLoc, RspDefInBlk3});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], LiveInRsp);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk3);
+
+  // In fact, if we eliminate the def in block 3, none of those PHIs are
+  // necessary, as we're just repeatedly copying LiveInRsp into $rsp. They
+  // should all be value propagated out.
+  TransferFunc[3].clear();
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], LiveInRsp);
+  EXPECT_EQ(InLocs[2][0], LiveInRsp);
+  EXPECT_EQ(InLocs[3][0], LiveInRsp);
+  EXPECT_EQ(InLocs[4][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[3][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[4][0], LiveInRsp);
+}
+
+TEST_F(InstrRefLDVTest, MLocBadlyNestedLoops) {
+  //           entry
+  //             |
+  //           loop1 -o
+  //             | ^
+  //             | ^
+  //           loop2 -o
+  //             | ^
+  //             | ^
+  //           loop3 -o
+  //             |
+  //            ret
+  //
+  // NB: the loop blocks self-loop, which is a bit too fiddly to draw on
+  // accurately.
+  llvm::Function &F = const_cast<llvm::Function &>(MF->getFunction());
+  auto BB1 = BasicBlock::Create(Ctx, "entry", &F);
+  auto BB2 = BasicBlock::Create(Ctx, "loop1", &F);
+  auto BB3 = BasicBlock::Create(Ctx, "loop2", &F);
+  auto BB4 = BasicBlock::Create(Ctx, "loop3", &F);
+  auto BB5 = BasicBlock::Create(Ctx, "ret", &F);
+  IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4), IRB5(BB5);
+  IRB1.CreateBr(BB2);
+  IRB2.CreateBr(BB3);
+  IRB3.CreateBr(BB4);
+  IRB4.CreateBr(BB5);
+  IRB5.CreateRetVoid();
+  MBB1 = MF->CreateMachineBasicBlock(BB1);
+  MF->insert(MF->end(), MBB1);
+  MBB2 = MF->CreateMachineBasicBlock(BB2);
+  MF->insert(MF->end(), MBB2);
+  MBB3 = MF->CreateMachineBasicBlock(BB3);
+  MF->insert(MF->end(), MBB3);
+  MBB4 = MF->CreateMachineBasicBlock(BB4);
+  MF->insert(MF->end(), MBB4);
+  MBB5 = MF->CreateMachineBasicBlock(BB5);
+  MF->insert(MF->end(), MBB5);
+  MBB1->addSuccessor(MBB2);
+  MBB2->addSuccessor(MBB2);
+  MBB2->addSuccessor(MBB3);
+  MBB3->addSuccessor(MBB2);
+  MBB3->addSuccessor(MBB3);
+  MBB3->addSuccessor(MBB4);
+  MBB4->addSuccessor(MBB3);
+  MBB4->addSuccessor(MBB4);
+  MBB4->addSuccessor(MBB5);
+  MF->RenumberBlocks();
+
+  setupLDVObj();
+
+  ASSERT_TRUE(MTracker->getNumLocs() == 1);
+  LocIdx RspLoc(0);
+  Register RAX = getRegByName("RAX");
+  LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX);
+
+  ValueIDNum InLocs[5][2], OutLocs[5][2];
+  ValueIDNum *InLocsPtr[5] = {InLocs[0], InLocs[1], InLocs[2], InLocs[3],
+                              InLocs[4]};
+  ValueIDNum *OutLocsPtr[5] = {OutLocs[0], OutLocs[1], OutLocs[2], OutLocs[3],
+                               OutLocs[4]};
+
+  SmallVector<MLocTransferMap, 1> TransferFunc;
+  TransferFunc.resize(5);
+
+  ValueIDNum LiveInRsp(0, 0, RspLoc);
+  ValueIDNum RspPHIInBlk1(1, 0, RspLoc);
+  ValueIDNum RspDefInBlk1(1, 1, RspLoc);
+  ValueIDNum RspPHIInBlk2(2, 0, RspLoc);
+  ValueIDNum RspPHIInBlk3(3, 0, RspLoc);
+  ValueIDNum RspDefInBlk3(3, 1, RspLoc);
+  ValueIDNum LiveInRax(0, 0, RaxLoc);
+  ValueIDNum RaxPHIInBlk3(3, 0, RaxLoc);
+
+  // As ever, test that everything is live-through if there are no defs.
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], LiveInRsp);
+  EXPECT_EQ(InLocs[2][0], LiveInRsp);
+  EXPECT_EQ(InLocs[3][0], LiveInRsp);
+  EXPECT_EQ(InLocs[4][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[3][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[4][0], LiveInRsp);
+
+  // A def in loop3 should cause PHIs in every loop block: they're all
+  // reachable from each other.
+  TransferFunc[3].insert({RspLoc, RspDefInBlk3});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk3);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk3);
+
+  // A def in loop1 should cause a PHI in loop1, but not the other blocks.
+  // loop2 and loop3 are dominated by the def in loop1, so they should have
+  // that value live-through.
+  TransferFunc[3].clear();
+  TransferFunc[1].insert({RspLoc, RspDefInBlk1});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspDefInBlk1);
+  EXPECT_EQ(InLocs[3][0], RspDefInBlk1);
+  EXPECT_EQ(InLocs[4][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[3][0], RspDefInBlk1);
+  EXPECT_EQ(OutLocs[4][0], RspDefInBlk1);
+
+  // As with earlier tricks: copy $rsp to $rax in the entry block, then $rax
+  // to $rsp in block 3. The only def of $rsp is simply copying the same value
+  // back into itself, and the value of $rsp is LiveInRsp all the way through.
+  // PHIs should be created, then value-propagated away...  however this
+  // doesn't work in practice.
+  // Consider the entry to loop3: we can determine that there's an incoming
+  // PHI value from loop2, and LiveInRsp from the self-loop. This would still
+  // justify having a PHI on entry to loop3. The only way to completely
+  // value-propagate these PHIs away would be to speculatively explore what
+  // PHIs could be eliminated and what that would lead to; which is
+  // combinatorially complex.
+  // Happily:
+  //  a) In this scenario, we always have a tracked location for LiveInRsp
+  //     anyway, so there's no loss in availability,
+  //  b) Only DBG_PHIs of a register would be vunlerable to this scenario, and
+  //     even then only if a true PHI became a DBG_PHI and was then optimised
+  //     through branch folding to no longer be at a CFG join,
+  //  c) The register allocator can spot this kind of redundant COPY easily,
+  //     and eliminate it.
+  //
+  // This unit test left in as a reference for the limitations of this
+  // approach. PHIs will be left in $rsp on entry to each block.
+  TransferFunc[1].clear();
+  TransferFunc[0].insert({RaxLoc, LiveInRsp});
+  TransferFunc[3].insert({RspLoc, RaxPHIInBlk3});
+  initValueArray(InLocsPtr, 5, 2);
+  initValueArray(OutLocsPtr, 5, 2);
+  buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc);
+  EXPECT_EQ(InLocs[0][0], LiveInRsp);
+  EXPECT_EQ(InLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(InLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(InLocs[3][0], RspPHIInBlk3);
+  EXPECT_EQ(InLocs[4][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1);
+  EXPECT_EQ(OutLocs[2][0], RspPHIInBlk2);
+  EXPECT_EQ(OutLocs[3][0], LiveInRsp);
+  EXPECT_EQ(OutLocs[4][0], LiveInRsp);
+  // Check $rax's value. It should have $rsps value from the entry block
+  // onwards.
+  EXPECT_EQ(InLocs[0][1], LiveInRax);
+  EXPECT_EQ(InLocs[1][1], LiveInRsp);
+  EXPECT_EQ(InLocs[2][1], LiveInRsp);
+  EXPECT_EQ(InLocs[3][1], LiveInRsp);
+  EXPECT_EQ(InLocs[4][1], LiveInRsp);
+  EXPECT_EQ(OutLocs[0][1], LiveInRsp);
+  EXPECT_EQ(OutLocs[1][1], LiveInRsp);
+  EXPECT_EQ(OutLocs[2][1], LiveInRsp);
+  EXPECT_EQ(OutLocs[3][1], LiveInRsp);
+  EXPECT_EQ(OutLocs[4][1], LiveInRsp);
+}