Index: llvm/lib/CodeGen/LiveDebugValues.cpp
===================================================================
--- llvm/lib/CodeGen/LiveDebugValues.cpp
+++ llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -26,12 +26,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/CoalescingBitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
 #include "llvm/CodeGen/LexicalScopes.h"
@@ -111,6 +111,7 @@
 namespace {
 
 using DefinedRegsSet = SmallSet<Register, 32>;
+using VarLocSet = CoalescingBitVector<uint64_t>;
 
 /// A type-checked pair of {Register Location (or 0), Index}, used to index
 /// into a \ref VarLocMap. This can be efficiently converted to a 64-bit int
@@ -133,6 +134,10 @@
     return (static_cast<uint64_t>(Location) << 32) | Index;
   }
 
+  static uint64_t rawIndexForReg(uint32_t Reg) {
+    return LocIndex(Reg, 0).getAsRawInteger();
+  }
+
   static LocIndex fromRawInteger(uint64_t ID) {
     return {static_cast<uint32_t>(ID >> 32), static_cast<uint32_t>(ID)};
   }
@@ -145,6 +150,7 @@
   const TargetFrameLowering *TFI;
   BitVector CalleeSavedRegs;
   LexicalScopes LS;
+  VarLocSet::Allocator Alloc;
 
   enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
 
@@ -434,18 +440,28 @@
   };
 
   class VarLocMap {
-    UniqueVector<VarLoc> VarLocs;
+    std::map<VarLoc, uint32_t> Var2Index;
+    SmallDenseMap<uint32_t, std::vector<VarLoc>> Loc2Vars;
 
   public:
     LocIndex insert(const VarLoc &VL) {
-      uint32_t Index = VarLocs.insert(VL);
-      return {0, Index};
+      uint32_t Location = VL.isDescribedByReg();
+      uint32_t &Index = Var2Index[VL];
+      if (!Index) {
+        auto &Vars = Loc2Vars[Location];
+        Vars.push_back(VL);
+        Index = Vars.size();
+      }
+      return {Location, Index - 1};
     }
 
-    const VarLoc &operator[](LocIndex ID) const { return VarLocs[ID.Index]; }
+    const VarLoc &operator[](LocIndex ID) const {
+      auto LocIt = Loc2Vars.find(ID.Location);
+      assert(LocIt != Loc2Vars.end() && "Location not tracked");
+      return LocIt->second[ID.Index];
+    }
   };
 
-  using VarLocSet = SparseBitVector<>;
   using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
   struct TransferDebugPair {
     MachineInstr *TransferInst; /// Instruction where this transfer occurs.
@@ -484,7 +500,8 @@
     OverlapMap &OverlappingFragments;
 
   public:
-    OpenRangesSet(OverlapMap &_OLapMap) : OverlappingFragments(_OLapMap) {}
+    OpenRangesSet(VarLocSet::Allocator &Alloc, OverlapMap &_OLapMap)
+        : VarLocs(Alloc), OverlappingFragments(_OLapMap) {}
 
     const VarLocSet &getVarLocs() const { return VarLocs; }
 
@@ -525,6 +542,27 @@
     }
   };
 
+  /// Collect all VarLoc IDs from \p CollectFrom for VarLocs which are located
+  /// in \p Reg. Insert collected IDs in \p Collected.
+  void collectIDsForReg(VarLocSet &Collected, uint32_t Reg,
+                        const VarLocSet &CollectFrom) const;
+
+  /// Get the registers which are used by VarLocs tracked by \p CollectFrom.
+  std::vector<uint32_t> getUsedRegs(const VarLocSet &CollectFrom) const;
+
+
+  VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, VarLocInMBB &Locs) {
+    auto Result = Locs.try_emplace(MBB, Alloc);
+    return Result.first->second;
+  }
+
+  const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB,
+                                   const VarLocInMBB &Locs) const {
+    auto It = Locs.find(MBB);
+    assert(It != Locs.end() && "MBB not in map");
+    return It->second;
+  }
+
   /// Tests whether this instruction is a spill to a stack location.
   bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
 
@@ -566,7 +604,7 @@
                         VarLocMap &VarLocIDs, const VarLoc &EntryVL);
   void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
                        VarLocMap &VarLocIDs, TransferMap &Transfers,
-                       SparseBitVector<> &KillSet);
+                       VarLocSet &KillSet);
   void recordEntryValue(const MachineInstr &MI,
                         const DefinedRegsSet &DefinedRegs,
                         OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs);
@@ -711,6 +749,42 @@
   return llvm::None;
 }
 
+void LiveDebugValues::collectIDsForReg(VarLocSet &Collected, uint32_t Reg,
+                                       const VarLocSet &CollectFrom) const {
+  // The half-open range [FirstIndexForReg, FirstInvalidIndex) contains all
+  // possible VarLoc IDs for VarLocs which live in Reg.
+  uint64_t FirstIndexForReg = LocIndex::rawIndexForReg(Reg);
+  uint64_t FirstInvalidIndex = LocIndex::rawIndexForReg(Reg + 1);
+  // Iterate through that half-open interval and collect all the set IDs.
+  for (auto It = CollectFrom.find(FirstIndexForReg), End = CollectFrom.end();
+       It != End && *It < FirstInvalidIndex; ++It)
+    Collected.set(*It);
+}
+
+std::vector<uint32_t>
+LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom) const {
+  std::vector<uint32_t> UsedRegs;
+  // All register-based VarLocs are assigned indices greater than or equal to
+  // FirstRegIndex.
+  uint64_t FirstRegIndex = LocIndex::rawIndexForReg(1);
+  for (auto It = CollectFrom.find(FirstRegIndex), End = CollectFrom.end();
+       It != End;) {
+    // We found a VarLoc ID for a VarLoc that lives in a register. Figure out
+    // which register and add it to UsedRegs.
+    uint32_t FoundReg = LocIndex::fromRawInteger(*It).Location;
+    assert((UsedRegs.empty() || FoundReg != UsedRegs.back()) &&
+           "Duplicate used reg");
+    UsedRegs.push_back(FoundReg);
+
+    // Skip to the next /set/ register. Note that this finds a lower bound, so
+    // even if there aren't any VarLocs living in `FoundReg+1`, we're still
+    // guaranteed to move on to the next register (or to end()).
+    uint64_t NextRegIndex = LocIndex::rawIndexForReg(FoundReg + 1);
+    It = CollectFrom.find(NextRegIndex);
+  }
+  return UsedRegs;
+}
+
 //===----------------------------------------------------------------------===//
 //            Debug Range Extension Implementation
 //===----------------------------------------------------------------------===//
@@ -723,7 +797,9 @@
                                        raw_ostream &Out) const {
   Out << '\n' << msg << '\n';
   for (const MachineBasicBlock &BB : MF) {
-    const VarLocSet &L = V.lookup(&BB);
+    if (!V.count(&BB))
+      continue;
+    const VarLocSet &L = getVarLocsInMBB(&BB, V);
     if (L.empty())
       continue;
     Out << "MBB: " << BB.getNumber() << ":\n";
@@ -863,7 +939,7 @@
                                       OpenRangesSet &OpenRanges,
                                       VarLocMap &VarLocIDs,
                                       TransferMap &Transfers,
-                                      SparseBitVector<> &KillSet) {
+                                      VarLocSet &KillSet) {
   for (uint64_t ID : KillSet) {
     LocIndex Idx = LocIndex::fromRawInteger(ID);
     const VarLoc &VL = VarLocIDs[Idx];
@@ -971,55 +1047,50 @@
   MachineFunction *MF = MI.getMF();
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
   unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
-  SparseBitVector<> KillSet;
+
+  // Find the regs killed by MI, and find regmasks of preserved regs.
   // Max out the number of statically allocated elements in `DeadRegs`, as this
-  // prevents fallback to std::set::count() operations. As often as possible, we
-  // want a linear scan here.
-  SmallSet<unsigned, 32> DeadRegs;
-  SmallVector<const uint32_t *, 4> DeadRegMasks;
+  // prevents fallback to std::set::count() operations.
+  SmallSet<uint32_t, 32> DeadRegs;
+  SmallVector<const uint32_t *, 4> RegMasks;
   for (const MachineOperand &MO : MI.operands()) {
-    // Determine whether the operand is a register def.  Assume that call
-    // instructions never clobber SP, because some backends (e.g., AArch64)
-    // never list SP in the regmask.
+    // Determine whether the operand is a register def.
     if (MO.isReg() && MO.isDef() && MO.getReg() &&
         Register::isPhysicalRegister(MO.getReg()) &&
         !(MI.isCall() && MO.getReg() == SP)) {
-      // Remove ranges of all aliased registers. Note: the actual removal is
-      // done after we finish visiting MachineOperands, for performance reasons.
+      // Remove ranges of all aliased registers.
       for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
         // FIXME: Can we break out of this loop early if no insertion occurs?
         DeadRegs.insert(*RAI);
     } else if (MO.isRegMask()) {
-      // Remove ranges of all clobbered registers. Register masks don't usually
-      // list SP as preserved.  While the debug info may be off for an
-      // instruction or two around callee-cleanup calls, transferring the
-      // DEBUG_VALUE across the call is still a better user experience. Note:
-      // the actual removal is done after we finish visiting MachineOperands,
-      // for performance reasons.
-      DeadRegMasks.push_back(MO.getRegMask());
+      RegMasks.push_back(MO.getRegMask());
     }
   }
-  // For performance reasons, it's critical to iterate over the open var locs
-  // at most once.
-  for (uint64_t ID : OpenRanges.getVarLocs()) {
-    LocIndex Idx = LocIndex::fromRawInteger(ID);
-    unsigned Reg = VarLocIDs[Idx].isDescribedByReg();
-    if (!Reg)
-      continue;
 
-    if (DeadRegs.count(Reg)) {
-      KillSet.set(ID);
+  // Erase VarLocs which reside in one of the dead registers. For performance
+  // reasons, it's critical to not iterate over the full set of open VarLocs.
+  // Iterate over the set of dying/used regs instead.
+  VarLocSet KillSet(Alloc);
+  for (uint32_t DeadReg : DeadRegs)
+    collectIDsForReg(KillSet, DeadReg, OpenRanges.getVarLocs());
+  for (uint32_t Reg : getUsedRegs(OpenRanges.getVarLocs())) {
+    // The VarLocs residing in this register are already in the kill set.
+    if (DeadRegs.count(Reg))
       continue;
-    }
 
+    // Remove ranges of all clobbered registers. Register masks don't usually
+    // list SP as preserved. Assume that call instructions never clobber SP,
+    // because some backends (e.g., AArch64) never list SP in the regmask. While
+    // the debug info may be off for an instruction or two around
+    // callee-cleanup calls, transferring the DEBUG_VALUE across the call is
+    // still a better user experience.
     if (Reg == SP)
       continue;
-    bool AnyRegMaskKillsReg =
-        any_of(DeadRegMasks, [Reg](const uint32_t *RegMask) {
-          return MachineOperand::clobbersPhysReg(RegMask, Reg);
-        });
+    bool AnyRegMaskKillsReg = any_of(RegMasks, [Reg](const uint32_t *RegMask) {
+      return MachineOperand::clobbersPhysReg(RegMask, Reg);
+    });
     if (AnyRegMaskKillsReg)
-      KillSet.set(ID);
+      collectIDsForReg(KillSet, Reg, OpenRanges.getVarLocs());
   }
   OpenRanges.erase(KillSet, VarLocIDs);
 
@@ -1119,7 +1190,7 @@
   // First, if there are any DBG_VALUEs pointing at a spill slot that is
   // written to, then close the variable location. The value in memory
   // will have changed.
-  VarLocSet KillSet;
+  VarLocSet KillSet(Alloc);
   if (isSpillInstruction(MI, MF)) {
     Loc = extractSpillBaseRegAndOffset(MI);
     for (uint64_t ID : OpenRanges.getVarLocs()) {
@@ -1264,7 +1335,7 @@
     dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ":  ";
     VarLocIDs[LocIndex::fromRawInteger(ID)].dump(TRI);
   });
-  VarLocSet &VLS = OutLocs[CurMBB];
+  VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs);
   Changed = VLS != OpenRanges.getVarLocs();
   // New OutLocs set may be different due to spill, restore or register
   // copy instruction processing.
@@ -1356,7 +1427,7 @@
   LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
   bool Changed = false;
 
-  VarLocSet InLocsT; // Temporary incoming locations.
+  VarLocSet InLocsT(Alloc); // Temporary incoming locations.
 
   // For all predecessors of this MBB, find the set of VarLocs that
   // can be joined.
@@ -1399,7 +1470,7 @@
   }
 
   // Filter out DBG_VALUES that are out of scope.
-  VarLocSet KillSet;
+  VarLocSet KillSet(Alloc);
   bool IsArtificial = ArtificialBlocks.count(&MBB);
   if (!IsArtificial) {
     for (uint64_t ID : InLocsT) {
@@ -1421,19 +1492,20 @@
   assert((NumVisited || MBB.pred_empty()) &&
          "Should have processed at least one predecessor");
 
-  VarLocSet &ILS = InLocs[&MBB];
-  VarLocSet &Pending = PendingInLocs[&MBB];
+  VarLocSet &ILS = getVarLocsInMBB(&MBB, InLocs);
+  VarLocSet &Pending = getVarLocsInMBB(&MBB, PendingInLocs);
 
   // New locations will have DBG_VALUE insts inserted at the start of the
   // block, after location propagation has finished. Record the insertions
   // that we need to perform in the Pending set.
   VarLocSet Diff = InLocsT;
   Diff.intersectWithComplement(ILS);
-  Pending |= Diff;
-  ILS |= Diff;
-  unsigned JustInserted = Diff.count();
-  NumInserted += JustInserted;
-  Changed = JustInserted > 0;
+  if (!Diff.empty()) {
+    Pending.set(Diff);
+    ILS.set(Diff);
+    NumInserted += Diff.size();
+    Changed = true;
+  }
 
   // We may have lost locations by learning about a predecessor that either
   // loses or moves a variable. Find any locations in ILS that are not in the
@@ -1442,7 +1514,7 @@
   Removed.intersectWithComplement(InLocsT);
   Pending.intersectWithComplement(Removed);
   ILS.intersectWithComplement(Removed);
-  unsigned JustRemoved = Removed.count();
+  unsigned JustRemoved = Removed.size();
   NumRemoved += JustRemoved;
   Changed |= JustRemoved > 0;
 
@@ -1565,7 +1637,7 @@
 
   VarLocMap VarLocIDs;         // Map VarLoc<>unique ID for use in bitvectors.
   OverlapMap OverlapFragments; // Map of overlapping variable fragments.
-  OpenRangesSet OpenRanges(OverlapFragments);
+  OpenRangesSet OpenRanges(Alloc, OverlapFragments);
                               // Ranges that are open until end of bb.
   VarLocInMBB OutLocs;        // Ranges that exist beyond bb.
   VarLocInMBB InLocs;         // Ranges that are incoming after joining.
@@ -1605,7 +1677,7 @@
 
   // Initialize per-block structures and scan for fragment overlaps.
   for (auto &MBB : MF) {
-    PendingInLocs[&MBB] = VarLocSet();
+    PendingInLocs.try_emplace(&MBB, Alloc);
 
     for (auto &MI : MBB) {
       if (MI.isDebugValue())
@@ -1657,7 +1729,8 @@
         // examine spill, copy and restore instructions to see whether they
         // operate with registers that correspond to user variables.
         // First load any pending inlocs.
-        OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs);
+        OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, PendingInLocs),
+                                    VarLocIDs);
         for (auto &MI : *MBB)
           process(MI, OpenRanges, VarLocIDs, Transfers);
         OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
Index: llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
===================================================================
--- llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
+++ llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
@@ -4,6 +4,8 @@
 # block structure relevant to the debug entry values propagation.
 #
 # CHECK: ![[ARG_B:.*]] = !DILocalVariable(name: "b"
+# CHECK: ![[ARG_C:.*]] = !DILocalVariable(name: "c"
+# CHECK: ![[ARG_Q:.*]] = !DILocalVariable(name: "q"
 # CHECK: bb.0.entry
 # CHECK: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression()
 # CHECK: bb.1.if.then
@@ -21,8 +23,9 @@
 # CHECK-NEXT: $ebx = MOV32ri 2
 # CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1)
 # CHECK: bb.3.if.end
+# CHECK-NEXT: DBG_VALUE $edx, $noreg, ![[ARG_Q]], !DIExpression()
+# CHECK-NEXT: DBG_VALUE $ebp, $noreg, ![[ARG_C]], !DIExpression()
 # CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1)
-#
 --- |
   ; ModuleID = 'test.c'
   source_filename = "test.c"
Index: llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir
===================================================================
--- llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir
+++ llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir
@@ -10,8 +10,8 @@
 # Verify that DW_OP_LLVM_entry_values are generated for parameters with multiple
 # DBG_VALUEs at entry block.
 # CHECK: DBG_VALUE $edi, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location {{.*}}
-# CHECK: DBG_VALUE $edx, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location {{.*}}
 # CHECK: DBG_VALUE $esi, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location {{.*}}
+# CHECK: DBG_VALUE $edx, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location {{.*}}
 
 --- |
   ; ModuleID = 'multiple-param-dbg-value-entry.ll'