Index: llvm/lib/CodeGen/LiveDebugValues.cpp =================================================================== --- llvm/lib/CodeGen/LiveDebugValues.cpp +++ llvm/lib/CodeGen/LiveDebugValues.cpp @@ -26,12 +26,12 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/ADT/CoalescingBitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/CodeGen/LexicalScopes.h" @@ -111,6 +111,7 @@ namespace { using DefinedRegsSet = SmallSet; +using VarLocSet = CoalescingBitVector; /// A type-checked pair of {Register Location (or 0), Index}, used to index /// into a \ref VarLocMap. This can be efficiently converted to a 64-bit int @@ -120,6 +121,10 @@ /// Why encode a location /into/ the VarLocMap index? This makes it possible /// to find the open VarLocs killed by a register def very quickly. This is a /// performance-critical operation for LiveDebugValues. +/// +/// TODO: Consider adding reserved intervals for kinds of VarLocs other than +/// RegisterKind, like SpillLocKind or EntryValueKind, to optimize iteration +/// over open locations. struct LocIndex { uint32_t Location; // Physical registers live in the range [1;2^30) (see // \ref MCRegister), so we have plenty of range left here @@ -136,6 +141,12 @@ static LocIndex fromRawInteger(uint64_t ID) { return {static_cast(ID >> 32), static_cast(ID)}; } + + /// Get the start of the interval reserved for VarLocs of kind RegisterKind + /// which reside in \p Reg. The end is at rawIndexForReg(Reg+1)-1. + static uint64_t rawIndexForReg(uint32_t Reg) { + return LocIndex(Reg, 0).getAsRawInteger(); + } }; class LiveDebugValues : public MachineFunctionPass { @@ -145,6 +156,7 @@ const TargetFrameLowering *TFI; BitVector CalleeSavedRegs; LexicalScopes LS; + VarLocSet::Allocator Alloc; enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore }; @@ -434,18 +446,28 @@ }; class VarLocMap { - UniqueVector VarLocs; + std::map Var2Index; + SmallDenseMap> Loc2Vars; public: LocIndex insert(const VarLoc &VL) { - uint32_t Index = VarLocs.insert(VL); - return {0, Index}; + uint32_t Location = VL.isDescribedByReg(); + uint32_t &Index = Var2Index[VL]; + if (!Index) { + auto &Vars = Loc2Vars[Location]; + Vars.push_back(VL); + Index = Vars.size(); + } + return {Location, Index - 1}; } - const VarLoc &operator[](LocIndex ID) const { return VarLocs[ID.Index]; } + const VarLoc &operator[](LocIndex ID) const { + auto LocIt = Loc2Vars.find(ID.Location); + assert(LocIt != Loc2Vars.end() && "Location not tracked"); + return LocIt->second[ID.Index]; + } }; - using VarLocSet = SparseBitVector<>; using VarLocInMBB = SmallDenseMap; struct TransferDebugPair { MachineInstr *TransferInst; /// Instruction where this transfer occurs. @@ -484,7 +506,8 @@ OverlapMap &OverlappingFragments; public: - OpenRangesSet(OverlapMap &_OLapMap) : OverlappingFragments(_OLapMap) {} + OpenRangesSet(VarLocSet::Allocator &Alloc, OverlapMap &_OLapMap) + : VarLocs(Alloc), OverlappingFragments(_OLapMap) {} const VarLocSet &getVarLocs() const { return VarLocs; } @@ -525,6 +548,28 @@ } }; + /// Collect all VarLoc IDs from \p CollectFrom for VarLocs which are located + /// in \p Reg, of kind RegisterKind. Insert collected IDs in \p Collected. + void collectIDsForReg(VarLocSet &Collected, uint32_t Reg, + const VarLocSet &CollectFrom) const; + + /// Get the registers which are used by VarLocs of kind RegisterKind tracked + /// by \p CollectFrom. + void getUsedRegs(const VarLocSet &CollectFrom, + SmallVectorImpl &UsedRegs) const; + + VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, VarLocInMBB &Locs) { + auto Result = Locs.try_emplace(MBB, Alloc); + return Result.first->second; + } + + const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, + const VarLocInMBB &Locs) const { + auto It = Locs.find(MBB); + assert(It != Locs.end() && "MBB not in map"); + return It->second; + } + /// Tests whether this instruction is a spill to a stack location. bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); @@ -566,7 +611,7 @@ VarLocMap &VarLocIDs, const VarLoc &EntryVL); void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - SparseBitVector<> &KillSet); + VarLocSet &KillSet); void recordEntryValue(const MachineInstr &MI, const DefinedRegsSet &DefinedRegs, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs); @@ -711,6 +756,40 @@ return llvm::None; } +void LiveDebugValues::collectIDsForReg(VarLocSet &Collected, uint32_t Reg, + const VarLocSet &CollectFrom) const { + // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains all + // possible VarLoc IDs for VarLocs of kind RegisterKind which live in Reg. + uint64_t FirstIndexForReg = LocIndex::rawIndexForReg(Reg); + uint64_t FirstInvalidIndex = LocIndex::rawIndexForReg(Reg + 1); + // Iterate through that half-open interval and collect all the set IDs. + for (auto It = CollectFrom.find(FirstIndexForReg), End = CollectFrom.end(); + It != End && *It < FirstInvalidIndex; ++It) + Collected.set(*It); +} + +void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom, + SmallVectorImpl &UsedRegs) const { + // All register-based VarLocs are assigned indices greater than or equal to + // FirstRegIndex. + uint64_t FirstRegIndex = LocIndex::rawIndexForReg(1); + for (auto It = CollectFrom.find(FirstRegIndex), End = CollectFrom.end(); + It != End;) { + // We found a VarLoc ID for a VarLoc that lives in a register. Figure out + // which register and add it to UsedRegs. + uint32_t FoundReg = LocIndex::fromRawInteger(*It).Location; + assert((UsedRegs.empty() || FoundReg != UsedRegs.back()) && + "Duplicate used reg"); + UsedRegs.push_back(FoundReg); + + // Skip to the next /set/ register. Note that this finds a lower bound, so + // even if there aren't any VarLocs living in `FoundReg+1`, we're still + // guaranteed to move on to the next register (or to end()). + uint64_t NextRegIndex = LocIndex::rawIndexForReg(FoundReg + 1); + It = CollectFrom.find(NextRegIndex); + } +} + //===----------------------------------------------------------------------===// // Debug Range Extension Implementation //===----------------------------------------------------------------------===// @@ -723,7 +802,9 @@ raw_ostream &Out) const { Out << '\n' << msg << '\n'; for (const MachineBasicBlock &BB : MF) { - const VarLocSet &L = V.lookup(&BB); + if (!V.count(&BB)) + continue; + const VarLocSet &L = getVarLocsInMBB(&BB, V); if (L.empty()) continue; Out << "MBB: " << BB.getNumber() << ":\n"; @@ -863,7 +944,7 @@ OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - SparseBitVector<> &KillSet) { + VarLocSet &KillSet) { for (uint64_t ID : KillSet) { LocIndex Idx = LocIndex::fromRawInteger(ID); const VarLoc &VL = VarLocIDs[Idx]; @@ -971,56 +1052,56 @@ MachineFunction *MF = MI.getMF(); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - SparseBitVector<> KillSet; + + // Find the regs killed by MI, and find regmasks of preserved regs. // Max out the number of statically allocated elements in `DeadRegs`, as this - // prevents fallback to std::set::count() operations. As often as possible, we - // want a linear scan here. - SmallSet DeadRegs; - SmallVector DeadRegMasks; + // prevents fallback to std::set::count() operations. + SmallSet DeadRegs; + SmallVector RegMasks; for (const MachineOperand &MO : MI.operands()) { - // Determine whether the operand is a register def. Assume that call - // instructions never clobber SP, because some backends (e.g., AArch64) - // never list SP in the regmask. + // Determine whether the operand is a register def. if (MO.isReg() && MO.isDef() && MO.getReg() && Register::isPhysicalRegister(MO.getReg()) && !(MI.isCall() && MO.getReg() == SP)) { - // Remove ranges of all aliased registers. Note: the actual removal is - // done after we finish visiting MachineOperands, for performance reasons. + // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) // FIXME: Can we break out of this loop early if no insertion occurs? DeadRegs.insert(*RAI); } else if (MO.isRegMask()) { + RegMasks.push_back(MO.getRegMask()); + } + } + + // Erase VarLocs which reside in one of the dead registers. For performance + // reasons, it's critical to not iterate over the full set of open VarLocs. + // Iterate over the set of dying/used regs instead. + VarLocSet KillSet(Alloc); + for (uint32_t DeadReg : DeadRegs) + collectIDsForReg(KillSet, DeadReg, OpenRanges.getVarLocs()); + if (!RegMasks.empty()) { + SmallVector UsedRegs; + getUsedRegs(OpenRanges.getVarLocs(), UsedRegs); + for (uint32_t Reg : UsedRegs) { + // The VarLocs residing in this register are already in the kill set. + if (DeadRegs.count(Reg)) + continue; + // Remove ranges of all clobbered registers. Register masks don't usually - // list SP as preserved. While the debug info may be off for an - // instruction or two around callee-cleanup calls, transferring the - // DEBUG_VALUE across the call is still a better user experience. Note: - // the actual removal is done after we finish visiting MachineOperands, - // for performance reasons. - DeadRegMasks.push_back(MO.getRegMask()); + // list SP as preserved. Assume that call instructions never clobber SP, + // because some backends (e.g., AArch64) never list SP in the regmask. + // While the debug info may be off for an instruction or two around + // callee-cleanup calls, transferring the DEBUG_VALUE across the call is + // still a better user experience. + if (Reg == SP) + continue; + bool AnyRegMaskKillsReg = + any_of(RegMasks, [Reg](const uint32_t *RegMask) { + return MachineOperand::clobbersPhysReg(RegMask, Reg); + }); + if (AnyRegMaskKillsReg) + collectIDsForReg(KillSet, Reg, OpenRanges.getVarLocs()); } } - // For performance reasons, it's critical to iterate over the open var locs - // at most once. - for (uint64_t ID : OpenRanges.getVarLocs()) { - LocIndex Idx = LocIndex::fromRawInteger(ID); - unsigned Reg = VarLocIDs[Idx].isDescribedByReg(); - if (!Reg) - continue; - - if (DeadRegs.count(Reg)) { - KillSet.set(ID); - continue; - } - - if (Reg == SP) - continue; - bool AnyRegMaskKillsReg = - any_of(DeadRegMasks, [Reg](const uint32_t *RegMask) { - return MachineOperand::clobbersPhysReg(RegMask, Reg); - }); - if (AnyRegMaskKillsReg) - KillSet.set(ID); - } OpenRanges.erase(KillSet, VarLocIDs); if (auto *TPC = getAnalysisIfAvailable()) { @@ -1119,7 +1200,7 @@ // First, if there are any DBG_VALUEs pointing at a spill slot that is // written to, then close the variable location. The value in memory // will have changed. - VarLocSet KillSet; + VarLocSet KillSet(Alloc); if (isSpillInstruction(MI, MF)) { Loc = extractSpillBaseRegAndOffset(MI); for (uint64_t ID : OpenRanges.getVarLocs()) { @@ -1264,7 +1345,7 @@ dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; VarLocIDs[LocIndex::fromRawInteger(ID)].dump(TRI); }); - VarLocSet &VLS = OutLocs[CurMBB]; + VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs); Changed = VLS != OpenRanges.getVarLocs(); // New OutLocs set may be different due to spill, restore or register // copy instruction processing. @@ -1356,7 +1437,7 @@ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; - VarLocSet InLocsT; // Temporary incoming locations. + VarLocSet InLocsT(Alloc); // Temporary incoming locations. // For all predecessors of this MBB, find the set of VarLocs that // can be joined. @@ -1399,7 +1480,7 @@ } // Filter out DBG_VALUES that are out of scope. - VarLocSet KillSet; + VarLocSet KillSet(Alloc); bool IsArtificial = ArtificialBlocks.count(&MBB); if (!IsArtificial) { for (uint64_t ID : InLocsT) { @@ -1421,32 +1502,28 @@ assert((NumVisited || MBB.pred_empty()) && "Should have processed at least one predecessor"); - VarLocSet &ILS = InLocs[&MBB]; - VarLocSet &Pending = PendingInLocs[&MBB]; + VarLocSet &ILS = getVarLocsInMBB(&MBB, InLocs); + VarLocSet &Pending = getVarLocsInMBB(&MBB, PendingInLocs); // New locations will have DBG_VALUE insts inserted at the start of the // block, after location propagation has finished. Record the insertions // that we need to perform in the Pending set. VarLocSet Diff = InLocsT; Diff.intersectWithComplement(ILS); - for (auto ID : Diff) { - Pending.set(ID); - ILS.set(ID); - ++NumInserted; - Changed = true; - } + Pending |= Diff; + ILS |= Diff; + NumInserted += Diff.count(); + Changed |= !Diff.empty(); // We may have lost locations by learning about a predecessor that either // loses or moves a variable. Find any locations in ILS that are not in the // new in-locations, and delete those. VarLocSet Removed = ILS; Removed.intersectWithComplement(InLocsT); - for (auto ID : Removed) { - Pending.reset(ID); - ILS.reset(ID); - ++NumRemoved; - Changed = true; - } + Pending.intersectWithComplement(Removed); + ILS.intersectWithComplement(Removed); + NumRemoved += Removed.count(); + Changed |= !Removed.empty(); return Changed; } @@ -1567,7 +1644,7 @@ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors. OverlapMap OverlapFragments; // Map of overlapping variable fragments. - OpenRangesSet OpenRanges(OverlapFragments); + OpenRangesSet OpenRanges(Alloc, OverlapFragments); // Ranges that are open until end of bb. VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. @@ -1607,7 +1684,7 @@ // Initialize per-block structures and scan for fragment overlaps. for (auto &MBB : MF) { - PendingInLocs[&MBB] = VarLocSet(); + PendingInLocs.try_emplace(&MBB, Alloc); for (auto &MI : MBB) { if (MI.isDebugValue()) @@ -1659,7 +1736,8 @@ // examine spill, copy and restore instructions to see whether they // operate with registers that correspond to user variables. // First load any pending inlocs. - OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs); + OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, PendingInLocs), + VarLocIDs); for (auto &MI : *MBB) process(MI, OpenRanges, VarLocIDs, Transfers); OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs); Index: llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir =================================================================== --- llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir +++ llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir @@ -4,6 +4,8 @@ # block structure relevant to the debug entry values propagation. # # CHECK: ![[ARG_B:.*]] = !DILocalVariable(name: "b" +# CHECK: ![[ARG_C:.*]] = !DILocalVariable(name: "c" +# CHECK: ![[ARG_Q:.*]] = !DILocalVariable(name: "q" # CHECK: bb.0.entry # CHECK: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression() # CHECK: bb.1.if.then @@ -21,8 +23,9 @@ # CHECK-NEXT: $ebx = MOV32ri 2 # CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) # CHECK: bb.3.if.end +# CHECK-NEXT: DBG_VALUE $edx, $noreg, ![[ARG_Q]], !DIExpression() +# CHECK-NEXT: DBG_VALUE $ebp, $noreg, ![[ARG_C]], !DIExpression() # CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) -# --- | ; ModuleID = 'test.c' source_filename = "test.c" Index: llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir =================================================================== --- llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir +++ llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir @@ -10,8 +10,8 @@ # Verify that DW_OP_LLVM_entry_values are generated for parameters with multiple # DBG_VALUEs at entry block. # CHECK: DBG_VALUE $edi, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location {{.*}} -# CHECK: DBG_VALUE $edx, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location {{.*}} # CHECK: DBG_VALUE $esi, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location {{.*}} +# CHECK: DBG_VALUE $edx, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location {{.*}} --- | ; ModuleID = 'multiple-param-dbg-value-entry.ll'