diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -647,11 +647,13 @@ // Vendor extensions: // Extensions for GNU-style thread-local storage. HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU) +HANDLE_DW_OP(0xf2, GNU_implicit_pointer, 0, GNU) // The GNU entry value extension. HANDLE_DW_OP(0xf3, GNU_entry_value, 0, GNU) // Extensions for Fission proposal. HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU) HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU) +HANDLE_DW_OP(0xf5, GNU_regval_type, 0, GNU) // DWARF languages. HANDLE_DW_LANG(0x0001, C89, 0, 2, DWARF) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h @@ -119,6 +119,10 @@ return Op; } + iterator skip_bytes(uint32_t Add) { + return iterator(Expr, Op.EndOffset + Add); + } + class Operation &operator*() { return Op; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -494,8 +494,16 @@ return createStringError(inconvertibleErrorCode(), "No %s", dwarf::AttributeString(Attr).data()); - if (Optional Off = Location->getAsSectionOffset()) - return U->findLoclistFromOffset(*Off); + if (Optional Off = Location->getAsSectionOffset()) { + if (Location->getForm() == DW_FORM_loclistx) { + if (auto LoclistOffset = U->getLoclistOffset(*Off)) { + uint64_t Offset = *LoclistOffset + U->getLocSectionBase(); + return U->findLoclistFromOffset(Offset); + } + } else { + return U->findLoclistFromOffset(*Off); + } + } if (Optional> Expr = Location->getAsBlock()) { return DWARFLocationExpressionsVector{ diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -101,6 +101,13 @@ Descriptions[DW_OP_convert] = Desc(Op::Dwarf5, Op::BaseTypeRef); Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB); + Descriptions[DW_OP_regval_type] = Desc(Op::Dwarf5, Op::SizeLEB, Op::SizeLEB); + Descriptions[DW_OP_GNU_regval_type] = + Desc(Op::Dwarf5, Op::SizeLEB, Op::SizeLEB); + Descriptions[DW_OP_implicit_pointer] = + Desc(Op::Dwarf5, Op::SizeRefAddr, Op::SignedSizeLEB); + Descriptions[DW_OP_GNU_implicit_pointer] = + Desc(Op::Dwarf5, Op::SizeRefAddr, Op::SignedSizeLEB); return Descriptions; } diff --git a/llvm/lib/Support/FormattedStream.cpp b/llvm/lib/Support/FormattedStream.cpp --- a/llvm/lib/Support/FormattedStream.cpp +++ b/llvm/lib/Support/FormattedStream.cpp @@ -26,8 +26,33 @@ // Keep track of the current column and line by scanning the string for // special characters + bool MultiByte = false; for (const char *End = Ptr + Size; Ptr != End; ++Ptr) { + // If this is a multi-byte sequence, skip the extra bytes, and don't check + // for special whitespace characters. + if ((*Ptr & 0b11100000) == 0b11000000) { + Ptr += 1; + MultiByte = true; + } else if ((*Ptr & 0b11110000) == 0b11100000) { + Ptr += 2; + MultiByte = true; + } else if ((*Ptr & 0b11111000) == 0b11110000) { + Ptr += 3; + MultiByte = true; + } + + // The string should never end part way through a multi-byte sequence. + assert(Ptr < End && "Malformed multi-byte sequence"); + ++Column; + + // If this is the final byte of a multi-byte sequence, it can't be any of + // the special whitespace characters below. + if (MultiByte) { + MultiByte = false; + continue; + } + switch (*Ptr) { case '\n': Line += 1; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -16,7 +16,10 @@ //===----------------------------------------------------------------------===// #include "llvm-objdump.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringExtras.h" @@ -24,6 +27,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/FaultMaps.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Demangle/Demangle.h" #include "llvm/MC/MCAsmInfo.h" @@ -66,10 +70,13 @@ #include #include #include +#include #include #include #include +#define DEBUG_TYPE "objdump" + using namespace llvm::object; namespace llvm { @@ -334,6 +341,9 @@ cl::cat(ObjdumpCat)); static cl::alias WideShort("w", cl::Grouping, cl::aliasopt(Wide)); +static cl::opt DbgVariables("debug-vars", cl::init(false)); +static cl::opt DbgIndent("debug-vars-indent", cl::init(50)); + static cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); @@ -566,7 +576,7 @@ Symbolizer.reset(new symbolize::LLVMSymbolizer(SymbolizerOpts)); } virtual ~SourcePrinter() = default; - virtual void printSourceLine(raw_ostream &OS, + virtual void printSourceLine(formatted_raw_ostream &OS, object::SectionedAddress Address, StringRef ObjectFilename, StringRef Delimiter = "; "); @@ -602,7 +612,7 @@ return true; } -void SourcePrinter::printSourceLine(raw_ostream &OS, +void SourcePrinter::printSourceLine(formatted_raw_ostream &OS, object::SectionedAddress Address, StringRef ObjectFilename, StringRef Delimiter) { @@ -670,15 +680,16 @@ return isArmElf(Obj) || isAArch64Elf(Obj); } -static void printRelocation(StringRef FileName, const RelocationRef &Rel, - uint64_t Address, bool Is64Bits) { +static void printRelocation(formatted_raw_ostream &OS, StringRef FileName, + const RelocationRef &Rel, uint64_t Address, + bool Is64Bits) { StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": "; SmallString<16> Name; SmallString<32> Val; Rel.getTypeName(Name); if (Error E = getRelocationValueString(Rel, Val)) reportError(std::move(E), FileName); - outs() << format(Fmt.data(), Address) << Name << "\t" << Val << "\n"; + OS << format(Fmt.data(), Address) << Name << "\t" << Val; } class PrettyPrinter { @@ -686,7 +697,7 @@ virtual ~PrettyPrinter() = default; virtual void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, - object::SectionedAddress Address, raw_ostream &OS, + object::SectionedAddress Address, formatted_raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, StringRef ObjectFilename, std::vector *Rels = nullptr) { @@ -718,7 +729,7 @@ class HexagonPrettyPrinter : public PrettyPrinter { public: void printLead(ArrayRef Bytes, uint64_t Address, - raw_ostream &OS) { + formatted_raw_ostream &OS) { uint32_t opcode = (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | Bytes[0]; if (!NoLeadingAddr) @@ -730,7 +741,7 @@ } } void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, - object::SectionedAddress Address, raw_ostream &OS, + object::SectionedAddress Address, formatted_raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, StringRef ObjectFilename, std::vector *Rels) override { @@ -761,7 +772,7 @@ auto PrintReloc = [&]() -> void { while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) { if (RelCur->getOffset() == Address.Address) { - printRelocation(ObjectFilename, *RelCur, Address.Address, false); + printRelocation(OS, ObjectFilename, *RelCur, Address.Address, false); return; } ++RelCur; @@ -800,7 +811,7 @@ class AMDGCNPrettyPrinter : public PrettyPrinter { public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, - object::SectionedAddress Address, raw_ostream &OS, + object::SectionedAddress Address, formatted_raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, StringRef ObjectFilename, std::vector *Rels) override { @@ -852,7 +863,7 @@ class BPFPrettyPrinter : public PrettyPrinter { public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, - object::SectionedAddress Address, raw_ostream &OS, + object::SectionedAddress Address, formatted_raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, StringRef ObjectFilename, std::vector *Rels) override { @@ -1110,6 +1121,518 @@ } } +struct PrintedExpr { + enum ExprKind { + Address, + Value, + }; + ExprKind Kind; + SmallString<20> String; + + PrintedExpr(ExprKind K = Address) : Kind(K) {} +}; + +static void PrettyPrintDWARFOps(raw_ostream &OS, DWARFExpression::iterator I, + const DWARFExpression::iterator E, + const MCRegisterInfo *MRI, + SmallString<20> FrameBase, + bool ForFrameBase = false) { + SmallVector Stack; + unsigned PieceStart = 0; + + while (I != E) { + auto &Op = *I; + unsigned Opcode = Op.getCode(); + if (Opcode == dwarf::DW_OP_piece) { + // DW_OP_piece - record piece of larger object. + assert(Stack.size() <= 1 && "too many values on stack"); + uint64_t PieceSize = Op.getRawOperand(0); + if (PieceStart != 0) + OS << " "; + OS.changeColor(raw_ostream::YELLOW); + OS << "[" << PieceStart << "," << (PieceStart + PieceSize) << "): "; + OS.changeColor(raw_ostream::CYAN); + if (Stack.empty()) + OS << "???"; + else if (Stack.front().Kind == PrintedExpr::Address && !ForFrameBase) + OS << "[" << Stack.front().String << "]"; + else + OS << Stack.front().String; + Stack.clear(); + PieceStart += PieceSize; + } else if (Opcode >= dwarf::DW_OP_reg0 && Opcode <= dwarf::DW_OP_reg31) { + // Register contents + int DwarfRegNum = Opcode - dwarf::DW_OP_reg0; + int LLVMRegNum = *MRI->getLLVMRegNum(DwarfRegNum, false); + raw_svector_ostream S(Stack.emplace_back(PrintedExpr::Value).String); + S << MRI->getName(LLVMRegNum); + } else if (Opcode >= dwarf::DW_OP_breg0 && Opcode <= dwarf::DW_OP_breg31) { + int DwarfRegNum = Opcode - dwarf::DW_OP_breg0; + int LLVMRegNum = *MRI->getLLVMRegNum(DwarfRegNum, false); + int64_t Offset = Op.getRawOperand(0); + raw_svector_ostream S(Stack.emplace_back().String); + S << MRI->getName(LLVMRegNum); + if (Offset) + S << format("%+" PRId64, Offset); + } else if (Opcode == dwarf::DW_OP_fbreg) { + int64_t Offset = Op.getRawOperand(0); + raw_svector_ostream S(Stack.emplace_back().String); + S << FrameBase; + if (Offset) + S << format("%+" PRId64, Offset); + } else if (Opcode >= dwarf::DW_OP_lit0 && Opcode <= dwarf::DW_OP_lit31) { + unsigned Val = Opcode - dwarf::DW_OP_lit0; + raw_svector_ostream S(Stack.emplace_back().String); + S << Val; + } else if (Opcode == dwarf::DW_OP_consts || + Opcode == dwarf::DW_OP_const1s || + Opcode == dwarf::DW_OP_const2s || + Opcode == dwarf::DW_OP_const4s || + Opcode == dwarf::DW_OP_const8s) { + // Signed constant + int64_t Val = Op.getRawOperand(0); + raw_svector_ostream S(Stack.emplace_back().String); + S << Val; + } else if (Opcode == dwarf::DW_OP_constu || + Opcode == dwarf::DW_OP_const1u || + Opcode == dwarf::DW_OP_const2u || + Opcode == dwarf::DW_OP_const4u || + Opcode == dwarf::DW_OP_const8u) { + // Unsigned constant + uint64_t Val = Op.getRawOperand(0); + raw_svector_ostream S(Stack.emplace_back().String); + S << Val; + } else if (Opcode == dwarf::DW_OP_implicit_value) { + uint64_t Len = Op.getRawOperand(0); + raw_svector_ostream S(Stack.emplace_back(PrintedExpr::Value).String); + S << ""; + } else if (Opcode == dwarf::DW_OP_stack_value) { + assert(!Stack.empty()); + Stack.back().Kind = PrintedExpr::Value; + } else if (Opcode == dwarf::DW_OP_GNU_entry_value) { + int64_t SubExprLength = Op.getRawOperand(0); + DWARFExpression::iterator SubExprEnd = I.skip_bytes(SubExprLength); + ++I; + raw_svector_ostream S(Stack.emplace_back().String); + S << "entry("; + PrettyPrintDWARFOps(S, I, SubExprEnd, MRI, FrameBase); + S << ")"; + I = SubExprEnd; + continue; + } else if (Opcode == dwarf::DW_OP_deref || + Opcode == dwarf::DW_OP_deref_size) { + // TODO: The current expression syntax doesn't include the size of memory + // accesses, but probably should. + PrintedExpr PoppedVal = Stack.pop_back_val(); + raw_svector_ostream S(Stack.emplace_back().String); + S << "[" << PoppedVal.String << "]"; + } else if (Opcode == dwarf::DW_OP_abs || Opcode == dwarf::DW_OP_neg || + Opcode == dwarf::DW_OP_not) { + // Unary operations + PrintedExpr ExprOp = Stack.pop_back_val(); + raw_svector_ostream S(Stack.emplace_back().String); + switch (Opcode) { + case dwarf::DW_OP_abs: S << "abs(" << ExprOp.String << ")"; break; + case dwarf::DW_OP_neg: S << "-" << ExprOp.String; break; + case dwarf::DW_OP_not: S << "~" << ExprOp.String; break; + default: llvm_unreachable("unexpected opcode"); + } + } else if (Opcode == dwarf::DW_OP_plus_uconst) { + // Unary operations with immediate on RHS + PrintedExpr LHS = Stack.pop_back_val(); + uint64_t RHS = Op.getRawOperand(0); + raw_svector_ostream S(Stack.emplace_back().String); + S << LHS.String << "+" << RHS; + } else if (Opcode == dwarf::DW_OP_and || Opcode == dwarf::DW_OP_div || + Opcode == dwarf::DW_OP_minus || Opcode == dwarf::DW_OP_mod || + Opcode == dwarf::DW_OP_mul || Opcode == dwarf::DW_OP_or || + Opcode == dwarf::DW_OP_plus || Opcode == dwarf::DW_OP_shl || + Opcode == dwarf::DW_OP_shr || Opcode == dwarf::DW_OP_shra) { + // Binary operations + PrintedExpr LHS = Stack.pop_back_val(); + PrintedExpr RHS = Stack.pop_back_val(); + raw_svector_ostream S(Stack.emplace_back().String); + S << LHS.String; + // FIXME: Define operator precedence, put parens in when needed. + switch (Opcode) { + case dwarf::DW_OP_and: S << "&"; break; + case dwarf::DW_OP_div: S << "/"; break; + case dwarf::DW_OP_minus: S << "-"; break; + case dwarf::DW_OP_mod: S << "%"; break; + case dwarf::DW_OP_mul: S << "*"; break; + case dwarf::DW_OP_or: S << "|"; break; + case dwarf::DW_OP_plus: S << "+"; break; + case dwarf::DW_OP_shl : S << "<<"; break; + // FIXME: different syntax for shr and shra + case dwarf::DW_OP_shr: S << ">>"; break; + case dwarf::DW_OP_shra: S << ">>"; break; + default: llvm_unreachable("unexpected opcode"); + } + S << RHS.String; + } else if (Opcode == dwarf::DW_OP_call_frame_cfa) { + raw_svector_ostream S(Stack.emplace_back().String); + S << "CFA"; + } else if (Opcode == dwarf::DW_OP_regval_type || + Opcode == dwarf::DW_OP_GNU_regval_type) { + // Register contents with type (we ignore the type for now) + int LLVMRegNum = *MRI->getLLVMRegNum(Op.getRawOperand(0), false); + raw_svector_ostream S(Stack.emplace_back(PrintedExpr::Value).String); + S << MRI->getName(LLVMRegNum); + } else if (Opcode == dwarf::DW_OP_addr) { + // Address, probably needs to care about relocations? + uint64_t Addr = Op.getRawOperand(0); + raw_svector_ostream S(Stack.emplace_back().String); + S << format("%#x", Addr); + } else if (Opcode == dwarf::DW_OP_implicit_pointer || + Opcode == dwarf::DW_OP_GNU_implicit_pointer) { + // Implicit pointer to thing described by debug info. + raw_svector_ostream S(Stack.emplace_back(PrintedExpr::Value).String); + S << ""; + } else { + dbgs() << ""; + llvm_unreachable("unknown op"); + } + ++I; + } + + assert((PieceStart != 0 && Stack.size() == 0) || + (PieceStart == 0 && Stack.size() == 1) && + "expected pieces and an empty stack, or one value on stack"); + if (!Stack.empty()) { + if (Stack.front().Kind == PrintedExpr::Address && !ForFrameBase) + OS << "[" << Stack.front().String << "]"; + else + OS << Stack.front().String; + } +} + +static void PrettyPrintDWARFExpr(raw_ostream &OS, const DWARFExpression *Expr, + const MCRegisterInfo *MRI, + const DWARFDie FuncDie) { + SmallString<20> FrameBase; + if (FuncDie) { + auto FrameBaseAttr = FuncDie.find(dwarf::DW_AT_frame_base); + if (FrameBaseAttr) { + StringRef FrameBaseStr((const char *)FrameBaseAttr->getAsBlock()->data(), + FrameBaseAttr->getAsBlock()->size()); + DataExtractor FrameBaseData(FrameBaseStr, true /* TODO endianness */, 0); + DWARFExpression FrameBaseExpr( + FrameBaseData, FuncDie.getDwarfUnit()->getVersion(), + FuncDie.getDwarfUnit()->getAddressByteSize()); + raw_svector_ostream S(FrameBase); + PrettyPrintDWARFOps(S, FrameBaseExpr.begin(), FrameBaseExpr.end(), MRI, + FrameBase, true); + } + } + + PrettyPrintDWARFOps(OS, Expr->begin(), Expr->end(), MRI, FrameBase); +} + +struct LiveVariable { + DWARFLocationExpression LocExpr; + const char *VarName; + DWARFUnit *Unit; + const DWARFDie FuncDie; + + LiveVariable(DWARFLocationExpression &LocExpr, const char *VarName, + DWARFUnit *Unit, const DWARFDie FuncDie) + : LocExpr(LocExpr), VarName(VarName), Unit(Unit), FuncDie(FuncDie) {} + + bool liveAtAddress(object::SectionedAddress Addr) { + if (LocExpr.Range == None) + return false; + return LocExpr.Range->SectionIndex == Addr.SectionIndex && + LocExpr.Range->LowPC <= Addr.Address && + LocExpr.Range->HighPC > Addr.Address; + } + + void print(raw_ostream &OS, const MCRegisterInfo *MRI) const { + DataExtractor Data({LocExpr.Expr.data(), LocExpr.Expr.size()}, + true /* TODO endianness */, 0); + DWARFExpression Expression(Data, Unit->getVersion(), + Unit->getAddressByteSize()); + PrettyPrintDWARFExpr(OS, &Expression, MRI, FuncDie); + } +}; + +struct LiveVariablePrinter { + struct Column { + unsigned VarIdx; + bool LiveIn; + bool LiveOut; + bool New; + + bool isActive() const { return VarIdx != ~0U; } + }; + + std::vector LiveVariables; + IndexedMap ActiveCols; + const MCRegisterInfo *MRI; + + LiveVariablePrinter(const MCRegisterInfo *MRI) + : LiveVariables(), ActiveCols({~0U, 0, 0, 0}), MRI(MRI) {} + + void dump() const { + for (const LiveVariable &LV : LiveVariables) { + dbgs() << LV.VarName << " @ " << LV.LocExpr.Range << ": "; + LV.print(dbgs(), MRI); + dbgs() << "\n"; + } + } + + void AddVariable(DWARFDie FuncDie, DWARFDie VarDie) { + uint64_t FuncLowPC, FuncHighPC, SectionIndex; + FuncDie.getLowAndHighPC(FuncLowPC, FuncHighPC, SectionIndex); + const char *VarName = VarDie.getName(DINameKind::ShortName); + DWARFUnit *U = VarDie.getDwarfUnit(); + + auto Locs = VarDie.getLocations(dwarf::DW_AT_location); + if (Locs) { + for (auto &LocExpr : *Locs) { + if (LocExpr.Range) { + // FIXME: getLocations seems to get the section index wrong for + // objects built with -ffunction-sections, for now we just fix it up + // here. + DWARFLocationExpression FixedExpr{ + DWARFAddressRange(LocExpr.Range->LowPC, LocExpr.Range->HighPC, + SectionIndex), + LocExpr.Expr}; + LiveVariables.emplace_back(FixedExpr, VarName, U, FuncDie); + } else { + // If the LocExpr does not have an associated range, it is valid for + // the whole of the function. TODO technically it is not valid for + // any range covered by another LocExpr, does that happen in reality? + DWARFLocationExpression WholeFuncExpr{ + DWARFAddressRange(FuncLowPC, FuncHighPC, SectionIndex), + LocExpr.Expr}; + LiveVariables.emplace_back(WholeFuncExpr, VarName, U, FuncDie); + } + } + } else { + consumeError(Locs.takeError()); + } + } + + void AddFunction(DWARFDie D) { + for (auto Child : D.children()) { + if (Child.getTag() == dwarf::DW_TAG_variable || + Child.getTag() == dwarf::DW_TAG_formal_parameter) { + AddVariable(D, Child); + } else { + AddFunction(Child); + } + } + } + + void AddCompileUnit(DWARFDie D) { + if (D.getTag() == dwarf::DW_TAG_subprogram) { + AddFunction(D); + } else { + for (auto Child : D.children()) + AddFunction(Child); + } + } + + // A live range starting at address A is actually live going into the + // instruction at A, so it's live at the gap between instructions at A. It + // might not be live out of the previous (address-ordered) instruction + // though, if it's in a different BB, how do we know the difference? That + // only occurs if there is no fall-through into this block, how can we + // force that for testing? + + unsigned findFreeColumn() { + for (unsigned ColIdx = 0; ColIdx < ActiveCols.size(); ++ColIdx) { + if (!ActiveCols[ColIdx].isActive()) + return ColIdx; + } + // TODO growth factor? + ActiveCols.grow(ActiveCols.size() + 1); + return ActiveCols.size() - 1; + } + + // Update to match the state of the instruction between ThisAddr and + // NextAddr. In the common case, any live range active at ThisAddr is + // live-in to the instruction, and any live range active at NextAddr is + // live-out of the instruction. If EndFunction is true, then ThisAddr and + // NextAddr are in different functions, so live ranges starting at NextAddr + // will be ignored, because they belong to the next function. + void update(object::SectionedAddress ThisAddr, + object::SectionedAddress NextAddr, bool EndFunction) { + // Any variables which were new aren't any more. + for (unsigned ColIdx = 0; ColIdx < ActiveCols.size(); ++ColIdx) + ActiveCols[ColIdx].New = false; + + // First, check variables which have already been assigned a column, so + // that we don't change their order. + SmallSet CheckedVarIdxs; + for (unsigned ColIdx = 0; ColIdx < ActiveCols.size(); ++ColIdx) { + if (!ActiveCols[ColIdx].isActive()) + continue; + CheckedVarIdxs.insert(ActiveCols[ColIdx].VarIdx); + LiveVariable &LV = LiveVariables[ActiveCols[ColIdx].VarIdx]; + ActiveCols[ColIdx].LiveIn = LV.liveAtAddress(ThisAddr); + ActiveCols[ColIdx].LiveOut = LV.liveAtAddress(NextAddr); + LLVM_DEBUG(dbgs() << "pass 1, " << ThisAddr.Address << "-" + << NextAddr.Address << ", " << LV.VarName << ", Col " + << ColIdx << ": LiveIn=" << ActiveCols[ColIdx].LiveIn + << ", LiveOut=" << ActiveCols[ColIdx].LiveOut << "\n"); + // If the variable range is neither live-in nor live-out, deallocate + // it's column. + if (!ActiveCols[ColIdx].LiveIn && !ActiveCols[ColIdx].LiveOut) + ActiveCols[ColIdx].VarIdx = ~0U; + } + + // Next, look for variables which don't already have a column, but which + // are now live. + if (!EndFunction) { + for (unsigned VarIdx = 0; VarIdx < LiveVariables.size(); ++VarIdx) { + if (CheckedVarIdxs.count(VarIdx)) + continue; + LiveVariable &LV = LiveVariables[VarIdx]; + bool LiveIn = LV.liveAtAddress(ThisAddr); + bool LiveOut = LV.liveAtAddress(NextAddr); + if (!LiveIn && !LiveOut) + continue; + + unsigned ColIdx = findFreeColumn(); + LLVM_DEBUG(dbgs() << "pass 2, " << ThisAddr.Address << "-" + << NextAddr.Address << ", " << LV.VarName << ", Col " + << ColIdx << ": LiveIn=" << LiveIn + << ", LiveOut=" << LiveOut << "\n"); + ActiveCols[ColIdx] = {VarIdx, LiveIn, LiveOut, true}; + } + } + } + + // Print live ranges to the right of an existing line. This assumes the + // line is not an instruction, so doesn't start or end any live ranges, so + // we only need to print active ranges or empty columns. If AfterInst is + // true, this is being printed after the last instruction fed to update(), + // otherwise this is being printed before it. + void printAfterOtherLine(formatted_raw_ostream &OS, bool AfterInst) { + if (ActiveCols.size()) { + unsigned FirstUnprintedColumn = MoveToFirstVarColumn(OS); + for (size_t ColIdx = FirstUnprintedColumn; ColIdx < ActiveCols.size(); + ++ColIdx) { + if (ActiveCols[ColIdx].isActive() && + (AfterInst ? ActiveCols[ColIdx].LiveOut + : ActiveCols[ColIdx].LiveIn)) + OS << "┃ "; + else + OS << " "; + } + } + OS << "\n"; + } + + // Indent to the first live-range column to the right of the currently + // printed line, and return the index of that column. + // TODO: formatted_raw_ostream uses "column" to mean a number of characters + // since the last \n, and we use it to mean the number of slots in which we + // put live variable lines. Pick a less overloaded word. + unsigned MoveToFirstVarColumn(formatted_raw_ostream &OS) { + unsigned FirstUnprintedColumn = + std::max((int)(OS.getColumn() - getIndentLevel() + 1) / 2, 0); + if ((getIndentLevel() + FirstUnprintedColumn * 2) > OS.getColumn()) + OS.PadToColumn(getIndentLevel() + FirstUnprintedColumn * 2); + return FirstUnprintedColumn; + } + + // Print any live variable range info needed to the right of a + // non-instruction line of disassembly. This is where we print the variable + // names and expressions, with thin line-drawing characters connecting them + // to the live range which starts at the next instruction. If MustPrint is + // true, we have to print at least one line (with the continuation of any + // already-active live ranges) because something has already been printed + // earlier on this line. + void printBetweenInsts(formatted_raw_ostream &OS, bool MustPrint) { + bool PrintedSomething = false; + for (unsigned ColIdx = 0; ColIdx < ActiveCols.size(); ++ColIdx) { + if (!ActiveCols[ColIdx].isActive()) + continue; + if (ActiveCols[ColIdx].New) { + // New live variable range for the next instruction, so print the + // variable name and location. + + // First we need to print the live range markers for any active + // columns to the left of this one. + OS.PadToColumn(getIndentLevel()); + for (unsigned ColIdx2 = 0; ColIdx2 < ColIdx; ++ColIdx2) { + if (ActiveCols[ColIdx2].isActive()) + OS << (ActiveCols[ColIdx2].LiveIn ? "┃ " : "│ "); + else + OS << " "; + } + + // Then print the variable name and location of the new live range, + // with box drawing characters joining it to the live range line. + OS << (ActiveCols[ColIdx].LiveIn ? "┠─ " : "┌─ "); + WithColor(OS, raw_ostream::GREEN) + << LiveVariables[ActiveCols[ColIdx].VarIdx].VarName; + OS << " = "; + { + WithColor ExprColor(OS, raw_ostream::CYAN); + LiveVariables[ActiveCols[ColIdx].VarIdx].print(OS, MRI); + } + + // If there are any columns to the right of the expression we just + // printed, then continue their live range lines. + unsigned FirstUnprintedColumn = MoveToFirstVarColumn(OS); + for (unsigned ColIdx2 = FirstUnprintedColumn; + ColIdx2 < ActiveCols.size(); ++ColIdx2) { + if (ActiveCols[ColIdx2].isActive() && ActiveCols[ColIdx2].LiveIn) + OS << "┃ "; + else + OS << " "; + } + + OS << "\n"; + PrintedSomething = true; + } + } + + // If we must print something (because we printed a line/column number), + // but don't have any new variables to print, then print a line which + // just continues any existing live ranges. + if (MustPrint && !PrintedSomething) { + printAfterOtherLine(OS, false); + } + } + + // Print the live variable ranges to the right of a disassembled + // instruction. + void printAfterInst(formatted_raw_ostream &OS) { + if (!ActiveCols.size()) + return; + // If the instruction was particularly long (e.g. it contained a long + // symbol name), then it might overrun the live variable lines. This + // isn't great, but it's not actually ambiguous until multiple + // consecutive instructions do this. + unsigned FirstUnprintedColumn = MoveToFirstVarColumn(OS); + for (unsigned ColIdx = FirstUnprintedColumn; ColIdx < ActiveCols.size(); + ++ColIdx) { + if (ActiveCols[ColIdx].isActive()) { + if (ActiveCols[ColIdx].LiveIn && ActiveCols[ColIdx].LiveOut) + OS << "┃ "; + else if (!ActiveCols[ColIdx].LiveIn && ActiveCols[ColIdx].LiveOut) + OS << "╈ "; + else if (ActiveCols[ColIdx].LiveIn && !ActiveCols[ColIdx].LiveOut) + OS << "┻ "; + else + llvm_unreachable("var must be live in or out!"); + } else { + OS << " "; + } + } + } + + // Get the column number (in characters) at which the first column of live + // variables should be printed. + unsigned getIndentLevel() const { + return DbgIndent + (NoShowRawInsn ? 16 : 40); + } +}; + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, @@ -1201,6 +1724,26 @@ array_pod_sort(SecSyms.second.begin(), SecSyms.second.end()); array_pod_sort(AbsoluteSymbols.begin(), AbsoluteSymbols.end()); + std::unique_ptr DICtx; + LiveVariablePrinter LVP(Ctx.getRegisterInfo()); + std::map StmtAddresses; + + if (DbgVariables) { + DICtx = DWARFContext::create(*Obj); + for (const auto &CU : DICtx->compile_units()) { + LVP.AddCompileUnit(CU->getUnitDIE(false)); + + auto LineTable = DICtx->getLineTableForUnit(CU.get()); + if (LineTable) { + for (auto Row : LineTable->Rows) + if (Row.IsStmt) + StmtAddresses.insert(std::make_pair(Row.Address, Row)); + } + } + } + + LLVM_DEBUG(LVP.dump()); + for (const SectionRef &Section : ToolSectionFilter(*Obj)) { if (FilterSections.empty() && !DisassembleAll && (!Section.isText() || Section.isVirtual())) @@ -1418,11 +1961,23 @@ if (Size == 0) Size = 1; + LVP.update({Index, Section.getIndex()}, + {Index + Size, Section.getIndex()}, Index + Size == End); + + formatted_raw_ostream FOS(outs()); + auto LineTableRow = StmtAddresses.find({Index, Section.getIndex()}); + bool ShowLineNum = (LineTableRow != StmtAddresses.end()) && DbgVariables; + if (ShowLineNum) { + WithColor(FOS, raw_ostream::YELLOW) + << " line " << LineTableRow->second.Line << ", column " + << LineTableRow->second.Column; + } + LVP.printBetweenInsts(FOS, ShowLineNum); PIP.printInst(*IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, - outs(), "", *STI, &SP, Obj->getFileName(), &Rels); - outs() << CommentStream.str(); + FOS, "", *STI, &SP, Obj->getFileName(), &Rels); + FOS << CommentStream.str(); Comments.clear(); // Try to resolve the target of a call, tail call, etc. to a specific @@ -1473,15 +2028,17 @@ --TargetSym; uint64_t TargetAddress = std::get<0>(*TargetSym); StringRef TargetName = std::get<1>(*TargetSym); - outs() << " <" << TargetName; + FOS << " <" << TargetName; uint64_t Disp = Target - TargetAddress; if (Disp) - outs() << "+0x" << Twine::utohexstr(Disp); - outs() << '>'; + FOS << "+0x" << Twine::utohexstr(Disp); + FOS << '>'; } } } - outs() << "\n"; + + LVP.printAfterInst(FOS); + FOS << "\n"; // Hexagon does this in pretty printer if (Obj->getArch() != Triple::hexagon) { @@ -1507,8 +2064,9 @@ Offset += AdjustVMA; } - printRelocation(Obj->getFileName(), *RelCur, SectionAddr + Offset, - Is64Bits); + printRelocation(FOS, Obj->getFileName(), *RelCur, + SectionAddr + Offset, Is64Bits); + LVP.printAfterOtherLine(FOS, true); ++RelCur; } }