Index: include/llvm/DebugInfo/GSYM/Breakpad.h =================================================================== --- include/llvm/DebugInfo/GSYM/Breakpad.h +++ include/llvm/DebugInfo/GSYM/Breakpad.h @@ -0,0 +1,28 @@ +//===- Breakpad.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_BREAKPAD_H +#define LLVM_DEBUGINFO_GSYM_BREAKPAD_H + +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { +namespace gsym { +class GsymCreator; + +std::error_code convertBreakpadFileToGSYM(StringRef BreakpadPath, + StringRef GSYMPath); + +std::error_code convertBreakpadDataToGSYM(StringRef Lines, GsymCreator &Gsym); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_BREAKPAD_H Index: include/llvm/DebugInfo/GSYM/DwarfTransformer.h =================================================================== --- include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -0,0 +1,72 @@ +//===- DwarfTransformer.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H +#define LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H + +#include +#include + +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" + +namespace llvm { +namespace gsym { + +struct CUInfo; + +class DwarfTransformer { +public: + DwarfTransformer(raw_ostream &OS, uint32_t N = 0) : Log(OS), NumThreads(N) { + if (NumThreads == 0) + NumThreads = std::max(std::thread::hardware_concurrency(), 1); + } + + std::error_code loadDwarf(const object::ObjectFile &Obj); + std::error_code loadDwarf(StringRef filename) { + if (auto Binary = getObjectFile(filename)) + return loadDwarf(*Binary.getValue().getBinary()); + return std::make_error_code(std::errc::invalid_argument); + } + + std::error_code loadSymbolTable(const object::ObjectFile &Obj); + std::error_code loadSymbolTable(StringRef filename) { + if (auto Binary = getObjectFile(filename)) + return loadSymbolTable(*Binary.getValue().getBinary()); + return std::make_error_code(std::errc::invalid_argument); + } + + GsymCreator &getData() { return Gsym; } + +private: + void handleDie(raw_ostream &Strm, CUInfo &CUI, DWARFDie Die); + + bool parseLineTable(raw_ostream &Strm, CUInfo &CUI, DWARFDie Die, + FunctionInfo &FI); + + Optional> + getObjectFile(StringRef filename) const; + + void initDataFromObj(const object::ObjectFile &Obj); + + bool Initialized = false; + GsymCreator Gsym; + raw_ostream &Log; + uint32_t NumThreads; + + friend class DwarfTransformerTest; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H Index: include/llvm/DebugInfo/GSYM/FileEntry.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileEntry.h +++ include/llvm/DebugInfo/GSYM/FileEntry.h @@ -0,0 +1,54 @@ +//===- FileEntry.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H +#define LLVM_DEBUGINFO_GSYM_FILEENTRY_H + +#include +#include +#include + +#include "llvm/ADT/DenseMapInfo.h" + +namespace llvm { +namespace gsym { + +struct FileEntry { + uint32_t Dir = 0; // String table offset in the string table + uint32_t Base = 0; // String table offset in the string table + + FileEntry() = default; + FileEntry(uint32_t D, uint32_t B) : Dir(D), Base(B) {} + + // implement operator== so that FileEntry can be used as key in + // unordered containers + bool operator==(const FileEntry &other) const { + return Dir == other.Dir && Base == other.Base; + }; +}; + +} // namespace gsym + +template <> struct DenseMapInfo { + static inline gsym::FileEntry getEmptyKey() { + return gsym::FileEntry(UINT32_MAX, UINT32_MAX); + } + static inline gsym::FileEntry getTombstoneKey() { + return gsym::FileEntry(UINT32_MAX - 1, UINT32_MAX - 1); + } + static unsigned getHashValue(const gsym::FileEntry &Val) { + return Val.Dir * 37U + Val.Base * 37U; + } + static bool isEqual(const gsym::FileEntry &LHS, const gsym::FileEntry &RHS) { + return LHS == RHS; + } +}; + +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H Index: include/llvm/DebugInfo/GSYM/FileTableCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileTableCreator.h +++ include/llvm/DebugInfo/GSYM/FileTableCreator.h @@ -0,0 +1,48 @@ +//===- FileTableCreator.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILETABLECREATOR_H +#define LLVM_DEBUGINFO_GSYM_FILETABLECREATOR_H + +#include +#include +#include + +#include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" + +namespace llvm { +namespace gsym { +class FileWriter; + +class FileTableCreator { + DenseMap EntryToIndex; + std::vector FileEntries; + llvm::gsym::StringTableCreator &StringTable; + +public: + explicit FileTableCreator(StringTableCreator &StrTab) : StringTable(StrTab) { + insert(FileEntry(0, 0)); + } + + const gsym::FileEntry &operator[](size_t Index) const { + return FileEntries[Index]; + } + + const StringTableCreator &getStringTable() const { return StringTable; } + + uint32_t insert(StringRef S); + uint32_t insert(FileEntry S); + void write(FileWriter &Out) const; +}; +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILETABLECREATOR_H Index: include/llvm/DebugInfo/GSYM/FileWriter.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileWriter.h +++ include/llvm/DebugInfo/GSYM/FileWriter.h @@ -0,0 +1,46 @@ +//===- FileWriter.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H +#define LLVM_DEBUGINFO_GSYM_FILEWRITER_H + +#include +#include +#include +#include + +namespace llvm { +namespace gsym { + +class FileWriter { + std::ostream &OS; + +public: + FileWriter(std::ostream &S) : OS(S) {} + ~FileWriter(); + bool WriteSLEB(int64_t Value); + bool WriteULEB(uint64_t Value); + bool WriteU8(uint8_t Value); + bool WriteU32(uint32_t Value); + bool Fixup32(uint32_t Value, off_t Offset); + bool WriteUnsigned(uint64_t Value, size_t N); + bool Write(const void *Src, size_t SrcLen); + bool AlignTo(size_t Align); + off_t Seek(off_t Offset); + off_t Tell(); + +private: + FileWriter(const FileWriter &rhs) = delete; + void operator=(const FileWriter &rhs) = delete; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H Index: include/llvm/DebugInfo/GSYM/FunctionInfo.h =================================================================== --- include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -0,0 +1,92 @@ +//===- FunctionInfo.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H +#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H + +#include +#include + +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" + +namespace llvm { +class raw_ostream; +namespace gsym { +class GsymReader; + +struct FunctionInfo { + AddressRange Range; + uint32_t Name; + std::vector Lines; + InlineInfo Inline; + + FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0) + : Range(Addr, Addr + Size), Name(N) {} + + bool hasRichInfo() const { + // Returns whether we have something else than range and name + return !Lines.empty() || Inline.isValid(); + } + + bool isValid() const { + // Address and size can be zero and there can be no line entries for a + // symbol so the only indication this entry is valid is if the name is + // not zero. + return Name != 0; + } + + uint64_t startAddress() const { return Range.startAddress(); } + uint64_t endAddress() const { return Range.endAddress(); } + uint64_t size() const { return Range.size(); } + void setStartAddress(uint64_t Addr) { Range.setStartAddress(Addr); } + void setEndAddress(uint64_t Addr) { Range.setEndAddress(Addr); } + void setSize(uint64_t Size) { Range.setSize(Size); } + + void clear() { + Range.clear(); + Name = 0; + Lines.clear(); + Inline.clear(); + } + + void dump(llvm::raw_ostream &OS, GsymReader &GSYM) const; +}; + +inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) { + return LHS.Range == RHS.Range && LHS.Name == RHS.Name && + LHS.Lines == RHS.Lines && LHS.Inline == RHS.Inline; +} +inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) { + return !(LHS == RHS); +} +inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) { + // First sort by address range + if (LHS.Range != RHS.Range) + return LHS.Range < RHS.Range; + + // Then sort by inline + if (LHS.Inline.isValid() != RHS.Inline.isValid()) + return RHS.Inline.isValid(); + + // If the number of lines is the same, then compare line table entries + if (LHS.Lines.size() == RHS.Lines.size()) + return LHS.Lines < RHS.Lines; + // Then sort by number of line table entries (more is better) + return LHS.Lines.size() < RHS.Lines.size(); +} + +raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H Index: include/llvm/DebugInfo/GSYM/GsymCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymCreator.h +++ include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -0,0 +1,81 @@ +//===- GsymCreator.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H +#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H + +#include +#include +#include +#include +#include +#include + +#include "FileTableCreator.h" +#include "FunctionInfo.h" +#include "InlineInfo.h" +#include "Range.h" +#include "StringTableCreator.h" + +namespace llvm { +class AsmPrinter; + +namespace gsym { + +class GsymCreator { + // Private member variables require Mutex protections + mutable std::mutex Mutex; + std::vector Funcs; + StringTableCreator StrTab; + FileTableCreator FileTab; + +public: + // Public member variables don't require Mutex protections + AddressRanges TextRanges; + std::vector UUID; + + GsymCreator() : FileTab(StrTab) {} + + std::error_code save(std::ostream &stream) const; + std::error_code save(StringRef path) const; + uint32_t insertString(StringRef S) { + std::lock_guard Guard(Mutex); + return StrTab.insert(S.str()); + } + uint32_t insertFile(StringRef Path) { + std::lock_guard Guard(Mutex); + return FileTab.insert(Path); + } + size_t getFunctionInfoSize() const { + std::lock_guard Guard(Mutex); + return Funcs.size(); + } + void addFunctionInfo(FunctionInfo &&FI) { + std::lock_guard Guard(Mutex); + Funcs.emplace_back(FI); + } + void forEachFunctionInfo( + std::function const &Callback) const { + std::lock_guard Guard(Mutex); + for (const auto &FuncInfo : Funcs) { + if (!Callback(FuncInfo)) + break; + } + } + void optimize(llvm::raw_ostream &OS); + void sortFunctionInfos() { + std::lock_guard Guard(Mutex); + std::sort(Funcs.begin(), Funcs.end()); + } +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H Index: include/llvm/DebugInfo/GSYM/GsymReader.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymReader.h +++ include/llvm/DebugInfo/GSYM/GsymReader.h @@ -0,0 +1,142 @@ +//===- GsymReader.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H +#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H + +#include +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/ErrorOr.h" + +namespace llvm { +class MemoryBuffer; +class raw_ostream; + +namespace gsym { + +constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_VERSION = 1; +constexpr size_t GSYM_MAX_UUID_SIZE = 20; +class FileTableCreator; +struct LookupResult; +class StringTableCreator; +class GSYMInfo; + +enum class InfoType { EndOfList = 0u, LineTableInfo = 1u, InlineInfo = 2u }; + +struct Header { + uint32_t Magic; + uint16_t Version; + uint8_t AddrOffSize; // Size of addr_off_t + uint8_t UUIDSize; + uint64_t BaseAddress; + uint32_t NumAddresses; + uint32_t StrtabOffset; + uint32_t StrtabSize; + uint8_t UUID[GSYM_MAX_UUID_SIZE]; + static size_t getByteSize() { return sizeof(Header); } + bool isValid() const; + void dump(llvm::raw_ostream &OS) const; +}; + +class GsymReader { +public: + GsymReader(); + ~GsymReader(); + + std::error_code openFile(StringRef Filename); + void copyBuffer(StringRef Bytes); + + const Header *getHeader() const { return GSYMHeader; } + void dump(llvm::raw_ostream &OS, bool Verbose) const; + // Dump any address info with matching name + bool dumpAddressInfos(llvm::raw_ostream &OS, StringRef Name) const; + bool lookup(uint64_t Addr, LookupResult &LR) const; + bool getFunctionInfo(uint64_t Addr, FunctionInfo &FuncInfo) const; + // Accessor functions that allow iteration across all addresses in the GSYM + // file. + size_t getNumAddresses() const; + uint64_t getAddress(size_t Index) const; + FileEntry getFile(uint32_t Index) const { + if (Index < Files.size()) + return Files[Index]; + return FileEntry(); + } + StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } + + void clear(); + +protected: + void init(StringRef Data); + void unmap(); + struct AddressInfo { + uint32_t Size; + uint32_t Name; + }; + struct LookupInfo { + uint64_t MatchAddrOffset; + size_t AddrInfoIndex; + const AddressInfo *AddrInfo; + LookupInfo() + : MatchAddrOffset(0), AddrInfoIndex(SIZE_MAX), AddrInfo(nullptr) {} + void clear() { + MatchAddrOffset = 0; + AddrInfoIndex = SIZE_MAX; + AddrInfo = nullptr; + } + }; + // Address offsets are encoded as 1, 2, 4 or 8 bytes. This function will + // correctly lookup AddrOffset using a binary search through the address + // table. + template + void findAddressOffsetInfo(const uint64_t AddrOffset, + LookupInfo &Info) const { + const T *First = reinterpret_cast(AddrOffsets.data()); + const T *Last = First + GSYMHeader->NumAddresses; + const T *Iter = std::lower_bound(First, Last, AddrOffset); + if (Iter == Last || AddrOffset < *Iter) + --Iter; + Info.AddrInfoIndex = std::distance(First, Iter); + Info.MatchAddrOffset = *Iter; + } + + bool findAddressInfo(uint64_t Addr, LookupInfo &Info) const; + void dumpAddressInfo(llvm::raw_ostream &OS, size_t AddrInfoIndex) const; + void dumpFileTable(llvm::raw_ostream &OS) const; + uint64_t getAddressOffset(size_t Index) const; + uint64_t getAddressInfoOffset(size_t Index) const; + DataExtractor getAddressInfoPayload(size_t Index) const; + void dumpLineTable(llvm::raw_ostream &OS, uint64_t BaseAddr, + DataExtractor &LineData, uint32_t Depth) const; + + std::unique_ptr MemBuffer; + StringRef GsymBytes; + const Header *GSYMHeader = nullptr; + ArrayRef AddrOffsets; + ArrayRef AddrInfoOffsets; + ArrayRef Files; + StringTable StrTab; + std::string ErrorStr; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H Index: include/llvm/DebugInfo/GSYM/GsymStreamer.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymStreamer.h +++ include/llvm/DebugInfo/GSYM/GsymStreamer.h @@ -0,0 +1,69 @@ +//===- GSYMStreamer.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H +#define LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +namespace llvm { +namespace gsym { + +/// The Dwarf streaming logic. +/// +/// All interactions with the MC layer that is used to build the debug +/// information binary representation are handled in this class. +class GSYMStreamer { +public: + GSYMStreamer(raw_fd_ostream &OutFile) : OutFile(OutFile) {} + + bool init(Triple TheTriple); + + AsmPrinter &getAsmPrinter() const { return *Asm; } + +private: + /// \defgroup MCObjects MC layer objects constructed by the streamer + /// @{ + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr MOFI; + std::unique_ptr MC; + MCAsmBackend *MAB; // Owned by MCStreamer + std::unique_ptr MII; + std::unique_ptr MSTI; + MCCodeEmitter *MCE; // Owned by MCStreamer + MCStreamer *MS; // Owned by AsmPrinter + std::unique_ptr TM; + std::unique_ptr Asm; + /// @} + + /// The file we stream the linked Dwarf to. + raw_fd_ostream &OutFile; +}; + +} // end namespace gsym +} // end namespace llvm + +#endif // LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H Index: include/llvm/DebugInfo/GSYM/InlineInfo.h =================================================================== --- include/llvm/DebugInfo/GSYM/InlineInfo.h +++ include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -0,0 +1,107 @@ +//===- InlineInfo.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H +#define LLVM_DEBUGINFO_GSYM_INLINEINFO_H + +#include +#include + +#include "Range.h" + +namespace llvm { +class DataExtractor; +class raw_ostream; + +namespace gsym { +class GsymReader; +struct FileEntry; +class FileWriter; +struct StringTable; + +struct InlineInfo { + uint32_t Name; + uint32_t CallFile; + uint32_t CallLine; + AddressRanges Ranges; + std::vector Children; + InlineInfo() : Name(0), CallFile(0), CallLine(0) {} + void write(FileWriter &Out, uint64_t BaseAddr) const; + + /// Decode all InlineInfo from address info data. + /// + /// Use this function when you want to get all of the inline information + /// for dumping or caching for subsequemnt lookups. + /// + /// \param Data the data extractor that contains only the bytes for the + /// inline info starting at offset zero, which is sized to contain only + /// the data for the InlineInfo. + /// \param BaseAddr the base address that all InlineInfo range offsets are + /// relative to. + /// + /// \returns true if successful, false if InlineInfo is empty + bool decode(DataExtractor &Data, uint64_t BaseAddr); + + /// Decode only the InlineInfo that maches the lookup address. + /// + /// Only ranges and child InlineInfo objects containing LookupAddr will be + /// stored in this object. This is handy to use when you are symbolicating + /// an address and don't intend to keep the InlineInfo instance around + /// after doing the lookup. This will minimize the allocations that happen + /// during lookups and speed up the lookup. + /// + /// \param Data the data extractor that contains only the bytes for the + /// inline info starting at offset zero, which is sized to contain only + /// the data for the InlineInfo. + /// \param BaseAddr the base address that all InlineInfo range offsets are + /// relative to. + /// \param LookupAddr the lookup address to use when decoding. + /// + /// \returns true if successful, false if InlineInfo is empty + bool decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t LookupAddr); + + void clear() { + Name = 0; + CallFile = 0; + CallLine = 0; + Ranges.clear(); + Children.clear(); + } + bool isValid() const { return !Ranges.empty(); } + void dump(llvm::raw_ostream &OS, const GsymReader &GSYM, + unsigned depth) const; + /// Lookup an address in the InlineInfo object + /// + /// This lookup can be performed on an instance that was decoded with + /// either InlineInfo::decode(...) call. This function is used to symbolicate + /// an inline call stack and can turn one address in the program into one or + /// more inline call stacks and have the stack trace show the original + /// call site from non-inlined code. + /// + /// \param Addr the address to lookup + /// \param InlineStack a vector of InlineInfo objects that describe the + /// inline call stack for a given address. + /// + /// \returns true if successful, false otherwise + bool getInlineStack(uint64_t Addr, + std::vector &InlineStack) const; +}; + +inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) { + return LHS.Name == RHS.Name && LHS.CallFile == RHS.CallFile && + LHS.CallLine == RHS.CallLine && LHS.Ranges == RHS.Ranges && + LHS.Children == RHS.Children; +} + +raw_ostream &operator<<(raw_ostream &OS, const InlineInfo &FI); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H Index: include/llvm/DebugInfo/GSYM/LineEntry.h =================================================================== --- include/llvm/DebugInfo/GSYM/LineEntry.h +++ include/llvm/DebugInfo/GSYM/LineEntry.h @@ -0,0 +1,45 @@ +//===- LineEntry.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H +#define LLVM_DEBUGINFO_GSYM_LINEENTRY_H + +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +namespace llvm { +namespace gsym { +struct LineEntry { + uint64_t Addr; // Start address of this line entry + uint32_t File; // 1 based index of file in FileTable + uint32_t Line; // Source line number + LineEntry(uint64_t A = 0, uint32_t F = 0, uint32_t L = 0) + : Addr(A), File(F), Line(L) {} + bool isValid() { return File != 0; } + void dump(llvm::raw_ostream &OS) const { + OS << "addr=" << format("0x%08" PRIx64, Addr) + << ", file=" << format("%3u", File) << ", line=" << format("%3u", Line) + << '\n'; + } +}; +inline bool operator==(const LineEntry &lhs, const LineEntry &rhs) { + return lhs.Addr == rhs.Addr && lhs.File == rhs.File && lhs.Line == rhs.Line; +} +inline bool operator!=(const LineEntry &lhs, const LineEntry &rhs) { + return !(lhs == rhs); +} +inline bool operator<(const LineEntry &lhs, const LineEntry &rhs) { + return lhs.Addr < rhs.Addr; +} +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H Index: include/llvm/DebugInfo/GSYM/LineTable.h =================================================================== --- include/llvm/DebugInfo/GSYM/LineTable.h +++ include/llvm/DebugInfo/GSYM/LineTable.h @@ -0,0 +1,39 @@ +//===- LineTable.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H +#define LLVM_DEBUGINFO_GSYM_LINETABLE_H + +#include +#include +#include + +#include "llvm/Support/DataExtractor.h" + +namespace llvm { +namespace gsym { + +struct LineEntry; +struct FunctionInfo; +class FileWriter; + +class LineTable { + DataExtractor Data; + +public: + LineTable(DataExtractor D) : Data(D) {} + static bool write(FileWriter &Out, const FunctionInfo &FuncInfo); + std::vector parseAllEntries(uint64_t BaseAddr); + LineEntry lookup(uint64_t BaseAddr, uint64_t Addr); +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H Index: include/llvm/DebugInfo/GSYM/LookupResult.h =================================================================== --- include/llvm/DebugInfo/GSYM/LookupResult.h +++ include/llvm/DebugInfo/GSYM/LookupResult.h @@ -0,0 +1,49 @@ +//===- LookupResult.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H +#define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H + +#include "Range.h" +#include "llvm/ADT/StringRef.h" +#include +#include +#include +#include + +namespace llvm { +class raw_ostream; +namespace gsym { +struct FileEntry; + +struct SourceLocation { + StringRef Name; + StringRef Dir; + StringRef Base; + uint32_t Line = 0; +}; + +struct LookupResult { + uint64_t LookupAddr = 0; + AddressRange FuncRange; + StringRef FuncName; + std::vector Locations; + void clear() { + LookupAddr = 0; + FuncRange.clear(); + FuncName = StringRef(); + Locations.clear(); + } + void dump(llvm::raw_ostream &OS) const; + std::string getSourceFile(uint32_t Index) const; +}; +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H Index: include/llvm/DebugInfo/GSYM/Range.h =================================================================== --- include/llvm/DebugInfo/GSYM/Range.h +++ include/llvm/DebugInfo/GSYM/Range.h @@ -0,0 +1,75 @@ +//===- AddressRange.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_RANGE_H +#define LLVM_DEBUGINFO_GSYM_RANGE_H + +#include +#include + +#define HEX8(v) llvm::format_hex(v, 4) +#define HEX16(v) llvm::format_hex(v, 6) +#define HEX32(v) llvm::format_hex(v, 10) +#define HEX64(v) llvm::format_hex(v, 18) + +namespace llvm { +class raw_ostream; + +namespace gsym { +struct AddressRange { + uint64_t Start; + uint64_t End; + AddressRange(uint64_t S = 0, uint64_t E = 0) : Start(S), End(E) {} + uint64_t size() const { return Start < End ? End - Start : 0; } + void setStartAddress(uint64_t Addr) { Start = Addr; } + void setEndAddress(uint64_t Addr) { End = Addr; } + void setSize(uint64_t Size) { End = Start + Size; } + uint64_t startAddress() const { return Start; } + uint64_t endAddress() const { return End; } + void clear() { + Start = 0; + End = 0; + } + bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; } + bool doesAdjoinOrIntersect(const AddressRange &RHS) const { + return (Start <= RHS.End) && (End >= RHS.Start); + } + bool doesIntersect(const AddressRange &RHS) const { + return (Start < RHS.End) && (End > RHS.Start); + } +}; + +inline bool operator==(const AddressRange &LHS, const AddressRange &RHS) { + return LHS.Start == RHS.Start && LHS.End == RHS.End; +} +inline bool operator!=(const AddressRange &LHS, const AddressRange &RHS) { + return LHS.Start != RHS.Start || LHS.End != RHS.End; +} +inline bool operator<(const AddressRange &LHS, const AddressRange &RHS) { + if (LHS.Start == RHS.Start) + return LHS.End < RHS.End; + return LHS.Start < RHS.Start; +} +inline bool operator<(const AddressRange &LHS, uint64_t Addr) { + return LHS.Start < Addr; +} +inline bool operator<(uint64_t Addr, const AddressRange &RHS) { + return Addr < RHS.Start; +} + +raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R); + +typedef std::vector AddressRanges; +bool contains(const AddressRanges &Ranges, uint64_t Addr); +void insert(AddressRanges &Ranges, const AddressRange &R); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_RANGE_H Index: include/llvm/DebugInfo/GSYM/StringTable.h =================================================================== --- include/llvm/DebugInfo/GSYM/StringTable.h +++ include/llvm/DebugInfo/GSYM/StringTable.h @@ -0,0 +1,63 @@ +//===- StringTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H +#define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H + +#include +#include + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace gsym { +struct StringTable { + StringRef Data; + StringTable() : Data() {} + StringTable(StringRef D) : Data(D) {} + StringRef operator[](size_t Offset) const { return getString(Offset); } + StringRef getString(uint32_t Offset) const { + if (Offset < Data.size()) { + auto End = Data.find('\0', Offset); + return Data.substr(Offset, End - Offset); + } + return StringRef(); + } + void clear() { Data = StringRef(); } + void dump(raw_ostream &OS) const { + OS << "String table:\n"; + uint32_t Offset = 0; + const size_t Size = Data.size(); + while (Offset < Size) { + StringRef Str = getString(Offset); + OS << HEX32(Offset) << ": \"" << Str << "\"\n"; + Offset += Str.size() + 1; + } + } + uint32_t find(StringRef Str) const { + if (Str.empty()) + return 0; // Return th + size_t Offset = 0; + size_t Pos; + while ((Pos = Data.find(Str, Offset)) != StringRef::npos) { + auto NullTerminator = Data.substr(Pos + Str.size(), 1); + if (NullTerminator.empty()) + break; + if (NullTerminator[0] == '\0') + return Pos; + Offset += Str.size() + 1; + } + return UINT32_MAX; // Return an invalid index when the string is not found + } +}; +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H Index: include/llvm/DebugInfo/GSYM/StringTableCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/StringTableCreator.h +++ include/llvm/DebugInfo/GSYM/StringTableCreator.h @@ -0,0 +1,59 @@ +//===- StringTableCreator.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLECREATOR_H +#define LLVM_DEBUGINFO_GSYM_STRINGTABLECREATOR_H + +#include + +#include "FileWriter.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +namespace llvm { +namespace gsym { +class StringTableCreator { + StringMap Strings; + // Strings contains the backing string + DenseMap OffsetToString; + std::vector OrderedStrings; + uint32_t NextOffset; + +public: + StringTableCreator() : NextOffset(0) { insert(""); } + + uint32_t insert(StringRef Str) { + + auto Pair = Strings.try_emplace(Str, NextOffset); + // Pair.second will be true if the string was added to the string table. + if (Pair.second) { + // String was added, do some book keeping + OffsetToString[Pair.first->getValue()] = Pair.first->getKey(); + OrderedStrings.push_back(Pair.first->getKey()); + NextOffset += Str.size() + 1; + } + return Pair.first->getValue(); + } + + uint32_t getTotalSize() const { return NextOffset; } + + StringRef operator[](size_t Offset) const { + auto it = OffsetToString.find(Offset); + return it == OffsetToString.end() ? StringRef() : it->second; + } + + void write(FileWriter &Out) const { + for (auto Str : OrderedStrings) { + Out.Write(Str.data(), Str.size()); + Out.Write("\0", 1); + } + } +}; +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLECREATOR_H Index: include/llvm/Support/DataExtractor.h =================================================================== --- include/llvm/Support/DataExtractor.h +++ include/llvm/Support/DataExtractor.h @@ -59,6 +59,37 @@ /// Set the address size for this extractor. void setAddressSize(uint8_t Size) { AddressSize = Size; } + /// Get a slice of data from this data extractor with the same endian + /// and address size. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// + /// @param[in] length + /// The size in byte of the data to extract. If the value is set + /// to StringRef::npos, then all remaining bytes. + /// + /// @return + /// A data extractor with the same endian and address byte size + /// settings as this object with a slice of data whose offset + /// will start at zero. + DataExtractor getSlice(uint32_t *offset_ptr, + size_t length = StringRef::npos) const { + auto Slice = Data.substr(*offset_ptr, length); + *offset_ptr += Slice.size(); + return DataExtractor(Slice, IsLittleEndian, AddressSize); + } + + const uint8_t *getDataPtr(uint32_t *offset_ptr, size_t length) const { + auto Slice = Data.substr(*offset_ptr, length); + *offset_ptr += Slice.size(); + return reinterpret_cast(Slice.data()); + } /// Extract a C string from \a *offset_ptr. /// /// Returns a pointer to a C String from the data at the offset Index: lib/DebugInfo/CMakeLists.txt =================================================================== --- lib/DebugInfo/CMakeLists.txt +++ lib/DebugInfo/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(DWARF) +add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(CodeView) add_subdirectory(PDB) Index: lib/DebugInfo/GSYM/Breakpad.cpp =================================================================== --- lib/DebugInfo/GSYM/Breakpad.cpp +++ lib/DebugInfo/GSYM/Breakpad.cpp @@ -0,0 +1,229 @@ +//===- Breakpad.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/MemoryBuffer.h" + +#include "llvm/DebugInfo/GSYM/Breakpad.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" + +using namespace llvm; +using namespace gsym; + +enum class BreakpadLineType { + Invalid, + Module, + File, + Function, + SourceLine, + Public, + Stack +}; + +class Line { + StringRef Str; + +public: + Line(StringRef S) : Str(S) {} + + BreakpadLineType GetLineType() { + static StringRef BreakpadModule("MODULE "); + static StringRef BreakpadFile("FILE "); + static StringRef BreakpadFunc("FUNC "); + static StringRef BreakpadPublic("PUBLIC "); + static StringRef BreakpadStack("STACK "); + + if (Str.empty()) + return BreakpadLineType::Invalid; + + switch (Str[0]) { + case 'F': + if (Str.consume_front(BreakpadFunc)) + return BreakpadLineType::Function; + if (Str.consume_front(BreakpadFile)) + return BreakpadLineType::File; + break; + case 'M': + if (Str.consume_front(BreakpadModule)) + return BreakpadLineType::Module; + break; + case 'P': + if (Str.consume_front(BreakpadPublic)) + return BreakpadLineType::Public; + break; + case 'S': + if (Str.consume_front(BreakpadStack)) + return BreakpadLineType::Stack; + break; + default: + if (isHexDigit(Str[0])) + return BreakpadLineType::SourceLine; + break; + } + return BreakpadLineType::Invalid; + } + void TrimLeadingSpaces() { Str = Str.ltrim(); } + StringRef GetWord() { + // Get the next word from the line. Any leading spaces + // will be stripped. A word is considered any character + // that is not a space. + TrimLeadingSpaces(); + if (Str.empty()) + return StringRef(); + const size_t EndWord = + std::min(Str.size(), Str.find_first_of(" \t\n\v\f\r")); + StringRef Word(Str.substr(0, EndWord)); + Str = Str.drop_front(EndWord); + return Word; + } + StringRef GetRestOfLineAsString() const { return Str.ltrim(); } + uint64_t GetHex() { + TrimLeadingSpaces(); + uint64_t Value = 0; + if (Str.consumeInteger(16, Value)) + return UINT64_MAX; + return Value; + } + uint32_t GetHex32() { + TrimLeadingSpaces(); + uint32_t Value = 0; + if (Str.consumeInteger(16, Value)) + return UINT32_MAX; + return Value; + } + uint64_t GetDecimal() { + TrimLeadingSpaces(); + uint64_t Value = 0; + if (Str.consumeInteger(10, Value)) + return UINT64_MAX; + return Value; + } + uint32_t GetDecimal32() { + TrimLeadingSpaces(); + uint32_t Value = 0; + if (Str.consumeInteger(10, Value)) + return UINT32_MAX; + return Value; + } +}; + +std::error_code llvm::gsym::convertBreakpadFileToGSYM(StringRef BreakpadPath, + StringRef GSYMPath) { + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(BreakpadPath); + auto EC = BuffOrErr.getError(); + if (EC) + return EC; + + std::unique_ptr Buffer = std::move(BuffOrErr.get()); + + GsymCreator Gsym; + EC = convertBreakpadDataToGSYM(StringRef(Buffer->getBuffer()), Gsym); + if (EC) + return EC; + + return Gsym.save(GSYMPath); +} + +std::error_code llvm::gsym::convertBreakpadDataToGSYM(StringRef Lines, + GsymCreator &Gsym) { + + FunctionInfo FuncInfo; + bool GotPublic = false; + while (!Lines.empty()) { + // Remove any leading spaces or newlines + Lines = Lines.ltrim(); + const size_t EndLine = std::min(Lines.find_first_of("\r\n"), Lines.size()); + Line L(Lines.substr(0, EndLine)); + Lines = Lines.drop_front(EndLine).ltrim(); + + switch (L.GetLineType()) { + case BreakpadLineType::Invalid: + break; + case BreakpadLineType::Module: { + L.GetWord(); // Skip OS string + L.GetWord(); // Skip arch string + StringRef UUID = L.GetWord(); + auto UUIDLength = UUID.size(); + uint8_t HighNibble = 0; + for (size_t i = 0; i < UUIDLength; ++i) { + const uint8_t Nibble = hexDigitValue(UUID[i]); + if (Nibble <= 0xf) { + if (i % 2) + Gsym.UUID.push_back(HighNibble + Nibble); + else + HighNibble = Nibble << 4; + } else { + Gsym.UUID.clear(); + break; + } + } + } break; + case BreakpadLineType::File: { +#ifndef NDEBUG + const auto BreakpadFileIndex = L.GetDecimal(); + const auto GsymFileIndex = Gsym.insertFile(L.GetRestOfLineAsString()); + assert(BreakpadFileIndex + 1 == GsymFileIndex); +#endif + break; + } + case BreakpadLineType::Function: + if (FuncInfo.isValid()) + Gsym.addFunctionInfo(std::move(FuncInfo)); + FuncInfo.Range.setStartAddress(L.GetHex()); + FuncInfo.Range.setSize(L.GetHex32()); + L.GetHex(); // Skip parameter_size + FuncInfo.Name = Gsym.insertString(L.GetRestOfLineAsString()); + FuncInfo.Lines.clear(); + break; + case BreakpadLineType::SourceLine: { + const uint64_t Addr = L.GetHex(); + L.GetHex32(); // Skip 32 bit size + const uint32_t LineNum = L.GetDecimal32(); + const uint32_t FileIndex = L.GetDecimal32() + 1; + if (!FuncInfo.Lines.empty()) { + auto &Last = FuncInfo.Lines.back(); + // Skip multiple line entries in a row that have the same file and line + if (Last.File == FileIndex && Last.Line == LineNum) + break; + } + FuncInfo.Lines.push_back(LineEntry(Addr, FileIndex, LineNum)); + } break; + case BreakpadLineType::Public: { + GotPublic = true; + const uint64_t Addr = L.GetHex(); + L.GetHex(); // Skip parameter_size + const uint32_t Name = Gsym.insertString(L.GetRestOfLineAsString()); + Gsym.addFunctionInfo(FunctionInfo(Addr, 0, Name)); + } break; + case BreakpadLineType::Stack: + break; + } + } + if (GotPublic) { + // We appended public symbol FunctionInfo objects onto the end of our + // sorted func_infos array and we need to sort it now. + Gsym.sortFunctionInfos(); + } + return std::error_code(); +} Index: lib/DebugInfo/GSYM/CMakeLists.txt =================================================================== --- lib/DebugInfo/GSYM/CMakeLists.txt +++ lib/DebugInfo/GSYM/CMakeLists.txt @@ -0,0 +1,18 @@ +add_llvm_library(LLVMDebugInfoGSYM + Breakpad.cpp + DwarfTransformer.cpp + FileTableCreator.cpp + FileWriter.cpp + FunctionInfo.cpp + GsymCreator.cpp + GsymReader.cpp + GsymStreamer.cpp + InlineInfo.cpp + LineTable.cpp + LookupResult.cpp + Range.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/GSYM + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo + ) Index: lib/DebugInfo/GSYM/DwarfTransformer.cpp =================================================================== --- lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -0,0 +1,545 @@ +//===- DwarfTransformer.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" + +using namespace llvm; +using namespace gsym; + +constexpr uint32_t NT_GNU_BUILD_ID_TAG = 0x03; + +struct llvm::gsym::CUInfo { + const DWARFDebugLine::LineTable *LineTable; + const char *CompDir; + std::vector FileCache; + uint64_t Language = 0; + + CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { + LineTable = DICtx.getLineTableForUnit(CU); + CompDir = CU->getCompilationDir(); + FileCache.clear(); + if (LineTable) + FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); + DWARFDie Die = CU->getUnitDIE(); + Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); + } + + uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) { + if (!LineTable) + return 0; + assert(DwarfFileIdx < FileCache.size()); + uint32_t FileIdx = FileCache[DwarfFileIdx]; + if (FileIdx != UINT32_MAX) + return FileIdx; + std::string File; + if (LineTable->getFileNameByIndex( + DwarfFileIdx, CompDir, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) + FileIdx = Gsym.insertFile(File); + else + FileIdx = 0; + FileCache[DwarfFileIdx] = FileIdx; + return FileIdx; + } +}; + +static std::vector getUUID(const object::ObjectFile &Obj) { + // Extract the UUID from the object file + std::vector UUID; + if (auto *MachO = dyn_cast(&Obj)) { + const ArrayRef MachUUID = MachO->getUuid(); + if (!MachUUID.empty()) + UUID.assign(MachUUID.data(), MachUUID.data() + MachUUID.size()); + } else if (isa(&Obj)) { + const StringRef GNUBuildID(".note.gnu.build-id"); + for (const object::SectionRef &Sect : Obj.sections()) { + StringRef SectName; + Sect.getName(SectName); + if (SectName != GNUBuildID) + continue; + StringRef BuildIDData; + Expected E = Sect.getContents(); + if (E) + BuildIDData = *E; + else { + consumeError(E.takeError()); + continue; + } + // TODO: set endian and addr byte size + DataExtractor Decoder(BuildIDData, true, 8); + uint32_t Offset = 0; + const uint32_t NameSize = Decoder.getU32(&Offset); + const uint32_t PayloadSize = Decoder.getU32(&Offset); + const uint32_t PayloadType = Decoder.getU32(&Offset); + StringRef Name((const char *)Decoder.getDataPtr(&Offset, NameSize)); + if (Name == "GNU" && PayloadType == NT_GNU_BUILD_ID_TAG) { + Offset = alignTo(Offset, 4); + const uint8_t *UUIDBytes = + (const uint8_t *)Decoder.getDataPtr(&Offset, PayloadSize); + if (UUIDBytes) + UUID.assign(UUIDBytes, UUIDBytes + PayloadSize); + } + } + } + return UUID; +} + +DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { + if (DWARFDie SpecDie = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { + if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) + return SpecParent; + } + if (DWARFDie AbstDie = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { + if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) + return AbstParent; + } + + // We never want to follow parent for inlined subroutine - that would + // give us information about where the function is inlined, not what + // function is inlined + if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) + return DWARFDie(); + + DWARFDie ParentDie = Die.getParent(); + if (!ParentDie) + return DWARFDie(); + + switch (ParentDie.getTag()) { + case dwarf::DW_TAG_namespace: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_subprogram: + return ParentDie; // Found parent decl context DIE + case dwarf::DW_TAG_lexical_block: + return GetParentDeclContextDIE(ParentDie); + default: + break; + } + + return DWARFDie(); +} + +std::string getQualifiedName(DWARFDie &Die, uint64_t Language) { + // If the dwarf has mangled name, use mangled name + if (auto LinkageName = + dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_AT_linkage_name}), + nullptr)) + return LinkageName; + + StringRef ShortName(Die.getName(DINameKind::ShortName)); + if (ShortName.empty()) + return ""; + + // For C++ and ObjC, prepend names of all parent declaration contexts + if (!(Language == dwarf::DW_LANG_C_plus_plus || + Language == dwarf::DW_LANG_C_plus_plus_03 || + Language == dwarf::DW_LANG_C_plus_plus_11 || + Language == dwarf::DW_LANG_C_plus_plus_14 || + Language == dwarf::DW_LANG_ObjC_plus_plus || + // This should not be needed for C, but we see C++ code marked as C + // in some binaries. This should hurt, so let's do it for C as well + Language == dwarf::DW_LANG_C)) + return ShortName.str(); + + // Some GCC optimizations create functions with names ending with .isra. + // or .part. and those names are just DW_AT_name, not DW_AT_linkage_name + // If it looks like it could be the case, don't add any prefix + if (ShortName.startswith("_Z") && + (ShortName.contains(".isra.") || ShortName.contains(".part."))) + return ShortName.str(); + + std::string Name = ShortName.str(); + DWARFDie ParentDie = GetParentDeclContextDIE(Die); + while (ParentDie) { + StringRef ParentName(ParentDie.getName(DINameKind::ShortName)); + if (!ParentName.empty()) { + // "lambda" names are wrapped in < >. Replace with { } + // to be consistent with demangled names and not to confuse with + // templates + if (ParentName.front() == '<' && ParentName.back() == '>') + Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + + "::" + Name; + else + Name = ParentName.str() + "::" + Name; + } + ParentDie = GetParentDeclContextDIE(ParentDie); + } + + return Name; +} + +static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { + bool CheckChildren = true; + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: + // Don't look into functions within functions. + CheckChildren = Depth == 0; + break; + case dwarf::DW_TAG_inlined_subroutine: + return true; + default: + break; + } + if (!CheckChildren) + return false; + for (DWARFDie ChildDie : Die.children()) { + if (hasInlineInfo(ChildDie, Depth + 1)) + return true; + } + return false; +} + +static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die, + uint32_t Depth, FunctionInfo &FuncInfo, + InlineInfo &parent) { + if (!hasInlineInfo(Die, Depth)) + return; + + dwarf::Tag Tag = Die.getTag(); + if (Tag == dwarf::DW_TAG_inlined_subroutine) { + // create new InlineInfo and append to parent.children + InlineInfo II; + DWARFAddressRange FuncRange = + DWARFAddressRange(FuncInfo.startAddress(), FuncInfo.endAddress()); + Expected RangesOrError = Die.getAddressRanges(); + if (RangesOrError) { + for (const DWARFAddressRange &Range : RangesOrError.get()) { + // Check that the inlined function is within the range of the function + // info, it might not be in case of split functions + if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC) + II.Ranges.emplace_back(AddressRange(Range.LowPC, Range.HighPC)); + } + } + if (II.Ranges.empty()) + return; + + std::string Name = getQualifiedName(Die, CUI.Language); + if (!Name.empty()) + II.Name = Gsym.insertString(std::move(Name)); + II.CallFile = CUI.DWARFToGSYMFileIndex( + Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0)); + II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); + // parse all children and append to parent + for (DWARFDie ChildDie : Die.children()) + parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FuncInfo, II); + parent.Children.emplace_back(std::move(II)); + return; + } + if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { + // skip this Die and just recurse down + for (DWARFDie ChildDie : Die.children()) + parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FuncInfo, parent); + } +} + +bool DwarfTransformer::parseLineTable(raw_ostream &Strm, CUInfo &CUI, + DWARFDie Die, FunctionInfo &FuncInfo) { + std::vector RowVector; + const uint64_t StartAddress = FuncInfo.startAddress(); + const uint64_t EndAddress = FuncInfo.endAddress(); + const uint64_t RangeSize = EndAddress - StartAddress; + const object::SectionedAddress SecAddress( + StartAddress, object::SectionedAddress::UndefSection); + if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) + return false; + + DWARFDebugLine::Row PrevRow; + for (uint32_t RowIndex : RowVector) { + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; + const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File); + const uint64_t RowAddress = Row.Address.Address; + + if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { + // We have seen full duplicate line tables for functions in some + // DWARF files. Watch for those here by checking the the last + // row was the function's end address (HighPC) and that the + // current line table entry's address is the same as the first + // line entry we already have in our "function_info.Lines". If + // so break out after printing a warning. + if (LineEntry(RowAddress, FileIdx, Row.Line) == FuncInfo.Lines.front()) { + Strm << "warning: duplicate line table detected for DIE:\n"; + Die.dump(Strm, 0, DIDumpOptions::getForSingleDIE()); + } else { + // Print out (ignore if os == nulls as this is expensive) + Strm << "error: line table has addresses that do not " + << "monotonically increase:\n"; + for (uint32_t RowIndex2 : RowVector) { + CUI.LineTable->Rows[RowIndex2].dump(Strm); + } + Die.dump(Strm, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + if (!FuncInfo.Lines.empty()) { + const LineEntry &last = FuncInfo.Lines.back(); + // Skip multiple line entris for the same file and line. + if (last.File == FileIdx && last.Line == Row.Line) + continue; + } + if (!Row.EndSequence) { + if (StartAddress <= RowAddress && RowAddress < EndAddress) { + FuncInfo.Lines.emplace_back(LineEntry(RowAddress, FileIdx, Row.Line)); + } else if (RowAddress < StartAddress) { + // Line tables are bad in DWARF. + Strm << "warning: invalid DWARF for DIE with range [" + << HEX64(StartAddress) << '-' << HEX64(EndAddress) + << ") that has a matching line entry with an address " + << HEX64(RowAddress) << " that is lower than the function " + << "start address:\n"; + Die.dump(Strm, 0, DIDumpOptions::getForSingleDIE()); + // Don't put multiple bogus line entries in the line table. + // Check to see if we already fixed up the first line entry to + // be the LowPC? + if (FuncInfo.Lines.size() == 1 && + FuncInfo.Lines.front().Addr == StartAddress) { + FuncInfo.Lines[0].File = FileIdx; + FuncInfo.Lines[0].Line = Row.Line; + } else { + FuncInfo.Lines.emplace_back( + LineEntry(StartAddress, FileIdx, Row.Line)); + } + } + } + PrevRow = Row; + } + return true; +} + +void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: { + Expected RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) { + consumeError(RangesOrError.takeError()); + break; + } + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + if (Ranges.empty()) + break; + std::string Name = getQualifiedName(Die, CUI.Language); + if (Name.empty()) { + OS << "error: function at " << HEX64(Die.getOffset()) + << " has no name\n "; + Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); + break; + } + const uint32_t NameIndex = Gsym.insertString(std::move(Name)); + + // Create a function_info for each range + for (const DWARFAddressRange &Range : Ranges) { + if (!contains(Gsym.TextRanges, Range.LowPC)) { + // We expect zero and -1 to be invalid addresses in DWARF depending + // on the linker of the DWARF. This indicates a function was stripped + // and the debug info wasn't able to be stripped from the DWARF. If + // the LowPC isn't zero or -1, then we should emit an error. + if (Range.LowPC != 0 && Range.LowPC != UINT32_MAX && + Range.LowPC != UINT64_MAX) { + // Unexpected invalid address, emit an error + OS << "error: DIE has an address range whose start address is not " + " in an executable section and will not be processed:\n"; + Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + if (Range.LowPC >= Range.HighPC) + break; + + FunctionInfo FuncInfo; + FuncInfo.setStartAddress(Range.LowPC); + FuncInfo.setEndAddress(Range.HighPC); + FuncInfo.Name = NameIndex; + if (CUI.LineTable) { + parseLineTable(OS, CUI, Die, FuncInfo); + } + if (hasInlineInfo(Die, 0)) { + FuncInfo.Inline.Name = NameIndex; + FuncInfo.Inline.Ranges = {FuncInfo.Range}; + parseInlineInfo(Gsym, CUI, Die, 0, FuncInfo, FuncInfo.Inline); + } + Gsym.addFunctionInfo(std::move(FuncInfo)); + } + } break; + default: + break; + } + for (DWARFDie ChildDie : Die.children()) + handleDie(OS, CUI, ChildDie); +} + +void DwarfTransformer::initDataFromObj(const object::ObjectFile &Obj) { + // Read build ID + Gsym.UUID = getUUID(Obj); + + // We need to know where the valid sections are that contain instructions. + // DWARF now emits DW_TAG_subprogram tags with DW_AT_low_pc as addresses + // and DW_AT_high_pc values as sizes, and the linker will zero out the + // DW_AT_low_pc, but won't touch the DW_AT_high_pc if it isn't an address. + // This means we might have many many DW_TAG_subprogram's with a + // DW_AT_low_pc of zero and a valid size. We need to ignore these entries. + for (const object::SectionRef &Sect : Obj.sections()) { + if (!Sect.isText()) + continue; + const uint64_t Size = Sect.getSize(); + if (Size == 0) + continue; + const uint64_t StartAddr = Sect.getAddress(); + insert(Gsym.TextRanges, AddressRange(StartAddr, StartAddr + Size)); + } +} + +std::error_code DwarfTransformer::loadDwarf(const object::ObjectFile &Obj) { + using namespace llvm::object; + std::unique_ptr DICtx = DWARFContext::create(Obj); + logAllUnhandledErrors(DICtx->loadRegisterInfo(Obj), Log, "dwarf2gsym"); + + if (!Initialized) + initDataFromObj(Obj); + + size_t NumBefore = Gsym.getFunctionInfoSize(); + if (NumThreads == 1) { + // Parse all DWARF data from this thread, use the same string/file table + // for everything + for (const auto &CU : DICtx->compile_units()) { + DWARFDie Die = CU->getUnitDIE(false); + CUInfo CUI(*DICtx, dyn_cast(CU.get())); + handleDie(Log, CUI, Die); + } + } else { + // THIS IS VERY HACKY. Without parsing DIEs for all CUs first, we might hit + // a race condition below. LLVM Dwarf parser is not completely thread-safe: + // DWARFDie keeps a pointer to an element of a vector in DWARFUnit. If we + // dont parse all the DIEs first, they might be parsed from another thread, + // causing the vector to reallocate, causing the pointer to be invalid. We + // could sequentially call CU->getUnitDIE(false) for all CUs, but that's + // also not super fast. To do this concurrently, we need to call + // getAbbreviations sequentially first so that getUnitDIE() only works with + // its local data. + for (const auto &CU : DICtx->compile_units()) + CU->getAbbreviations(); + + ThreadPool pool(NumThreads); + for (const auto &CU : DICtx->compile_units()) + pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); + pool.wait(); + + std::mutex LogMutex; + for (const auto &CU : DICtx->compile_units()) { + DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); + if (Die) { + CUInfo CUI(*DICtx, dyn_cast(CU.get())); + pool.async([this, CUI, &LogMutex, Die]() mutable { + std::string ThreadLogStorage; + raw_string_ostream ThreadOS(ThreadLogStorage); + handleDie(ThreadOS, CUI, Die); + ThreadOS.flush(); + if (!ThreadLogStorage.empty()) { + // Print ThreadLogStorage lines into an actual stream under a lock + std::lock_guard guard(LogMutex); + Log << ThreadLogStorage; + } + }); + } + } + pool.wait(); + } + size_t FunctionsAddedCount = Gsym.getFunctionInfoSize() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; + if (FunctionsAddedCount > 0) + return std::error_code(); + return std::make_error_code(std::errc::invalid_argument); +} + +std::error_code +DwarfTransformer::loadSymbolTable(const object::ObjectFile &Obj) { + using namespace llvm::object; + if (!Initialized) + initDataFromObj(Obj); + size_t NumBefore = Gsym.getFunctionInfoSize(); + + for (const object::SymbolRef &Sym : Obj.symbols()) { + Expected SymType = Sym.getType(); + const uint64_t addr = Sym.getValue(); + if (!SymType || SymType.get() != SymbolRef::Type::ST_Function || addr == 0) + continue; + // Function size for MachO files will be 0 + const uint64_t size = + isa(&Obj) ? ELFSymbolRef(Sym).getSize() : 0; + if (Expected Name = Sym.getName()) + Gsym.addFunctionInfo(FunctionInfo(addr, size, Gsym.insertString(*Name))); + } + size_t FunctionsAddedCount = Gsym.getFunctionInfoSize() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from symbol table.\n"; + if (FunctionsAddedCount > 0) + return std::error_code(); + return std::make_error_code(std::errc::invalid_argument); +} + +Optional> +DwarfTransformer::getObjectFile(StringRef Path) const { + auto BuffOrErr = MemoryBuffer::getFileOrSTDIN(Path); + if (auto err = BuffOrErr.getError()) { + Log << Path << ": " << err.message() << "\n"; + return None; + } + auto Buff = std::move(BuffOrErr.get()); + auto BinOrErr = object::createBinary(*Buff); + if (auto err = errorToErrorCode(BinOrErr.takeError())) { + Log << Path << ": " << err.message() << "\n"; + return None; + } + + if (isa(BinOrErr->get())) { + // transfer ownership + auto ptr = std::unique_ptr( + dyn_cast(BinOrErr->release())); + return object::OwningBinary(std::move(ptr), + std::move(Buff)); + } else if (auto *Fat = + dyn_cast(BinOrErr->get())) { + auto Arch = object::MachOObjectFile::getHostArch().getArchName(); + if (Fat->getNumberOfObjects() == 1) { + auto MachOOrErr = Fat->begin_objects()->getAsObjectFile(); + if (auto err = BuffOrErr.getError()) { + Log << Path << ": " << err.message() << "\n"; + return None; + } + return object::OwningBinary( + std::move(MachOOrErr.get()), std::move(Buff)); + } else if (auto MachOOrErr = Fat->getObjectForArch(Arch)) { + return object::OwningBinary( + std::move(MachOOrErr.get()), std::move(Buff)); + } else { + Log << Path << ": file contains objects for multiple archs but not for " + << Arch << "\n"; + } + } + Log << Path << ": unsupported binary type\n"; + return None; +} Index: lib/DebugInfo/GSYM/FileTableCreator.cpp =================================================================== --- lib/DebugInfo/GSYM/FileTableCreator.cpp +++ lib/DebugInfo/GSYM/FileTableCreator.cpp @@ -0,0 +1,46 @@ +//===- FileTableCreator.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include + +using namespace llvm; +using namespace gsym; + +uint32_t FileTableCreator::insert(StringRef S) { + auto Dir = 0, Base = 0; + auto LastSlash = S.rfind('/'); + if (LastSlash == std::string::npos || LastSlash == 0) { + Base = StringTable.insert(S); + } else { + Dir = StringTable.insert(S.substr(0, LastSlash)); + Base = StringTable.insert(S.substr(LastSlash + 1)); + } + return insert(FileEntry(Dir, Base)); +} + +uint32_t FileTableCreator::insert(FileEntry Entry) { + auto Index = FileEntries.size(); + // find in hash map and insert if not present + auto R = EntryToIndex.insert(std::make_pair(Entry, Index)); + if (R.second) + FileEntries.emplace_back(Entry); + return R.first->second; +} + +void FileTableCreator::write(FileWriter &Out) const { + // we should always have 1 blank entry + assert(!FileEntries.empty()); + assert(FileEntries[0].Dir == 0); + assert(FileEntries[0].Base == 0); + size_t NumFiles = FileEntries.size(); + Out.WriteUnsigned(NumFiles, sizeof(uint32_t)); + Out.Write(FileEntries.data(), NumFiles * sizeof(FileEntry)); +} Index: lib/DebugInfo/GSYM/FileWriter.cpp =================================================================== --- lib/DebugInfo/GSYM/FileWriter.cpp +++ lib/DebugInfo/GSYM/FileWriter.cpp @@ -0,0 +1,79 @@ +//===- FileWriter.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/LEB128.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace gsym; + +FileWriter::~FileWriter() { OS.flush(); } + +bool FileWriter::WriteSLEB(int64_t S) { + uint8_t Bytes[32]; + auto Length = encodeSLEB128(S, Bytes); + assert(Length < sizeof(Bytes)); + return Write(Bytes, Length); +} + +bool FileWriter::WriteULEB(uint64_t U) { + uint8_t Bytes[32]; + auto Length = encodeULEB128(U, Bytes); + assert(Length < sizeof(Bytes)); + return Write(Bytes, Length); +} + +bool FileWriter::WriteU8(uint8_t U) { return Write(&U, sizeof(U)); } + +bool FileWriter::WriteU32(uint32_t U) { return Write(&U, sizeof(U)); } + +bool FileWriter::Fixup32(uint32_t U, off_t Offset) { + const off_t CurrOffset = Tell(); + if (CurrOffset == -1) + return false; + if (Seek(Offset) != Offset) + return false; + if (!WriteU32(U)) + return false; + return Seek(CurrOffset) == CurrOffset; +} + +bool FileWriter::WriteUnsigned(uint64_t U, size_t N) { + // NOTE: this only works on little endian machines + return Write(&U, N); +} +bool FileWriter::Write(const void *Src, size_t SrcLength) { + OS.write((const char *)Src, SrcLength); + return OS.good(); +} + +off_t FileWriter::Tell() { return OS.tellp(); } + +off_t FileWriter::Seek(off_t Offset) { + OS.seekp(Offset); + return OS.good() ? Offset : -1; +} + +bool FileWriter::AlignTo(size_t Align) { + off_t Offset = Tell(); + assert(Offset != -1); + if (Offset == -1) + return false; + off_t AlignedOffset = (Offset + Align - 1) / Align * Align; + if (AlignedOffset == Offset) + return true; + off_t PadCount = AlignedOffset - Offset; + auto Success = Write(std::string(PadCount, '\0').c_str(), PadCount); + assert(Tell() == AlignedOffset); + return Success; +} Index: lib/DebugInfo/GSYM/FunctionInfo.cpp =================================================================== --- lib/DebugInfo/GSYM/FunctionInfo.cpp +++ lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -0,0 +1,40 @@ +//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +void FunctionInfo::dump(llvm::raw_ostream &OS, GsymReader &GSYM) const { + OS << '[' << HEX64(Range.startAddress()) << '-' << HEX64(Range.endAddress()) + << ") " << GSYM.getString(Name) << '\n'; + if (!Lines.empty()) { + OS << "Lines:\n"; + for (const auto &Line : Lines) + Line.dump(OS); + } + if (Inline.isValid()) + Inline.dump(OS, GSYM, 0); +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { + OS << '[' << HEX64(FI.Range.startAddress()) << '-' + << HEX64(FI.Range.endAddress()) << "): " + << "Name=" << HEX32(FI.Name) << '\n'; + for (const auto &Line : FI.Lines) + Line.dump(OS); + OS << FI.Inline; + return OS; +} Index: lib/DebugInfo/GSYM/GsymCreator.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymCreator.cpp +++ lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,203 @@ +//===- GsymCreator.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" + +#include +#include +#include +#include +#include + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/GsymStreamer.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" + +using namespace llvm; +using namespace gsym; + +std::error_code GsymCreator::save(StringRef Path) const { + std::ofstream File(Path.str().c_str(), + std::ios::binary | std::ios::out | std::ios::trunc); + std::error_code Error = save(File); + File.close(); + return Error; +} + +std::error_code GsymCreator::save(std::ostream &OS) const { + std::lock_guard Guard(Mutex); + if (Funcs.empty()) + return std::make_error_code(std::errc::invalid_argument); + const uint64_t MinAddr = Funcs.front().startAddress(); + const uint64_t MaxAddr = Funcs.back().startAddress(); + const uint64_t AddrDelta = MaxAddr - MinAddr; + uint8_t AddrOffSize = 8; + if (AddrDelta <= UINT8_MAX) + AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + AddrOffSize = 4; + Header Hdr = {0, 0, 0, 0, 0, 0, 0, 0, {0}}; + Hdr.Magic = GSYM_MAGIC; + Hdr.Version = GSYM_VERSION; + Hdr.AddrOffSize = AddrOffSize; + Hdr.UUIDSize = UUID.size(); + Hdr.BaseAddress = MinAddr; + assert(Funcs.size() <= UINT32_MAX); + Hdr.NumAddresses = (uint32_t)Funcs.size(); + Hdr.StrtabOffset = 0; // We will need to fix this up later. + Hdr.StrtabSize = 0; // We will need to fix this up later. + if (Hdr.UUIDSize > sizeof(Hdr.UUID)) { + fprintf(stderr, "error: UUID value is too large (%u bytes)\n", + (uint32_t)UUID.size()); + return std::make_error_code(std::errc::invalid_argument); + } + if (UUID.size() > 0) { + memcpy(Hdr.UUID, UUID.data(), UUID.size()); + } + FileWriter Out(OS); + // Write out the header + Out.Write(&Hdr, Header::getByteSize()); + Out.AlignTo(Hdr.AddrOffSize); + // Write out the address offsets + for (const auto &FuncInfo : Funcs) { + uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + Out.WriteUnsigned(AddrOffset, Hdr.AddrOffSize); + } + // Write out all zeros for the AddrInfoOffsets + Out.AlignTo(sizeof(uint32_t)); + const off_t AddrInfoOffsetsOffset = Out.Tell(); + for (size_t i = 0, n = Funcs.size(); i < n; ++i) + Out.WriteU32(0); + + // Write out the file table + Out.AlignTo(sizeof(uint32_t)); + FileTab.write(Out); + // Write out the sting table + const off_t StrtabOffset = Out.Tell(); + StrTab.write(Out); + const off_t StrtabSize = Out.Tell() - StrtabOffset; + std::vector AddrInfoOffsets; + // Write out the address infos for each address + for (const auto &FuncInfo : Funcs) { + Out.AlignTo(sizeof(uint32_t)); + AddrInfoOffsets.push_back((uint32_t)Out.Tell()); + // Write the size in bytes of this function as a uint32_t + Out.WriteU32(FuncInfo.size()); + // Write the name of this function as a uint32_t string table offset + Out.WriteU32(FuncInfo.Name); + // Write out the line table if we have one. + LineTable::write(Out, FuncInfo); + + // Write out the inline function info if we have any + if (FuncInfo.Inline.isValid()) { + Out.WriteU32(static_cast(InfoType::InlineInfo)); + const auto InlineInfoSizeOffset = Out.Tell(); + Out.WriteU32(0); // We will fix this up after writing the info out + const auto InlineInfoStart = Out.Tell(); + FuncInfo.Inline.write(Out, FuncInfo.startAddress()); + const off_t inline_info_length = Out.Tell() - InlineInfoStart; + Out.Fixup32((uint32_t)inline_info_length, InlineInfoSizeOffset); + } + + // Terminate the data chunks with and end of list with zero size + Out.WriteU32(static_cast(InfoType::EndOfList)); + Out.WriteU32(0); + } + // Fixup the string table offset and size in the header + Out.Seek(offsetof(Header, StrtabOffset)); + Out.WriteU32((uint32_t)StrtabOffset); + Out.WriteU32((uint32_t)StrtabSize); + + // Fixup all address info offsets + Out.Seek(AddrInfoOffsetsOffset); + Out.Write(AddrInfoOffsets.data(), AddrInfoOffsets.size() * sizeof(uint32_t)); + return std::error_code(); +} + +void GsymCreator::optimize(llvm::raw_ostream &OS) { + // Remove duplicates function infos that have both entries from debug info + // (DWARF or Breakpad) and entries from the SymbolTable. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + sortFunctionInfos(); + std::lock_guard Guard(Mutex); + auto NumBefore = Funcs.size(); + auto Curr = Funcs.begin(); + auto Prev = Funcs.end(); + while (Curr != Funcs.end()) { + // Can't check for overlaps or same address ranges if we don't have a + // previous entry + if (Prev != Funcs.end()) { + if (Prev->Range.doesIntersect(Curr->Range)) { + // Overlapping address ranges. + if (Prev->Range == Curr->Range) { + // Same address range. Check if one is from debug info and the other + // is from a symbol table. If so, then keep the one with debug info. + // Our sorting guarantees that entries with matching address ranges + // that have debug info are last in the sort. + if (*Prev == *Curr) { + // FunctionInfo entries match exactly (range, lines, inlines) + OS << "warning: duplicate function info entries, removing " + "duplicate:\n" + << *Curr << '\n'; + Curr = Funcs.erase(Prev); + } else { + if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { + // Same address range, one with no debug info (symbol) and the + // next with debug info. Keep the latter. + Curr = Funcs.erase(Prev); + } else { + OS << "warning: same address range contains different debug " + << "info. Removing:\n" + << *Prev << "\nIn favor of this one:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + } else { + // print warnings about overlaps + OS << "warning: function ranges overlap:\n" + << *Prev << "\n" + << *Curr << "\n"; + } + } else if (Prev->Range.size() == 0 && + Curr->Range.contains(Prev->Range.startAddress())) { + OS << "warning: removing symbol:\n" + << *Prev << "\nKeeping:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + if (Curr == Funcs.end()) + break; + Prev = Curr++; + } + + OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " + << Funcs.size() << " total\n"; +} Index: lib/DebugInfo/GSYM/GsymReader.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymReader.cpp +++ lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,543 @@ +//===- GsymReader.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace gsym; + +static int unsigned_width(uint64_t u) { + if (u < 10) + return 1; + return 1 + unsigned_width(u / 10); +} + +void Header::dump(llvm::raw_ostream &OS) const { + OS << "Header:\n"; + OS << " magic = " << HEX32(Magic) << "\n"; + OS << " version = " << HEX16(Version) << '\n'; + OS << " addr_off_size = " << HEX8(AddrOffSize) << '\n'; + OS << " uuid_size = " << HEX8(UUIDSize) << '\n'; + OS << " base_address = " << HEX64(BaseAddress) << '\n'; + OS << " num_addrs = " << HEX32(NumAddresses) << '\n'; + OS << " strtab_offset = " << HEX32(StrtabOffset) << '\n'; + OS << " strtab_size = " << HEX32(StrtabSize) << '\n'; + OS << " uuid = "; + for (uint8_t I = 0; I < UUIDSize; ++I) { + OS << format_hex_no_prefix(UUID[I], 2); + } + OS << '\n'; +} + +bool Header::isValid() const { + // TODO: support swapped GSYM files + if (Magic != GSYM_MAGIC) + return false; + if (Version != 1) + return false; + return true; +} + +GsymReader::GsymReader() {} + +std::error_code GsymReader::openFile(StringRef Filename) { + // Open the input file + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto Error = BuffOrErr.getError(); + if (!Error) { + MemBuffer = std::move(BuffOrErr.get()); + init(MemBuffer->getBuffer()); + } + return Error; +} + +void GsymReader::copyBuffer(StringRef Bytes) { + clear(); + MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); + init(MemBuffer->getBuffer()); +} + +void GsymReader::clear() { + MemBuffer.reset(); + GsymBytes = StringRef(); + GSYMHeader = nullptr; + AddrOffsets = ArrayRef(); + AddrInfoOffsets = ArrayRef(); + Files = ArrayRef(); + StrTab.clear(); + ErrorStr.clear(); +} + +void GsymReader::init(StringRef FileBytes) { + BinaryStreamReader FileData(FileBytes, support::endian::system_endianness()); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + if (FileData.readObject(GSYMHeader)) + return; + + if (GSYMHeader->Magic == GSYM_MAGIC) { + // This file is a GSYM file, the file data is the GSYM data + GsymBytes = FileBytes; + } else { + // TODO: Load this file as an ObjectFile and extract GSYM info from a + // ".gsym" section (ELF) or "__gsym" section (mach-o) in the file. + return; + } + + if (!GSYMHeader->isValid()) + return; + + if (FileData.padToAlignment(GSYMHeader->AddrOffSize)) + return; + // Calculate the start of the address offset table. The address offset table + // contains a sorted list of the all all addresses contained in this GSYM + // file. + if (FileData.readArray(AddrOffsets, + GSYMHeader->NumAddresses * GSYMHeader->AddrOffSize)) + return; + // Calculate the start of the address info offset table. Each address offset + // in the address offset table has a offset to the file data for the address + // in the address info offsets table. + if (FileData.padToAlignment(sizeof(uint32_t))) + return; + if (FileData.readArray(AddrInfoOffsets, GSYMHeader->NumAddresses)) + return; + + uint32_t NumFiles = 0; + if (FileData.readInteger(NumFiles)) + return; + // Calculate the start of the file table. + if (FileData.readArray(Files, NumFiles)) + return; + + // Set the string table based off information in the GSYM header and read it + // from the FileData, not from GsymBytes. + FileData.setOffset(GSYMHeader->StrtabOffset); + + if (FileData.readFixedString(StrTab.Data, GSYMHeader->StrtabSize)) + return; +} + +uint64_t GsymReader::getAddressOffset(size_t Index) const { + if (GSYMHeader && !AddrOffsets.empty() && Index < GSYMHeader->NumAddresses) { + switch (GSYMHeader->AddrOffSize) { + case 1: + return reinterpret_cast(AddrOffsets.data())[Index]; + case 2: + return reinterpret_cast(AddrOffsets.data())[Index]; + case 4: + return reinterpret_cast(AddrOffsets.data())[Index]; + case 8: + return reinterpret_cast(AddrOffsets.data())[Index]; + } + } + return UINT64_MAX; +} + +size_t GsymReader::getNumAddresses() const { + if (GSYMHeader) + return GSYMHeader->NumAddresses; + return 0; +} + +uint64_t GsymReader::getAddress(size_t Index) const { + if (GSYMHeader) { + auto AddrOffset = getAddressOffset(Index); + if (AddrOffset != UINT64_MAX) + return GSYMHeader->BaseAddress + AddrOffset; + } + return UINT64_MAX; +} + +uint64_t GsymReader::getAddressInfoOffset(size_t Index) const { + const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); + if (Index < NumAddrInfoOffsets) + return AddrInfoOffsets[Index]; + return UINT64_MAX; +} + +DataExtractor GsymReader::getAddressInfoPayload(size_t Index) const { + const uint64_t Offset = getAddressInfoOffset(Index); + if (Offset == UINT64_MAX) + return DataExtractor(StringRef(), true, 8); + + BinaryStreamReader GSYMData(GsymBytes, support::endian::system_endianness()); + GSYMData.setOffset(Offset); + if (!GSYMData.skip(sizeof(AddressInfo))) { + uint64_t EndOffset = getAddressInfoOffset(Index + 1); + if (EndOffset == UINT64_MAX) + EndOffset = GsymBytes.size(); + if (EndOffset > Offset + sizeof(sizeof(AddressInfo))) { + const uint64_t Size = EndOffset - (Offset + sizeof(AddressInfo)); + StringRef AddrInfoData; + if (!GSYMData.readFixedString(AddrInfoData, Size)) + return DataExtractor(AddrInfoData, true, 8); + } + } + return DataExtractor(StringRef(), true, 8); +} + +static StringRef getInfoTypeAsString(InfoType IT) { + switch (IT) { + case InfoType::EndOfList: + return "EndOfList"; + case InfoType::LineTableInfo: + return "LineTable"; + case InfoType::InlineInfo: + return "InlineInfo"; + } + return "???"; +} + +void GsymReader::dump(llvm::raw_ostream &OS, bool Verbose) const { + // If GSYMHeader is not NULL, then the header has been validated. + if (!GSYMHeader) { + OS << "invalid gsym file\n"; + return; + } + if (Verbose) { + // When verbose dumping, dump the header, address offsets, address info + // offsets, file table and string table. + GSYMHeader->dump(OS); + OS << "Address Offsets:\n"; + const size_t AddrOffsetHexWidth = GSYMHeader->AddrOffSize * 2 + 2; + const size_t AddressHexWidth = 16 + 2; + const size_t IndexWidth = unsigned_width(GSYMHeader->NumAddresses); + for (uint32_t I = 0; I < GSYMHeader->NumAddresses; ++I) { + auto AddrOffset = getAddressOffset(I); + OS << " [" << format_decimal(I, IndexWidth) << "] " + << format_hex(AddrOffset, AddrOffsetHexWidth) << " (" + << format_hex(AddrOffset + GSYMHeader->BaseAddress, AddressHexWidth) + << ")\n"; + } + OS << "Address Info Offsets:\n"; + for (uint32_t I = 0; I < GSYMHeader->NumAddresses; ++I) + OS << " [" << format_decimal(I, IndexWidth) << "] " + << HEX32(getAddressInfoOffset(I)) << '\n'; + dumpFileTable(OS); + StrTab.dump(OS); + } + + // Dump contents of all address info objects. + OS << "Address Infos:\n"; + for (uint32_t I = 0; I < GSYMHeader->NumAddresses; ++I) + dumpAddressInfo(OS, I); +} + +void GsymReader::dumpAddressInfo(llvm::raw_ostream &OS, + size_t AddrInfoIndex) const { + const auto AddrInfoOffset = getAddressInfoOffset(AddrInfoIndex); + if (AddrInfoOffset == UINT64_MAX) { + OS << "error: invalid address info index " << AddrInfoIndex; + return; + } + OS << HEX32(AddrInfoOffset) << ": "; + const struct AddressInfo *AddrInfo = nullptr; + BinaryStreamReader GSYMData(GsymBytes, support::endian::system_endianness()); + GSYMData.setOffset(AddrInfoOffset); + if (GSYMData.readObject(AddrInfo)) { + OS << "error: corrupt GSYM file doesn't contain address info\n"; + return; + } + const uint64_t AddrOffset = getAddressOffset(AddrInfoIndex); + const uint64_t StartAddr = GSYMHeader->BaseAddress + AddrOffset; + const uint64_t EndAddr = StartAddr + AddrInfo->Size; + OS << '[' << HEX64(StartAddr) << " - " << HEX64(EndAddr) + << "): " << StrTab[AddrInfo->Name] << '\n'; + DataExtractor Data = getAddressInfoPayload(AddrInfoIndex); + + bool Done = false; + uint32_t Offset = 0; + while (!Done) { + auto IT = static_cast(Data.getU32(&Offset)); + uint32_t InfoLength = Data.getU32(&Offset); + OS << " " << HEX32(AddrInfoOffset + 8 + Offset) << ": <" + << HEX32(InfoLength) << "> " << getInfoTypeAsString(IT) << '\n'; + DataExtractor InfoData = Data.getSlice(&Offset, InfoLength); + switch (IT) { + case InfoType::EndOfList: + Done = true; + break; + + case InfoType::LineTableInfo: + dumpLineTable(OS, StartAddr, InfoData, 4); + break; + + case InfoType::InlineInfo: { + InlineInfo Inline; + if (Inline.decode(InfoData, StartAddr)) + Inline.dump(OS, *this, 4); + else + OS << "error: failed to decode inline info\n"; + } break; + } + } +} + +bool GsymReader::dumpAddressInfos(llvm::raw_ostream &OS, + StringRef NameCstr) const { + uint32_t NameIdx = StrTab.find(NameCstr); + if (NameIdx == 0) + return false; + bool Success = false; + for (uint32_t I = 0; I < GSYMHeader->NumAddresses; ++I) { + const auto AddrInfoOffset = getAddressInfoOffset(I); + const struct AddressInfo *AddrInfo = nullptr; + BinaryStreamReader GSYMData(GsymBytes, + support::endian::system_endianness()); + GSYMData.setOffset(AddrInfoOffset); + if (!GSYMData.readObject(AddrInfo) && AddrInfo->Name == NameIdx) { + dumpAddressInfo(OS, I); + Success = true; + } + } + return Success; +} + +void GsymReader::dumpFileTable(llvm::raw_ostream &OS) const { + const auto NumFiles = Files.size(); + OS << "Files:\n"; + const size_t IndexWidth = unsigned_width(NumFiles); + for (uint32_t I = 0; I < NumFiles; ++I) { + OS << "files[" << format_decimal(I, IndexWidth) << "] " + << HEX32(Files[I].Dir) << ", " << HEX32(Files[I].Base); + StringRef Dir(StrTab[Files[I].Dir]); + StringRef Base(StrTab[Files[I].Base]); + if (Dir.empty()) { + if (!Base.empty()) + OS << " (\"" << Base << "\")"; + } else { + assert(!Base.empty()); + OS << " (\"" << Dir << '/' << Base << "\")"; + } + OS << '\n'; + } +} + +void GsymReader::dumpLineTable(llvm::raw_ostream &OS, uint64_t BaseAddr, + DataExtractor &LineData, uint32_t Depth) const { + LineTable parser(LineData); + std::vector LineEntries = parser.parseAllEntries(BaseAddr); + for (const auto &Line : LineEntries) { + FileEntry File = getFile(Line.File); + auto Dir = StrTab[File.Dir]; + auto Base = StrTab[File.Base]; + OS.indent(Depth); + OS << HEX64(Line.Addr) << ": " << Dir << '/' << Base << ':' << Line.Line + << '\n'; + } +} + +bool GsymReader::findAddressInfo(uint64_t Addr, LookupInfo &Info) const { + if (Addr < GSYMHeader->BaseAddress || GSYMHeader->NumAddresses == 0) + return false; + const uint64_t AddrOffset = Addr - GSYMHeader->BaseAddress; + Info.clear(); + + switch (GSYMHeader->AddrOffSize) { + case 1: + findAddressOffsetInfo(AddrOffset, Info); + break; + case 2: + findAddressOffsetInfo(AddrOffset, Info); + break; + case 4: + findAddressOffsetInfo(AddrOffset, Info); + break; + case 8: + findAddressOffsetInfo(AddrOffset, Info); + break; + default: + break; + } + + if (Info.AddrInfoIndex < GSYMHeader->NumAddresses) { + auto AddrInfoOffset = AddrInfoOffsets[Info.AddrInfoIndex]; + const struct AddressInfo *AddrInfo = nullptr; + BinaryStreamReader GSYMData(GsymBytes, + support::endian::system_endianness()); + GSYMData.setOffset(AddrInfoOffset); + if (!GSYMData.readObject(AddrInfo)) { + // Make sure the address is within the bounds of the address info's size + auto FuncOffset = AddrOffset - Info.MatchAddrOffset; + // If an entry has zero size, then we will match it regardless of the + // size. These are typically symbols in the symbol table. + if (AddrInfo->Size == 0 || FuncOffset < AddrInfo->Size) { + Info.AddrInfo = AddrInfo; + return true; + } + } + } + return false; +} + +bool GsymReader::lookup(uint64_t Addr, LookupResult &Result) const { + Result.clear(); + LookupInfo Info; + if (!findAddressInfo(Addr, Info)) + return false; + + Result.LookupAddr = Addr; + Result.FuncRange.setStartAddress(GSYMHeader->BaseAddress + + Info.MatchAddrOffset); + Result.FuncRange.setSize(Info.AddrInfo->Size); + Result.FuncName = getString(Info.AddrInfo->Name); + + LineEntry Line; + InlineInfo Inline; + DataExtractor Data = getAddressInfoPayload(Info.AddrInfoIndex); + uint32_t Offset = 0; + uint32_t IT; + while ((IT = Data.getU32(&Offset))) { + uint32_t InfoLength = Data.getU32(&Offset); + DataExtractor InfoData = Data.getSlice(&Offset, InfoLength); + switch (static_cast(IT)) { + case InfoType::LineTableInfo: + Line = LineTable(InfoData).lookup(Result.FuncRange.startAddress(), Addr); + break; + case InfoType::InlineInfo: + Inline.decode(InfoData, Result.FuncRange.startAddress(), Addr); + break; + default: + break; + } + } + if (Line.isValid()) { + FileEntry File = getFile(Line.File); + std::vector InlineStack; + + Inline.getInlineStack(Addr, InlineStack); + SourceLocation SrcLoc; + + if (!InlineStack.empty()) { + + // Rest of inlined functions. Note that we don't have to add last + // (non-inlined) function explicitly because the root InlineInfo node + // has the same name as the function + + auto Begin = InlineStack.begin(); + const InlineInfo *Prev = nullptr; + for (auto Iter = Begin; Iter != InlineStack.end(); Iter++) { + const InlineInfo *Curr = *Iter; + if (Prev == nullptr) { + // First entry in inline callstack. The file and line come from + // the "Line", and the name comes from the inline info. + SrcLoc.Name = StrTab[Curr->Name]; + FileEntry File = getFile(Line.File); + SrcLoc.Dir = StrTab[File.Dir]; + SrcLoc.Base = StrTab[File.Base]; + SrcLoc.Line = Line.Line; + Result.Locations.emplace_back(std::move(SrcLoc)); + } else { + SrcLoc.Name = StrTab[Curr->Name]; + FileEntry CallFile = getFile(Prev->CallFile); + SrcLoc.Dir = StrTab[CallFile.Dir]; + SrcLoc.Base = StrTab[CallFile.Base]; + SrcLoc.Line = Prev->CallLine; + Result.Locations.push_back(SrcLoc); + } + Prev = Curr; + } + if (Prev) { + SrcLoc.Name = StrTab[Info.AddrInfo->Name]; + FileEntry CallFile = getFile(Prev->CallFile); + SrcLoc.Dir = StrTab[CallFile.Dir]; + SrcLoc.Base = StrTab[CallFile.Base]; + SrcLoc.Line = Prev->CallLine; + Result.Locations.push_back(SrcLoc); + } + } else { + SrcLoc.Name = StrTab[Info.AddrInfo->Name]; + SrcLoc.Dir = StrTab[File.Dir]; + SrcLoc.Base = StrTab[File.Base]; + SrcLoc.Line = Line.Line; + Result.Locations.push_back(SrcLoc); + } + } else { + SourceLocation SrcLoc; + SrcLoc.Name = StrTab[Info.AddrInfo->Name]; + Result.Locations.push_back(SrcLoc); + } + return true; +} + +bool GsymReader::getFunctionInfo(uint64_t Addr, FunctionInfo &FuncInfo) const { + LookupInfo Info; + if (!findAddressInfo(Addr, Info)) + return false; + FuncInfo.clear(); + FuncInfo.setStartAddress(getAddress(Info.AddrInfoIndex)); + FuncInfo.setSize(Info.AddrInfo->Size); + if (Info.AddrInfo->Size == 0) { + // The byte size is zero, see if there is another address after this. + // Set the size accordingly if there is one using the delta between the + // matching address and the next address. + const uint64_t NextAddress = getAddress(Info.AddrInfoIndex + 1); + if (NextAddress != UINT64_MAX && NextAddress > FuncInfo.startAddress()) { + FuncInfo.setSize(NextAddress - FuncInfo.startAddress()); + } + } + FuncInfo.Name = Info.AddrInfo->Name; + + DataExtractor Data = getAddressInfoPayload(Info.AddrInfoIndex); + uint32_t Offset = 0; + while (uint32_t IT = Data.getU32(&Offset)) { + uint32_t InfoLength = Data.getU32(&Offset); + DataExtractor InfoData = Data.getSlice(&Offset, InfoLength); + switch (static_cast(IT)) { + case InfoType::LineTableInfo: { + LineTable parser(InfoData); + FuncInfo.Lines = parser.parseAllEntries(FuncInfo.startAddress()); + } break; + case InfoType::InlineInfo: { + FuncInfo.Inline.decode(InfoData, FuncInfo.startAddress()); + break; + } + default: + break; + } + } + return true; +} + +void GsymReader::unmap() { + MemBuffer.reset(); + GsymBytes = StringRef(); + GSYMHeader = nullptr; + AddrOffsets = ArrayRef(); + AddrInfoOffsets = ArrayRef(); + Files = ArrayRef(); + StrTab.clear(); +} + +GsymReader::~GsymReader() { unmap(); } Index: lib/DebugInfo/GSYM/GsymStreamer.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymStreamer.cpp +++ lib/DebugInfo/GSYM/GsymStreamer.cpp @@ -0,0 +1,92 @@ +//===- GSYMStreamer.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymStreamer.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/WithColor.h" + +namespace llvm { +namespace gsym { + +static inline bool error(Twine Error, Twine Context = {}) { + WithColor::error() << Error + "\n"; + if (!Context.isTriviallyEmpty()) + WithColor::note() << Twine("while processing ") + Context + "\n"; + return false; +} + +bool GSYMStreamer::init(Triple TheTriple) { + std::string ErrorStr; + std::string TripleName; + StringRef Context = "gsym streamer init"; + + // Get the target. + const Target *TheTarget = + TargetRegistry::lookupTarget(TripleName, TheTriple, ErrorStr); + if (!TheTarget) + return error(ErrorStr, Context); + TripleName = TheTriple.getTriple(); + + // Create all the MC Objects. + MRI.reset(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) + return error(Twine("no register info for target ") + TripleName, Context); + + MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!MAI) + return error("no asm info for target " + TripleName, Context); + + MOFI.reset(new MCObjectFileInfo); + MC.reset(new MCContext(MAI.get(), MRI.get(), MOFI.get())); + MOFI->InitMCObjectFileInfo(TheTriple, /*PIC*/ false, *MC); + + MSTI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", "")); + if (!MSTI) + return error("no subtarget info for target " + TripleName, Context); + + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions); + if (!MAB) + return error("no asm backend for target " + TripleName, Context); + + MII.reset(TheTarget->createMCInstrInfo()); + if (!MII) + return error("no instr info info for target " + TripleName, Context); + + MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *MC); + if (!MCE) + return error("no code emitter for target " + TripleName, Context); + + MS = TheTarget->createMCObjectStreamer( + TheTriple, *MC, std::unique_ptr(MAB), + MAB->createObjectWriter(OutFile), std::unique_ptr(MCE), + *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false); + + if (!MS) + return error("no object streamer for target " + TripleName, Context); + + // Finally create the AsmPrinter we'll use to emit the DIEs. + TM.reset(TheTarget->createTargetMachine(TripleName, "", "", TargetOptions(), + None)); + if (!TM) + return error("no target machine for target " + TripleName, Context); + + Asm.reset(TheTarget->createAsmPrinter(*TM, std::unique_ptr(MS))); + if (!Asm) + return error("no asm printer for target " + TripleName, Context); + + return true; +} + +} // namespace gsym +} // namespace llvm Index: lib/DebugInfo/GSYM/InlineInfo.cpp =================================================================== --- lib/DebugInfo/GSYM/InlineInfo.cpp +++ lib/DebugInfo/GSYM/InlineInfo.cpp @@ -0,0 +1,196 @@ +//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include + +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +void InlineInfo::write(FileWriter &O, uint64_t BaseAddr) const { + O.WriteULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) { + auto Offset = Range.Start - BaseAddr; + auto Size = Range.End - Range.Start; + O.WriteULEB(Offset); + O.WriteULEB(Size); + } + bool HasChildren = !Children.empty(); + O.WriteU8(HasChildren); + O.WriteU32(Name); + O.WriteULEB(CallFile); + O.WriteULEB(CallLine); + if (HasChildren) { + for (const auto &Child : Children) + Child.write(O, Ranges.front().Start); + O.WriteULEB(0); // Terminate child sibling chain + } +} + +static bool decodeAll(InlineInfo &Inline, DataExtractor &Data, uint32_t &Offset, + uint64_t BaseAddr) { + uint64_t NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return false; + Inline.Ranges.reserve(NumRanges); + for (size_t I = 0; I < NumRanges; ++I) { + const uint64_t Off = Data.getULEB128(&Offset); + const uint64_t Size = Data.getULEB128(&Offset); + const uint64_t StartAddr = BaseAddr + Off; + Inline.Ranges.emplace_back(AddressRange(StartAddr, StartAddr + Size)); + } + bool HasChildren = Data.getU8(&Offset) != 0; + Inline.Name = Data.getU32(&Offset); + Inline.CallFile = (uint32_t)Data.getULEB128(&Offset); + Inline.CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + InlineInfo Child; + while (decodeAll(Child, Data, Offset, Inline.Ranges.front().Start)) { + Inline.Children.emplace_back(std::move(Child)); + Child.clear(); + } + } + return true; +} + +bool InlineInfo::decode(DataExtractor &Data, uint64_t BaseAddr) { + uint32_t Offset = 0; + return decodeAll(*this, Data, Offset, BaseAddr); +} + +static bool decodeLookup(InlineInfo &Inline, DataExtractor &Data, + uint32_t &Offset, uint64_t BaseAddr, + Optional LookupAddr) { + auto NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return false; + + uint64_t ChildBaseAddr = 0; + if (LookupAddr) { + for (size_t I = 0; I < NumRanges; ++I) { + uint64_t Off = Data.getULEB128(&Offset); + uint64_t Size = Data.getULEB128(&Offset); + uint64_t StartAddr = BaseAddr + Off; + uint64_t EndAddr = StartAddr + Size; + if (I == 0) + ChildBaseAddr = StartAddr; + if (StartAddr <= *LookupAddr && *LookupAddr < EndAddr) + Inline.Ranges.emplace_back(AddressRange(StartAddr, EndAddr)); + } + } else { + for (size_t I = 0; I < NumRanges; ++I) { + Data.getULEB128(&Offset); // Off + Data.getULEB128(&Offset); // Size + } + } + + bool HasChildren = Data.getU8(&Offset) != 0; + Inline.Name = Data.getU32(&Offset); + if (LookupAddr) { + Inline.CallFile = (uint32_t)Data.getULEB128(&Offset); + Inline.CallLine = (uint32_t)Data.getULEB128(&Offset); + } else { + Data.getULEB128(&Offset); // CallFile + Data.getULEB128(&Offset); // CallLine + } + if (HasChildren) { + InlineInfo Child; + if (Inline.Ranges.empty()) { + // This inlined function does not contain lookup_offset, no need to decode + // Ranges, just skip + while (decodeLookup(Child, Data, Offset, ChildBaseAddr, llvm::None)) + /* Do nothing */; + } else { + while (decodeLookup(Child, Data, Offset, ChildBaseAddr, LookupAddr)) { + if (!Child.Ranges.empty()) + Inline.Children.emplace_back(std::move(Child)); + Child.clear(); + } + } + } + return true; +} + +bool InlineInfo::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t LookupAddr) { + uint32_t Offset = 0; + return decodeLookup(*this, Data, Offset, BaseAddr, LookupAddr); +} + +void InlineInfo::dump(llvm::raw_ostream &OS, const GsymReader &GSYM, + unsigned Depth) const { + if (!isValid()) + return; + // Indent with spaces based on Depth + OS.indent(Depth); + bool First = true; + for (auto Range : Ranges) { + if (First) + First = false; + else + OS << ' '; + OS << Range; + } + if (Name) + OS << ' ' << GSYM.getString(Name); + if (CallFile) { + auto FE = GSYM.getFile(CallFile); + OS << " called from " << GSYM.getString(FE.Dir) << '/' + << GSYM.getString(FE.Base) << ":" << CallLine; + } + OS << '\n'; + for (const auto &Child : Children) + Child.dump(OS, GSYM, Depth + 1); +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const InlineInfo &II) { + if (!II.isValid()) + return OS; + bool First = true; + for (auto Range : II.Ranges) { + if (First) + First = false; + else + OS << ' '; + OS << Range; + } + OS << " Name = " << HEX32(II.Name) << ", CallFile = " << II.CallFile + << ", CallLine = " << II.CallFile << '\n'; + for (const auto &Child : II.Children) + OS << Child; + return OS; +} + +bool InlineInfo::getInlineStack( + uint64_t Addr, std::vector &InlineStack) const { + for (const auto &Range : Ranges) { + if (Range.contains(Addr)) { + if (Name > 0) + InlineStack.insert(InlineStack.begin(), this); + for (const auto &Child : Children) { + if (Child.getInlineStack(Addr, InlineStack)) + break; + } + return true; + } + } + return false; +} Index: lib/DebugInfo/GSYM/LineTable.cpp =================================================================== --- lib/DebugInfo/GSYM/LineTable.cpp +++ lib/DebugInfo/GSYM/LineTable.cpp @@ -0,0 +1,253 @@ +//===- LineTable.cpp --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/Support/DataExtractor.h" + +using namespace llvm; +using namespace gsym; + +enum LineTableOpCode { + LTOC_EndSequence = 0x00, ///< End of the line table + LTOC_SetFile = 0x01, ///< Set LineTableRow.file_idx, don't push a row + LTOC_AdvancePC = 0x02, ///< Increment LineTableRow.address, and push a row + LTOC_AdvanceLine = 0x03, ///< Set LineTableRow.file_line, don't push a row + LTOC_FirstSpecial = 0x04, ///< All special opcodes push a row +}; + +struct DeltaInfo { + int64_t Delta; + uint32_t Count; + DeltaInfo(int64_t D, uint32_t C) : Delta(D), Count(C) {} +}; + +inline bool operator<(const DeltaInfo &LHS, int64_t Delta) { + return LHS.Delta < Delta; +} + +static bool encode_special(int64_t MinLineDelta, int64_t MaxLineDelta, + int64_t LineDelta, uint64_t AddrDelta, + uint8_t &SpecialOp) { + if (LineDelta < MinLineDelta) + return false; + if (LineDelta > MaxLineDelta) + return false; + int64_t LineRange = MaxLineDelta - MinLineDelta + 1; + int64_t AdjustedOp = ((LineDelta - MinLineDelta) + AddrDelta * LineRange); + int64_t Op = AdjustedOp + LTOC_FirstSpecial; + if (Op < 0) + return false; + if (Op > 255) + return false; + SpecialOp = (uint8_t)Op; + return true; +} + +static void parse(DataExtractor &Data, uint64_t BaseAddr, + std::function const &Callback) { + uint32_t Offset = 0; + int64_t MinDelta = Data.getSLEB128(&Offset); + int64_t MaxDelta = Data.getSLEB128(&Offset); + int64_t LineRange = MaxDelta - MinDelta + 1; + const uint32_t FirstLine = (uint32_t)Data.getULEB128(&Offset); + LineEntry Row(BaseAddr, 1, FirstLine); + bool Done = false; + while (!Done) { + uint8_t Op = Data.getU8(&Offset); + switch (Op) { + case LTOC_EndSequence: + Done = true; + break; + case LTOC_SetFile: + Row.File = (uint32_t)Data.getULEB128(&Offset); + break; + case LTOC_AdvancePC: + Row.Addr += Data.getULEB128(&Offset); + // If the function callback returns false, we stop parsing + if (Callback(Row) == false) + return; + break; + case LTOC_AdvanceLine: + Row.Line += Data.getSLEB128(&Offset); + break; + default: { + // A byte that contains both address and line increment + uint8_t AdjustedOp = Op - LTOC_FirstSpecial; + int64_t LineDelta = MinDelta + (AdjustedOp % LineRange); + uint64_t AddrDelta = (AdjustedOp / LineRange); + Row.Line += LineDelta; + Row.Addr += AddrDelta; + // If the function callback returns false, we stop parsing + if (Callback(Row) == false) + return; + break; + } + } + } +} + +bool LineTable::write(FileWriter &Out, const FunctionInfo &FuncInfo) { + if (FuncInfo.Lines.empty()) + return false; + // Write out the unsigned InfoType::LineTableInfo enum + Out.WriteU32(static_cast(InfoType::LineTableInfo)); + // Write out a zero byte size of this chunk for now, we will fixup later + const off_t LineTableLengthOffset = Out.Tell(); + Out.WriteU32(0); + const off_t LineTableStart = LineTableLengthOffset + 4; + int64_t MinLineDelta = INT64_MAX; + int64_t MaxLineDelta = INT64_MIN; + std::vector DeltaInfos; + if (FuncInfo.Lines.size() == 1) { + MinLineDelta = 0; + MaxLineDelta = 0; + } else { + int64_t PrevLine = 1; + bool First = true; + for (const auto &line_entry : FuncInfo.Lines) { + if (First) + First = false; + else { + int64_t LineDelta = (int64_t)line_entry.Line - PrevLine; + auto End = DeltaInfos.end(); + auto Pos = std::lower_bound(DeltaInfos.begin(), End, LineDelta); + if (Pos != End && Pos->Delta == LineDelta) + ++Pos->Count; + else + DeltaInfos.insert(Pos, DeltaInfo(LineDelta, 1)); + if (LineDelta < MinLineDelta) + MinLineDelta = LineDelta; + if (LineDelta > MaxLineDelta) + MaxLineDelta = LineDelta; + } + PrevLine = (int64_t)line_entry.Line; + } + assert(MinLineDelta <= MaxLineDelta); + } + // Set the min and max line delta intelligently based on the counts of + // the line deltas. if our range is too large. + const int64_t MaxLineRange = 14; + if (MaxLineDelta - MinLineDelta > MaxLineRange) { + uint32_t BestIndex = 0; + uint32_t BestEndIndex = 0; + uint32_t BestCount = 0; + const size_t NumDeltaInfos = DeltaInfos.size(); + for (uint32_t I = 0; I < NumDeltaInfos; ++I) { + const int64_t FirstDelta = DeltaInfos[I].Delta; + uint32_t CurrCount = 0; + uint32_t J; + for (J = I; J < NumDeltaInfos; ++J) { + auto LineRange = DeltaInfos[J].Delta - FirstDelta; + if (LineRange > MaxLineRange) + break; + CurrCount += DeltaInfos[J].Count; + } + if (CurrCount > BestCount) { + BestIndex = I; + BestEndIndex = J - 1; + BestCount = CurrCount; + } + } + MinLineDelta = DeltaInfos[BestIndex].Delta; + MaxLineDelta = DeltaInfos[BestEndIndex].Delta; + } + if (MinLineDelta == MaxLineDelta && MinLineDelta > 0 && + MinLineDelta < MaxLineRange) + MinLineDelta = 0; + assert(MinLineDelta <= MaxLineDelta); + + // Initialize the line entry state as a starting point. All line entries + // will be deltas from this. + LineEntry Prev(FuncInfo.Range.startAddress(), 1, FuncInfo.Lines.front().Line); + + // Write out the min and max line delta as signed LEB128 + Out.WriteSLEB(MinLineDelta); + Out.WriteSLEB(MaxLineDelta); + // Write out the starting line number as a unsigned LEB128 + Out.WriteULEB(Prev.Line); + + for (const auto &Curr : FuncInfo.Lines) { + assert(Curr.Addr >= Prev.Addr); + const uint64_t AddrDelta = Curr.Addr - Prev.Addr; + int64_t LineDelta = 0; + if (Curr.Line > Prev.Line) + LineDelta = Curr.Line - Prev.Line; + else if (Prev.Line > Curr.Line) + LineDelta = -((int32_t)(Prev.Line - Curr.Line)); + + // Set the file if it doesn't match the current one. + if (Curr.File != Prev.File) { + Out.WriteU8(LTOC_SetFile); + Out.WriteULEB(Curr.File); + } + + uint8_t SpecialOp; + if (encode_special(MinLineDelta, MaxLineDelta, LineDelta, AddrDelta, + SpecialOp)) { + // Advance the PC and line and push a row + Out.WriteU8(SpecialOp); + } else { + // We can't encode the address delta and line delta into + // a single special opcode, we must do them separately + + // Advance the line + if (LineDelta != 0) { + Out.WriteU8(LTOC_AdvanceLine); + Out.WriteSLEB(LineDelta); + } + + // Advance the PC and push a row + Out.WriteU8(LTOC_AdvancePC); + Out.WriteULEB(AddrDelta); + } + Prev = Curr; + } + Out.WriteU8(LTOC_EndSequence); + + // Fixup the line table byte size + const off_t LineTableLength = Out.Tell() - LineTableStart; + Out.Fixup32((uint32_t)LineTableLength, LineTableLengthOffset); + return true; +} + +// Parse all line table entries into the "LineTable" vector. We can +// cache the results of this if needed, or we can call LineTable::lookup() +// below. +std::vector LineTable::parseAllEntries(uint64_t BaseAddr) { + std::vector LineTable; + parse(Data, BaseAddr, [&LineTable](const LineEntry &Row) -> bool { + LineTable.push_back(Row); + return true; // Keep parsing by returning true + }); + return LineTable; +} +// Parse the line table on the fly and find the row we are looking for. +// We will need to determine if we need to cache the line table by calling +// LineTable::parseAllEntries(...) or just call this function each time. +// There is a CPU vs memory tradeoff we will need to determine. +LineEntry LineTable::lookup(uint64_t BaseAddr, uint64_t Addr) { + LineEntry Result; + parse(Data, BaseAddr, [Addr, &Result](const LineEntry &Row) -> bool { + if (Addr < Row.Addr) + return false; // Stop parsing, result contains the line table row! + Result = Row; + if (Addr == Row.Addr) { + // Stop parsing, this is the row we are looking for since the address + // matches. + return false; + } + return true; // Keep parsing till we find the right row + }); + return Result; +} Index: lib/DebugInfo/GSYM/LookupResult.cpp =================================================================== --- lib/DebugInfo/GSYM/LookupResult.cpp +++ lib/DebugInfo/GSYM/LookupResult.cpp @@ -0,0 +1,58 @@ +//===- LookupResult.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LookupResult.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +std::string LookupResult::getSourceFile(uint32_t Index) const { + std::string Fullpath; + if (Index < Locations.size()) { + if (!Locations[Index].Dir.empty()) { + if (Locations[Index].Base.empty()) { + Fullpath = Locations[Index].Dir; + } else { + llvm::SmallString<64> Storage; + llvm::sys::path::append(Storage, Locations[Index].Dir, + Locations[Index].Base); + Fullpath.assign(Storage.begin(), Storage.end()); + } + } else if (!Locations[Index].Base.empty()) + Fullpath = Locations[Index].Base; + } + return Fullpath; +} + +void LookupResult::dump(llvm::raw_ostream &OS) const { + OS << HEX64(LookupAddr) << ": "; + auto NumLocations = Locations.size(); + for (size_t I = 0; I < NumLocations; ++I) { + if (I > 0) { + OS << '\n'; + OS.indent(20); + } + const bool IsInlined = I + 1 != NumLocations; + OS << Locations[I].Name; + if (!Locations[I].Base.empty()) { + OS << " @ "; + if (!Locations[I].Dir.empty()) + OS << Locations[I].Dir << '/'; + OS << Locations[I].Base << ':' << Locations[I].Line; + } + + if (IsInlined) + OS << " [inlined]"; + } + OS << '\n'; +} Index: lib/DebugInfo/GSYM/README.md =================================================================== --- lib/DebugInfo/GSYM/README.md +++ lib/DebugInfo/GSYM/README.md @@ -0,0 +1,281 @@ +# GSYM Introduction + +GSYM is a symbolication file format is designed to be the best format to use for symbolicating addresses into function name + source file + line information. It is a binary file format designed to be mapped into one or more processes. GSYM information can be created by converting DWARF debug information, or Breakpad files. GSYM information can exist as a stand alone file, or be contained in ELF or mach-o files in a section. When embedded into ELF or mach-o files, GSYM sections can share a string tables that already exists within a file. + +## Why use GSYM? +GSYM files are up to 7x smaller than DWARF files and up to 3x smaller than Breakpad files. The file format is designed to touch as few pages of the file as possible while doing address lookups. GSYM files can be mmap'ed into a process as shared memory allowing multiple processes on a symbolication server to share loaded GSYM pages. The file format includes inline call stack information and can help turn a single address lookup into multiple stack frames that walk the inlined call stack back to the concrete function that invoked these functions. + +## Converting DWARF Files to GSYM +`llvm-gsymutil` is available in the `llvm/tools/gsym` directory and has options to convert DWARF into GSYM files. `llvm-gsymutil` has a `-dwarf` option that specifies a DWARF file to convert into a GSYM file. The output file can be specified with the `-out-file` option. +``` +$ llvm-gsymutil -dwarf /tmp/a.out -out-file /tmp/a.out.gsym +``` + + This command will convert a DWARF file into the GSYM file format. This allows clients that are currently symbolicating with DWARF to switch to using the GSYM file format. This tool could be used in a symbolication workflow where symbolication servers convert DWARF to GSYM and cached the results on the fly, or could be used at build time to always produce a GSYM file at build time. DWARF debug information is rich enough to support encoding the inline call stack information for richer and more useful symbolication backtraces. + + ## Converting Breakpad Files to GSYM + +`llvm-gsymutil` has a `-breakpad` option that specifies a Breakpad file to convert into a GSYM file. The output file can be specified with the `-out-file` option. +``` +$ llvm-gsymutil -breakpad /tmp/foo.sym -out-file /tmp/foo.gsym +``` +This allows clients currently using breakpad to switch over to use GSYM files. This tool could be used in a symbolication workflow where symbolication servers convert breakpad to GSYM format on the fly only when needed. Breakpad files do not contain inline call stack information, so it is advisable to use `llvm-gsymutil -dwarf` when possible to avoid losing this vital information. + +## File Format Overview +The GSYM file consists of a header, address table, address info offset table and address info data for each address. + +The GSYM file format when in a stand alone file is ordered as shown: +- Header +- Address Table +- Address Data Offsets Table +- File Table +- String Table +- Address Data + + +### Header +``` +#define GSYM_MAGIC 0x4753594d +#define GSYM_VERSION 1 +struct Header { + uint32_t magic; + uint16_t version; + uint8_t addr_off_size; + uint8_t uuid_size; + uint64_t base_address; + uint32_t num_addrs; + uint32_t strtab_offset; + uint32_t strtab_size; + uint8_t uuid[20]; +}; +``` + +The magic value is set to `GSYM_MAGIC` and allows quick and easy detection of this file +format when it is loaded. Addresses in the address table are stored as offsets from a 64 +bit address found in `Header.base_address`. This allows the address table to contain 64, +32, 16 or 8 bit offsets, instead of a table of full sized addresses. The file size is +smaller and causes fewer pages to be touched during address lookups when the address +table is smaller. The size of the address offsets in the address table is specified in +the header in `Header.addr_off_size`. The header contains a UUID to ensure the GSYM file +can be properly matched to the object ELF or mach-o file that created the stack trace. +The header specifies the location of the string table for all strings contained in the +GSYM file, or can point to an existing string table within a ELF or mach-o file. + +### Address Table +The address table immediately follows the header in the file and consists of +`Header.num_addrs` address offsets. These offsets are sorted and can be binary searched +for efficient lookups. Address offsets are encoded as offsets that are +`Header.addr_off_size` bytes in size. During address lookup, the index of the matching +address offset will be the index into the address data offsets table. + +### Address Data Offsets Table +The address data offsets table immediately follows the address table and +consists of `Header.num_addrs` 32 bit offsets: one for each address in the +address table. The offsets in this table are relative to the first byte of the +GSYM header. The offsets point to the address data for each address in the +address table. Keeping this data separate from the address table helps to +reduce the number of pages that are touched when address lookups occur. + +### File Table +The file table immediately follows the address data offsets table. The format of the `FileTable` is: + +``` +struct FileTable { + uint32_t count; + FileInfo files[]; +}; +``` +The file table starts with a 32 bit count of the number of files that are used in all of the address data, followed by that number of `FileInfo` structures. + +Each file in the file table is represented with a `FileInfo` structure: + +``` +struct FileInfo { + uint32_t directory; + uint32_t filename; +}; +``` + +The FileInfo structure has the file path split into a string for the directory and a string for the filename. The directory and filename are specified as offsets into the string table. Splitting paths into directory and file base name allows GSYM to use the same string table entry for common directories. + +### String Table +The string table follows the file table in stand alone GSYM files and contains +all strings for everything contained in the GSYM file. Any string data should +be added to the string table and any references to strings inside GSYM +information must be stored as 32 bit string table offsets into this string +table. Strings are stored as NULL terminated UTF8 strings. The format of the +string table starts with an empty string at offset zero followed by zero or +more strings. The format is the same as the DWARF .debug_str format with an +additional restriction of being required to start with empty string. The string +table is specified in the GSYM header with the `Header.strtab_offset` and +`Header.strtab_size` fields. The `Header.strtab_offset` is an absolute offset in +the file for the string table. This allows the string table to share other +string table sections that might exist in the file when the GSYM data is a +section within an object file. + +### Address Data +The address data is the payload that contains information about the address that is being looked up. The structure that represents this data is: +``` +struct AddressInfo { + uint32_t size; + uint32_t name; + AddressData data[]; +}; +``` +It starts with a 32 bit size for the address range of the function and is followed by the 32 bit string table offset for the name of the function. The size of the address range is important to encode as it stops address lookups from matching if the address is between two functions in some padding. This is followed by an array of address data information: +``` +struct AddressData { + uint32_t type; + uint32_t length; + uint8_t data[length]; +}; +``` +The address data starts with a 32 bit type, followed by a 32 bit length, followed by an array of bytes that encode each specific kind of data. +The `AddressData.type` is an enumeration value: +``` +enum class InfoType { + EndOfList = 0u, + LineTableInfo = 1u, + InlineInfo = 2u +}; +``` +The `AddressInfo.data[]` is encoded as a vector of AddressData structs that is terminated by a `AddressData` struct whose type is set to `InfoType.EndOfList`. This allows the GSYM file format the contain arbitrary data for any address range and allows us to expand the GSYM capabilities as we find more uses for it. + +`InfoType::EndOfList` is always the last `AddressData` in the `AddressInfo`. + +#### InfoType::LineTableInfo + +`InfoType::LineTableInfo` is a modified version of the DWARF line tables that +efficiently stores line table information for each function. DWARF stores line +table information for an entire source file and includes all functions. Having +each function's line table encoded separately allows fewer pages to be touched +when looking up the line entry for a specific address. The information is +optional and can be omitted for address data that is from a symbol or label +where no line table information is available. + +##### Format + +Line tables are encoded using a format similar to DWARF line tables. The line +table is produced using a state machine in a `LineEntry` structure: + +``` +struct LineEntry { + uint64_t Addr; // Start address of this line entry + uint32_t File; // 1 based index of file in FileTable + uint32_t Line; // Source line number +}; +``` +The initial `LineEntry` for the state machine is initialized with `LineEntry.Addr` +being set to the start address funciton from the Address Table. The `LineEntry.File` +is initialized to a value of 1. And the `LineEntry.Line` is initialized with a +value from the bytes in the line table header described below. As opcodes are +parsed, some modify `LineEntry`, and some opcodes cause `LineEntry` to be appended +to a array of `LineEntry` structs that will be the resulting line table for the +`AddressInfo`. + +#### Opcodes +The main part of the line table is encoded as a byte stream that contains opcodes. +The opcoes are defined in the table below. +| Opcode | Byte | Add Row | Operand | Description | +|------------------|:----------|:--------|:--------|:----------------------------| +| LTOC_EndSequence | 0x00 | No | None | End of line table encoding | +| LTOC_SetFile | 0x01 | No | ULEB128 | `LineEntry.File` = operand | +| LTOC_AdvancePC | 0x02 | Yes | ULEB128 | `LineEntry.Addr` += operand | +| LTOC_AdvanceLine | 0x03 | No | SLEB128 | `LineEntry.Line` += operand | +| Special | 0x04-0xff | Yes | None | See below. | + +Special opcodes encode both an increment to the `LineTable.Addr` and a signed offset +to apply to `LineTable.Line` and will push a row. This allows a single byte in the +line table byte stream to adjust both the address and the line and push a row. +The header of the line table encodes exactly how `Special` opcode bytes divide +up what part of this opcode is the address increment and what part is the line +table offset by tailoring it to each line table to allow the maximum amount of +Special opcodes, and thus the most efficient encoding. + +##### File Format +The line table is encoded in two main parts: +- LineHeader +- Data + +`LineHeader` looks like: +``` +struct LineHeader { + uint32_t Type; + uint32_t Length; + int64_t MinLineDelta; + int64_t MaxLineDelta; + uint32_t InitialLine; +}; + +##### File Format +``` +`LineHeader` is encoded as: +1. UINT32 enumeration value of `InfoType::LineTableInfo` (`LineHeader.Type`) +2. UINT32 length in bytes of entire line table (`LineHeader.Length`) +3. SLEB128 minimum line table delta (`LineHeader.MinLineDelta`) +4. SLEB128 maximum line table delta (`LineHeader.MaxLineDelta`) +5. ULEB128 initial source line (`LineHeader.InitialLine`) + +A `LineEntry` instance is then initialized prior to decoding the Data: +``` +LineEntry Row; +Row.Addr = FunctionStartAddress; +Row.File = 1 +Row.Line = LineHeader.InitialLine; +``` + +Then a series of line table opcodes follow and cause the line table to be decoded. + +The Special opcodes calculate the address increment and the source file offset as +follows: + +``` +uint8_t AdjustedOp = opcode - 0x04; +int64_t LineRange = LineHeader.MaxLineDelta - LineHeader.MinLineDelta + 1; +int64_t LineDelta = LineHeader.MinLineDelta + (AdjustedOp % LineRange); +uint64_t AddrIncrement = (AdjustedOp / LineRange); +Row.Line += LineDelta; +Row.Addr += AddrIncrement; +``` + + +#### InfoType::InlineInfo + +`InfoType::InlineInfo` is a format that encodes inline call stacks. This information is optional and doesn't need to be included for each address. If the function has no inlined functions this data should not be included. + +The inline info is encoded with the address ranges being relative to the parent +scope's `BaseAddress`. The For the top level inlined info, this is the start address +of the AddressInfo itself. For all `InlineInfo` structures contained as children of a +parent `InlineInfo`, the address is the the first address in the ranges in the parent. +This ensures that addresses are always small deltas from the parent scope and that no +relocations are required on any inline information. Relocating `InlineInfo` is as simple +as changing the address in the Address Table. + +##### File Format +The inline infornation is encoded in two main parts: +- Header +- Data + +The Header is encoded as: +1. UINT32 enumeration value of `InfoType::InlineInfo` +2. UINT32 length in bytes of entire line table + +The Data is encoded as: +1. ULEB128 - Address range count (referred to from here out as `NumRanges`). If + `NumRanges` is zero, this is an end of child `InlineInfo` entry and parsing stops. + No top level InlineInfo should have zero ranges as that indicates there is no inline + info. If `NumRanges` is greater than zero, then proceed to step 2 below. +2. `NumRanges` address ranges encoded as 2 ULEB128 values that are the start offset + and the end offset from the parent address. These address ranges are sorted + from lowest range to highest ranges prior to emitting them. +3. UINT8 - Byte indicating if the current `InlineInfo` has childreen +4. UINT32 - Name of the inlined function as a 32 bit string table offset +5. ULEB128 - Call file encoded as an index into the File Table +6. ULEB128 - Call line encoded as a 1 based source line number +7. If the `InlineInfo` has children write out each child `InlineInfo` in sequence + using the first start address in the `InlineInfo.Ranges` emitted in step 2 as the + base address for encoding all range offsets. This allows the child address range + offsets to be as small as possible when encoded as ULEB128 values. After all child + `InlineInfo` objects have been encoded, a single ULEB128 zero byte to indicate an + invalid `InlineInfo` with zero ranges must be emitted to termine the child InlineInfo + chain. Index: lib/DebugInfo/GSYM/Range.cpp =================================================================== --- lib/DebugInfo/GSYM/Range.cpp +++ lib/DebugInfo/GSYM/Range.cpp @@ -0,0 +1,47 @@ +//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include + +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +void llvm::gsym::insert(AddressRanges &Ranges, const AddressRange &Range) { + Ranges.insert(std::upper_bound(Ranges.begin(), Ranges.end(), Range), Range); +} + +bool llvm::gsym::contains(const AddressRanges &Ranges, uint64_t Addr) { + if (Ranges.empty()) + return false; + if (Addr < Ranges.front().Start) + return false; + if (Addr >= Ranges.back().End) + return false; + auto begin = Ranges.begin(); + auto EndPos = Ranges.end(); + auto Pos = std::upper_bound(begin, EndPos, Addr); + if (Pos == EndPos) + return Ranges.back().contains(Addr); + if (Pos != begin) { + --Pos; + return Pos->contains(Addr); + } + return false; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) { + return OS << '[' << HEX64(R.startAddress()) << " - " << HEX64(R.endAddress()) + << ")"; +} Index: tools/gsym/CMakeLists.txt =================================================================== --- tools/gsym/CMakeLists.txt +++ tools/gsym/CMakeLists.txt @@ -0,0 +1,19 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + DebugInfoDWARF + DebugInfoGSYM + AsmPrinter + AllTargetsDescs + AllTargetsInfos + MC + Object + Support + Target + ) + +add_llvm_tool(llvm-gsymutil + llvm-gsymutil.cpp + + DEPENDS + intrinsics_gen + ) Index: tools/gsym/llvm-gsymutil.cpp =================================================================== --- tools/gsym/llvm-gsymutil.cpp +++ tools/gsym/llvm-gsymutil.cpp @@ -0,0 +1,457 @@ +//===-- llvm-gsymutil.cpp - GSYM dumping and creation utility for llvm ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/Breakpad.h" +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" + +using namespace llvm; +using namespace gsym; +using namespace object; + +/// @} +/// Command line options. +/// @{ + +namespace { +using namespace cl; + +OptionCategory SpecificOptions("Specific Options"); +OptionCategory DWARFOptions("DWARF Conversion Options"); +OptionCategory BreakpadOptions("Breakpad Conversion Options"); +OptionCategory GSYMOptions("GSYM Options"); + +static opt Help("h", desc("Alias for -help"), Hidden, + cat(SpecificOptions)); +// TODO: fix to inly accept one file as input +static list InputFilenames(Positional, desc(""), + ZeroOrMore, cat(SpecificOptions)); + +static opt + DWARFFilename("dwarf", cl::init(""), + cl::desc("Convert the specified DWARF file to the GSYM " + "format."), + cl::value_desc("filename"), cat(DWARFOptions)); + +static opt + BreakpadFilename("breakpad", cl::init(""), + cl::desc("Convert the specified Breakpad file to the GSYM " + "format."), + cl::value_desc("filename"), cat(BreakpadOptions)); + +static list + ArchFilters("arch", + desc("Process debug information for the specified CPU " + "architecture only. Architectures may be specified by " + "name or by number. This option can be specified " + "multiple times, once for each desired architecture."), + cat(DWARFOptions)); + +static opt + OutputFilename("out-file", cl::init(""), + cl::desc("Save converted output to the specified file. " + "Used in conjunction with the -dwarf or -breakpad " + "options."), + cl::value_desc("filename"), cat(SpecificOptions)); +static alias OutputFilenameAlias("o", desc("Alias for -out-file."), + aliasopt(OutputFilename), cat(DWARFOptions)); +static opt + SegmentSize("segment-size", + desc("Split output file into segments of roughly this size"), + cat(DWARFOptions)); + +static opt + NumThreads("threads", + desc("Number of threads to use; defaults is number of cores"), + cat(DWARFOptions)); + +static opt LookupAddress("address", + desc("Lookup an address in a GSYM file"), + cat(GSYMOptions), cl::value_desc("address"), + cl::init(UINT64_MAX)); + +static opt LookupName("name", desc("Lookup a name in a GSYM file"), + cat(GSYMOptions), cl::value_desc("name"), + cl::init("")); + +static opt Verbose("verbose", + desc("Print more low-level encoding details."), + cat(SpecificOptions)); + +static opt Verify("verify", + desc("Verify the generated GSYM file against the DWARF " + "by looking up all addresses."), + cat(SpecificOptions)); + +} // namespace +/// @} +//===----------------------------------------------------------------------===// + +static void error(StringRef Prefix, std::error_code EC) { + if (!EC) + return; + errs() << Prefix << ": " << EC.message() << "\n"; + exit(1); +} + +/// If the input path is a .dSYM bundle (as created by the dsymutil tool), +/// replace it with individual entries for each of the object files inside the +/// bundle otherwise return the input path. +static std::vector expandBundle(const std::string &InputPath) { + std::vector BundlePaths; + SmallString<256> BundlePath(InputPath); + // Manually open up the bundle to avoid introducing additional dependencies. + if (sys::fs::is_directory(BundlePath) && + sys::path::extension(BundlePath) == ".dSYM") { + std::error_code EC; + sys::path::append(BundlePath, "Contents", "Resources", "DWARF"); + for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + const std::string &Path = Dir->path(); + sys::fs::file_status Status; + EC = sys::fs::status(Path, Status); + error(Path, EC); + switch (Status.type()) { + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + BundlePaths.push_back(Path); + break; + default: /*ignore*/; + } + } + error(BundlePath, EC); + } + if (!BundlePaths.size()) + BundlePaths.push_back(InputPath); + return BundlePaths; +} + +static void verify(ObjectFile &Obj, raw_ostream &OS, + const std::string &OutFile) { + OS << "Verifying GSYM file \"" << OutFile << "\":\n"; + auto DICtx = DWARFContext::create(Obj); + + gsym::GsymReader gsymFile; + auto error = gsymFile.openFile(OutFile); + if (error) { + OS << "error: error opening \"" << OutFile << "\"\n"; + return; + } + + auto NumAddrs = gsymFile.getNumAddresses(); + DILineInfoSpecifier DLIS( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + DILineInfoSpecifier::FunctionNameKind::LinkageName); + LookupResult gsymLookup; + std::string gsymFilename; + for (uint32_t i = 0; i < NumAddrs; ++i) { + auto FuncAddr = gsymFile.getAddress(i); + FunctionInfo FuncInfo; + if (gsymFile.getFunctionInfo(FuncAddr, FuncInfo)) { + for (auto addr = FuncAddr; addr < FuncAddr + FuncInfo.size(); ++addr) { + const object::SectionedAddress secaddr( + addr, object::SectionedAddress::UndefSection); + if (gsymFile.lookup(addr, gsymLookup)) { + auto dwarfInlineInfos = + DICtx->getInliningInfoForAddress(secaddr, DLIS); + uint32_t numDwarfInlineInfos = dwarfInlineInfos.getNumberOfFrames(); + if (numDwarfInlineInfos == 0) { + dwarfInlineInfos.addFrame( + DICtx->getLineInfoForAddress(secaddr, DLIS)); + } + + // Check for 1 entry that has no file and line info + if (numDwarfInlineInfos == 1 && + dwarfInlineInfos.getFrame(0).FileName == "") { + dwarfInlineInfos = DIInliningInfo(); + numDwarfInlineInfos = 0; + } + if (numDwarfInlineInfos > 0 && + numDwarfInlineInfos != gsymLookup.Locations.size()) { + OS << "error: address " << HEX64(addr) << " has " + << numDwarfInlineInfos << " DWARF inline frames and GSYM has " + << gsymLookup.Locations.size() << "\n"; + OS << " " << numDwarfInlineInfos << " DWARF frames:\n"; + for (size_t idx = 0; idx < numDwarfInlineInfos; ++idx) { + const auto dii = dwarfInlineInfos.getFrame(idx); + OS << " [" << idx << "]: " << dii.FunctionName << " @ " + << dii.FileName << ':' << dii.Line << '\n'; + } + OS << " " << gsymLookup.Locations.size() << " GSYM frames:\n"; + for (size_t idx = 0, count = gsymLookup.Locations.size(); + idx < count; ++idx) { + const auto &gii = gsymLookup.Locations[idx]; + OS << " [" << idx << "]: " << gii.Name << " @ " << gii.Dir + << '/' << gii.Base << ':' << gii.Line << '\n'; + } + dwarfInlineInfos = DICtx->getInliningInfoForAddress(secaddr, DLIS); + FuncInfo.dump(outs(), gsymFile); + continue; + } + + for (size_t idx = 0, count = gsymLookup.Locations.size(); idx < count; + ++idx) { + const auto &gii = gsymLookup.Locations[idx]; + if (idx < numDwarfInlineInfos) { + const auto dii = dwarfInlineInfos.getFrame(idx); + gsymFilename = gsymLookup.getSourceFile(idx); + // Verify function name + if (dii.FunctionName.find(gii.Name) != 0) + OS << "error: address " << HEX64(addr) << " DWARF function \"" + << dii.FunctionName.c_str() + << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; + // Verify source file path + if (dii.FileName != gsymFilename) + OS << "error: address " << HEX64(addr) << " DWARF path \"" + << dii.FileName.c_str() << "\" doesn't match GSYM path \"" + << gsymFilename.c_str() << "\"\n"; + // Verify source file line + if (dii.Line != gii.Line) + OS << "error: address " << HEX64(addr) << " DWARF line " + << dii.Line << " != GSYM line " << gii.Line << "\n"; + } + } + } + } + } else { + OS << "error: address lookup failed for address[" << i << "] " + << HEX64(FuncAddr) << "\n"; + } + } +} + +static uint32_t getCPUType(MachOObjectFile &MachO) { + if (MachO.is64Bit()) + return MachO.getHeader64().cputype; + else + return MachO.getHeader().cputype; +} + +/// Return true if the object file has not been filtered by an --arch option. +static bool filterArch(ObjectFile &Obj) { + if (ArchFilters.empty()) + return true; + + if (auto *MachO = dyn_cast(&Obj)) { + std::string ObjArch = + Triple::getArchTypeName(MachO->getArchTriple().getArch()); + + for (auto Arch : ArchFilters) { + // Match name. + if (Arch == ObjArch) + return true; + + // Match architecture number. + unsigned Value; + if (!StringRef(Arch).getAsInteger(0, Value)) + if (Value == getCPUType(*MachO)) + return true; + } + } + return false; +} + +static std::error_code handleDWARFObjectFile(StringRef Filename, + ObjectFile &Obj, + const std::string &OutFile) { + auto ThreadCount = + NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency(); + auto &OS = outs(); + DwarfTransformer Transformer(OS, ThreadCount); + + std::error_code Error; + Error = Transformer.loadDwarf(Obj); + if (Error) + return Error; + Error = Transformer.loadSymbolTable(Obj); + if (Error) + return Error; + Transformer.getData().optimize(OS); + Error = Transformer.getData().save(OutFile.c_str()); + if (Error) + return Error; + if (Verify) + verify(Obj, OS, OutFile); + + return Error; +} + +static std::error_code handleDWARFBuffer(StringRef Filename, + MemoryBufferRef Buffer, + const std::string &OutFile) { + Expected> BinOrErr = object::createBinary(Buffer); + error(Filename, errorToErrorCode(BinOrErr.takeError())); + + std::error_code Error; + if (auto *Obj = dyn_cast(BinOrErr->get())) { + Error = handleDWARFObjectFile(Filename, *Obj, OutFile.c_str()); + } else if (auto *Fat = dyn_cast(BinOrErr->get())) { + for (auto &ObjForArch : Fat->objects()) { + if (auto MachOOrErr = ObjForArch.getAsObjectFile()) { + auto &Obj = **MachOOrErr; + if (filterArch(Obj)) { + auto archName = ObjForArch.getArchFlagName(); + std::string ArchOutFile(OutFile); + ArchOutFile.append(1, '.'); + ArchOutFile.append(archName); + printf("Hanlding %s architecture...\n", archName.c_str()); + Error = handleDWARFObjectFile(Filename, Obj, ArchOutFile); + if (Error) + return Error; + } + } else { + consumeError(MachOOrErr.takeError()); + } + } + } + return Error; +} + +static std::error_code handleDWARFFile(StringRef Filename, + const std::string &OutFile) { + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + error(Filename, BuffOrErr.getError()); + std::unique_ptr Buffer = std::move(BuffOrErr.get()); + return handleDWARFBuffer(Filename, *Buffer, OutFile); +} + +static std::error_code convertDWARFToGSYM(raw_ostream &OS) { + // Expand any .dSYM bundles to the individual object files contained therein. + std::vector Objects; + std::string OutFile = OutputFilename; + if (OutFile.empty()) { + OutFile = DWARFFilename; + OutFile += ".gsym"; + } + + OS << "Converting DWARF from \"" << DWARFFilename << "\" to GSYM in \"" + << OutFile << "\"\n"; + + auto Objs = expandBundle(DWARFFilename); + Objects.insert(Objects.end(), Objs.begin(), Objs.end()); + + std::error_code Error; + for (auto Object : Objects) { + Error = handleDWARFFile(Object, OutFile); + if (Error) + return Error; + } + return Error; +} + +int main(int argc, char const *argv[]) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(argv[0]); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + + const char *Overview = + "A tool that displays information in GSYM files, or " + "converts other formats to GSYM.\n\nSpecify one or more GSYM " + "paths as arguments to dump or lookup information.\nUse the -dwarf " + "option to specify a DWARF file to convert to GSYM.\nUse the " + "-breakpad option to specify a breakpad file to convert to GSYM.\n"; + HideUnrelatedOptions( + {&SpecificOptions, &DWARFOptions, &BreakpadOptions, &GSYMOptions}); + cl::ParseCommandLineOptions(argc, argv, Overview); + + if (Help) { + PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true); + return 0; + } + + raw_ostream &OS = outs(); + + if (!DWARFFilename.empty()) { + // Convert DWARF to GSYM + if (!InputFilenames.empty()) { + OS << "error: no input files cat be specified when using the -dwarf " + "option \n"; + return 1; + } + return convertDWARFToGSYM(OS) ? 1 : EXIT_SUCCESS; + } else if (!BreakpadFilename.empty()) { + std::string OutFile = OutputFilename; + if (OutFile.empty()) { + OutFile = BreakpadFilename; + OutFile += ".gsym"; + } + error(BreakpadFilename, + convertBreakpadFileToGSYM(BreakpadFilename, OutFile)); + } + + // Dump or access data inside GSYM files + for (const auto &GSYMPath : InputFilenames) { + gsym::GsymReader gsymFile; + auto Error = gsymFile.openFile(GSYMPath); + if (Error) { + OS << "error: error opening \"" << GSYMPath << "\"\n"; + return 1; + } + if (LookupAddress != UINT64_MAX) { + // Lookup an address in a GSYM file and print any matches. + OS << "Looking up " << HEX64(LookupAddress) << " in GSYM file \"" + << GSYMPath << "\":\n"; + LookupResult Result; + if (gsymFile.lookup(LookupAddress, Result)) { + Result.dump(OS); + } else { + OS << "error: address not found\n"; + } + } else if (!LookupName.empty()) { + // Lookup all address infos with matching names + OS << "Looking up \"" << LookupName << "\" in GSYM file \"" << GSYMPath + << "\":\n"; + gsymFile.dumpAddressInfos(OS, LookupName); + } else { + OS << "Dumping GSYM file \"" << GSYMPath << "\":\n"; + gsymFile.dump(OS, Verbose); + } + } + return EXIT_SUCCESS; +} Index: unittests/DebugInfo/CMakeLists.txt =================================================================== --- unittests/DebugInfo/CMakeLists.txt +++ unittests/DebugInfo/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(CodeView) add_subdirectory(DWARF) +add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(PDB) Index: unittests/DebugInfo/GSYM/CMakeLists.txt =================================================================== --- unittests/DebugInfo/GSYM/CMakeLists.txt +++ unittests/DebugInfo/GSYM/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + AsmPrinter + DebugInfoGSYM + MC + Object + ObjectYAML + Support + ) + +add_llvm_unittest(DebugInfoGSYMTests + GSYMTest.cpp + ) + +target_link_libraries(DebugInfoGSYMTests PRIVATE LLVMTestingSupport) Index: unittests/DebugInfo/GSYM/GSYMTest.cpp =================================================================== --- unittests/DebugInfo/GSYM/GSYMTest.cpp +++ unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -0,0 +1,528 @@ +//===- llvm/unittest/DebugInfo/GSYMTest.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/Breakpad.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" +#include "llvm/Testing/Support/Error.h" + +#include "gtest/gtest.h" +#include +#include + +using namespace llvm; +using namespace gsym; + +TEST(GSYMTest, TestStringTables) { + // Verify the llvm::gsym::StringTableCreator and the llvm::gsym::StringTable + // works as expected. + StringTableCreator StrTabCreator; + // Use a local buffer that changes when inserting strings into the string + // table to ensure "StrTabCreator" is storing the strings on + char Str[32]; + + auto HelloIndex = StrTabCreator.insert(strcpy(Str, "hello")); + auto WorldIndex = StrTabCreator.insert(strcpy(Str, "world")); + // Verify that if we add another hello from a different string we get the + // same index + char HelloCStr[] = "hello"; + auto HelloIndex2 = StrTabCreator.insert(HelloCStr); + EXPECT_EQ(HelloIndex2, HelloIndex); + // Make sure a common substring gets its own string table entry + auto HellIndex = StrTabCreator.insert(strcpy(Str, "hell")); + EXPECT_NE(HellIndex, HelloIndex); + + // Verify the operator[] works in the StringTableCreator class + EXPECT_EQ(StrTabCreator[0], StringRef()); + EXPECT_EQ(StrTabCreator[HelloIndex], StringRef("hello")); + EXPECT_EQ(StrTabCreator[WorldIndex], StringRef("world")); + EXPECT_EQ(StrTabCreator[HellIndex], StringRef("hell")); + // Verify the operator[] fails with invalid index + EXPECT_EQ(StrTabCreator[HelloIndex + 1], StringRef()); + // Now write the string table out to a buffer so we can load it back in + // and do tests with the read only version + std::stringstream OutStrm; + FileWriter OutWriter(OutStrm); + StrTabCreator.write(OutWriter); + std::string OutData(OutStrm.str()); + // Now use the string table we just created by decoding it and using it + // with the StringTable class. StringTable is the class we used to decode the + // string table when we read a GSYM file from a file on disk. + StringTable StrTabReader(OutData); + EXPECT_EQ(StrTabReader[0], StringRef()); + EXPECT_EQ(StrTabReader[HelloIndex], StringRef("hello")); + EXPECT_EQ(StrTabReader[WorldIndex], StringRef("world")); + EXPECT_EQ(StrTabReader[HellIndex], StringRef("hell")); + // Verify that an offset into the string table that doesn't sit at the + // Actualy start of a string still retursn a partial string + EXPECT_EQ(StrTabReader[HellIndex + 1], StringRef("ell")); + // Verify the first invalid offset in the string table just past the end + // returns and empty string + EXPECT_EQ(StrTabReader[OutData.size()], StringRef()); + // Test that we can find a string within StringTable instances. This is used + // to do name lookups within a GSYM file. + EXPECT_EQ(StrTabReader.find(""), 0U); + EXPECT_EQ(StrTabReader.find("hello"), HelloIndex); + EXPECT_EQ(StrTabReader.find("world"), WorldIndex); + EXPECT_EQ(StrTabReader.find("hell"), HellIndex); + // Verify we can find a partial srting as long as it is NULL terminated + EXPECT_EQ(StrTabReader.find("ell"), HellIndex + 1); + // Verify + EXPECT_EQ(StrTabReader.find("carp"), UINT32_MAX); +} + +static std::string createGSYM(const GsymCreator &Gsym) { + std::stringstream OutStrm; + Gsym.save(OutStrm); + return OutStrm.str(); +} + +static const uint8_t TestUUID[] = {0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + +static void verifyUUID(const Header *Hdr) { + EXPECT_EQ(Hdr->UUIDSize, (uint8_t)sizeof(TestUUID)); + if (Hdr->UUIDSize == sizeof(TestUUID)) { + for (size_t I = 0; I < Hdr->UUIDSize; ++I) { + EXPECT_EQ(TestUUID[I], Hdr->UUID[I]); + } + } +} +static void createAndVerifyGsym(const GsymCreator &GC, GsymReader &Gsym, + bool Dump = false, + StringRef SavePath = StringRef()) { + + Gsym.copyBuffer(createGSYM(GC)); + + if (Dump) + Gsym.dump(outs(), true); + + if (!SavePath.empty()) + GC.save(SavePath); + + auto Hdr = Gsym.getHeader(); + ASSERT_TRUE(Hdr != nullptr); + // Verify UUID size and bytes + verifyUUID(Hdr); + + // Make sure we have the same number of functions in both the creator and + // the reader + size_t NumFuncs = GC.getFunctionInfoSize(); + EXPECT_EQ(NumFuncs, Gsym.getNumAddresses()); + bool First = true; + uint64_t LoAddress = 0; + uint64_t HiAddress = 0; + + GC.forEachFunctionInfo([&](const FunctionInfo &GCFuncInfo) -> bool { + // Make sure we can fetch a matching function info from the GsymReader for + // each funciton info in the GsymCreator. + FunctionInfo FuncInfo; + const uint64_t OrigStartAddr = GCFuncInfo.startAddress(); + EXPECT_EQ(Gsym.getFunctionInfo(OrigStartAddr, FuncInfo), true); + if (First) { + // Verify that our header base address is the same as the first function + // info address. Since all address offsets are in increasing order we + // know that the first one is the base address. + First = false; + EXPECT_EQ(Hdr->BaseAddress, OrigStartAddr); + LoAddress = OrigStartAddr; + } + HiAddress = OrigStartAddr; + EXPECT_EQ(FuncInfo, GCFuncInfo); + return true; // Keep iterating + }); + // Make sure the address offset size is efficiently encoded + const uint64_t MaxOffset = HiAddress - LoAddress; + if (MaxOffset <= UINT8_MAX) + EXPECT_EQ(Hdr->AddrOffSize, 1u); + else if (MaxOffset <= UINT16_MAX) + EXPECT_EQ(Hdr->AddrOffSize, 2u); + else if (MaxOffset <= UINT32_MAX) + EXPECT_EQ(Hdr->AddrOffSize, 4u); + else + EXPECT_EQ(Hdr->AddrOffSize, 8u); +} + +TEST(GSYMTest, TestGsymCreatorSingleFunction) { + // Test creating a GSYM file with a single function and verify + // we can create a valid GSYM + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, GC.insertString("main"))); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreator8BitAddrOffsets) { + // Test creating a GSYM file with multiple functions and verify we can + // create a valid GSYM. Verify address info offsets are 1 byte in size + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x1010, 0x20, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x1030, 0x50, GC.insertString("bar"))); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreator16BitAddrOffsets) { + // Test creating a GSYM file with multiple functions and verify we can + // create a valid GSYM. Verify address info offsets are 2 byte in size + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x1100, 0x200, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x1300, 0x500, GC.insertString("bar"))); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreator32BitAddrOffsets) { + // Test creating a GSYM file with multiple functions and verify we can + // create a valid GSYM. Verify address info offsets are 4 byte in size + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x10000, 0x100, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x20000, 0x200, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x40000, 0x500, GC.insertString("bar"))); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreator64BitAddrOffsets) { + // Test creating a GSYM file with multiple functions and verify we can + // create a valid GSYM. Verify address info offsets are 4 byte in size + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x100000000, 0x100, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x200000000, 0x200, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x400000000, 0x500, GC.insertString("bar"))); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreatorLineTables) { + // Test creating a GSYM file with a function that has a line table. Verify + // the line table is able to be encoded by GsymCreator by encoding the data + // and decoding it it with GsymReader and making sure all FunctionInfo + // objects are the same. + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + FunctionInfo FuncInfo(0x1000, 0x100, GC.insertString("main")); + const uint32_t MainFileIndex = GC.insertFile("/tmp/main.c"); + const uint32_t FooFileIndex = GC.insertFile("/tmp/foo.h"); + FuncInfo.Lines.emplace_back(LineEntry(0x1000, MainFileIndex, 5)); + FuncInfo.Lines.emplace_back(LineEntry(0x1010, MainFileIndex, 6)); + FuncInfo.Lines.emplace_back(LineEntry(0x1020, FooFileIndex, 23)); + GC.addFunctionInfo(std::move(FuncInfo)); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreatorInlineInfo) { + // Test creating a GSYM file with a function that has a inline information. + // Verify the inline infor is able to be encoded by GsymCreator by encoding + // the data and decoding it with GsymReader and making sure all FunctionInfo + // objects are the same. + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + FunctionInfo FuncInfo(0x1000, 0x100, GC.insertString("main")); + const uint32_t MainFileIndex = GC.insertFile("/tmp/main.c"); + const uint32_t FooFileIndex = GC.insertFile("/tmp/foo.h"); + FuncInfo.Lines.emplace_back(LineEntry(0x1000, MainFileIndex, 5)); + FuncInfo.Lines.emplace_back(LineEntry(0x1010, FooFileIndex, 23)); + FuncInfo.Lines.emplace_back(LineEntry(0x1012, FooFileIndex, 25)); + FuncInfo.Lines.emplace_back(LineEntry(0x1018, FooFileIndex, 24)); + FuncInfo.Lines.emplace_back(LineEntry(0x1020, MainFileIndex, 8)); + FuncInfo.Inline.Name = GC.insertString("inline1"); + FuncInfo.Inline.CallFile = MainFileIndex; + FuncInfo.Inline.CallLine = 6; + FuncInfo.Inline.Ranges.emplace_back(AddressRange(0x1010, 0x1020)); + InlineInfo Inline; + Inline.Name = GC.insertString("inline2"); + Inline.CallFile = FooFileIndex; + Inline.CallLine = 33; + Inline.Ranges.emplace_back(AddressRange(0x1012, 0x1018)); + FuncInfo.Inline.Children.emplace_back(Inline); + GC.addFunctionInfo(std::move(FuncInfo)); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); + LookupResult LR; + // Verify inline info is correct when doing lookups. + EXPECT_EQ(Gsym.lookup(0x1000, LR), true); + EXPECT_EQ(LR.Locations.size(), 1u); + EXPECT_EQ(LR.Locations[0].Name, "main"); + EXPECT_EQ(LR.Locations[0].Line, 5u); + EXPECT_EQ(LR.Locations[0].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[0].Base, "main.c"); + + EXPECT_EQ(Gsym.lookup(0x100F, LR), true); + EXPECT_EQ(LR.Locations.size(), 1u); + EXPECT_EQ(LR.Locations[0].Name, "main"); + EXPECT_EQ(LR.Locations[0].Line, 5u); + EXPECT_EQ(LR.Locations[0].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[0].Base, "main.c"); + + EXPECT_EQ(Gsym.lookup(0x1010, LR), true); + EXPECT_EQ(LR.Locations.size(), 2u); + EXPECT_EQ(LR.Locations[0].Name, "inline1"); + EXPECT_EQ(LR.Locations[0].Line, 23u); + EXPECT_EQ(LR.Locations[0].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[0].Base, "foo.h"); + EXPECT_EQ(LR.Locations[1].Name, "main"); + EXPECT_EQ(LR.Locations[1].Line, 6u); + EXPECT_EQ(LR.Locations[1].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[1].Base, "main.c"); + + EXPECT_EQ(Gsym.lookup(0x1012, LR), true); + EXPECT_EQ(LR.Locations.size(), 3u); + EXPECT_EQ(LR.Locations[0].Name, "inline2"); + EXPECT_EQ(LR.Locations[0].Line, 25u); + EXPECT_EQ(LR.Locations[0].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[0].Base, "foo.h"); + EXPECT_EQ(LR.Locations[1].Name, "inline1"); + EXPECT_EQ(LR.Locations[1].Line, 33u); + EXPECT_EQ(LR.Locations[1].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[1].Base, "foo.h"); + EXPECT_EQ(LR.Locations[2].Name, "main"); + EXPECT_EQ(LR.Locations[2].Line, 6u); + EXPECT_EQ(LR.Locations[2].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[2].Base, "main.c"); + + EXPECT_EQ(Gsym.lookup(0x1018, LR), true); + EXPECT_EQ(LR.Locations.size(), 2u); + EXPECT_EQ(LR.Locations[0].Name, "inline1"); + EXPECT_EQ(LR.Locations[0].Line, 24u); + EXPECT_EQ(LR.Locations[0].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[0].Base, "foo.h"); + EXPECT_EQ(LR.Locations[1].Name, "main"); + EXPECT_EQ(LR.Locations[1].Line, 6u); + EXPECT_EQ(LR.Locations[1].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[1].Base, "main.c"); + + EXPECT_EQ(Gsym.lookup(0x1020, LR), true); + EXPECT_EQ(LR.Locations.size(), 1u); + EXPECT_EQ(LR.Locations[0].Name, "main"); + EXPECT_EQ(LR.Locations[0].Line, 8u); + EXPECT_EQ(LR.Locations[0].Dir, "/tmp"); + EXPECT_EQ(LR.Locations[0].Base, "main.c"); +} + +TEST(GSYMTest, TestGsymCreatorLookups) { + // Test lookups of various information in a GSYM file + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x1020, 0x20, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x1050, 0x20, GC.insertString("symbol"))); + // Make a symbol with no size. This is what might happen if we make a + // function info from a symbol table symbol that has no size (mach-o) + GC.addFunctionInfo(FunctionInfo(0x1070, 0, GC.insertString("nosize"))); + // std::string GsymData(createGSYM(GC)); + // GsymReader Gsym; + // Gsym.init(GsymData); + // FunctionInfo FuncInfo; + GsymReader Gsym; + FunctionInfo FuncInfo; + createAndVerifyGsym(GC, Gsym); + + // Make sure lookups fail for addresses before the first address + EXPECT_EQ(Gsym.getFunctionInfo(0, FuncInfo), false); + EXPECT_EQ(Gsym.getFunctionInfo(0x1000 - 1, FuncInfo), false); + // Make sure lookups succeed for addresses for "main" + EXPECT_EQ(Gsym.getFunctionInfo(0x1000, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1000u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1010u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "main"); + + EXPECT_EQ(Gsym.getFunctionInfo(0x1000 + 0x10 - 1, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1000u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1010u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "main"); + // Make sure lookups fail for addresses between the end of "main" and start + // of "foo" + EXPECT_EQ(Gsym.getFunctionInfo(0x1010, FuncInfo), false); + EXPECT_EQ(Gsym.getFunctionInfo(0x1020 - 1, FuncInfo), false); + + // Make sure lookups succeed for addresses for "foo" + EXPECT_EQ(Gsym.getFunctionInfo(0x1020, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1020u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1040u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "foo"); + + EXPECT_EQ(Gsym.getFunctionInfo(0x1020u + 0x20 - 1, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1020u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1040u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "foo"); + + // Make sure lookups fail for addresses between the end of "foo" and start + // of "symbol" + EXPECT_EQ(Gsym.getFunctionInfo(0x1040, FuncInfo), false); + EXPECT_EQ(Gsym.getFunctionInfo(0x1050 - 1, FuncInfo), false); + + // A symbol with zero size should infer its size from the delta between + // its address and the next address in the table. + EXPECT_EQ(Gsym.getFunctionInfo(0x1050, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1050u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1070u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "symbol"); + EXPECT_EQ(Gsym.getFunctionInfo(0x1070 - 1, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1050u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1070u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "symbol"); + + // The last symbol will match any address that is >= to the last symbol if + // that symbol has a byte size of zero. + EXPECT_EQ(Gsym.getFunctionInfo(0x1070, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1070u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "nosize"); + EXPECT_EQ(Gsym.getFunctionInfo(UINT64_MAX, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1070u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "nosize"); +} + +TEST(GSYMTest, TestGsymCreatorDuplicateFunctionInfo) { + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, GC.insertString("main"))); + std::string S; + raw_string_ostream OS(S); + // Optimize the information in the GC + GC.optimize(OS); + OS.flush(); + EXPECT_NE( + std::string::npos, + S.find("warning: duplicate function info entries, removing duplicate:")); + EXPECT_NE(std::string::npos, + S.find("Pruned 1 functions, ended with 1 total")); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreatorOverlappingFunctions) { + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x1008, 0x10, GC.insertString("foo"))); + std::string S; + raw_string_ostream OS(S); + // Optimize the information in the GC + GC.optimize(OS); + OS.flush(); + EXPECT_NE(std::string::npos, S.find("warning: function ranges overlap:")); + EXPECT_NE(std::string::npos, + S.find("Pruned 0 functions, ended with 2 total")); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreatorSameRangeOneWithDebugInfo) { + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + FunctionInfo MainFI(0x1000, 0x10, GC.insertString("main")); + const uint32_t MainFileIndex = GC.insertFile("/tmp/main.c"); + MainFI.Lines.emplace_back(LineEntry(0x1000, MainFileIndex, 5)); + GC.addFunctionInfo(std::move(MainFI)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, GC.insertString("main"))); + std::string S; + raw_string_ostream OS(S); + // Optimize the information in the GC + GC.optimize(OS); + OS.flush(); + EXPECT_NE(std::string::npos, + S.find("Pruned 1 functions, ended with 1 total")); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestGsymCreatorSameRangeDifferentDebugInfo) { + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + FunctionInfo MainFI(0x1000, 0x10, GC.insertString("main")); + const uint32_t MainFileIndex = GC.insertFile("/tmp/main.c"); + MainFI.Lines.emplace_back(LineEntry(0x1000, MainFileIndex, 5)); + GC.addFunctionInfo(std::move(MainFI)); + FunctionInfo FooFI(0x1000, 0x10, GC.insertString("foo")); + const uint32_t FooFileIndex = GC.insertFile("/tmp/foo.c"); + FooFI.Lines.emplace_back(LineEntry(0x1000, FooFileIndex, 5)); + GC.addFunctionInfo(std::move(FooFI)); + std::string S; + raw_string_ostream OS(S); + // Optimize the information in the GC + GC.optimize(OS); + OS.flush(); + EXPECT_NE( + std::string::npos, + S.find("warning: same address range contains different debug info.")); + EXPECT_NE(std::string::npos, + S.find("Pruned 1 functions, ended with 1 total")); + GsymReader Gsym; + createAndVerifyGsym(GC, Gsym); +} + +TEST(GSYMTest, TestBreakpadToGSYM) { + // Test converting a breakpad file to GSYM + static StringRef BreakpadSource( + R"(MODULE mac x86_64 000102030405060708090A0B0C0D0E0F dump_syms +FILE 0 /tmp/foo.c +FILE 1 /tmp/foo.h +FILE 2 /tmp/bar.c +FUNC 100 50 0 foo(int, int) +100 10 5 0 +110 25 6 0 +120 30 23 1 +FUNC 200 50 0 a::bar(int) +200 20 5 2 +220 25 6 2 +PUBLIC 300 0 baz +PUBLIC 350 0 ding +)"); + GsymCreator GC; + std::error_code Error = convertBreakpadDataToGSYM(BreakpadSource, GC); + ASSERT_TRUE(!Error); + GsymReader Gsym; + Gsym.copyBuffer(createGSYM(GC)); + // Gsym.dump(outs(), true); + auto Hdr = Gsym.getHeader(); + ASSERT_TRUE(Hdr != nullptr); + verifyUUID(Hdr); + FunctionInfo FuncInfo; + EXPECT_EQ(Gsym.getFunctionInfo(0xff, FuncInfo), false); + // Verify we can find our "foo(int, int)" function + EXPECT_EQ(Gsym.getFunctionInfo(0x100, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x100u); + EXPECT_EQ(FuncInfo.endAddress(), 0x150u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "foo(int, int)"); + ASSERT_TRUE(FuncInfo.Lines.size() == 3u); + EXPECT_EQ(FuncInfo.Lines[0].Addr, 0x100u); + EXPECT_EQ(FuncInfo.Lines[0].File, 1u); + EXPECT_EQ(FuncInfo.Lines[0].Line, 5u); + + EXPECT_EQ(FuncInfo.Lines[1].Addr, 0x110u); + EXPECT_EQ(FuncInfo.Lines[1].File, 1u); + EXPECT_EQ(FuncInfo.Lines[1].Line, 6u); + + EXPECT_EQ(FuncInfo.Lines[2].Addr, 0x120u); + EXPECT_EQ(FuncInfo.Lines[2].File, 2u); + EXPECT_EQ(FuncInfo.Lines[2].Line, 23u); + + // Verify we can find our "baz" function + EXPECT_EQ(Gsym.getFunctionInfo(0x300, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x300u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "baz"); + // Verify we can find our "baz" function + EXPECT_EQ(Gsym.getFunctionInfo(0x350, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x350u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "ding"); +}