Index: include/llvm/DebugInfo/GSYM/Breakpad.h =================================================================== --- include/llvm/DebugInfo/GSYM/Breakpad.h +++ include/llvm/DebugInfo/GSYM/Breakpad.h @@ -0,0 +1,29 @@ +//===- Breakpad.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_BREAKPAD_H +#define LLVM_DEBUGINFO_GSYM_BREAKPAD_H + +#include +#include "llvm/ADT/StringRef.h" + +namespace llvm { +namespace gsym { +class GsymCreator; + +std::error_code +convertBreakpadFileToGSYM(StringRef BreakpadPath, StringRef GSYMPath); + +std::error_code +convertBreakpadDataToGSYM(StringRef Lines, GsymCreator &Gsym); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_BREAKPAD_H Index: include/llvm/DebugInfo/GSYM/DwarfTransformer.h =================================================================== --- include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -0,0 +1,75 @@ +//===- DwarfTransformer.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H +#define LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H + +#include +#include + +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" + +namespace llvm { +namespace gsym { + +struct CUInfo; + +class DwarfTransformer { +public: + DwarfTransformer(raw_ostream &OS, uint32_t N = 0) : Log(OS), NumThreads(N) { + if (NumThreads == 0) + NumThreads = std::max(std::thread::hardware_concurrency(), 1); + } + + std::error_code loadDwarf(const object::ObjectFile &Obj); + std::error_code loadDwarf(StringRef filename) { + if (auto binary = getObjectFile(filename)) { + return loadDwarf(*binary.getValue().getBinary()); + } + return std::make_error_code(std::errc::invalid_argument); +; + } + + std::error_code loadSymbolTable(const object::ObjectFile &Obj); + std::error_code loadSymbolTable(StringRef filename) { + if (auto binary = getObjectFile(filename)) { + return loadSymbolTable(*binary.getValue().getBinary()); + } + return std::make_error_code(std::errc::invalid_argument); + } + + GsymCreator &getData() { return Gsym; } + +private: + void handleDie(raw_ostream &Strm, CUInfo &CUI, DWARFDie Die); + + bool parseLineTable(raw_ostream &Strm, CUInfo &CUI, DWARFDie Die, + FunctionInfo &FI); + + Optional> + getObjectFile(StringRef filename) const; + + void initDataFromObj(const object::ObjectFile &Obj); + + bool Initialized = false; + GsymCreator Gsym; + raw_ostream &Log; + uint32_t NumThreads; + + friend class DwarfTransformerTest; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H Index: include/llvm/DebugInfo/GSYM/FileEntry.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileEntry.h +++ include/llvm/DebugInfo/GSYM/FileEntry.h @@ -0,0 +1,55 @@ +//===- FileEntry.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H +#define LLVM_DEBUGINFO_GSYM_FILEENTRY_H + +#include +#include +#include + +#include "llvm/ADT/DenseMapInfo.h" + +namespace llvm { +namespace gsym { + +struct FileEntry { + uint32_t Dir = 0; // String table offset in the string table + uint32_t Base = 0; // String table offset in the string table + + FileEntry() = default; + FileEntry(uint32_t D, uint32_t B) : Dir(D), Base(B) {} + + // implement operator== so that FileEntry can be used as key in + // unordered containers + bool operator==(const FileEntry &other) const { + return Dir == other.Dir && Base == other.Base; + }; +}; + + +} // namespace gsym + +template<> struct DenseMapInfo { + static inline gsym::FileEntry getEmptyKey() { + return gsym::FileEntry(UINT32_MAX, UINT32_MAX); + } + static inline gsym::FileEntry getTombstoneKey() { + return gsym::FileEntry(UINT32_MAX-1, UINT32_MAX-1); + } + static unsigned getHashValue(const gsym::FileEntry& Val) { + return Val.Dir * 37U + Val.Base * 37U; + } + static bool isEqual(const gsym::FileEntry &LHS, const gsym::FileEntry &RHS) { + return LHS == RHS; + } +}; + +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H Index: include/llvm/DebugInfo/GSYM/FileTableCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileTableCreator.h +++ include/llvm/DebugInfo/GSYM/FileTableCreator.h @@ -0,0 +1,50 @@ +//===- FileTableCreator.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILETABLECREATOR_H +#define LLVM_DEBUGINFO_GSYM_FILETABLECREATOR_H + +#include +#include +#include + +#include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" + +namespace llvm { +namespace gsym { +class FileWriter; + +class FileTableCreator { + //std::unordered_map EntryToIndex; + DenseMap EntryToIndex; + std::vector FileEntries; + llvm::gsym::StringTableCreator& StringTable; + +public: + explicit FileTableCreator(StringTableCreator &StrTab) + : StringTable(StrTab) { + insert(FileEntry(0, 0)); + } + + const gsym::FileEntry &operator[](size_t idx) const { + return FileEntries[idx]; + } + + const StringTableCreator &getStringTable() const { return StringTable; } + + uint32_t insert(StringRef S); + uint32_t insert(FileEntry S); + void write(FileWriter &Out) const; +}; +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILETABLECREATOR_H Index: include/llvm/DebugInfo/GSYM/FileWriter.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileWriter.h +++ include/llvm/DebugInfo/GSYM/FileWriter.h @@ -0,0 +1,46 @@ +//===- FileWriter.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H +#define LLVM_DEBUGINFO_GSYM_FILEWRITER_H + +#include +#include +#include +#include + +namespace llvm { +namespace gsym { + +class FileWriter { + std::ostream &OS; + +public: + FileWriter(std::ostream &stream) : OS(stream) {} + ~FileWriter(); + bool WriteSLEB(int64_t value); + bool WriteULEB(uint64_t value); + bool WriteU8(uint8_t value); + bool WriteU32(uint32_t value); + bool Fixup32(uint32_t value, off_t offset); + bool WriteUnsigned(uint64_t value, size_t n); + bool Write(const void *src, size_t src_len); + bool AlignTo(size_t align); + off_t Seek(off_t pos); + off_t Tell(); + +private: + FileWriter(const FileWriter &rhs) = delete; + void operator=(const FileWriter &rhs) = delete; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H Index: include/llvm/DebugInfo/GSYM/FunctionInfo.h =================================================================== --- include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -0,0 +1,99 @@ +//===- FunctionInfo.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H +#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H + +#include +#include + +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" + +namespace llvm { +class raw_ostream; +namespace gsym { +class GsymReader; + +struct FunctionInfo { + AddressRange Range; + uint32_t Name; + std::vector Lines; + InlineInfo InlineInfo; + + FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0) + : Range(Addr, Addr + Size), Name(N) {} + + bool hasRichInfo() const { + // Returns whether we have something else than range and name + return !Lines.empty() || InlineInfo.isValid(); + } + + bool isValid() const { + // Address and size can be zero and there can be no line entries for a + // symbol so the only indication this entry is valid is if the name is + // not zero. + return Name != 0; + } + + uint64_t startAddress() const { + return Range.startAddress(); + } + uint64_t endAddress() const { + return Range.endAddress(); + } + uint64_t size() const { + return Range.size(); + } + void setStartAddress(uint64_t Addr) { + Range.setStartAddress(Addr); + } + void setEndAddress(uint64_t Addr) { + Range.setEndAddress(Addr); + } + void setSize(uint64_t Size) { + Range.setSize(Size); + } + + void clear() { + Range.clear(); + Name = 0; + Lines.clear(); + InlineInfo.clear(); + } + + void dump(llvm::raw_ostream &OS, GsymReader &GSYM) const; +}; + +inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) { + return LHS.Range == RHS.Range && LHS.Name == RHS.Name && + LHS.Lines == RHS.Lines && LHS.InlineInfo == RHS.InlineInfo; +} +inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) { + return !(LHS == RHS); +} +inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) { + // First sort by address + if (LHS.Range.Start != RHS.Range.Start) + return LHS.Range.Start < RHS.Range.Start; + + // If address is the same, functions with more info go first + if (LHS.hasRichInfo() ^ RHS.hasRichInfo()) + return LHS.hasRichInfo(); + + // Lastly, longer functions go first + return LHS.Range.size() > RHS.Range.size(); +} + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H Index: include/llvm/DebugInfo/GSYM/GsymCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymCreator.h +++ include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -0,0 +1,80 @@ +//===- GsymCreator.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H +#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H + +#include +#include +#include +#include +#include +#include + +#include "FileTableCreator.h" +#include "FunctionInfo.h" +#include "InlineInfo.h" +#include "Range.h" +#include "StringTableCreator.h" + +namespace llvm { +class AsmPrinter; + +namespace gsym { + +class GsymCreator { + // Private member variables require Mutex protections + mutable std::mutex Mutex; + std::vector Funcs; + StringTableCreator StrTab; + FileTableCreator FileTab; +public: + // Public member variables don't require Mutex protections + AddressRanges TextRanges; + std::vector UUID; + + GsymCreator() : FileTab(StrTab) {} + + std::error_code save(std::ostream &stream) const; + std::error_code save(StringRef path) const; + uint32_t insertString(StringRef S) { + std::lock_guard guard(Mutex); + return StrTab.insert(S.str()); + } + uint32_t insertFile(StringRef Path) { + std::lock_guard guard(Mutex); + return FileTab.insert(Path); + } + size_t getFunctionInfoSize() const { + std::lock_guard guard(Mutex); + return Funcs.size(); + } + void addFunctionInfo(FunctionInfo &&FI) { + std::lock_guard guard(Mutex); + Funcs.emplace_back(FI); + } + void forEachFunctionInfo( + std::function const &Callback) const { + std::lock_guard guard(Mutex); + for (const auto &FuncInfo: Funcs) { + if (!Callback(FuncInfo)) + break; + } + } + void optimize(llvm::raw_ostream &OS); + void sortFunctionInfos() { + std::lock_guard guard(Mutex); + std::sort(Funcs.begin(), Funcs.end()); + } +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H Index: include/llvm/DebugInfo/GSYM/GsymReader.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymReader.h +++ include/llvm/DebugInfo/GSYM/GsymReader.h @@ -0,0 +1,131 @@ +//===- GsymReader.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H +#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H + +#include +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" + + +namespace llvm { +class MemoryBuffer; +class raw_ostream; + +namespace gsym { + +constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_VERSION = 1; +constexpr size_t GSYM_MAX_UUID_SIZE = 20; +class FileTableCreator; +struct LookupResult; +class StringTableCreator; +class GSYMInfo; + +enum class InfoType { EndOfList = 0u, LineTableInfo = 1u, InlineInfo = 2u }; + +struct Header { + uint32_t Magic; + uint16_t Version; + uint8_t AddrOffSize; // Size of addr_off_t + uint8_t UUIDSize; + uint64_t BaseAddress; + uint32_t NumAddresses; + uint32_t StrtabOffset; + uint32_t StrtabSize; + uint8_t UUID[GSYM_MAX_UUID_SIZE]; + static size_t getByteSize() { return sizeof(Header); } + std::string getError() const; + void dump(llvm::raw_ostream &OS) const; +}; + +class GsymReader { +public: + GsymReader(); + ~GsymReader(); + + std::error_code openFile(StringRef Filename); + void init(StringRef Data); + + const Header *getHeader() const { return GSYMHeader; } + void dump(llvm::raw_ostream &OS, bool Verbose) const; + // Dump any address info with matching name + bool dumpAddressInfos(llvm::raw_ostream &OS, StringRef Name) const; + bool lookup(uint64_t addr, LookupResult &LR) const; + bool getFunctionInfo(uint64_t addr, FunctionInfo &FuncInfo) const; + // Accessor functions that allow iteration across all addresses in the GSYM + // file. + size_t getNumAddresses() const; + uint64_t getAddress(size_t Index) const; + FileEntry getFile(uint32_t Index) const { + if (Index < Files.size()) + return Files[Index]; + return FileEntry(); + } + StringRef getString(uint32_t Offset) const { + return StrTab[Offset]; + } + +protected: + void unmap(); + struct AddressInfo { + uint32_t Size; + uint32_t Name; + }; + struct LookupInfo { + uint64_t MatchAddrOffset; + size_t AddrInfoIndex; + const AddressInfo *AddrInfo; + LookupInfo() + : MatchAddrOffset(0), AddrInfoIndex(SIZE_MAX), AddrInfo(nullptr) {} + void clear() { + MatchAddrOffset = 0; + AddrInfoIndex = SIZE_MAX; + AddrInfo = nullptr; + } + }; + + bool findAddressInfo(uint64_t addr, LookupInfo &lookup_info) const; + void dumpAddressInfo(llvm::raw_ostream &OS, size_t AddrInfoIndex) const; + void dumpFileTable(llvm::raw_ostream &OS) const; + uint64_t getAddressOffset(size_t idx) const; + uint64_t getAddressInfoOffset(size_t idx) const; + DataExtractor getAddressInfoPayload(size_t idx) const; + void dumpLineTable(llvm::raw_ostream &OS, uint64_t BaseAddr, + DataExtractor &line_data, uint32_t Depth) const; + + static const char *getInfoTypeAsString(InfoType info_type); + + + std::unique_ptr MemBuffer; + StringRef GsymBytes; + const Header *GSYMHeader = nullptr; + ArrayRef AddrOffsets; + ArrayRef AddrInfoOffsets; + ArrayRef Files; + StringTable StrTab; + std::string ErrorStr; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H Index: include/llvm/DebugInfo/GSYM/GsymStreamer.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymStreamer.h +++ include/llvm/DebugInfo/GSYM/GsymStreamer.h @@ -0,0 +1,70 @@ +//===- GSYMStreamer.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H +#define LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +namespace llvm { +namespace gsym { + +/// The Dwarf streaming logic. +/// +/// All interactions with the MC layer that is used to build the debug +/// information binary representation are handled in this class. +class GSYMStreamer { +public: + GSYMStreamer(raw_fd_ostream &OutFile) + : OutFile(OutFile) {} + + bool init(Triple TheTriple); + + AsmPrinter &getAsmPrinter() const { return *Asm; } + +private: + /// \defgroup MCObjects MC layer objects constructed by the streamer + /// @{ + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr MOFI; + std::unique_ptr MC; + MCAsmBackend *MAB; // Owned by MCStreamer + std::unique_ptr MII; + std::unique_ptr MSTI; + MCCodeEmitter *MCE; // Owned by MCStreamer + MCStreamer *MS; // Owned by AsmPrinter + std::unique_ptr TM; + std::unique_ptr Asm; + /// @} + + /// The file we stream the linked Dwarf to. + raw_fd_ostream &OutFile; +}; + +} // end namespace gsym +} // end namespace llvm + +#endif // LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H Index: include/llvm/DebugInfo/GSYM/InlineInfo.h =================================================================== --- include/llvm/DebugInfo/GSYM/InlineInfo.h +++ include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -0,0 +1,69 @@ +//===- InlineInfo.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H +#define LLVM_DEBUGINFO_GSYM_INLINEINFO_H + +#include +#include + +#include "Range.h" + +namespace llvm { +class DataExtractor; +class raw_ostream; + +namespace gsym { +class GsymReader; +struct FileEntry; +class FileWriter; +struct StringTable; + +struct InlineInfo { + uint32_t Name; + uint32_t CallFile; + uint32_t CallLine; + AddressRanges Ranges; + std::vector Children; + InlineInfo() : Name(0), CallFile(0), CallLine(0) {} + void write(FileWriter &out, uint64_t BaseAddr) const; + + // Decode InlineInfo from data file. In the second variant, only ranges and + // children containing LookupAddr will be stored. For LookupAddr < 0 we will + // just skip the data in the data file. Returns true if successful, false if + // InlineInfo is empty (meaning end of list). + bool decode(DataExtractor &Data, uint32_t &Offset, uint64_t BaseAddr); + bool decode(DataExtractor &Data, uint32_t &Offset, uint64_t BaseAddr, + uint64_t LookupAddr); + + void clear() { + Name = 0; + CallFile = 0; + CallLine = 0; + Ranges.clear(); + Children.clear(); + } + bool isValid() const { return !Ranges.empty(); } + void dump(llvm::raw_ostream &OS, const GsymReader &GSYM, + unsigned depth) const; + bool getInlineStack(uint64_t Addr, + std::vector &inline_stack) const; + +}; + +inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) { + return LHS.Name == RHS.Name && LHS.CallFile == RHS.CallFile && + LHS.CallLine == RHS.CallLine && LHS.Ranges == RHS.Ranges && + LHS.Children == RHS.Children; +} + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H Index: include/llvm/DebugInfo/GSYM/LineEntry.h =================================================================== --- include/llvm/DebugInfo/GSYM/LineEntry.h +++ include/llvm/DebugInfo/GSYM/LineEntry.h @@ -0,0 +1,41 @@ +//===- LineEntry.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H +#define LLVM_DEBUGINFO_GSYM_LINEENTRY_H + +#include +#include +#include +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace gsym { +struct LineEntry { + uint64_t Addr; // Start address of this line entry + uint32_t File; // 1 based index of file in FileTable + uint32_t Line; // Source line number + LineEntry(uint64_t A = 0, uint32_t F = 0, uint32_t L = 0) + : Addr(A), File(F), Line(L) {} + bool isValid() { return File != 0; } + void dump(llvm::raw_ostream &OS) const { + OS << "addr=" << format("0x%08" PRIx64, Addr) << ", file=" << + format("%3u", File) << ", line=" << format("%3u", Line) << '\n'; + } +}; +inline bool operator==(const LineEntry &lhs, const LineEntry &rhs) { + return lhs.Addr == rhs.Addr && lhs.File == rhs.File && lhs.Line == rhs.Line; +} +inline bool operator!=(const LineEntry &lhs, const LineEntry &rhs) { + return !(lhs == rhs); +} +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H Index: include/llvm/DebugInfo/GSYM/LineTable.h =================================================================== --- include/llvm/DebugInfo/GSYM/LineTable.h +++ include/llvm/DebugInfo/GSYM/LineTable.h @@ -0,0 +1,47 @@ +//===- LineTable.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H +#define LLVM_DEBUGINFO_GSYM_LINETABLE_H + +#include +#include +#include + +#include "llvm/Support/DataExtractor.h" + +namespace llvm { +namespace gsym { + +struct LineEntry; +struct FunctionInfo; +class FileWriter; + +enum LineTableOpCode { + LTOC_EndSequence = 0x00, // End of the line table + LTOC_SetFile = 0x01, // Set LineTableRow.file_idx, don't push a row + LTOC_AdvancePC = 0x02, // Increment LineTableRow.address, and push a row + LTOC_AdvanceLine = 0x03, // Set LineTableRow.file_line, don't push a row + LTOC_FirstSpecial = 0x04, // All special opcodes push a row +}; + +class LineTable { + DataExtractor Data; + +public: + LineTable(DataExtractor D) : Data(D) {} + static bool write(FileWriter &out, const FunctionInfo &func_info); + std::vector parseAllEntries(uint64_t base_addr); + LineEntry lookup(uint64_t base_addr, uint64_t addr); +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H Index: include/llvm/DebugInfo/GSYM/LookupResult.h =================================================================== --- include/llvm/DebugInfo/GSYM/LookupResult.h +++ include/llvm/DebugInfo/GSYM/LookupResult.h @@ -0,0 +1,48 @@ +//===- LookupResult.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H +#define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H + +#include +#include +#include +#include +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class raw_ostream; +namespace gsym { +struct FileEntry; + +struct SourceLocation { + StringRef Name; + StringRef Dir; + StringRef Base; + uint32_t Line = 0; +}; + +struct LookupResult { + uint64_t Addr; + uint64_t EndAddr; + StringRef Name; + std::vector Locations; + LookupResult() : Addr(0), EndAddr(0) {} + void clear() { + Addr = 0; + EndAddr = 0; + Locations.clear(); + } + void dump(llvm::raw_ostream &OS) const; + std::string getSourceFile(uint32_t idx) const; +}; +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H Index: include/llvm/DebugInfo/GSYM/Range.h =================================================================== --- include/llvm/DebugInfo/GSYM/Range.h +++ include/llvm/DebugInfo/GSYM/Range.h @@ -0,0 +1,63 @@ +//===- AddressRange.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_RANGE_H +#define LLVM_DEBUGINFO_GSYM_RANGE_H + +#include +#include + +namespace llvm { +class raw_ostream; + +namespace gsym { +struct AddressRange { + uint64_t Start; + uint64_t End; + AddressRange(uint64_t S = 0, uint64_t E = 0) : Start(S), End(E) {} + uint64_t size() const { + return Start < End ? End - Start : 0; + } + void setStartAddress(uint64_t Addr) { Start = Addr; } + void setEndAddress(uint64_t Addr) { End = Addr; } + void setSize(uint64_t Size) { End = Start + Size; } + uint64_t startAddress() const { return Start; } + uint64_t endAddress() const { return End; } + void clear() { Start = 0; End = 0; } + void dump(llvm::raw_ostream &OS) const; + bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; } + bool doesAdjoinOrIntersect(const AddressRange &RHS) const { + return (Start <= RHS.End) && (End >= RHS.Start); + } + bool doesIntersect(const AddressRange &RHS) const { + return (Start < RHS.End) && (End > RHS.Start); + } +}; + +inline bool operator==(const AddressRange &LHS, const AddressRange &RHS) { + return LHS.Start == RHS.Start && LHS.End == RHS.End; +} +inline bool operator<(const AddressRange &LHS, const AddressRange &RHS) { + return LHS.Start < RHS.Start; +} +inline bool operator<(const AddressRange &LHS, uint64_t Addr) { + return LHS.Start < Addr; +} +inline bool operator<(uint64_t Addr, const AddressRange &RHS) { + return Addr < RHS.Start; +} + +typedef std::vector AddressRanges; +bool contains(const AddressRanges &Ranges, uint64_t Addr); +void insert(AddressRanges &Ranges, const AddressRange &R); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_RANGE_H Index: include/llvm/DebugInfo/GSYM/StringTable.h =================================================================== --- include/llvm/DebugInfo/GSYM/StringTable.h +++ include/llvm/DebugInfo/GSYM/StringTable.h @@ -0,0 +1,65 @@ +//===- StringTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H +#define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H + +#include +#include + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace gsym { +struct StringTable { + StringRef Data; + StringTable() : Data() {} + StringTable(StringRef D) : Data(D) {} + StringRef operator[](size_t Offset) const { + return getString(Offset); + } + StringRef getString(uint32_t Offset) const { + if (Offset < Data.size()) { + auto End = Data.find('\0', Offset); + return Data.substr(Offset, End - Offset); + } + return StringRef(); + } + void clear() { Data = StringRef(); } + void dump(raw_ostream &OS) const { + OS << "String table:\n"; + uint32_t Offset = 0; + const size_t Size = Data.size(); + while (Offset < Size) { + StringRef Str = getString(Offset); + OS << format_hex(Offset, 10) << ": \"" << Str << "\"\n"; + Offset += Str.size() + 1; + } + } + uint32_t find(StringRef Str) const { + if (Str.empty()) + return 0; // Return th + size_t Offset = 0; + size_t Pos; + while ((Pos = Data.find(Str, Offset)) != StringRef::npos) { + auto NullTerminator = Data.substr(Pos + Str.size(), 1); + if (NullTerminator.empty()) + break; + if (NullTerminator[0] == '\0') + return Pos; + Offset += Str.size() + 1; + } + return UINT32_MAX; // Return an invalid index when the string is not found + } +}; +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H Index: include/llvm/DebugInfo/GSYM/StringTableCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/StringTableCreator.h +++ include/llvm/DebugInfo/GSYM/StringTableCreator.h @@ -0,0 +1,59 @@ +//===- StringTableCreator.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLECREATOR_H +#define LLVM_DEBUGINFO_GSYM_STRINGTABLECREATOR_H + +#include + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/DenseMap.h" +#include "FileWriter.h" +namespace llvm { +namespace gsym { +class StringTableCreator { + StringMap Strings; + // Strings contains the backing string + DenseMap OffsetToString; + std::vector OrderedStrings; + uint32_t NextOffset; + +public: + StringTableCreator() : NextOffset(0) { insert(""); } + + uint32_t insert(StringRef Str) { + + auto Pair = Strings.try_emplace(Str, NextOffset); + // Pair.second will be true if the string was added to the string table. + if (Pair.second) { + // String was added, do some book keeping + OffsetToString[Pair.first->getValue()] = Pair.first->getKey(); + OrderedStrings.push_back(Pair.first->getKey()); + NextOffset += Str.size() + 1; + } + return Pair.first->getValue(); + } + + uint32_t getTotalSize() const { return NextOffset; } + + StringRef operator[](size_t offset) const { + auto it = OffsetToString.find(offset); + return it == OffsetToString.end() ? StringRef() : it->second; + } + + void write(FileWriter &out) const { + for (auto Str : OrderedStrings) { + out.Write(Str.data(), Str.size()); + out.Write("\0", 1); + } + } +}; +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLECREATOR_H Index: include/llvm/Support/DataExtractor.h =================================================================== --- include/llvm/Support/DataExtractor.h +++ include/llvm/Support/DataExtractor.h @@ -60,6 +60,37 @@ /// Set the address size for this extractor. void setAddressSize(uint8_t Size) { AddressSize = Size; } + /// Get a slice of data from this data extractor with the same endian + /// and address size. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// + /// @param[in] length + /// The size in byte of the data to extract. If the value is set + /// to StringRef::npos, then all remaining bytes. + /// + /// @return + /// A data extractor with the same endian and address byte size + /// settings as this object with a slice of data whose offset + /// will start at zero. + DataExtractor getSlice(uint32_t *offset_ptr, + size_t length = StringRef::npos) const { + auto Slice = Data.substr(*offset_ptr, length); + *offset_ptr += Slice.size(); + return DataExtractor(Slice, IsLittleEndian, AddressSize); + } + + const uint8_t *getDataPtr(uint32_t *offset_ptr, size_t length) const { + auto Slice = Data.substr(*offset_ptr, length); + *offset_ptr += Slice.size(); + return reinterpret_cast(Slice.data()); + } /// Extract a C string from \a *offset_ptr. /// /// Returns a pointer to a C String from the data at the offset Index: lib/DebugInfo/CMakeLists.txt =================================================================== --- lib/DebugInfo/CMakeLists.txt +++ lib/DebugInfo/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(DWARF) +add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(CodeView) add_subdirectory(PDB) Index: lib/DebugInfo/GSYM/Breakpad.cpp =================================================================== --- lib/DebugInfo/GSYM/Breakpad.cpp +++ lib/DebugInfo/GSYM/Breakpad.cpp @@ -0,0 +1,232 @@ +//===- Breakpad.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/MemoryBuffer.h" + +#include "llvm/DebugInfo/GSYM/Breakpad.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +using namespace llvm; +using namespace gsym; + +enum class BreakpadLineType { + Invalid, + Module, + File, + Function, + SourceLine, + Public, + Stack +}; + +class Line { + StringRef Str; + +public: + Line(StringRef S) : Str(S) {} + + BreakpadLineType GetLineType() { + static StringRef BPAD_MODULE("MODULE "); + static StringRef BPAD_FILE("FILE "); + static StringRef BPAD_FUNC("FUNC "); + static StringRef BPAD_PUBLIC("PUBLIC "); + static StringRef BPAD_STACK("STACK "); + + if (Str.empty()) + return BreakpadLineType::Invalid; + + switch (Str[0]) { + case 'F': + if (Str.consume_front(BPAD_FUNC)) + return BreakpadLineType::Function; + if (Str.consume_front(BPAD_FILE)) + return BreakpadLineType::File; + break; + case 'M': + if (Str.consume_front(BPAD_MODULE)) + return BreakpadLineType::Module; + break; + case 'P': + if (Str.consume_front(BPAD_PUBLIC)) + return BreakpadLineType::Public; + break; + case 'S': + if (Str.consume_front(BPAD_STACK)) + return BreakpadLineType::Stack; + break; + default: + if (isHexDigit(Str[0])) + return BreakpadLineType::SourceLine; + break; + } + return BreakpadLineType::Invalid; + } + void TrimLeadingSpaces() { + Str = Str.ltrim(); + } + StringRef GetWord() { + // Get the next word from the line. Any leading spaces + // will be stripped. A word is considered any character + // that is not a space. + TrimLeadingSpaces(); + if (Str.empty()) + return StringRef(); + const size_t EndWord = std::min(Str.size(), + Str.find_first_of(" \t\n\v\f\r")); + StringRef Word(Str.substr(0, EndWord)); + Str = Str.drop_front(EndWord); + return Word; + } + StringRef GetRestOfLineAsString() const { + return Str.ltrim(); + } + uint64_t GetHex() { + TrimLeadingSpaces(); + uint64_t Value = 0; + if (Str.consumeInteger(16, Value)) + return UINT64_MAX; + return Value; + } + uint32_t GetHex32() { + TrimLeadingSpaces(); + uint32_t Value = 0; + if (Str.consumeInteger(16, Value)) + return UINT32_MAX; + return Value; + } + uint64_t GetDecimal() { + TrimLeadingSpaces(); + uint64_t Value = 0; + if (Str.consumeInteger(10, Value)) + return UINT64_MAX; + return Value; + } + uint32_t GetDecimal32() { + TrimLeadingSpaces(); + uint32_t Value = 0; + if (Str.consumeInteger(10, Value)) + return UINT32_MAX; + return Value; + } +}; + +std::error_code +llvm::gsym::convertBreakpadFileToGSYM(StringRef BreakpadPath, + StringRef GSYMPath) { + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(BreakpadPath); + auto EC = BuffOrErr.getError(); + if (EC) + return EC; + + std::unique_ptr Buffer = std::move(BuffOrErr.get()); + + GsymCreator Gsym; + EC = convertBreakpadDataToGSYM(StringRef(Buffer->getBuffer()), Gsym); + if (EC) + return EC; + + return Gsym.save(GSYMPath); +} + +std::error_code +llvm::gsym::convertBreakpadDataToGSYM(StringRef Lines, GsymCreator &Gsym) { + + FunctionInfo FuncInfo; + bool GotPublic = false; + while (!Lines.empty()) { + // Remove any leading spaces or newlines + Lines = Lines.ltrim(); + const size_t EndLine = std::min(Lines.find_first_of("\r\n"), Lines.size()); + Line L(Lines.substr(0, EndLine)); + Lines = Lines.drop_front(EndLine).ltrim(); + + switch (L.GetLineType()) { + case BreakpadLineType::Invalid: + break; + case BreakpadLineType::Module: { + StringRef OpSys = L.GetWord(); // Skip OS string + StringRef Arch = L.GetWord(); // Skip arch string + StringRef UUID = L.GetWord(); + auto UUIDLength = UUID.size(); + uint8_t HighNibble = 0; + for (size_t i = 0; i < UUIDLength; ++i) { + const uint8_t Nibble = hexDigitValue(UUID[i]); + if (Nibble <= 0xf) { + if (i % 2) + Gsym.UUID.push_back(HighNibble + Nibble); + else + HighNibble = Nibble << 4; + } else { + Gsym.UUID.clear(); + break; + } + } + } break; + case BreakpadLineType::File: { + const auto BreakpadFileIndex = L.GetDecimal(); + const auto GsymFileIndex = Gsym.insertFile(L.GetRestOfLineAsString()); + assert(BreakpadFileIndex + 1 == GsymFileIndex); + break; + } + case BreakpadLineType::Function: + if (FuncInfo.isValid()) + Gsym.addFunctionInfo(std::move(FuncInfo)); + FuncInfo.Range.setStartAddress(L.GetHex()); + FuncInfo.Range.setSize(L.GetHex32()); + L.GetHex(); // Skip parameter_size + FuncInfo.Name = Gsym.insertString(L.GetRestOfLineAsString()); + FuncInfo.Lines.clear(); + break; + case BreakpadLineType::SourceLine: { + uint64_t addr = L.GetHex(); + L.GetHex32(); // Skip 32 bit size + uint32_t line_num = L.GetDecimal32(); + uint32_t file_idx = L.GetDecimal32() + 1; + if (!FuncInfo.Lines.empty()) { + auto &last = FuncInfo.Lines.back(); + // Skip multiple line entries in a row that have the same file and line + if (last.File == file_idx && last.Line == line_num) + break; + } + FuncInfo.Lines.push_back(LineEntry(addr, file_idx, line_num)); + } break; + case BreakpadLineType::Public: { + GotPublic = true; + uint64_t addr = L.GetHex(); + L.GetHex(); // Skip parameter_size + uint32_t name = Gsym.insertString(L.GetRestOfLineAsString()); + Gsym.addFunctionInfo(FunctionInfo(addr, 0, name)); + } break; + case BreakpadLineType::Stack: + break; + } + } + if (GotPublic) { + // We appended public symbol FunctionInfo objects onto the end of our + // sorted func_infos array and we need to sort it now. + Gsym.sortFunctionInfos(); + } + return std::error_code(); +} Index: lib/DebugInfo/GSYM/CMakeLists.txt =================================================================== --- lib/DebugInfo/GSYM/CMakeLists.txt +++ lib/DebugInfo/GSYM/CMakeLists.txt @@ -0,0 +1,18 @@ +add_llvm_library(LLVMDebugInfoGSYM + Breakpad.cpp + DwarfTransformer.cpp + FileTableCreator.cpp + FileWriter.cpp + FunctionInfo.cpp + GsymCreator.cpp + GsymReader.cpp + GsymStreamer.cpp + InlineInfo.cpp + LineTable.cpp + LookupResult.cpp + Range.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/GSYM + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo + ) Index: lib/DebugInfo/GSYM/DwarfTransformer.cpp =================================================================== --- lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -0,0 +1,556 @@ +//===- DwarfTransformer.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" + +using namespace llvm; +using namespace gsym; + +#define HEX32(v) format("0x%8.8" PRIx32, (uint32_t)v) +#define HEX64(v) format("0x%8.8" PRIx64, (uint64_t)v) + +constexpr uint32_t NT_GNU_BUILD_ID_TAG = 0x03; + +struct llvm::gsym::CUInfo { + const DWARFDebugLine::LineTable *LineTable; + const char *CompDir; + std::vector FileCache; + uint64_t language = 0; + + CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { + LineTable = DICtx.getLineTableForUnit(CU); + CompDir = CU->getCompilationDir(); + FileCache.clear(); + if (LineTable) { + FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); + } + DWARFDie Die = CU->getUnitDIE(); + language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); + } + + uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, + uint32_t DwarfFileIdx) { + if (!LineTable) + return 0; + assert(DwarfFileIdx < FileCache.size()); + uint32_t FileIdx = FileCache[DwarfFileIdx]; + if (FileIdx != UINT32_MAX) + return FileIdx; + std::string File; + if (LineTable->getFileNameByIndex( + DwarfFileIdx, CompDir, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) + FileIdx = Gsym.insertFile(File); + else + FileIdx = 0; + FileCache[DwarfFileIdx] = FileIdx; + return FileIdx; + } +}; + +static std::vector getUUID(const object::ObjectFile &Obj) { + // Extract the UUID from the object file + std::vector UUID; + if (auto *MachO = dyn_cast(&Obj)) { + const ArrayRef mach_uuid = MachO->getUuid(); + if (!mach_uuid.empty()) { + UUID.assign(mach_uuid.data(), mach_uuid.data() + mach_uuid.size()); + } + } else if (auto *Elf = dyn_cast(&Obj)) { + const StringRef gnu_build_id(".note.gnu.build-id"); + for (const object::SectionRef §Ref : Obj.sections()) { + StringRef sectName; + sectRef.getName(sectName); + if (sectName == gnu_build_id) { + StringRef build_id_data; + if (!sectRef.getContents(build_id_data)) { + DataExtractor decoder(build_id_data, true, 8); // TODO: set endian and addr byte size + uint32_t Offset = 0; + const uint32_t name_size = decoder.getU32(&Offset); + const uint32_t payload_size = decoder.getU32(&Offset); + const uint32_t payload_type = decoder.getU32(&Offset); + const char *name = (const char *)decoder.getDataPtr(&Offset, + name_size); + if ((strncmp(name, "GNU", name_size) == 0) && + payload_type == NT_GNU_BUILD_ID_TAG) { + Offset = alignTo(Offset, 4); + const uint8_t *uuid_data = + (const uint8_t *)decoder.getDataPtr(&Offset, payload_size); + if (uuid_data) + UUID.assign(uuid_data, uuid_data + payload_size); + } + } + } + } + } + return UUID; +} + +DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { + if (DWARFDie spec = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { + if (DWARFDie spec_parent = GetParentDeclContextDIE(spec)) { + return spec_parent; + } + } + if (DWARFDie abst = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { + if (DWARFDie abst_parent = GetParentDeclContextDIE(abst)) { + return abst_parent; + } + } + + if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) { + // We never want to follow parent for inlined subroutine - that would + // give us information about where the function is inlined, not what + // function is inlined + return DWARFDie(); + } + + DWARFDie parent = Die.getParent(); + if (!parent) { + return DWARFDie(); + } + switch (parent.getTag()) { + case dwarf::DW_TAG_namespace: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_subprogram: + return parent; // Found parent decl context DIE + case dwarf::DW_TAG_lexical_block: + return GetParentDeclContextDIE(parent); + default: + break; + } + + return DWARFDie(); +} + +std::string getQualifiedName(DWARFDie &Die, uint64_t Language) { + // If the dwarf has mangled name, use mangled name + if (Optional LinkageName = + dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_AT_linkage_name}), + nullptr)) { + return LinkageName.getValue(); + } + + StringRef ShortName(Die.getName(DINameKind::ShortName)); + if (ShortName.empty()) { + return ""; + } + + // For C++ and ObjC, prepend names of all parent declaration contexts + if (!(Language == dwarf::DW_LANG_C_plus_plus || + Language == dwarf::DW_LANG_C_plus_plus_03 || + Language == dwarf::DW_LANG_C_plus_plus_11 || + Language == dwarf::DW_LANG_C_plus_plus_14 || + Language == dwarf::DW_LANG_ObjC_plus_plus || + // This should not be needed for C, but we see C++ code marked as C + // in some binaries. This should hurt, so let's do it for C as well + Language == dwarf::DW_LANG_C)) { + return ShortName.str(); + } + // Some GCC optimizations create functions with names ending with .isra. + // or .part. and those names are just DW_AT_name, not DW_AT_linkage_name + // If it looks like it could be the case, don't add any prefix + if (ShortName.startswith("_Z") && + (ShortName.contains(".isra.") || ShortName.contains(".part."))) { + return ShortName.str(); + } + + std::string name = ShortName.str(); + DWARFDie parent_die = GetParentDeclContextDIE(Die); + while (parent_die) { + StringRef ParentName(parent_die.getName(DINameKind::ShortName)); + if (!ParentName.empty()) { + // "lambda" names are wrapped in < >. Replace with { } + // to be consistent with demangled names and not to confuse with + // templates + if (ParentName.front() == '<' && ParentName.back() == '>') + name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + + "::" + name; + else + name = ParentName.str() + "::" + name; + } + parent_die = GetParentDeclContextDIE(parent_die); + } + + return name; +} + +static bool hasInlineInfo(DWARFDie Die, uint32_t depth) { + bool checkChildren = true; + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: + // Don't look into functions within functions. + checkChildren = depth == 0; + break; + case dwarf::DW_TAG_inlined_subroutine: + return true; + default: + break; + } + if (checkChildren) { + for (DWARFDie child : Die.children()) { + if (hasInlineInfo(child, depth + 1)) + return true; + } + } + return false; +} + +static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die, + uint32_t depth, FunctionInfo &FuncInfo, + InlineInfo &parent) { + if (!hasInlineInfo(Die, depth)) + return; + + dwarf::Tag tag = Die.getTag(); + if (tag == dwarf::DW_TAG_inlined_subroutine) { + // create new InlineInfo and append to parent.children + InlineInfo ii; + DWARFAddressRange funcRange = DWARFAddressRange(FuncInfo.startAddress(), + FuncInfo.endAddress()); + Expected RangesOrError = Die.getAddressRanges(); + if (RangesOrError) { + for (const DWARFAddressRange &Range : RangesOrError.get()) { + // Check that the inlined function is within the range of the function + // info, it might not be in case of split functions + if (funcRange.contains(Range)) + ii.Ranges.emplace_back(AddressRange(Range.LowPC, Range.HighPC)); + } + } + if (ii.Ranges.empty()) { + return; + } + + std::string name = getQualifiedName(Die, CUI.language); + if (!name.empty()) { + ii.Name = Gsym.insertString(std::move(name)); + } + ii.CallFile = CUI.DWARFToGSYMFileIndex( + Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0)); + ii.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); + // parse all children and append to parent + for (DWARFDie child : Die.children()) { + parseInlineInfo(Gsym, CUI, child, depth + 1, FuncInfo, ii); + } + parent.Children.emplace_back(std::move(ii)); + } else if (tag == dwarf::DW_TAG_subprogram || + tag == dwarf::DW_TAG_lexical_block) { + // skip this Die and just recurse down + for (DWARFDie child : Die.children()) { + parseInlineInfo(Gsym, CUI, child, depth + 1, FuncInfo, parent); + } + } +} + +bool DwarfTransformer::parseLineTable(raw_ostream &Strm, CUInfo &CUI, + DWARFDie Die, FunctionInfo &FuncInfo) { + std::vector RowVector; + const uint64_t StartAddress = FuncInfo.startAddress(); + const uint64_t EndAddress = FuncInfo.endAddress(); + const uint64_t RangeSize = EndAddress - StartAddress; + if (!CUI.LineTable->lookupAddressRange(StartAddress, RangeSize, RowVector)) { + return false; + } + DWARFDebugLine::Row PrevRow; + for (uint32_t RowIndex : RowVector) { + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; + uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File); + + if (RowIndex != RowVector[0] && PrevRow.Address > Row.Address) { + // We have seen full duplicate line tables for functions in some + // DWARF files. Watch for those here by checking the the last + // row was the function's end address (HighPC) and that the + // current line table entry's address is the same as the first + // line entry we already have in our "function_info.Lines". If + // so break out after printing a warning. + if (LineEntry(Row.Address, FileIdx, Row.Line) == FuncInfo.Lines.front()) { + Strm << "warning: duplicate line table detected for DIE:\n"; + Die.dump(Strm, 0, DIDumpOptions::getForSingleDIE()); + } else { + // Print out (ignore if os == nulls as this is expensive) + Strm << "error: line table has addresses that do not " + << "monotonically increase:\n"; + for (uint32_t RowIndex2 : RowVector) { + CUI.LineTable->Rows[RowIndex2].dump(Strm); + } + Die.dump(Strm, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + if (!FuncInfo.Lines.empty()) { + const LineEntry &last = FuncInfo.Lines.back(); + // Skip multiple line entris for the same file and line. + if (last.File == FileIdx && last.Line == Row.Line) + continue; + } + if (!Row.EndSequence) { + if (StartAddress <= Row.Address && Row.Address < EndAddress) { + FuncInfo.Lines.emplace_back(LineEntry(Row.Address, FileIdx, Row.Line)); + } else if (Row.Address < StartAddress) { + // Line tables are bad in DWARF. + Strm << "warning: invalid DWARF for DIE with range [" + << HEX64(StartAddress) << '-' << HEX64(EndAddress) + << ") that has a matching line entry with an address " + << HEX64(Row.Address) << " that is lower than the function " + << "start address:\n"; + Die.dump(Strm, 0, DIDumpOptions::getForSingleDIE()); + // Don't put multiple bogus line entries in the line table. + // Check to see if we already fixed up the first line entry to + // be the LowPC? + if (FuncInfo.Lines.size() == 1 && FuncInfo.Lines.front().Addr == StartAddress) { + FuncInfo.Lines[0].File = FileIdx; + FuncInfo.Lines[0].Line = Row.Line; + } else { + FuncInfo.Lines.emplace_back(LineEntry(StartAddress, FileIdx, Row.Line)); + } + } + } + PrevRow = Row; + } + return true; +} + +void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: { + Expected RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) { + consumeError(RangesOrError.takeError()); + break; + } + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + if (Ranges.empty()) + break; + std::string name = getQualifiedName(Die, CUI.language); + if (name.empty()) { + OS << "error: function at " << HEX64(Die.getOffset()) + << " has no name\n "; + Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); + break; + } + const uint32_t nameIdx = Gsym.insertString(std::move(name)); + + // Create a function_info for each range + for (const DWARFAddressRange &range : Ranges) { + if (!contains(Gsym.TextRanges, range.LowPC)) { + // We expect zero and -1 to be invalid addresses in DWARF depending + // on the linker of the DWARF. This indicates a function was stripped + // and the debug info wasn't able to be stripped from the DWARF. If + // the LowPC isn't zero or -1, then we should emit an error. + if (range.LowPC != 0 && range.LowPC != UINT32_MAX && + range.LowPC != UINT64_MAX) { + // Unexpected invalid address, emit an error + OS << "error: DIE has an address range whose start address is not " + " in an executable section and will not be processed:\n"; + Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + if (range.LowPC >= range.HighPC) + break; + + FunctionInfo FuncInfo; + FuncInfo.setStartAddress(range.LowPC); + FuncInfo.setEndAddress(range.HighPC); + FuncInfo.Name = nameIdx; + if (CUI.LineTable) { + parseLineTable(OS, CUI, Die, FuncInfo); + } + if (hasInlineInfo(Die, 0)) { + FuncInfo.InlineInfo.Name = nameIdx; + FuncInfo.InlineInfo.Ranges = { FuncInfo.Range }; + parseInlineInfo(Gsym, CUI, Die, 0, FuncInfo, + FuncInfo.InlineInfo); + } + Gsym.addFunctionInfo(std::move(FuncInfo)); + } + } break; + default: + break; + } + for (DWARFDie child : Die.children()) { + handleDie(OS, CUI, child); + } +} + +void DwarfTransformer::initDataFromObj(const object::ObjectFile &Obj) { + // Read build ID + Gsym.UUID = getUUID(Obj); + + // We need to know where the valid sections are that contain instructions. + // DWARF now emits DW_TAG_subprogram tags with DW_AT_low_pc as addresses + // and DW_AT_high_pc values as sizes, and the linker will zero out the + // DW_AT_low_pc, but won't touch the DW_AT_high_pc if it isn't an address. + // This means we might have many many DW_TAG_subprogram's with a + // DW_AT_low_pc of zero and a valid size. We need to ignore these entries. + for (const object::SectionRef §Ref : Obj.sections()) { + if (sectRef.isText()) { + const uint64_t Size = sectRef.getSize(); + if (Size > 0) { + const uint64_t StartAddr = sectRef.getAddress(); + insert(Gsym.TextRanges, AddressRange(StartAddr, StartAddr + Size)); + } + } + } +} + +std::error_code DwarfTransformer::loadDwarf(const object::ObjectFile &Obj) { + using namespace llvm::object; + std::unique_ptr DICtx = DWARFContext::create(Obj); + logAllUnhandledErrors(DICtx->loadRegisterInfo(Obj), Log, "dwarf2gsym"); + + if (!Initialized) { + initDataFromObj(Obj); + } + size_t NumBefore = Gsym.getFunctionInfoSize(); + if (NumThreads == 1) { + // Parse all DWARF data from this thread, use the same string/file table + // for everything + for (const auto &CU : DICtx->compile_units()) { + DWARFDie Die = CU->getUnitDIE(false); + CUInfo CUI(*DICtx, dyn_cast(CU.get())); + handleDie(Log, CUI, Die); + } + } else { + // THIS IS VERY HACKY. Without parsing DIEs for all CUs first, we might hit + // a race condition below. LLVM Dwarf parser is not completely thread-safe: + // DWARFDie keeps a pointer to an element of a vector in DWARFUnit. If we + // dont parse all the DIEs first, they might be parsed from another thread, + // causing the vector to reallocate, causing the pointer to be invalid. We + // could sequentially call CU->getUnitDIE(false) for all CUs, but that's + // also not super fast. To do this concurrently, we need to call + // getAbbreviations sequentially first so that getUnitDIE() only works with + // its local data. + for (const auto &CU : DICtx->compile_units()) { + CU->getAbbreviations(); + } + ThreadPool pool(NumThreads); + for (const auto &CU : DICtx->compile_units()) { + pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); + } + pool.wait(); + + std::mutex Mutex; // needed to insert into output.Funcs + for (const auto &CU : DICtx->compile_units()) { + DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); + if (Die) { + CUInfo CUI(*DICtx, dyn_cast(CU.get())); + pool.async([this, CUI, &Mutex, Die]() mutable { + std::string ThreadLogStorage; + raw_string_ostream ThreadOS(ThreadLogStorage); + handleDie(ThreadOS, CUI, Die); + ThreadOS.flush(); + if (!ThreadLogStorage.empty()) { + // Print ThreadLogStorage lines into an actual stream under a lock + std::lock_guard guard(Mutex); + Log << ThreadLogStorage; + } + }); + } + } + pool.wait(); + } + size_t FunctionsAddedCount = Gsym.getFunctionInfoSize() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; + if (FunctionsAddedCount > 0) + return std::error_code(); + return std::make_error_code(std::errc::invalid_argument); +} + +std::error_code DwarfTransformer::loadSymbolTable(const object::ObjectFile &Obj) { + using namespace llvm::object; + if (!Initialized) { + initDataFromObj(Obj); + } + size_t NumBefore = Gsym.getFunctionInfoSize(); + + + for (const object::SymbolRef &s : Obj.symbols()) { + Expected SymType = s.getType(); + const uint64_t addr = s.getValue(); + if (!SymType || SymType.get() != SymbolRef::Type::ST_Function || addr == 0) + continue; + // Function size for MachO files will be 0 + const uint64_t size = isa(&Obj) ? + ELFSymbolRef(s).getSize() : 0; + if (Expected name = s.getName()) { + Gsym.addFunctionInfo(FunctionInfo(addr, size, + Gsym.insertString(*name))); + } + } + size_t FunctionsAddedCount = Gsym.getFunctionInfoSize() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from symbol table.\n"; + if (FunctionsAddedCount > 0) + return std::error_code(); + return std::make_error_code(std::errc::invalid_argument); +} + +Optional> +DwarfTransformer::getObjectFile(StringRef filename) const { + auto BuffOrErr = MemoryBuffer::getFileOrSTDIN(filename); + if (auto err = BuffOrErr.getError()) { + Log << filename << ": " << err.message(); + return None; + } + auto Buff = std::move(BuffOrErr.get()); + auto BinOrErr = object::createBinary(*Buff); + if (auto err = errorToErrorCode(BinOrErr.takeError())) { + Log << filename << ": " << err.message(); + return None; + } + + if (auto *Obj = dyn_cast(BinOrErr->get())) { + // transfer ownership + auto ptr = std::unique_ptr( + dyn_cast(BinOrErr->release())); + return object::OwningBinary(std::move(ptr), + std::move(Buff)); + } else if (auto *Fat = + dyn_cast(BinOrErr->get())) { + auto arch = object::MachOObjectFile::getHostArch().getArchName(); + if (Fat->getNumberOfObjects() == 1) { + auto MachOOrErr = Fat->begin_objects()->getAsObjectFile(); + if (auto err = BuffOrErr.getError()) { + Log << filename << ": " << err.message(); + return None; + } + return object::OwningBinary( + std::move(MachOOrErr.get()), std::move(Buff)); + } else if (auto MachOOrErr = Fat->getObjectForArch(arch)) { + return object::OwningBinary( + std::move(MachOOrErr.get()), std::move(Buff)); + } else { + Log << filename << ": file contains objects for " + << "multiple archs but not for " << arch; + } + } + Log << filename << ": unsupported binary type"; + return None; +} Index: lib/DebugInfo/GSYM/FileTableCreator.cpp =================================================================== --- lib/DebugInfo/GSYM/FileTableCreator.cpp +++ lib/DebugInfo/GSYM/FileTableCreator.cpp @@ -0,0 +1,47 @@ +//===- FileTableCreator.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include + +using namespace llvm; +using namespace gsym; + +uint32_t FileTableCreator::insert(StringRef S) { + auto Dir = 0, Base = 0; + auto LastSlash = S.rfind('/'); + if (LastSlash == std::string::npos || LastSlash == 0) { + Base = StringTable.insert(S); + } else { + Dir = StringTable.insert(S.substr(0, LastSlash)); + Base = StringTable.insert(S.substr(LastSlash + 1)); + } + return insert(FileEntry(Dir, Base)); +} + +uint32_t FileTableCreator::insert(FileEntry Entry) { + auto Index = FileEntries.size(); + // find in hash map and insert if not present + auto R = EntryToIndex.insert(std::make_pair(Entry, Index)); + if (R.second) { // if newly inserted + FileEntries.emplace_back(Entry); + } + return R.first->second; +} + +void FileTableCreator::write(FileWriter &Out) const { + // we should always have 1 blank entry + assert(!FileEntries.empty()); + assert(FileEntries[0].Dir == 0); + assert(FileEntries[0].Base == 0); + size_t NumFiles = FileEntries.size(); + Out.WriteUnsigned(NumFiles, sizeof(uint32_t)); + Out.Write(FileEntries.data(), NumFiles * sizeof(FileEntry)); +} Index: lib/DebugInfo/GSYM/FileWriter.cpp =================================================================== --- lib/DebugInfo/GSYM/FileWriter.cpp +++ lib/DebugInfo/GSYM/FileWriter.cpp @@ -0,0 +1,80 @@ +//===- FileWriter.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/LEB128.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace gsym; + +FileWriter::~FileWriter() { OS.flush(); } + +bool FileWriter::WriteSLEB(int64_t value) { + uint8_t Bytes[32]; + auto Length = encodeSLEB128(value, Bytes); + assert(Length < sizeof(Bytes)); + return Write(Bytes, Length); +} + +bool FileWriter::WriteULEB(uint64_t value) { + uint8_t Bytes[32]; + auto Length = encodeULEB128(value, Bytes); + assert(Length < sizeof(Bytes)); + return Write(Bytes, Length); +} + +bool FileWriter::WriteU8(uint8_t U) { return Write(&U, sizeof(U)); } + +bool FileWriter::WriteU32(uint32_t U) { return Write(&U, sizeof(U)); } + +bool FileWriter::Fixup32(uint32_t Value, off_t Offset) { + const off_t CurrOffset = Tell(); + if (CurrOffset == -1) + return false; + if (Seek(Offset) != Offset) + return false; + if (!WriteU32(Value)) + return false; + return Seek(CurrOffset) == CurrOffset; +} + +bool FileWriter::WriteUnsigned(uint64_t U, size_t N) { + // NOTE: this only works on little endian machines + return Write(&U, N); +} +bool FileWriter::Write(const void *Src, size_t SrcLength) { + OS.write((const char *)Src, SrcLength); + return OS.good(); +} + +off_t FileWriter::Tell() { return OS.tellp(); } + +off_t FileWriter::Seek(off_t Offset) { + OS.seekp(Offset); + return OS.good() ? Offset : -1; +} + +bool FileWriter::AlignTo(size_t Align) { + off_t Offset = Tell(); + assert(Offset != -1); + if (Offset == -1) + return false; + off_t AlignedOffset = (Offset + Align - 1) / Align * Align; + if (AlignedOffset == Offset) + return true; + off_t PadCount = AlignedOffset - Offset; + auto Success = + Write(std::string(PadCount, '\0').c_str(), PadCount); + assert(Tell() == AlignedOffset); + return Success; +} Index: lib/DebugInfo/GSYM/FunctionInfo.cpp =================================================================== --- lib/DebugInfo/GSYM/FunctionInfo.cpp +++ lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -0,0 +1,32 @@ +//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +void FunctionInfo::dump(llvm::raw_ostream &OS, GsymReader &GSYM) const { + OS << '[' << format_hex(Range.startAddress(), 18) << '-' + << format_hex(Range.endAddress(), 18) << ") " << GSYM.getString(Name) + << '\n'; + if (!Lines.empty()) { + OS << "Lines:\n"; + for (const auto &Line : Lines) { + Line.dump(OS); + } + } + if (InlineInfo.isValid()) + InlineInfo.dump(OS, GSYM, 0); +} Index: lib/DebugInfo/GSYM/GsymCreator.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymCreator.cpp +++ lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,197 @@ +//===- GsymCreator.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" + +#include +#include +#include +#include +#include + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/GsymStreamer.h" + +using namespace llvm; +using namespace gsym; + +std::error_code GsymCreator::save(StringRef path) const { + std::ofstream File(path.str().c_str(), + std::ios::binary | std::ios::out | std::ios::trunc); + std::error_code Error = save(File); + File.close(); + return Error; +} + +std::error_code GsymCreator::save(std::ostream &stream) const { + std::lock_guard guard(Mutex); + if (Funcs.empty()) + return std::make_error_code(std::errc::invalid_argument); + const uint64_t MinAddr = Funcs.front().startAddress(); + const uint64_t MaxAddr = Funcs.back().startAddress(); + const uint64_t AddrDelta = MaxAddr - MinAddr; + uint8_t AddrOffSize = 8; + if (AddrDelta <= UINT8_MAX) + AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + AddrOffSize = 4; + Header header = {0, 0, 0, 0, 0, 0, 0, 0, {0}}; + header.Magic = GSYM_MAGIC; + header.Version = GSYM_VERSION; + header.AddrOffSize = AddrOffSize; + header.UUIDSize = UUID.size(); + header.BaseAddress = MinAddr; + assert(Funcs.size() <= UINT32_MAX); + header.NumAddresses = (uint32_t)Funcs.size(); + header.StrtabOffset = 0; // We will need to fix this up later. + header.StrtabSize = 0; // We will need to fix this up later. + if (header.UUIDSize > sizeof(header.UUID)) { + fprintf(stderr, "error: UUID value is too large (%u bytes)\n", + (uint32_t)UUID.size()); + return std::make_error_code(std::errc::invalid_argument); + } + if (UUID.size() > 0) { + memcpy(header.UUID, UUID.data(), UUID.size()); + } + FileWriter out(stream); + // Write out the header + out.Write(&header, Header::getByteSize()); + out.AlignTo(header.AddrOffSize); + // Write out the address offsets + for (const auto &FuncInfo : Funcs) { + uint64_t addr_offset = FuncInfo.startAddress() - header.BaseAddress; + out.WriteUnsigned(addr_offset, header.AddrOffSize); + } + // Write out all zeros for the addr_info_offsets; + out.AlignTo(sizeof(uint32_t)); + const off_t addr_info_offsets_offset = out.Tell(); + for (size_t i = 0, n = Funcs.size(); i < n; ++i) + out.WriteU32(0); + + // Write out the file table + out.AlignTo(sizeof(uint32_t)); + FileTab.write(out); + // Write out the sting table + const off_t StrtabOffset = out.Tell(); + StrTab.write(out); + const off_t StrtabSize = out.Tell() - StrtabOffset; + std::vector addr_info_offsets; + // Write out the address infos for each address + for (const auto &FuncInfo : Funcs) { + out.AlignTo(sizeof(uint32_t)); + addr_info_offsets.push_back((uint32_t)out.Tell()); + // Write the size in bytes of this function as a uint32_t + out.WriteU32(FuncInfo.size()); + // Write the name of this function as a uint32_t string table offset + out.WriteU32(FuncInfo.Name); + // Write out the line table if we have one. + LineTable::write(out, FuncInfo); + + // Write out the inline function info if we have any + if (FuncInfo.InlineInfo.isValid()) { + out.WriteU32(static_cast(InfoType::InlineInfo)); + const auto inline_info_size_offset = out.Tell(); + out.WriteU32(0); // We will fix this up after writing the info out + const auto inline_info_start = out.Tell(); + FuncInfo.InlineInfo.write(out, FuncInfo.startAddress()); + const off_t inline_info_length = out.Tell() - inline_info_start; + out.Fixup32((uint32_t)inline_info_length, inline_info_size_offset); + } + + // Terminate the data chunks with and end of list with zero size + out.WriteU32(static_cast(InfoType::EndOfList)); + out.WriteU32(0); + } + // Fixup the string table offset and size in the header + out.Seek(offsetof(Header, StrtabOffset)); + out.WriteU32((uint32_t)StrtabOffset); + out.WriteU32((uint32_t)StrtabSize); + + // Fixup all address info offsets + out.Seek(addr_info_offsets_offset); + out.Write(addr_info_offsets.data(), + addr_info_offsets.size() * sizeof(uint32_t)); + return std::error_code(); +} + +void GsymCreator::optimize(llvm::raw_ostream &OS) { + // Remove duplicates function infos that have both entries from debug info + // (DWARF or Breakpad) and entries from the SymbolTable. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + sortFunctionInfos(); + std::lock_guard guard(Mutex); + auto NumBefore = Funcs.size(); + auto Curr = Funcs.begin(); + auto Prev = Funcs.end(); + while (Curr != Funcs.end()) { + if (Curr->size() == 0) { + // Skip function infos with no size + Curr = Funcs.erase(Curr); + continue; + } + #if 0 + // Can't check for overlaps or same address ranges if we don't have a + // previous entry + if (Prev != Funcs.end()) { + const auto PrevHi = Prev->endAddress(); + const auto CurrLo = Curr->startAddress(); + if (CurrLo < PrevHi) { + const auto CurrHi = Curr->endAddress(); + const auto PrevLo = Prev->startAddress(); + if (CurrLo == PrevLo && CurrHi == PrevHi) { + // Same address range. Check if one is from debug info and the other + // is from a symbol table. If so, then keep the one with debug info + } + auto &last = Funcs.back(); + if (last.Name != Curr->Name) { + // For symbol table entries (no rich info), prefer shorter name + // (not that functions with rich info are before functions without) + // if (!Curr->hasRichInfo() && !last.hasRichInfo() && + // strlen(Curr->name()) < strlen(last.name())) + // Funcs.back() = f; + // if both have rich info, print a warning + // if (Curr->hasRichInfo() && last.hasRichInfo()) + // OS << "Warning: functions with different names and " + // << "same address range: \n\t" << f << "\n\t" << last << "\n"; + } + } else { + // print warnings about overlaps + assert(PrevHi > CurrHi || (PrevHi == CurrHi && CurrLo > PrevLo)); + assert(!Funcs.empty()); + // OS << "Warning: function \t" << f << "\n\toverlaps with\t" + // << Funcs.back() << "\n"; + } +#endif + Prev = Curr++; + } + + OS << "Pruned " << NumBefore - Funcs.size() + << " functions, ended with " << Funcs.size() << " total\n"; +} Index: lib/DebugInfo/GSYM/GsymReader.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymReader.cpp +++ lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,581 @@ +//===- GsymReader.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "llvm/Support/Endian.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" + +using namespace llvm; +using namespace gsym; + +static int unsigned_width(uint64_t u) { + if (u < 10) + return 1; + return 1 + unsigned_width(u/10); +} + +void Header::dump(llvm::raw_ostream &OS) const { + OS << "Header:\n"; + OS << " magic = " << format_hex(Magic, 10) << "\n"; + OS << " version = " << format_hex(Version, 6) << '\n'; + OS << " addr_off_size = " << format_hex(AddrOffSize, 4) << '\n'; + OS << " uuid_size = " << format_hex(UUIDSize, 4) << '\n'; + OS << " base_address = " << format_hex(BaseAddress, 18) << '\n'; + OS << " num_addrs = " << format_hex(NumAddresses, 10) << '\n'; + OS << " strtab_offset = " << format_hex(StrtabOffset, 10) << '\n'; + OS << " strtab_size = " << format_hex(StrtabSize, 10) << '\n'; + OS << " uuid = "; + for (uint8_t i = 0; i < UUIDSize; ++i) { + OS << format_hex_no_prefix(UUID[i], 2); + } + OS << '\n'; +} + +std::string Header::getError() const { + // TODO: support swapped GSYM files + if (Magic != GSYM_MAGIC) + return "invalid magic"; + if (Version != 1) + return "invalid version"; + return ""; +} + +GsymReader::GsymReader() { +} + +std::error_code +GsymReader::openFile(StringRef Filename) { + // Open the input file + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto error = BuffOrErr.getError(); + if (!error) { + MemBuffer = std::move(BuffOrErr.get()); + init(MemBuffer->getBuffer()); + } + return error; +} + +void GsymReader::init(StringRef FileBytes) { + BinaryStreamReader FileData(FileBytes, support::endian::system_endianness()); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + if (FileData.readObject(GSYMHeader)) + return; + + if (GSYMHeader->Magic == GSYM_MAGIC) { + // This file is a GSYM file, the file data is the GSYM data + GsymBytes = FileBytes; + } else { + // TODO: Load this file as an ObjectFile and extract GSYM info from a + // ".gsym" section (ELF) or "__gsym" section (mach-o) in the file. + return; + } + + ErrorStr = GSYMHeader->getError(); + if (!ErrorStr.empty()) + return; + + if (FileData.padToAlignment(GSYMHeader->AddrOffSize)) + return; + // Calculate the start of the address offset table. The address offset table + // contains a sorted list of the all all addresses contained in this GSYM + // file. + if (FileData.readArray(AddrOffsets, + GSYMHeader->NumAddresses * GSYMHeader->AddrOffSize)) + return; + // Calculate the start of the address info offset table. Each address offset + // in the address offset table has a offset to the file data for the address + // in the address info offsets table. + if (FileData.padToAlignment(sizeof(uint32_t))) + return; + if (FileData.readArray(AddrInfoOffsets, + GSYMHeader->NumAddresses)) + return; + + uint32_t NumFiles = 0; + if (FileData.readInteger(NumFiles)) + return; + // Calculate the start of the file table. + if (FileData.readArray(Files, NumFiles)) + return; + + // Set the string table based off information in the GSYM header and read it + // from the FileData, not from GsymBytes. + FileData.setOffset(GSYMHeader->StrtabOffset); + + if (FileData.readFixedString(StrTab.Data, GSYMHeader->StrtabSize)) + return; +} + +uint64_t GsymReader::getAddressOffset(size_t idx) const { + if (GSYMHeader && !AddrOffsets.empty() && idx < GSYMHeader->NumAddresses) { + switch (GSYMHeader->AddrOffSize) { + case 1: + return reinterpret_cast(AddrOffsets.data())[idx]; + case 2: + return reinterpret_cast(AddrOffsets.data())[idx]; + case 4: + return reinterpret_cast(AddrOffsets.data())[idx]; + case 8: + return reinterpret_cast(AddrOffsets.data())[idx]; + } + } + return UINT64_MAX; +} + +size_t GsymReader::getNumAddresses() const { + if (GSYMHeader) + return GSYMHeader->NumAddresses; + return 0; +} + +uint64_t GsymReader::getAddress(size_t idx) const { + if (GSYMHeader) { + auto AddrOffset = getAddressOffset(idx); + if (AddrOffset != UINT64_MAX) + return GSYMHeader->BaseAddress + AddrOffset; + } + return UINT64_MAX; +} + +uint64_t GsymReader::getAddressInfoOffset(size_t idx) const { + const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); + if (idx < NumAddrInfoOffsets) + return AddrInfoOffsets[idx]; + if (idx > 0 && idx == NumAddrInfoOffsets) { + // We can figure out what the last address info offset is by figuring out + // now many bytes are left in the GsymBytes. + const auto PrevAddrInfoOffset = getAddressInfoOffset(idx - 1); + if (PrevAddrInfoOffset != UINT64_MAX) { + const auto Length = GsymBytes.size(); + if (Length > PrevAddrInfoOffset) + return Length - PrevAddrInfoOffset; + } + } + return UINT64_MAX; +} + +DataExtractor GsymReader::getAddressInfoPayload(size_t idx) const { + const uint64_t Offset = getAddressInfoOffset(idx); + if (Offset != UINT64_MAX) { + BinaryStreamReader GSYMData(GsymBytes, + support::endian::system_endianness()); + GSYMData.setOffset(Offset); + if (!GSYMData.skip(sizeof(AddressInfo))) { + const uint64_t EndOffset = getAddressInfoOffset(idx + 1); + if (EndOffset > Offset + sizeof(sizeof(AddressInfo))) { + const uint64_t Size = EndOffset - (Offset + sizeof(AddressInfo)); + StringRef AddrInfoData; + if (!GSYMData.readFixedString(AddrInfoData,Size)) + return DataExtractor(AddrInfoData, true, 8); + + } + } + } + return DataExtractor(StringRef(), true, 8); +} + +const char *GsymReader::getInfoTypeAsString(InfoType IT) { + switch (IT) { + case InfoType::EndOfList: + return "EndOfList"; + case InfoType::LineTableInfo: + return "LineTable"; + case InfoType::InlineInfo: + return "InlineInfo"; + } + return "???"; +} + +// void GsymReader::FileTable::dump(llvm::raw_ostream &OS, +// const StringTable &StrTab) const { +// OS << "Files:\n"; +// const size_t index_width = unsigned_width(NumFiles); +// for (uint32_t i = 0; i < NumFiles; ++i) { +// OS << "files[" << format_decimal(i, index_width) << "] " << +// format_hex(Files[i].Dir, 10) << ", " << format_hex(Files[i].Base, 10); +// StringRef Dir(StrTab[Files[i].Dir]); +// StringRef Base(StrTab[Files[i].Base]); +// if (Dir.empty()) { +// if (!Base.empty()) +// OS << " (\"" << Base << "\")"; +// } else { +// assert(!Base.empty()); +// OS << " (\"" << Dir << '/' << Base << "\")"; +// } +// OS << '\n'; +// } +// } + +void GsymReader::dump(llvm::raw_ostream &OS, bool Verbose) const { + // If GSYMHeader is not NULL, then the header has been validated. + if (!GSYMHeader) { + OS << "invalid gsym file\n"; + return; + } + if (Verbose) { + // When verbose dumping, dump the header, address offsets, address info + // offsets, file table and string table. + GSYMHeader->dump(OS); + OS << "Address Offsets:\n"; + const size_t addr_off_hex_width = GSYMHeader->AddrOffSize * 2 + 2; + const size_t addr_hex_width = 16 + 2; + const size_t index_width = unsigned_width(GSYMHeader->NumAddresses); + for (uint32_t i = 0; i < GSYMHeader->NumAddresses; ++i) { + auto AddrOffset = getAddressOffset(i); + OS << " [" << format_decimal(i, index_width) << "] " << + format_hex(AddrOffset, addr_off_hex_width) << " (" << + format_hex(AddrOffset + GSYMHeader->BaseAddress, addr_hex_width) << + ")\n"; + } + OS << "Address Info Offsets:\n"; + for (uint32_t i = 0; i < GSYMHeader->NumAddresses; ++i) + OS << " [" << format_decimal(i, index_width) << "] " << + format_hex(getAddressInfoOffset(i), 10) << '\n'; + dumpFileTable(OS); + StrTab.dump(OS); + } + + // Dump contents of all address info objects. + OS << "Address Infos:\n"; + for (uint32_t i = 0; i < GSYMHeader->NumAddresses; ++i) { + dumpAddressInfo(OS, i); + } +} + +void GsymReader::dumpAddressInfo(llvm::raw_ostream &OS, + size_t AddrInfoIndex) const { + const auto AddrInfoOffset = getAddressInfoOffset(AddrInfoIndex); + if (AddrInfoOffset == UINT64_MAX) { + OS << "error: invalid address info index " << AddrInfoIndex; + return; + } + OS << format_hex(AddrInfoOffset, 10) << ": "; + const struct AddressInfo *AddrInfo = nullptr; + BinaryStreamReader GSYMData(GsymBytes, + support::endian::system_endianness()); + GSYMData.setOffset(AddrInfoOffset); + if (GSYMData.readObject(AddrInfo)) { + OS << "error: corrupt GSYM file doesn't contain address info\n"; + return; + } + const uint64_t AddrOffset = getAddressOffset(AddrInfoIndex); + const uint64_t StartAddr = GSYMHeader->BaseAddress + AddrOffset; + const uint64_t EndAddr = StartAddr + AddrInfo->Size; + OS << '[' << format_hex(StartAddr, 18) << " - " << format_hex(EndAddr, 18) + << "): " << StrTab[AddrInfo->Name] << '\n'; + DataExtractor data = getAddressInfoPayload(AddrInfoIndex); + + bool done = false; + uint32_t Offset = 0; + while (!done) { + auto IT = static_cast(data.getU32(&Offset)); + uint32_t InfoLength = data.getU32(&Offset); + OS << " " << format_hex(AddrInfoOffset + 8 + Offset, 10) << ": <" << + format_hex(InfoLength, 10) << "> " << getInfoTypeAsString(IT) + << '\n'; + DataExtractor InfoData = data.getSlice(&Offset, InfoLength); + switch (IT) { + case InfoType::EndOfList: + done = true; + break; + + case InfoType::LineTableInfo: { + std::vector line_table; + LineTable line_parser(InfoData); + dumpLineTable(OS, StartAddr, InfoData, 4); + } break; + + case InfoType::InlineInfo: { + InlineInfo InlineInfo; + uint32_t InlineOffset = 0; + if (InlineInfo.decode(InfoData, InlineOffset, StartAddr)) + InlineInfo.dump(OS, *this, 4); + else + OS << "error: failed to decode inline info\n"; + } break; + } + } +} + +bool GsymReader::dumpAddressInfos(llvm::raw_ostream &OS, + StringRef NameCstr) const { + uint32_t NameIdx = StrTab.find(NameCstr); + if (NameIdx == 0) + return false; + bool success = false; + for (uint32_t i = 0; i < GSYMHeader->NumAddresses; ++i) { + const auto AddrInfoOffset = getAddressInfoOffset(i); + const struct AddressInfo *AddrInfo = nullptr; + BinaryStreamReader GSYMData(GsymBytes, + support::endian::system_endianness()); + GSYMData.setOffset(AddrInfoOffset); + if (!GSYMData.readObject(AddrInfo) && AddrInfo->Name == NameIdx) { + dumpAddressInfo(OS, i); + success = true; + } + } + return success; +} + +void GsymReader::dumpFileTable(llvm::raw_ostream &OS) const { + const auto NumFiles = Files.size(); + OS << "Files:\n"; + const size_t index_width = unsigned_width(NumFiles); + for (uint32_t i = 0; i < NumFiles; ++i) { + OS << "files[" << format_decimal(i, index_width) << "] " << + format_hex(Files[i].Dir, 10) << ", " << format_hex(Files[i].Base, 10); + StringRef Dir(StrTab[Files[i].Dir]); + StringRef Base(StrTab[Files[i].Base]); + if (Dir.empty()) { + if (!Base.empty()) + OS << " (\"" << Base << "\")"; + } else { + assert(!Base.empty()); + OS << " (\"" << Dir << '/' << Base << "\")"; + } + OS << '\n'; + } +} + +void GsymReader::dumpLineTable(llvm::raw_ostream &OS, uint64_t BaseAddr, + DataExtractor &LineData, uint32_t Depth) const { + LineTable parser(LineData); + std::vector line_table = parser.parseAllEntries(BaseAddr); + for (const auto &Line : line_table) { + auto File = getFile(Line.File); + auto Dir = StrTab[File.Dir]; + auto Base = StrTab[File.Base]; + OS.indent(Depth); + OS << format_hex(Line.Addr, 18) << ": " << Dir << '/' << + Base << ':' << Line.Line << '\n'; + } +} + +bool GsymReader::findAddressInfo(uint64_t Addr, LookupInfo &Info) const { + if (Addr < GSYMHeader->BaseAddress || GSYMHeader->NumAddresses == 0) + return false; + const uint64_t AddrOffset = Addr - GSYMHeader->BaseAddress; + Info.clear(); + + switch (GSYMHeader->AddrOffSize) { + case 1: { + auto First = AddrOffsets.data(); + auto Last = First + GSYMHeader->NumAddresses; + auto Iter = std::lower_bound(First, Last, AddrOffset); + if (Iter == Last || AddrOffset < *Iter) + --Iter; + Info.AddrInfoIndex = std::distance(First, Iter); + Info.MatchAddrOffset = *Iter; + break; + } + case 2: { + auto First = reinterpret_cast(AddrOffsets.data()); + auto Last = First + GSYMHeader->NumAddresses; + auto Iter = std::lower_bound(First, Last, AddrOffset); + if (Iter == Last || AddrOffset < *Iter) + --Iter; + Info.AddrInfoIndex = std::distance(First, Iter); + Info.MatchAddrOffset = *Iter; + break; + } + case 4: { + auto First = reinterpret_cast(AddrOffsets.data()); + auto Last = First + GSYMHeader->NumAddresses; + auto Iter = std::lower_bound(First, Last, AddrOffset); + if (Iter == Last || AddrOffset < *Iter) + --Iter; + Info.AddrInfoIndex = std::distance(First, Iter); + Info.MatchAddrOffset = *Iter; + break; + } + case 8: { + auto First = reinterpret_cast(AddrOffsets.data()); + auto Last = First + GSYMHeader->NumAddresses; + auto Iter = std::lower_bound(First, Last, AddrOffset); + if (Iter == Last || AddrOffset < *Iter) + --Iter; + Info.AddrInfoIndex = std::distance(First, Iter); + Info.MatchAddrOffset = *Iter; + break; + } + default: + break; + } + + if (Info.AddrInfoIndex < GSYMHeader->NumAddresses) { + auto AddrInfoOffset = AddrInfoOffsets[Info.AddrInfoIndex]; + const struct AddressInfo *AddrInfo = nullptr; + BinaryStreamReader GSYMData(GsymBytes, + support::endian::system_endianness()); + GSYMData.setOffset(AddrInfoOffset); + if (!GSYMData.readObject(AddrInfo)) { + // Make sure the address is within the bounds of the address info's size + auto FuncOffset = AddrOffset - Info.MatchAddrOffset; + // If an entry has zero size, then we will match it regardless of the + // size. These are typically symbols in the symbol table. + if (AddrInfo->Size == 0 || FuncOffset < AddrInfo->Size) { + Info.AddrInfo = AddrInfo; + return true; + } + } + } + return false; +} + +bool GsymReader::lookup(uint64_t Addr, LookupResult &result) const { + result.clear(); + LookupInfo Info; + if (!findAddressInfo(Addr, Info)) + return false; + + result.Addr = GSYMHeader->BaseAddress + Info.MatchAddrOffset; + result.EndAddr = result.Addr + Info.AddrInfo->Size; + + LineEntry Line; + InlineInfo InlineInfo; + DataExtractor data = getAddressInfoPayload(Info.AddrInfoIndex); + uint32_t Offset = 0; + uint32_t IT; + while ((IT = data.getU32(&Offset))) { + uint32_t InfoLength = data.getU32(&Offset); + DataExtractor InfoData = data.getSlice(&Offset, InfoLength); + switch (static_cast(IT)) { + case InfoType::LineTableInfo: { + std::vector line_table; + LineTable line_parser(InfoData); + Line = line_parser.lookup(result.Addr, Addr); + } break; + case InfoType::InlineInfo: { + uint32_t InfoOffset = 0; + InlineInfo.decode(InfoData, InfoOffset, result.Addr, Addr); + break; + } + default: + break; + } + } + if (Line.isValid()) { + auto File = getFile(Line.File); + std::vector InlineStack; + SourceLocation SrcLoc; + InlineInfo.getInlineStack(Addr, InlineStack); + + if (!InlineStack.empty()) { + auto prev = InlineStack.front(); + // First entry in inline callstack. The file and line come from + // the "Line", and the name comes from the inline info. + SrcLoc.Name = StrTab[prev->Name]; + SrcLoc.Dir = StrTab[File.Dir]; + SrcLoc.Base = StrTab[File.Base]; + SrcLoc.Line = Line.Line; + result.Locations.emplace_back(std::move(SrcLoc)); + + // Rest of inlined functions. Note that we don't have to add last + // (non-inlined) function explicitly because the root InlineInfo node + // has the same name as the function + for (auto it = InlineStack.begin() + 1; it != InlineStack.end(); it++) { + auto ii = *it; + SrcLoc.Name = StrTab[ii->Name]; + auto CallFile = getFile(prev->CallFile); + SrcLoc.Dir = StrTab[CallFile.Dir]; + SrcLoc.Base = StrTab[CallFile.Base]; + SrcLoc.Line = prev->CallLine; + result.Locations.push_back(SrcLoc); + prev = ii; + } + } else { + SrcLoc.Name = StrTab[Info.AddrInfo->Name]; + SrcLoc.Dir = StrTab[File.Dir]; + SrcLoc.Base = StrTab[File.Base]; + SrcLoc.Line = Line.Line; + result.Locations.push_back(SrcLoc); + } + } else { + SourceLocation SrcLoc; + SrcLoc.Name = StrTab[Info.AddrInfo->Name]; + result.Locations.push_back(SrcLoc); + } + return true; +} + +bool GsymReader::getFunctionInfo(uint64_t Addr, FunctionInfo &FuncInfo) const { + LookupInfo Info; + if (!findAddressInfo(Addr, Info)) + return false; + FuncInfo.clear(); + FuncInfo.setStartAddress(getAddress(Info.AddrInfoIndex)); + FuncInfo.setSize(Info.AddrInfo->Size); + if (Info.AddrInfo->Size == 0) { + // The byte size is zero, see if there is another address after this. + // Set the size accordingly if there is one using the delta between the + // matching address and the next address. + const uint64_t NextAddress = getAddress(Info.AddrInfoIndex+1); + if (NextAddress != UINT64_MAX && NextAddress > FuncInfo.startAddress()) { + FuncInfo.setSize(NextAddress - FuncInfo.startAddress()); + } + } + FuncInfo.Name = Info.AddrInfo->Name; + + DataExtractor data = getAddressInfoPayload(Info.AddrInfoIndex); + uint32_t Offset = 0; + while (uint32_t IT = data.getU32(&Offset)) { + uint32_t InfoLength = data.getU32(&Offset); + DataExtractor InfoData = data.getSlice(&Offset, InfoLength); + switch (static_cast(IT)) { + case InfoType::LineTableInfo: { + LineTable parser(InfoData); + FuncInfo.Lines = parser.parseAllEntries(FuncInfo.startAddress()); + } break; + case InfoType::InlineInfo: { + uint32_t InlineOffset = 0; + FuncInfo.InlineInfo.decode(InfoData, InlineOffset, + FuncInfo.startAddress()); + break; + } + default: + break; + } + } + return true; +} + +void GsymReader::unmap() { + MemBuffer.reset(); + GsymBytes = StringRef(); + GSYMHeader = nullptr; + AddrOffsets = ArrayRef(); + AddrInfoOffsets = ArrayRef(); + Files = ArrayRef(); + StrTab.clear(); +} + +GsymReader::~GsymReader() { unmap(); } Index: lib/DebugInfo/GSYM/GsymStreamer.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymStreamer.cpp +++ lib/DebugInfo/GSYM/GsymStreamer.cpp @@ -0,0 +1,92 @@ +//===- GSYMStreamer.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymStreamer.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/WithColor.h" + +namespace llvm { +namespace gsym { + +static inline bool error(Twine Error, Twine Context = {}) { + WithColor::error() << Error + "\n"; + if (!Context.isTriviallyEmpty()) + WithColor::note() << Twine("while processing ") + Context + "\n"; + return false; +} + +bool GSYMStreamer::init(Triple TheTriple) { + std::string ErrorStr; + std::string TripleName; + StringRef Context = "gsym streamer init"; + + // Get the target. + const Target *TheTarget = + TargetRegistry::lookupTarget(TripleName, TheTriple, ErrorStr); + if (!TheTarget) + return error(ErrorStr, Context); + TripleName = TheTriple.getTriple(); + + // Create all the MC Objects. + MRI.reset(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) + return error(Twine("no register info for target ") + TripleName, Context); + + MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!MAI) + return error("no asm info for target " + TripleName, Context); + + MOFI.reset(new MCObjectFileInfo); + MC.reset(new MCContext(MAI.get(), MRI.get(), MOFI.get())); + MOFI->InitMCObjectFileInfo(TheTriple, /*PIC*/ false, *MC); + + MSTI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", "")); + if (!MSTI) + return error("no subtarget info for target " + TripleName, Context); + + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions); + if (!MAB) + return error("no asm backend for target " + TripleName, Context); + + MII.reset(TheTarget->createMCInstrInfo()); + if (!MII) + return error("no instr info info for target " + TripleName, Context); + + MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *MC); + if (!MCE) + return error("no code emitter for target " + TripleName, Context); + + MS = TheTarget->createMCObjectStreamer( + TheTriple, *MC, std::unique_ptr(MAB), + MAB->createObjectWriter(OutFile), std::unique_ptr(MCE), + *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false); + + if (!MS) + return error("no object streamer for target " + TripleName, Context); + + // Finally create the AsmPrinter we'll use to emit the DIEs. + TM.reset(TheTarget->createTargetMachine(TripleName, "", "", TargetOptions(), + None)); + if (!TM) + return error("no target machine for target " + TripleName, Context); + + Asm.reset(TheTarget->createAsmPrinter(*TM, std::unique_ptr(MS))); + if (!Asm) + return error("no asm printer for target " + TripleName, Context); + + return true; +} + +} // namespace gsym +} // namespace llvm Index: lib/DebugInfo/GSYM/InlineInfo.cpp =================================================================== --- lib/DebugInfo/GSYM/InlineInfo.cpp +++ lib/DebugInfo/GSYM/InlineInfo.cpp @@ -0,0 +1,164 @@ +//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include + +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +void InlineInfo::write(FileWriter &O, uint64_t BaseAddr) const { + O.WriteULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) { + auto Offset = Range.Start - BaseAddr; + auto Size = Range.End - Range.Start; + O.WriteULEB(Offset); + O.WriteULEB(Size); + } + bool HasChildren = !Children.empty(); + O.WriteU8(HasChildren); + O.WriteU32(Name); + O.WriteULEB(CallFile); + O.WriteULEB(CallLine); + if (HasChildren) { + for (const auto &child : Children) + child.write(O, Ranges.front().Start); + O.WriteULEB(0); // Terminate child sibling chain + } +} + +bool InlineInfo::decode(DataExtractor &Data, uint32_t &Offset, + uint64_t BaseAddr) { + auto NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return false; + Ranges.reserve(NumRanges); + for (size_t I = 0; I < NumRanges; ++I) { + auto Off = Data.getULEB128(&Offset); + auto Size = Data.getULEB128(&Offset); + Ranges.emplace_back(AddressRange(BaseAddr + Off, BaseAddr + Off + Size)); + } + bool HasChildren = Data.getU8(&Offset) != 0; + Name = Data.getU32(&Offset); + CallFile = (uint32_t)Data.getULEB128(&Offset); + CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + InlineInfo child; + while (child.decode(Data, Offset, Ranges.front().Start)) { + Children.emplace_back(std::move(child)); + child.clear(); + } + } + return true; +} + +bool InlineInfo::decode(DataExtractor &Data, uint32_t &Offset, + uint64_t BaseAddr, uint64_t LookupAddr) { + auto NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return false; + + uint64_t ChildBaseAddr = 0; + if (LookupAddr < 0) { + for (size_t I = 0; I < NumRanges; ++I) { + Data.getULEB128(&Offset); // off + Data.getULEB128(&Offset); // size + } + } else { + for (size_t I = 0; I < NumRanges; ++I) { + auto off = Data.getULEB128(&Offset); + auto size = Data.getULEB128(&Offset); + auto Start = BaseAddr + off; + auto End = Start + size; + if (I == 0) + ChildBaseAddr = Start; + if (Start <= LookupAddr && End > LookupAddr) { + Ranges.emplace_back(AddressRange(Start, End)); + } + } + } + + bool HasChildren = Data.getU8(&Offset) != 0; + Name = Data.getU32(&Offset); + if (LookupAddr < 0) { + Data.getULEB128(&Offset); // CallFile + Data.getULEB128(&Offset); // CallLine + } else { + CallFile = (uint32_t)Data.getULEB128(&Offset); + CallLine = (uint32_t)Data.getULEB128(&Offset); + } + if (HasChildren) { + InlineInfo Child; + if (Ranges.empty()) { + // This inlined function does not contain lookup_offset, no need to decode + // Ranges, just skip + while (Child.decode(Data, Offset, ChildBaseAddr, -1)) + /* Do nothing */; + } else { + while (Child.decode(Data, Offset, ChildBaseAddr, LookupAddr)) { + if (!Child.Ranges.empty()) + Children.emplace_back(std::move(Child)); + Child.clear(); + } + } + } + return true; +} + +void InlineInfo::dump(llvm::raw_ostream &OS, const GsymReader &GSYM, + unsigned Depth) const { + // Indent with spaces based on Depth + OS.indent(Depth); + bool First = true; + for (auto Range : Ranges) { + if (First) + First = false; + else + OS << ' '; + Range.dump(OS); + } + if (Name) + OS << ' ' << GSYM.getString(Name); + if (CallFile) { + auto file_entry = GSYM.getFile(CallFile); + OS << " called from " << GSYM.getString(file_entry.Dir) << '/' << + GSYM.getString(file_entry.Base) << ":" << CallLine; + } + OS << '\n'; + for (const auto &child : Children) + child.dump(OS, GSYM, Depth + 1); +} + +bool InlineInfo::getInlineStack( + uint64_t Addr, std::vector &InlineStack) const { + for (const auto &Range : Ranges) { + if (Range.contains(Addr)) { + if (Name > 0) + InlineStack.insert(InlineStack.begin(), this); + for (const auto &child : Children) { + if (child.getInlineStack(Addr, InlineStack)) + break; + } + return true; + } + } + return false; +} Index: lib/DebugInfo/GSYM/LineTable.cpp =================================================================== --- lib/DebugInfo/GSYM/LineTable.cpp +++ lib/DebugInfo/GSYM/LineTable.cpp @@ -0,0 +1,250 @@ +//===- LineTable.cpp --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +using namespace llvm; +using namespace gsym; + +// type summary add -s "delta=${var.delta}, count=${var.count}" "(anonymous +// namespace)::DeltaInfo" +struct DeltaInfo { + int64_t delta; + uint32_t count; + DeltaInfo(int64_t d, uint32_t c) : delta(d), count(c) {} +}; + +inline bool operator<(const DeltaInfo &lhs, int64_t delta) { + return lhs.delta < delta; +} + +static bool encode_special(int64_t min_line_delta, int64_t max_line_delta, + int64_t line_delta, uint64_t addr_delta, + uint8_t &special_opcode) { + if (line_delta < min_line_delta) + return false; + if (line_delta > max_line_delta) + return false; + int64_t line_range = max_line_delta - min_line_delta + 1; + int64_t adjusted_opcode = + ((line_delta - min_line_delta) + addr_delta * line_range); + int64_t opcode = adjusted_opcode + LTOC_FirstSpecial; + if (opcode < 0) + return false; + if (opcode > 255) + return false; + special_opcode = (uint8_t)opcode; + return true; +} + +static void parse(DataExtractor &Data, uint64_t BaseAddr, + std::function const &row_callback) { + uint32_t offset = 0; + int64_t min_delta = Data.getSLEB128(&offset); + int64_t max_delta = Data.getSLEB128(&offset); + int64_t line_range = max_delta - min_delta + 1; + uint32_t first_line = (uint32_t)Data.getULEB128(&offset); + LineEntry row(BaseAddr, 1, first_line); + bool done = false; + while (!done) { + uint8_t opcode = Data.getU8(&offset); + switch (opcode) { + case LTOC_EndSequence: + done = true; + break; + case LTOC_SetFile: + row.File = (uint32_t)Data.getULEB128(&offset); + break; + case LTOC_AdvancePC: { + auto delta = Data.getULEB128(&offset); + row.Addr += delta; + // If the function callback returns false, we stop parsing + if (row_callback(row) == false) + return; + } break; + case LTOC_AdvanceLine: { + auto delta = Data.getSLEB128(&offset); + row.Line += delta; + } break; + default: { + // A byte that contains both address and line increment + uint8_t adjusted_opcode = opcode - LTOC_FirstSpecial; + int64_t line_delta = min_delta + (adjusted_opcode % line_range); + uint64_t addr_delta = (adjusted_opcode / line_range); + row.Line += line_delta; + row.Addr += addr_delta; + // If the function callback returns false, we stop parsing + if (row_callback(row) == false) + return; + break; + } + } + } +} + +bool LineTable::write(FileWriter &out, const FunctionInfo &func_info) { + if (func_info.Lines.empty()) + return false; + // Write out the unsigned InfoType::LineTableInfo enum + out.WriteU32(static_cast(InfoType::LineTableInfo)); + // Write out a zero byte size of this chunk for now, we will fixup later + const off_t line_table_length_offset = out.Tell(); + out.WriteU32(0); + const off_t line_table_start = line_table_length_offset + 4; + int64_t min_line_delta = INT64_MAX; + int64_t max_line_delta = INT64_MIN; + std::vector delta_infos; + if (func_info.Lines.size() == 1) { + min_line_delta = 0; + max_line_delta = 0; + } else { + int64_t prev_line = 1; + bool first = true; + for (const auto &line_entry : func_info.Lines) { + if (first) + first = false; + else { + int64_t line_delta = (int64_t)line_entry.Line - prev_line; + auto end = delta_infos.end(); + auto pos = std::lower_bound(delta_infos.begin(), end, line_delta); + if (pos != end && pos->delta == line_delta) + ++pos->count; + else + delta_infos.insert(pos, DeltaInfo(line_delta, 1)); + if (line_delta < min_line_delta) + min_line_delta = line_delta; + if (line_delta > max_line_delta) + max_line_delta = line_delta; + } + prev_line = (int64_t)line_entry.Line; + } + assert(min_line_delta <= max_line_delta); + } + // Set the min and max line delta intelligently based on the counts of + // the line deltas. if our range is too large. + const int64_t max_line_ranage = 14; + if (max_line_delta - min_line_delta > max_line_ranage) { + uint32_t best_index = 0; + uint32_t best_end_index = 0; + uint32_t best_count = 0; + const size_t num_delta_infos = delta_infos.size(); + for (uint32_t i = 0; i < num_delta_infos; ++i) { + const int64_t first_delta = delta_infos[i].delta; + uint32_t curr_count = 0; + uint32_t j; + for (j = i; j < num_delta_infos; ++j) { + auto line_range = delta_infos[j].delta - first_delta; + if (line_range > max_line_ranage) + break; + curr_count += delta_infos[j].count; + } + if (curr_count > best_count) { + best_index = i; + best_end_index = j - 1; + best_count = curr_count; + } + } + min_line_delta = delta_infos[best_index].delta; + max_line_delta = delta_infos[best_end_index].delta; + } + if (min_line_delta == max_line_delta && min_line_delta > 0 && + min_line_delta < max_line_ranage) + min_line_delta = 0; + assert(min_line_delta <= max_line_delta); + + // Initialize the line entry state as a starting point. All line entries + // will be deltas from this. + LineEntry prev(func_info.Range.startAddress(), 1, func_info.Lines.front().Line); + + // Write out the min and max line delta as signed LEB128 + out.WriteSLEB(min_line_delta); + out.WriteSLEB(max_line_delta); + // Write out the starting line number as a unsigned LEB128 + out.WriteULEB(prev.Line); + + for (const auto &curr : func_info.Lines) { + assert(curr.Addr >= prev.Addr); + uint64_t addr_delta = curr.Addr - prev.Addr; + int64_t line_delta = 0; + if (curr.Line > prev.Line) + line_delta = curr.Line - prev.Line; + else if (prev.Line > curr.Line) + line_delta = -((int32_t)(prev.Line - curr.Line)); + + // Set the file if it doesn't match the current one. + if (curr.File != prev.File) { + out.WriteU8(LTOC_SetFile); + out.WriteULEB(curr.File); + } + + uint8_t special_op; + if (encode_special(min_line_delta, max_line_delta, line_delta, addr_delta, + special_op)) { + // Advance the PC and line and push a row + out.WriteU8(special_op); + } else { + // We can't encode the address delta and line delta into + // a single special opcode, we must do them separately + + // Advance the line + if (line_delta != 0) { + out.WriteU8(LTOC_AdvanceLine); + out.WriteSLEB(line_delta); + } + + // Advance the PC and push a row + out.WriteU8(LTOC_AdvancePC); + out.WriteULEB(addr_delta); + } + prev = curr; + } + out.WriteU8(LTOC_EndSequence); + + // Fixup the line table byte size + const off_t line_table_length = out.Tell() - line_table_start; + out.Fixup32((uint32_t)line_table_length, line_table_length_offset); + return true; +} + +// Parse all line table entries into the "line_table" vector. We can +// cache the results of this if needed, or we can call LineTable::lookup() +// below. +std::vector LineTable::parseAllEntries(uint64_t BaseAddr) { + std::vector line_table; + parse(Data, BaseAddr, [&line_table](const LineEntry &row) -> bool { + line_table.push_back(row); + return true; // Keep parsing by returning true + }); + return line_table; +} +// Parse the line table on the fly and find the row we are looking for. +// We will need to determine if we need to cache the line table by calling +// LineTable::parseAllEntries(...) or just call this function each time. +// There is a CPU vs memory tradeoff we will need to determine. +LineEntry LineTable::lookup(uint64_t BaseAddr, uint64_t Addr) { + LineEntry result; + parse(Data, BaseAddr, [Addr, &result](const LineEntry &row) -> bool { + if (Addr < row.Addr) + return false; // Stop parsing, result contains the line table row! + result = row; + if (Addr == row.Addr) { + // Stop parsing, this is the row we are looking for since the address + // matches. + return false; + } + return true; // Keep parsing till we find the right row + }); + return result; +} Index: lib/DebugInfo/GSYM/LookupResult.cpp =================================================================== --- lib/DebugInfo/GSYM/LookupResult.cpp +++ lib/DebugInfo/GSYM/LookupResult.cpp @@ -0,0 +1,57 @@ +//===- LookupResult.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LookupResult.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace gsym; + +std::string LookupResult::getSourceFile(uint32_t idx) const { + std::string fullpath; + if (idx < Locations.size()) { + if (!Locations[idx].Dir.empty()) { + if (Locations[idx].Base.empty()) { + fullpath = Locations[idx].Dir; + } else { + llvm::SmallString<64> Storage; + llvm::sys::path::append(Storage, Locations[idx].Dir, + Locations[idx].Base); + fullpath.assign(Storage.begin(), Storage.end()); + } + } else if (!Locations[idx].Base.empty()) + fullpath = Locations[idx].Base; + } + return fullpath; +} + +void LookupResult::dump(llvm::raw_ostream &OS) const { + OS << '[' << format_hex(Addr, 18) << " - "<< format_hex(EndAddr, 18) + << "): "; + auto NumLocations = Locations.size(); + for (size_t i = 0; i < NumLocations; ++i) { + if (i > 0) + OS.indent(43); + const bool is_inlined = i + 1 != NumLocations; + OS << Locations[i].Name; + if (!Locations[i].Base.empty()) { + OS << " @ "; + if (!Locations[i].Dir.empty()) + OS << Locations[i].Dir << '/'; + OS << Locations[i].Base << ':' << Locations[i].Line; + } + + if (is_inlined) + OS << " [inlined]"; + OS << '\n'; + } +} Index: lib/DebugInfo/GSYM/README.md =================================================================== --- lib/DebugInfo/GSYM/README.md +++ lib/DebugInfo/GSYM/README.md @@ -0,0 +1,135 @@ +# GSYM Introduction + +GSYM is a symbolication file format is designed to be the best format to use for symbolicating addresses into function name + source file + line information. It is a binary file format designed to be mapped into one or more processes. GSYM information can be created by converting DWARF debug information, or Breakpad files. GSYM information can exist as a stand alone file, or be contained in ELF or mach-o files in a section. When embedded into ELF or mach-o files, GSYM sections can share a string tables that already exists within a file. + +## Why use GSYM? +GSYM files are up to 7x smaller than DWARF files and up to 3x smaller than Breakpad files. The file format is designed to touch as few pages of the file as possible while doing address lookups. GSYM files can be mmap'ed into a process as shared memory allowing multiple processes on a symbolication server to share loaded GSYM pages. The file format includes inline call stack information and can help turn a single address lookup into multiple stack frames that walk the inlined call stack back to the concrete function that invoked these functions. + +## Converting DWARF Files to GSYM +`llvm-dsymutil` is available in the `llvm/tools/gsym` directory and has options to convert DWARF into GSYM files. `llvm-dsymutil` has a `-dwarf` option that specifies a DWARF file to convert into a GSYM file. The output file can be specified with the `-out-file` option. +``` +$ llvm-dsymutil -dwarf /tmp/a.out -out-file /tmp/a.out.gsym +``` + + This command will convert a DWARF file into the GSYM file format. This allows clients that are currently symbolicating with DWARF to switch to using the GSYM file format. This tool could be used in a symbolication workflow where symbolication servers convert DWARF to GSYM and cached the results on the fly, or could be used at build time to always produce a GSYM file at build time. DWARF debug information is rich enough to support encoding the inline call stack information for richer and more useful symbolication backtraces. + + ## Converting Breakpad Files to GSYM + +`llvm-dsymutil` has a `-breakpad` option that specifies a Breakpad file to convert into a GSYM file. The output file can be specified with the `-out-file` option. +``` +$ llvm-dsymutil -breakpad /tmp/foo.sym -out-file /tmp/foo.gsym +``` +This allows clients currently using breakpad to switch over to use GSYM files. This tool could be used in a symbolication workflow where symbolication servers convert breakpad to GSYM format on the fly only when needed. Breakpad files do not contain inline call stack information, so it is advisable to use `llvm-dsymutil -dwarf` when possible to avoid losing this vital information. + +## File Format Overview +The GSYM file consists of a header, address table, address info offset table and address info data for each address. + +The GSYM file format when in a stand alone file is ordered as shown: +- Header +- Address Table +- Address Data Offsets Table +- File Table +- String Table +- Address Data + + +### Header +``` +#define GSYM_MAGIC 0x4753594d +#define GSYM_VERSION 1 +struct Header { + uint32_t magic; + uint16_t version; + uint8_t addr_off_size; + uint8_t uuid_size; + uint64_t base_address; + uint32_t num_addrs; + uint32_t strtab_offset; + uint32_t strtab_size; + uint8_t uuid[20]; +}; +``` + +The magic value is set to `GSYM_MAGIC` and allows quick and easy detection of this file format when it is loaded. Addresses in the address table are stored as offsets from a 64 bit address found in `Header.base_address`. This allows the address table to contain 32, 16 or 8 bit offsets, instead of a table of full sized addresses. The file size is smaller and causes fewer pages to be touched during address lookups when the address table is smaller. The size of the address offsets in the address table is specified in the header in `Header.addr_off_size`. The header contains a UUID to ensure the GSYM file can be properly matched to the object ELf or mach-o file that created the stack trace. The header specifies the location of the string table for all strings contained in the GSYM file, or can point to an existing string table within a ELF or mach-o file. + +### Address Table +The address table immediately follows the header in the file and consists of `Header.num_addrs` address offsets. These offsets are sorted and can be binary searched for efficient lookups. Address offsets are encoded as offsets that are `Header.addr_off_size` bytes in size. During address lookup, the index of the matching address offset will be the index into the address data offsets table. + +### Address Data Offsets Table +The address data offsets table immediately follows the address table and +consists of `Header.num_addrs` 32 bit offsets: one for each address in the +address table. The offsets in this table are relative to the first byte of the +GSYM header. The offsets point to the address data for each address in the +address table. Keeping this data separate from the address table helps to +reduce the number of pages that are touched when address lookups occur. + +### File Table +The file table immediately follows the address data offsets table. The format of the `FileTable` is: + +``` +struct FileTable { + uint32_t count; + FileInfo files[]; +}; +``` +The file table starts with a 32 bit count of the number of files that are used in all of the address data, followed by that number of `FileInfo` structures. + +Each file in the file table is represented with a `FileInfo` structure: + +``` +struct FileInfo { + uint32_t directory; + uint32_t filename; +}; +``` + +The FileInfo structure has the file path split into a string for the directory and a string for the filename. The directory and filename are specified as offsets into the string table. Splitting paths into directory and file base name allows GSYM to use the same string table entry for common directories. + +### String Table +The string table follows the file table in stand alone GSYM files and contains +all strings for everything contained in the GSYM file. Any string data should +be added to the string table and any references to strings inside GSYM +information must be stored as 32 bit string table offsets into this string +table. Strings are stored as NULL terminated UTF8 strings. The format of the +string table starts with an empty string at offset zero followed by zero or +more strings. The format is the same as the DWARF .debug_str format with an +additional restriction of being required to start with empty string. The string +table is specified in the GSYM header with the `Header.strtab_offset` and +`Header.strtab_size` fields. The `Header.strtab_offset` is an absolute offset in +the file for the string table. This allows the string table to share other +string table sections that might exist in the file when the GSYM data is a +section within an object file. + +### Address Data +The address data is the payload that contains information about the address that is being looked up. The structure that represents this data is: +``` +struct AddressInfo { + uint32_t size; + uint32_t name; + AddressData data[]; +}; +``` +It starts with a 32 bit size for the address range of the functiopn and is followed by the 32 bit string table offset for the name of the function. The size of the address range is important to encode as it stops address lookups from matching if the address is between two functions in some padding. This is followed by an array of address data information: +``` +struct AddressData { + uint32_t type; + uint32_t length; + uint8_t data[length]; +}; +``` +The address data starts with a 32 bit type, followed by a 32 bit length, followed by an array of bytes that encode each specify kind of data. +The `AddressData.type` is an enumeration value: +``` +enum class InfoType { + EndOfList = 0u, + LineTableInfo = 1u, + InlineInfo = 2u +}; +``` +The `AddressInfo.data[]` is encoded as a vector of AddressData structs that is terminated by a `AddressData` struct whose type is set to `InfoType.EndOfList`. This allows the GSYM file format the contain arbitrary data for any address range and allows us to expand the GSYM capabilities as we find more uses for it. + +`InfoType::EndOfList` is always the last `AddressData` in the `AddressInfo`. + +`InfoType::LineTableInfo` is a modified version of the DWARF line tables that efficiently stores line table information for each function. DWARF stores line table information for an entire source file and includes all functions. Having each function's line table encoded separately allows fewer pages to be touched when looking up the line entry for a specific address. The information is optional and can be omitted fo address data that is from a symbol or label where no line table information is available. + +`InfoType::InlineInfo` is a format that encodes inline call stacks. This information is optional and doesn't need to be included for each address. If the function has no inlined functions this data should not be included. Index: lib/DebugInfo/GSYM/Range.cpp =================================================================== --- lib/DebugInfo/GSYM/Range.cpp +++ lib/DebugInfo/GSYM/Range.cpp @@ -0,0 +1,46 @@ +//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include + +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +void AddressRange::dump(llvm::raw_ostream &OS) const { + OS << '[' << format_hex(Start, 18) << " - " << format_hex(End, 18) << ")"; +} + +void llvm::gsym::insert(AddressRanges &Ranges, const AddressRange &Range) { + Ranges.insert(std::upper_bound(Ranges.begin(), Ranges.end(), Range), Range); +} + +bool llvm::gsym::contains(const AddressRanges &Ranges, uint64_t Addr) { + if (Ranges.empty()) + return false; + if (Addr < Ranges.front().Start) + return false; + if (Addr >= Ranges.back().End) + return false; + auto begin = Ranges.begin(); + auto EndPos = Ranges.end(); + auto Pos = std::upper_bound(begin, EndPos, Addr); + if (Pos == EndPos) + return Ranges.back().contains(Addr); + if (Pos != begin) { + --Pos; + return Pos->contains(Addr); + } + return false; +} Index: tools/gsym/CMakeLists.txt =================================================================== --- tools/gsym/CMakeLists.txt +++ tools/gsym/CMakeLists.txt @@ -0,0 +1,19 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + DebugInfoDWARF + DebugInfoGSYM + AsmPrinter + AllTargetsDescs + AllTargetsInfos + MC + Object + Support + Target + ) + +add_llvm_tool(llvm-gsymutil + llvm-gsymutil.cpp + + DEPENDS + intrinsics_gen + ) Index: tools/gsym/llvm-gsymutil.cpp =================================================================== --- tools/gsym/llvm-gsymutil.cpp +++ tools/gsym/llvm-gsymutil.cpp @@ -0,0 +1,461 @@ +//===-- llvm-gsymutil.cpp - GSYM dumping and creation utility for llvm ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/RelocVisitor.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/Breakpad.h" +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" + +using namespace llvm; +using namespace gsym; +using namespace object; + +#define HEX32(v) format("0x%8.8" PRIx32, (uint32_t)v) +#define HEX64(v) format("0x%8.8" PRIx64, (uint64_t)v) +/// @} +/// Command line options. +/// @{ + +namespace { +using namespace cl; + +OptionCategory SpecificOptions("Specific Options"); +OptionCategory DWARFOptions("DWARF Conversion Options"); +OptionCategory BreakpadOptions("Breakpad Conversion Options"); +OptionCategory GSYMOptions("GSYM Options"); + +static opt Help("h", desc("Alias for -help"), Hidden, + cat(SpecificOptions)); +// TODO: fix to inly accept one file as input +static list + InputFilenames(Positional, desc(""), + ZeroOrMore, cat(SpecificOptions)); + + static opt + DWARFFilename("dwarf", cl::init(""), + cl::desc("Convert the specified DWARF file to the GSYM " + "format."), + cl::value_desc("filename"), cat(DWARFOptions)); + +static opt + BreakpadFilename("breakpad", cl::init(""), + cl::desc("Convert the specified Breakpad file to the GSYM " + "format."), + cl::value_desc("filename"), cat(BreakpadOptions)); + +static list + ArchFilters("arch", + desc("Process debug information for the specified CPU " + "architecture only. Architectures may be specified by " + "name or by number. This option can be specified " + "multiple times, once for each desired architecture."), + cat(DWARFOptions)); + +static opt + OutputFilename("out-file", cl::init(""), + cl::desc("Save converted output to the specified file. " + "Used in conjunction with the -dwarf or -breakpad " + "options."), + cl::value_desc("filename"), cat(SpecificOptions)); +static alias OutputFilenameAlias("o", desc("Alias for -out-file."), + aliasopt(OutputFilename), + cat(DWARFOptions)); +static opt + SegmentSize("segment-size", + desc("Split output file into segments of roughly this size"), + cat(DWARFOptions)); + +static opt + NumThreads("threads", + desc("Number of threads to use; defaults is number of cores"), + cat(DWARFOptions)); + +static opt LookupAddress("address", + desc("Lookup an address in a GSYM file"), + cat(GSYMOptions), cl::value_desc("address"), + cl::init(UINT64_MAX)); + +static opt LookupName("name", desc("Lookup a name in a GSYM file"), + cat(GSYMOptions), cl::value_desc("name"), + cl::init("")); + +static opt Verbose("verbose", + desc("Print more low-level encoding details."), + cat(SpecificOptions)); + +static opt Verify("verify", + desc("Verify the generated GSYM file against the DWARF " + "by looking up all addresses."), + cat(SpecificOptions)); + +} // namespace +/// @} +//===----------------------------------------------------------------------===// + +static void error(StringRef Prefix, std::error_code EC) { + if (!EC) + return; + errs() << Prefix << ": " << EC.message() << "\n"; + exit(1); +} + +/// If the input path is a .dSYM bundle (as created by the dsymutil tool), +/// replace it with individual entries for each of the object files inside the +/// bundle otherwise return the input path. +static std::vector expandBundle(const std::string &InputPath) { + std::vector BundlePaths; + SmallString<256> BundlePath(InputPath); + // Manually open up the bundle to avoid introducing additional dependencies. + if (sys::fs::is_directory(BundlePath) && + sys::path::extension(BundlePath) == ".dSYM") { + std::error_code EC; + sys::path::append(BundlePath, "Contents", "Resources", "DWARF"); + for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + const std::string &Path = Dir->path(); + sys::fs::file_status Status; + EC = sys::fs::status(Path, Status); + error(Path, EC); + switch (Status.type()) { + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + BundlePaths.push_back(Path); + break; + default: /*ignore*/; + } + } + error(BundlePath, EC); + } + if (!BundlePaths.size()) + BundlePaths.push_back(InputPath); + return BundlePaths; +} + +static void verify(ObjectFile &Obj, raw_ostream &OS, + const std::string &OutFile) { + OS << "Verifying GSYM file \"" << OutFile << "\":\n"; + auto DICtx = DWARFContext::create(Obj); + + gsym::GsymReader gsymFile; + auto error = gsymFile.openFile(OutFile); + if (error) { + OS << "error: error opening \"" << OutFile << "\"\n"; + return; + } + + auto NumAddrs = gsymFile.getNumAddresses(); + DILineInfoSpecifier DLIS( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + DILineInfoSpecifier::FunctionNameKind::LinkageName); + LookupResult gsymLookup; + std::string gsymFilename; + for (uint32_t i = 0; i < NumAddrs; ++i) { + auto FuncAddr = gsymFile.getAddress(i); + FunctionInfo FuncInfo; + if (gsymFile.getFunctionInfo(FuncAddr, FuncInfo)) { + for (auto addr = FuncAddr; addr < FuncAddr + FuncInfo.size(); ++addr) { + if (gsymFile.lookup(addr, gsymLookup)) { + auto dwarfInlineInfos = DICtx->getInliningInfoForAddress(addr, DLIS); + uint32_t numDwarfInlineInfos = dwarfInlineInfos.getNumberOfFrames(); + if (numDwarfInlineInfos == 0) { + dwarfInlineInfos.addFrame(DICtx->getLineInfoForAddress(addr, DLIS)); + } + + // Check for 1 entry that has no file and line info + if (numDwarfInlineInfos == 1 && + dwarfInlineInfos.getFrame(0).FileName == "") { + dwarfInlineInfos = DIInliningInfo(); + numDwarfInlineInfos = 0; + } + if (numDwarfInlineInfos > 0 && + numDwarfInlineInfos != gsymLookup.Locations.size()) { + OS << "error: address " << HEX64(addr) << " has " + << numDwarfInlineInfos << " DWARF inline frames and GSYM has " + << gsymLookup.Locations.size() << "\n"; + OS << " " << numDwarfInlineInfos << " DWARF frames:\n"; + for (size_t idx = 0; idx < numDwarfInlineInfos; ++idx) { + const auto dii = dwarfInlineInfos.getFrame(idx); + OS << " [" << idx << "]: " << dii.FunctionName << " @ " + << dii.FileName << ':' << dii.Line << '\n'; + } + OS << " " << gsymLookup.Locations.size() << " GSYM frames:\n"; + for (size_t idx = 0, count = gsymLookup.Locations.size(); + idx < count; ++idx) { + const auto &gii = gsymLookup.Locations[idx]; + OS << " [" << idx << "]: " << gii.Name << " @ " << gii.Dir + << '/' << gii.Base << ':' << gii.Line << '\n'; + } + dwarfInlineInfos = DICtx->getInliningInfoForAddress(addr, DLIS); + FuncInfo.dump(outs(), gsymFile); + continue; + } + + for (size_t idx = 0, count = gsymLookup.Locations.size(); idx < count; + ++idx) { + const auto &gii = gsymLookup.Locations[idx]; + if (idx < numDwarfInlineInfos) { + const auto dii = dwarfInlineInfos.getFrame(idx); + gsymFilename = gsymLookup.getSourceFile(idx); + // Verify function name + if (dii.FunctionName.find(gii.Name) != 0) + OS << "error: address " << format_hex(addr, 10) + << " DWARF function \"" << dii.FunctionName.c_str() + << "\" doesn't match GSYM function \"" << gii.Name + << "\"\n"; + // Verify source file path + if (dii.FileName != gsymFilename) + OS << "error: address " << format_hex(addr, 10) + << " DWARF path \"" << dii.FileName.c_str() + << "\" doesn't match GSYM path \"" << gsymFilename.c_str() + << "\"\n"; + // Verify source file line + if (dii.Line != gii.Line) + OS << "error: address " << format_hex(addr, 10) + << " DWARF line " << dii.Line << " != GSYM line " + << gii.Line << "\n"; + } + } + } + } + } else { + OS << "error: address lookup failed for address[" << i << "] " + << format_hex(FuncAddr, 10) << "\n"; + } + } +} + +static uint32_t getCPUType(MachOObjectFile &MachO) { + if (MachO.is64Bit()) + return MachO.getHeader64().cputype; + else + return MachO.getHeader().cputype; +} + +/// Return true if the object file has not been filtered by an --arch option. +static bool filterArch(ObjectFile &Obj) { + if (ArchFilters.empty()) + return true; + + if (auto *MachO = dyn_cast(&Obj)) { + std::string ObjArch = + Triple::getArchTypeName(MachO->getArchTriple().getArch()); + + for (auto Arch : ArchFilters) { + // Match name. + if (Arch == ObjArch) + return true; + + // Match architecture number. + unsigned Value; + if (!StringRef(Arch).getAsInteger(0, Value)) + if (Value == getCPUType(*MachO)) + return true; + } + } + return false; +} + +static std::error_code +handleDWARFObjectFile(StringRef Filename, ObjectFile &Obj, + const std::string &OutFile) { + auto ThreadCount = + NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency(); + auto &OS = outs(); + DwarfTransformer Transformer(OS, ThreadCount); + + std::error_code Error; + Error = Transformer.loadDwarf(Obj); + if (Error) + return Error; + Error = Transformer.loadSymbolTable(Obj); + if (Error) + return Error; + Transformer.getData().optimize(OS); + Error = Transformer.getData().save(OutFile.c_str()); + if (Error) + return Error; + if (Verify) + verify(Obj, OS, OutFile); + + return Error; +} + +static std::error_code +handleDWARFBuffer(StringRef Filename, MemoryBufferRef Buffer, + const std::string &OutFile) { + Expected> BinOrErr = object::createBinary(Buffer); + error(Filename, errorToErrorCode(BinOrErr.takeError())); + + std::error_code Error; + if (auto *Obj = dyn_cast(BinOrErr->get())) { + Error = handleDWARFObjectFile(Filename, *Obj, OutFile.c_str()); + } else if (auto *Fat = dyn_cast(BinOrErr->get())) { + for (auto &ObjForArch : Fat->objects()) { + if (auto MachOOrErr = ObjForArch.getAsObjectFile()) { + auto &Obj = **MachOOrErr; + if (filterArch(Obj)) { + auto archName = ObjForArch.getArchFlagName(); + std::string ArchOutFile(OutFile); + ArchOutFile.append(1, '.'); + ArchOutFile.append(archName); + printf("Hanlding %s architecture...\n", archName.c_str()); + Error = handleDWARFObjectFile(Filename, Obj, ArchOutFile); + if (Error) + return Error; + } + } else { + consumeError(MachOOrErr.takeError()); + } + } + } + return Error; +} + +static std::error_code +handleDWARFFile(StringRef Filename, const std::string &OutFile) { + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + error(Filename, BuffOrErr.getError()); + std::unique_ptr Buffer = std::move(BuffOrErr.get()); + return handleDWARFBuffer(Filename, *Buffer, OutFile); +} + +static std::error_code convertDWARFToGSYM(raw_ostream &OS) { + // Expand any .dSYM bundles to the individual object files contained therein. + std::vector Objects; + std::string OutFile = OutputFilename; + if (OutFile.empty()) { + OutFile = DWARFFilename; + OutFile += ".gsym"; + } + + OS << "Converting DWARF from \"" << DWARFFilename << "\" to GSYM in \"" << + OutFile << "\"\n"; + + auto Objs = expandBundle(DWARFFilename); + Objects.insert(Objects.end(), Objs.begin(), Objs.end()); + + std::error_code Error; + for (auto Object : Objects) { + Error = handleDWARFFile(Object, OutFile); + if (Error) + return Error; + } + return Error; +} + +int main(int argc, char const *argv[]) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(argv[0]); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + + const char * Overview = "A tool that displays information in GSYM files, or " + "converts other formats to GSYM.\n\nSpecify one or more GSYM " + "paths as arguments to dump or lookup information.\nUse the -dwarf " + "option to specify a DWARF file to convert to GSYM.\nUse the " + "-breakpad option to specify a breakpad file to convert to GSYM.\n"; + HideUnrelatedOptions({&SpecificOptions, &DWARFOptions, &BreakpadOptions, + &GSYMOptions}); + cl::ParseCommandLineOptions(argc, argv, Overview); + + if (Help) { + PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true); + return 0; + } + + raw_ostream &OS = outs(); + + if (!DWARFFilename.empty()) { + // Convert DWARF to GSYM + if (!InputFilenames.empty()) { + OS << "error: no input files cat be specified when using the -dwarf " + "option \n"; + return 1; + } + return convertDWARFToGSYM(OS) ? 1 : EXIT_SUCCESS; + } else if (!BreakpadFilename.empty()) { + std::string OutFile = OutputFilename; + if (OutFile.empty()) { + OutFile = BreakpadFilename; + OutFile += ".gsym"; + } + error(BreakpadFilename, convertBreakpadFileToGSYM(BreakpadFilename, + OutFile)); + } + + // Dump or access data inside GSYM files + for (const auto &GSYMPath : InputFilenames) { + gsym::GsymReader gsymFile; + auto Error = gsymFile.openFile(GSYMPath); + if (Error) { + OS << "error: error opening \"" << GSYMPath << "\"\n"; + return 1; + } + if (LookupAddress != UINT64_MAX) { + // Lookup an address in a GSYM file and print any matches. + OS << "Looking up " << format_hex(LookupAddress, 18) << " in GSYM file \"" + << GSYMPath << "\":\n"; + LookupResult Result; + if (gsymFile.lookup(LookupAddress, Result)) { + Result.dump(OS); + } else { + OS << "error: address not found\n"; + } + } else if (!LookupName.empty()) { + // Lookup all address infos with matching names + OS << "Looking up \"" << LookupName << "\" in GSYM file \"" + << GSYMPath << "\":\n"; + gsymFile.dumpAddressInfos(OS, LookupName); + } else { + OS << "Dumping GSYM file \"" << GSYMPath << "\":\n"; + gsymFile.dump(OS, Verbose); + } + } + return EXIT_SUCCESS; +} Index: unittests/DebugInfo/CMakeLists.txt =================================================================== --- unittests/DebugInfo/CMakeLists.txt +++ unittests/DebugInfo/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(CodeView) add_subdirectory(DWARF) +add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(PDB) Index: unittests/DebugInfo/GSYM/CMakeLists.txt =================================================================== --- unittests/DebugInfo/GSYM/CMakeLists.txt +++ unittests/DebugInfo/GSYM/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + AsmPrinter + DebugInfoGSYM + MC + Object + ObjectYAML + Support + ) + +add_llvm_unittest(DebugInfoGSYMTests + GSYMTest.cpp + ) + +target_link_libraries(DebugInfoGSYMTests PRIVATE LLVMTestingSupport) Index: unittests/DebugInfo/GSYM/GSYMTest.cpp =================================================================== --- unittests/DebugInfo/GSYM/GSYMTest.cpp +++ unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -0,0 +1,328 @@ +//===- llvm/unittest/DebugInfo/GSYMTest.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Testing/Support/Error.h" +#include "llvm/DebugInfo/GSYM/Breakpad.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include "gtest/gtest.h" +#include +#include + +using namespace llvm; +using namespace gsym; + +TEST(GSYMTest, TestStringTables) { + // Verify the llvm::gsym::StringTableCreator and the llvm::gsym::StringTable + // works as expected. + StringTableCreator StrTabCreator; + // Use a local buffer that changes when inserting strings into the string + // table to ensure "StrTabCreator" is storing the strings on + char Str[32]; + + auto HelloIndex = StrTabCreator.insert(strcpy(Str, "hello")); + auto WorldIndex = StrTabCreator.insert(strcpy(Str, "world")); + // Verify that if we add another hello from a different string we get the + // same index + char HelloCStr[] = "hello"; + auto HelloIndex2 = StrTabCreator.insert(HelloCStr); + EXPECT_EQ(HelloIndex2, HelloIndex); + // Make sure a common substring gets its own string table entry + auto HellIndex = StrTabCreator.insert(strcpy(Str, "hell")); + EXPECT_NE(HellIndex, HelloIndex); + + // Verify the operator[] works in the StringTableCreator class + EXPECT_EQ(StrTabCreator[0], StringRef()); + EXPECT_EQ(StrTabCreator[HelloIndex], StringRef("hello")); + EXPECT_EQ(StrTabCreator[WorldIndex], StringRef("world")); + EXPECT_EQ(StrTabCreator[HellIndex], StringRef("hell")); + // Verify the operator[] fails with invalid index + EXPECT_EQ(StrTabCreator[HelloIndex+1], StringRef()); + // Now write the string table out to a buffer so we can load it back in + // and do tests with the read only version + std::stringstream OutStrm; + FileWriter OutWriter(OutStrm); + StrTabCreator.write(OutWriter); + std::string OutData(OutStrm.str()); + // Now use the string table we just created by decoding it and using it + // with the StringTable class. StringTable is the class we used to decode the + // string table when we read a GSYM file from a file on disk. + StringTable StrTabReader(OutData); + EXPECT_EQ(StrTabReader[0], StringRef()); + EXPECT_EQ(StrTabReader[HelloIndex], StringRef("hello")); + EXPECT_EQ(StrTabReader[WorldIndex], StringRef("world")); + EXPECT_EQ(StrTabReader[HellIndex], StringRef("hell")); + // Verify that an offset into the string table that doesn't sit at the + // Actualy start of a string still retursn a partial string + EXPECT_EQ(StrTabReader[HellIndex+1], StringRef("ell")); + // Verify the first invalid offset in the string table just past the end + // returns and empty string + EXPECT_EQ(StrTabReader[OutData.size()], StringRef()); + // Test that we can find a string within StringTable instances. This is used + // to do name lookups within a GSYM file. + EXPECT_EQ(StrTabReader.find(""), 0U); + EXPECT_EQ(StrTabReader.find("hello"), HelloIndex); + EXPECT_EQ(StrTabReader.find("world"), WorldIndex); + EXPECT_EQ(StrTabReader.find("hell"), HellIndex); + // Verify we can find a partial srting as long as it is NULL terminated + EXPECT_EQ(StrTabReader.find("ell"), HellIndex+1); + // Verify + EXPECT_EQ(StrTabReader.find("carp"), UINT32_MAX); +} + +static std::string createGSYM(const GsymCreator &Gsym) { + std::stringstream OutStrm; + Gsym.save(OutStrm); + return OutStrm.str(); +} + +static const uint8_t TestUUID[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + +static void verifyUUID(const Header *Hdr) { + EXPECT_EQ(Hdr->UUIDSize, (uint8_t)sizeof(TestUUID)); + if (Hdr->UUIDSize == sizeof(TestUUID)) { + for (size_t I=0; IUUIDSize; ++I) { + EXPECT_EQ(TestUUID[I], Hdr->UUID[I]); + } + } +} +static void createAndVerifyGsym(const GsymCreator &GC) { + + std::string GsymData(createGSYM(GC)); + GsymReader Gsym; + Gsym.init(GsymData); + //Gsym.dump(outs(), true); + auto Hdr = Gsym.getHeader(); + ASSERT_TRUE(Hdr != nullptr); + // Verify UUID size and bytes + verifyUUID(Hdr); + + // Make sure we have the same number of functions in both the creator and + // the reader + size_t NumFuncs = GC.getFunctionInfoSize(); + EXPECT_EQ(NumFuncs, Gsym.getNumAddresses()); + bool First = true; + uint64_t LoAddress = 0; + uint64_t HiAddress = 0; + + GC.forEachFunctionInfo([&](const FunctionInfo &GCFuncInfo) -> bool { + // Make sure we can fetch a matching function info from the GsymReader for + // each funciton info in the GsymCreator. + FunctionInfo FuncInfo; + const uint64_t OrigStartAddr = GCFuncInfo.startAddress(); + EXPECT_EQ(Gsym.getFunctionInfo(OrigStartAddr, FuncInfo), true); + if (First) { + // Verify that our header base address is the same as the first function + // info address. Since all address offsets are in increasing order we + // know that the first one is the base address. + First = false; + EXPECT_EQ(Hdr->BaseAddress, OrigStartAddr); + LoAddress = OrigStartAddr; + } + HiAddress = OrigStartAddr; + EXPECT_EQ(FuncInfo, GCFuncInfo); + return true; // Keep iterating + }); + // Make sure the address offset size is efficiently encoded + const uint64_t MaxOffset = HiAddress - LoAddress; + if (MaxOffset <= UINT8_MAX) + EXPECT_EQ(Hdr->AddrOffSize, 1u); + else if (MaxOffset <= UINT16_MAX) + EXPECT_EQ(Hdr->AddrOffSize, 2u); + else if (MaxOffset <= UINT32_MAX) + EXPECT_EQ(Hdr->AddrOffSize, 4u); + else + EXPECT_EQ(Hdr->AddrOffSize, 8u); +} + +TEST(GSYMTest, TestGsymCreatorSingleFunction) { + // Test creating a GSYM file with a single function and verify + // we can create a valid GSYM + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, GC.insertString("main"))); + createAndVerifyGsym(GC); +} + +TEST(GSYMTest, TestGsymCreator8BitAddrOffsets) { + // Test creating a GSYM file with multiple functions and verify we can + // create a valid GSYM. Verify address info offsets are 1 byte in size + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x1010, 0x20, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x1030, 0x50, GC.insertString("bar"))); + createAndVerifyGsym(GC); +} + +TEST(GSYMTest, TestGsymCreator16BitAddrOffsets) { + // Test creating a GSYM file with multiple functions and verify we can + // create a valid GSYM. Verify address info offsets are 2 byte in size + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x1100, 0x200, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x1300, 0x500, GC.insertString("bar"))); + createAndVerifyGsym(GC); +} + +TEST(GSYMTest, TestGsymCreator32BitAddrOffsets) { + // Test creating a GSYM file with multiple functions and verify we can + // create a valid GSYM. Verify address info offsets are 4 byte in size + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x10000, 0x100, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x20000, 0x200, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x40000, 0x500, GC.insertString("bar"))); + createAndVerifyGsym(GC); +} + +TEST(GSYMTest, TestGsymCreator64BitAddrOffsets) { + // Test creating a GSYM file with multiple functions and verify we can + // create a valid GSYM. Verify address info offsets are 4 byte in size + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x100000000, 0x100, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x200000000, 0x200, GC.insertString("foo"))); + GC.addFunctionInfo(FunctionInfo(0x400000000, 0x500, GC.insertString("bar"))); + createAndVerifyGsym(GC); +} + +TEST(GSYMTest, TestGsymCreatorLookups) { + // Test lookups of various information in a GSYM file + GsymCreator GC; + GC.UUID.assign(TestUUID, TestUUID + sizeof(TestUUID)); + GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, GC.insertString("main"))); + GC.addFunctionInfo(FunctionInfo(0x1020, 0x20, GC.insertString("foo"))); + // Make a symbol with no size. This is what might happen if we make a + // function info from a symbol table symbol that has no size (mach-o) + GC.addFunctionInfo(FunctionInfo(0x1050, 0, GC.insertString("nosize"))); + GC.addFunctionInfo(FunctionInfo(0x1070, 0, GC.insertString("nosize-last"))); + std::string GsymData(createGSYM(GC)); + GsymReader Gsym; + Gsym.init(GsymData); + FunctionInfo FuncInfo; + + // Make sure lookups fail for addresses before the first address + EXPECT_EQ(Gsym.getFunctionInfo(0, FuncInfo), false); + EXPECT_EQ(Gsym.getFunctionInfo(0x1000-1, FuncInfo), false); + // Make sure lookups succeed for addresses for "main" + EXPECT_EQ(Gsym.getFunctionInfo(0x1000, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1000u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1010u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "main"); + + EXPECT_EQ(Gsym.getFunctionInfo(0x1000+0x10-1, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1000u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1010u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "main"); + // Make sure lookups fail for addresses between the end of "main" and start + // of "foo" + EXPECT_EQ(Gsym.getFunctionInfo(0x1010, FuncInfo), false); + EXPECT_EQ(Gsym.getFunctionInfo(0x1020-1, FuncInfo), false); + + // Make sure lookups succeed for addresses for "foo" + EXPECT_EQ(Gsym.getFunctionInfo(0x1020, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1020u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1040u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "foo"); + + EXPECT_EQ(Gsym.getFunctionInfo(0x1020u+0x20-1, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1020u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1040u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "foo"); + + // Make sure lookups fail for addresses between the end of "foo" and start + // of "nosize" + EXPECT_EQ(Gsym.getFunctionInfo(0x1040, FuncInfo), false); + EXPECT_EQ(Gsym.getFunctionInfo(0x1050-1, FuncInfo), false); + + // A symbol with zero size should infer its size from the delta between + // its address and the next address in the table. + EXPECT_EQ(Gsym.getFunctionInfo(0x1050, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1050u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1070u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "nosize"); + EXPECT_EQ(Gsym.getFunctionInfo(0x1070-1, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1050u); + EXPECT_EQ(FuncInfo.endAddress(), 0x1070u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "nosize"); + + // The last symbol will match any address that is >= to the last symbol if + // that symbol has a byte size of zero. + EXPECT_EQ(Gsym.getFunctionInfo(0x1070, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1070u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "nosize-last"); + EXPECT_EQ(Gsym.getFunctionInfo(UINT64_MAX, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x1070u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "nosize-last"); + +} + +TEST(GSYMTest, TestBreakpadToGSYM) { + // Test converting a breakpad file to GSYM + static StringRef BreakpadSource( +R"(MODULE mac x86_64 000102030405060708090A0B0C0D0E0F dump_syms +FILE 0 /tmp/foo.c +FILE 1 /tmp/foo.h +FILE 2 /tmp/bar.c +FUNC 100 50 0 foo(int, int) +100 10 5 0 +110 25 6 0 +120 30 23 1 +FUNC 200 50 0 a::bar(int) +200 20 5 2 +220 25 6 2 +PUBLIC 300 0 baz +PUBLIC 350 0 ding +)"); + GsymCreator GC; + std::error_code Error = convertBreakpadDataToGSYM(BreakpadSource,GC); + ASSERT_TRUE(!Error); + std::string GsymData(createGSYM(GC)); + GsymReader Gsym; + Gsym.init(GsymData); + //Gsym.dump(outs(), true); + auto Hdr = Gsym.getHeader(); + ASSERT_TRUE(Hdr != nullptr); + verifyUUID(Hdr); + FunctionInfo FuncInfo; + EXPECT_EQ(Gsym.getFunctionInfo(0xff, FuncInfo), false); + // Verify we can find our "foo(int, int)" function + EXPECT_EQ(Gsym.getFunctionInfo(0x100, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x100u); + EXPECT_EQ(FuncInfo.endAddress(), 0x150u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "foo(int, int)"); + ASSERT_TRUE(FuncInfo.Lines.size() == 3u); + EXPECT_EQ(FuncInfo.Lines[0].Addr, 0x100u); + EXPECT_EQ(FuncInfo.Lines[0].File, 1u); + EXPECT_EQ(FuncInfo.Lines[0].Line, 5u); + + EXPECT_EQ(FuncInfo.Lines[1].Addr, 0x110u); + EXPECT_EQ(FuncInfo.Lines[1].File, 1u); + EXPECT_EQ(FuncInfo.Lines[1].Line, 6u); + + EXPECT_EQ(FuncInfo.Lines[2].Addr, 0x120u); + EXPECT_EQ(FuncInfo.Lines[2].File, 2u); + EXPECT_EQ(FuncInfo.Lines[2].Line, 23u); + + // Verify we can find our "baz" function + EXPECT_EQ(Gsym.getFunctionInfo(0x300, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x300u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "baz"); + // Verify we can find our "baz" function + EXPECT_EQ(Gsym.getFunctionInfo(0x350, FuncInfo), true); + EXPECT_EQ(FuncInfo.startAddress(), 0x350u); + EXPECT_EQ(Gsym.getString(FuncInfo.Name), "ding"); + +}