Index: include/llvm/DebugInfo/GSYM/Breakpad.h =================================================================== --- include/llvm/DebugInfo/GSYM/Breakpad.h +++ include/llvm/DebugInfo/GSYM/Breakpad.h @@ -0,0 +1,24 @@ +//===- Breakpad.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_BREAKPAD_H +#define LLVM_DEBUGINFO_BREAKPAD_H + +#include + +namespace llvm { +namespace gsym { + +std::error_code +convertBreakpadToGSYM(const char *BreakpadPath, const char *GSYMPath); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_BREAKPAD_H Index: include/llvm/DebugInfo/GSYM/DataRef.h =================================================================== --- include/llvm/DebugInfo/GSYM/DataRef.h +++ include/llvm/DebugInfo/GSYM/DataRef.h @@ -0,0 +1,106 @@ +//===- DataRef.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DATAREF_H +#define LLVM_DEBUGINFO_DATAREF_H + +#include +#include + +#include "llvm/ADT/StringRef.h" + +namespace llvm { +namespace gsym { +//---------------------------------------------------------------------- +// The DataRef class is designed to point to memory mapped memory and +// access structures and data in that memory without needing to parse it +// in any way. Structures encoded in memory can be easily accessed +// using: +// +// DataRef data(...); +// Foo *foo_ptr = data.getPointer(Offset); +// +// The "foo_ptr" will be NULL if there isn't enough data in the DataRef +// to contain a struct whose size is the size of the requested type at +// the specified byte Offset "Offset". +// +// DataRef has rudimentary functions to extract types with GetValue(). +// It is designed to get around alignment requirements when decoding +// native types. A uint64_t can only be dereferenced from a buffer of +// bytes if it is properly aligned on some architectures (ARM/Thumb), +// so the functionality used memcpy to copy the data into an aligned +// location before returning it. +// +// More complex data extraction should use the llvm::DataExtractor +// class. +//---------------------------------------------------------------------- +class DataRef { + StringRef Data; +public: + DataRef() : Data() {} + DataRef(StringRef d) : Data(d) {} + + StringRef getData() const { return Data; } + + void setData(StringRef D) { + Data = D; + } + + uint64_t bytesLeft(uint64_t Offset) const { + const auto Length = Data.size(); + return Offset < Length ? Length - Offset : 0; + } + + void clear() { + Data = StringRef(); + } + + bool isValid() const { return !Data.empty(); } + const uint8_t *getStart() const { return (const uint8_t *)Data.data(); } + const uint8_t *getEnd() const { + if (isValid()) + return getStart() + Data.size(); + return nullptr; + } + + DataRef getSlice(uint64_t Offset, uint64_t len = UINT64_MAX) const { + if (len == UINT64_MAX) + return DataRef(Data.substr(Offset)); + return DataRef(Data.substr(Offset, len)); + } + + const void *getData(uint64_t Offset, uint64_t len = 1) const { + if (bytesLeft(Offset) >= len) + return Data.data() + Offset; + return nullptr; + } + + template T getValue(uint64_t Offset, T fail_value = T()) const { + auto Ptr = getData(Offset, sizeof(T)); + if (Ptr) { + // Use memcpy to avoid alignment requirements + T value; + memcpy(&value, Ptr, sizeof(T)); + return value; + } + return fail_value; + } + + // Get a pointer to a natively encoded type in the byte stream. + // + // This function ensures there is enough data left in the buffer for a T + // type before returning a pointer. + template const T *getPointer(uint64_t Offset) const { + return reinterpret_cast(getData(Offset, sizeof(T))); + } +}; +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_DATAREF_H Index: include/llvm/DebugInfo/GSYM/DwarfTransformer.h =================================================================== --- include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -0,0 +1,95 @@ +//===- DwarfTransformer.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef gsym_DwarfTransformer_h +#define gsym_DwarfTransformer_h + +#include + +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" + +namespace llvm { +namespace gsym { + +struct CUInfo; + +class DwarfTransformer { +public: + DwarfTransformer &setNumThreads(uint32_t num) { + m_numThreads = num; + return *this; + } + + DwarfTransformer &setErrorBanner(const std::string &banner) { + m_errorBanner = banner; + return *this; + } + + DwarfTransformer &setOutStream(llvm::raw_ostream *stream) { + m_outStream = stream; + return *this; + } + + bool loadDwarf(llvm::object::ObjectFile &obj); + bool loadDwarf(const std::string &filename) { + if (auto binary = getObjectFile(filename)) { + return loadDwarf(*binary.getValue().getBinary()); + } + return false; + } + + bool loadSymbolTable(llvm::object::ObjectFile &obj); + bool loadSymbolTable(const std::string &filename) { + if (auto binary = getObjectFile(filename)) { + return loadSymbolTable(*binary.getValue().getBinary()); + } + return false; + } + + void optimize(); + + GsymData &getData() { return m_data; } + +private: + void handleDie(std::shared_ptr strtab, + std::shared_ptr filetab, CUInfo &cuInfo, + llvm::raw_ostream *OS, llvm::DWARFDie die, + std::function insertFunc); + + bool parseLineTable(FileTableCreator &filetab, CUInfo &cuInfo, + llvm::raw_ostream *OS, llvm::DWARFDie die, + FunctionInfo &func); + + llvm::Optional> + getObjectFile(const std::string &filename); + + void initDataFromObj(llvm::object::ObjectFile &obj); + + bool loggingEnabled() const { return m_outStream != &llvm::nulls(); } + llvm::raw_ostream &log() const { return *m_outStream << m_errorBanner; } + + bool m_init = false; + GsymData m_data; + + uint32_t m_numThreads = + std::max(std::thread::hardware_concurrency(), 1); + llvm::raw_ostream *m_outStream = &llvm::nulls(); + std::string m_errorBanner = ""; + + friend class DwarfTransformerTest; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef gsym_DwarfTransformer_h Index: include/llvm/DebugInfo/GSYM/FileEntry.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileEntry.h +++ include/llvm/DebugInfo/GSYM/FileEntry.h @@ -0,0 +1,45 @@ +//===- FileEntry.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_FILEENTRY_H +#define LLVM_DEBUGINFO_FILEENTRY_H + +#include +#include +#include + +namespace llvm { +namespace gsym { + +struct FileEntry { + uint32_t Dir; // String table offset in the string table + uint32_t Base; // String table offset in the string table + + FileEntry(uint32_t D = 0, uint32_t B = 0) : Dir(D), Base(B) {} + + // implement operator== so that FileEntry can be used as key in + // unordered containers + bool operator==(const FileEntry &other) const { + return Dir == other.Dir && Base == other.Base; + }; +}; + +} // namespace gsym +} // namespace llvm + +namespace std { +// implement std::hash so that FileEntry can be used as key in +// unordered containers +template <> struct hash { + size_t operator()(const llvm::gsym::FileEntry &x) const { + return std::hash()(x.Dir) ^ std::hash()(x.Base); + } +}; +} // namespace std +#endif // #ifndef LLVM_DEBUGINFO_FILEENTRY_H Index: include/llvm/DebugInfo/GSYM/FileTableCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileTableCreator.h +++ include/llvm/DebugInfo/GSYM/FileTableCreator.h @@ -0,0 +1,48 @@ +//===- FileTableCreator.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_FILETABLECREATOR_H +#define LLVM_DEBUGINFO_FILETABLECREATOR_H + +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" + +namespace llvm { +namespace gsym { +class FileWriter; + +class FileTableCreator { + std::unordered_map EntryToIndex; + std::vector FileEntries; + std::shared_ptr StringTable; + +public: + explicit FileTableCreator(std::shared_ptr &StrTab) + : StringTable(StrTab) { + insert(FileEntry(0, 0)); + } + + const gsym::FileEntry &operator[](size_t idx) const { + return FileEntries[idx]; + } + + const StringTableCreator &getStringTable() const { return *StringTable; } + + uint32_t insert(std::string S); + uint32_t insert(FileEntry S); + void write(FileWriter &Out) const; +}; +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_FILETABLECREATOR_H Index: include/llvm/DebugInfo/GSYM/FileWriter.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileWriter.h +++ include/llvm/DebugInfo/GSYM/FileWriter.h @@ -0,0 +1,46 @@ +//===- FileWriter.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_FILEWRITER_H +#define LLVM_DEBUGINFO_FILEWRITER_H + +#include +#include +#include +#include + +namespace llvm { +namespace gsym { + +class FileWriter { + std::ostream &m_ostream; + +public: + FileWriter(std::ostream &stream) : m_ostream(stream) {} + ~FileWriter(); + bool WriteSLEB(int64_t value); + bool WriteULEB(uint64_t value); + bool WriteU8(uint8_t value); + bool WriteU32(uint32_t value); + bool Fixup32(uint32_t value, off_t offset); + bool WriteUnsigned(uint64_t value, size_t n); + bool Write(const void *src, size_t src_len); + bool AlignTo(size_t align); + off_t Seek(off_t pos); + off_t Tell(); + +private: + FileWriter(const FileWriter &rhs) = delete; + void operator=(const FileWriter &rhs) = delete; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_FILEWRITER_H Index: include/llvm/DebugInfo/GSYM/FunctionInfo.h =================================================================== --- include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -0,0 +1,81 @@ +//===- FunctionInfo.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_FUNCTIONINFO_H +#define LLVM_DEBUGINFO_FUNCTIONINFO_H + +#include +#include + +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" + +namespace llvm { +class raw_ostream; +namespace gsym { +class GsymReader; + +struct FunctionInfo { + uint64_t Addr; + uint32_t Size; + uint32_t Name; + std::vector Lines; + InlineInfo InlineInfo; + + FunctionInfo(uint64_t a = 0, uint32_t s = 0, uint32_t n = 0) + : Addr(a), Size(s), Name(n) {} + + bool hasRichInfo() const { + // Returns whether we have something else than range and name + return !Lines.empty() || InlineInfo.isValid(); + } + + bool isValid() const { + // Address and size can be zero and there can be no line entries for a + // symbol so the only indication this entry is valid is if the name is + // not zero. + return Name != 0; + } + + void clear() { + Addr = 0; + Size = 0; + Name = 0; + Lines.clear(); + InlineInfo.clear(); + } + + void dump(llvm::raw_ostream &OS, GsymReader &GSYM) const; +}; + +inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) { + return LHS.Addr == RHS.Addr && LHS.Size == RHS.Size && LHS.Name == RHS.Name && + LHS.Lines == RHS.Lines; +} +inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) { + return !(LHS == RHS); +} +inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) { + // First sort by address + if (LHS.Addr != RHS.Addr) + return LHS.Addr < RHS.Addr; + + // If address is the same, functions with more info go first + if (LHS.hasRichInfo() ^ RHS.hasRichInfo()) + return LHS.hasRichInfo(); + + // Lastly, longer functions go first + return LHS.Size > RHS.Size; +} + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_FUNCTIONINFO_H Index: include/llvm/DebugInfo/GSYM/GsymCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymCreator.h +++ include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -0,0 +1,108 @@ +//===- GsymCreator.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYMCREATOR_H +#define LLVM_DEBUGINFO_GSYMCREATOR_H + +#include +#include +#include +#include +#include + +#include "FileTableCreator.h" +#include "FunctionInfo.h" +#include "InlineInfo.h" +#include "StringTableCreator.h" + +namespace llvm { +class AsmPrinter; + +namespace gsym { + +// Helper struct to save debug info for a function. Similar to FunctionInfo, +// but with pointers to custom string/file table. +struct FunctionData { + std::shared_ptr StrTab; + std::shared_ptr FileTab; + FunctionInfo FuncInfo; + + FunctionData(std::shared_ptr ST, + std::shared_ptr FT, FunctionInfo &&FI) + : StrTab(ST), FileTab(FT), FuncInfo(FI){} + + const char *name() const { return (*StrTab)[FuncInfo.Name]; } +}; + +inline bool operator<(const FunctionData &lhs, const FunctionData &rhs) { + return lhs.FuncInfo < rhs.FuncInfo; +} + +// Struct with parsed debug info. Most importantly the vector of FunctionData. +// Note that every FunctionData can have different string/file table. +struct GsymData { + Ranges TextRanges; + std::vector UUID; + std::vector Funcs; +}; + +class GsymCreator { +public: + static GsymCreator createFrom(GsymData &data); + + // Note that if numThreads > 1, the callback needs to be thread-safe + static void + createSegmentsFrom(const GsymData &data, size_t fileSizeTarget, + std::function callback, + uint32_t numThreads = std::thread::hardware_concurrency()); + + bool save(std::ostream &stream) const; + bool save(llvm::AsmPrinter &ASM) const; + bool save(const char *path) const; + + uint64_t getFirstAddr() const { + return FuncInfos.empty() ? 0 : FuncInfos.front().Addr; + } + + uint64_t getFunctionCount() const { + return FuncInfos.size(); + } + +private: + uint32_t addForeignString(const StringTableCreator &OtherStrTab, + uint32_t offset); + uint32_t addForeignFile(const FileTableCreator &OtherFileTab, + uint32_t id); + void fixForeignInlineInfo(InlineInfo &ii, + const StringTableCreator &OtherStrTab, + const FileTableCreator &OtherFileTab); + void addForeignFunction(const std::shared_ptr OtherStrTab, + const std::shared_ptr OtherFileTab, + const FunctionInfo &fi); + size_t estimateFileSize(); + + static void createSegmentsFromRange( + const GsymData &data, std::vector::const_iterator begin, + std::vector::const_iterator end, size_t fileSizeTarget, + std::function callback); + + std::vector FuncInfos; + std::shared_ptr StrTab; + std::shared_ptr FileTab; + Ranges TextRanges; + std::vector UUID; + + size_t SizeEstimate = 0; + size_t NextFuncToEstimate = 0; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYMCREATOR_H Index: include/llvm/DebugInfo/GSYM/GsymReader.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymReader.h +++ include/llvm/DebugInfo/GSYM/GsymReader.h @@ -0,0 +1,142 @@ +//===- GsymReader.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYMREADER_H +#define LLVM_DEBUGINFO_GSYMREADER_H + +#include +#include +#include +#include +#include + +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/DebugInfo/GSYM/DataRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" + + +namespace llvm { +class MemoryBuffer; +class raw_ostream; + +namespace gsym { + +constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_VERSION = 1; +constexpr size_t GSYM_MAX_UUID_SIZE = 20; +class FileTableCreator; +struct LookupResult; +class StringTableCreator; +class GSYMInfo; + +enum class InfoType { EndOfList = 0u, LineTableInfo = 1u, InlineInfo = 2u }; + +struct Header { + uint32_t Magic; + uint16_t Version; + uint8_t AddrOffSize; // Size of addr_off_t + uint8_t UUIDSize; + uint64_t BaseAddress; + uint32_t NumAddresses; + uint32_t StrtabOffset; + uint32_t StrtabSize; + uint8_t UUID[GSYM_MAX_UUID_SIZE]; + static size_t getByteSize() { return sizeof(Header); } + std::string getError() const; + void dump(llvm::raw_ostream &OS) const; +}; + +class GsymReader { +public: + GsymReader(); + ~GsymReader(); + + std::error_code openFile(StringRef Filename); + + void dump(llvm::raw_ostream &OS, bool Verbose) const; + // Dump any address info with matching name + bool dumpAddressInfos(llvm::raw_ostream &OS, const char *name) const; + bool lookup(uint64_t addr, LookupResult &result) const; + bool getFunctionInfo(uint64_t addr, FunctionInfo &func_info) const; + // Accessor functions that allow iteration across all addresses in the GSYM + // file. + size_t getNumAddresses() const; + uint64_t getAddress(size_t idx) const; + + FileEntry getFile(uint32_t file_idx) const { + if (FileTab) + return FileTab->getFile(file_idx); + return FileEntry(); + } + + const char *getString(uint32_t stroff) const { + return StrTab.getString(stroff); + } + +protected: + void init(StringRef Data); + void unmap(); + struct AddressInfo { + uint32_t Size; + uint32_t Name; + }; + struct LookupInfo { + uint64_t MatchAddrOffset; + size_t AddrInfoIndex; + const AddressInfo *AddrInfo; + LookupInfo() + : MatchAddrOffset(0), AddrInfoIndex(SIZE_MAX), AddrInfo(nullptr) {} + void clear() { + MatchAddrOffset = 0; + AddrInfoIndex = SIZE_MAX; + AddrInfo = nullptr; + } + }; + + bool findAddressInfo(uint64_t addr, LookupInfo &lookup_info) const; + void dumpAddressInfo(llvm::raw_ostream &OS, size_t AddrInfoIndex) const; + uint64_t getAddressOffset(size_t idx) const; + uint64_t getAddressInfoOffset(size_t idx) const; + DataExtractor getAddressInfoPayload(size_t idx) const; + void dumpLineTable(llvm::raw_ostream &OS, uint64_t BaseAddr, + DataExtractor &line_data, uint32_t Depth) const; + + static const char *getInfoTypeAsString(InfoType info_type); + + struct FileTable { + uint32_t NumFiles; + FileEntry Files[0]; + size_t getByteSize() const { + return sizeof(uint32_t) + NumFiles * sizeof(FileEntry); + } + FileEntry getFile(uint32_t idx) const { + if (idx > NumFiles) + return FileEntry(); + return Files[idx]; + } + void dump(llvm::raw_ostream &OS, const StringTable &StrTab) const; + }; + std::unique_ptr MemBuffer; + DataRef GSYMData; + const Header *GSYMHeader = nullptr; + const void *AddrOffsets = nullptr; + const uint32_t *AddrInfoOffsets = nullptr; + const FileTable *FileTab = nullptr; + StringTable StrTab; + std::string ErrorStr; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYMREADER_H Index: include/llvm/DebugInfo/GSYM/GsymStreamer.h =================================================================== --- include/llvm/DebugInfo/GSYM/GsymStreamer.h +++ include/llvm/DebugInfo/GSYM/GsymStreamer.h @@ -0,0 +1,70 @@ +//===- GSYMStreamer.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H +#define LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +namespace llvm { +namespace gsym { + +/// The Dwarf streaming logic. +/// +/// All interactions with the MC layer that is used to build the debug +/// information binary representation are handled in this class. +class GSYMStreamer { +public: + GSYMStreamer(raw_fd_ostream &OutFile) + : OutFile(OutFile) {} + + bool init(Triple TheTriple); + + AsmPrinter &getAsmPrinter() const { return *Asm; } + +private: + /// \defgroup MCObjects MC layer objects constructed by the streamer + /// @{ + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr MOFI; + std::unique_ptr MC; + MCAsmBackend *MAB; // Owned by MCStreamer + std::unique_ptr MII; + std::unique_ptr MSTI; + MCCodeEmitter *MCE; // Owned by MCStreamer + MCStreamer *MS; // Owned by AsmPrinter + std::unique_ptr TM; + std::unique_ptr Asm; + /// @} + + /// The file we stream the linked Dwarf to. + raw_fd_ostream &OutFile; +}; + +} // end namespace gsym +} // end namespace llvm + +#endif // LLVM_TOOLS_GSYMUTIL_GSYMSTREAMER_H Index: include/llvm/DebugInfo/GSYM/InlineInfo.h =================================================================== --- include/llvm/DebugInfo/GSYM/InlineInfo.h +++ include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -0,0 +1,103 @@ +//===- InlineInfo.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_INLINEINFO_H +#define LLVM_DEBUGINFO_INLINEINFO_H + +#include +#include + +namespace llvm { +class DataExtractor; +class raw_ostream; + +namespace gsym { +class GsymReader; +struct FileEntry; +class FileWriter; +struct StringTable; + +struct Range { + uint64_t Start; + uint64_t End; + Range(uint64_t s, uint64_t e) : Start(s), End(e) {} + void dump(llvm::raw_ostream &OS) const; + bool contains(uint64_t addr) const { return Start <= addr && addr < End; } + bool doesAdjoinOrIntersect(const Range &rhs) const { + return (Start <= rhs.End) && (End >= rhs.Start); + } + bool doesIntersect(const Range &rhs) const { + return (Start < rhs.End) && (End > rhs.Start); + } +}; + +inline bool operator==(const Range &lhs, const Range &rhs) { + return lhs.Start == rhs.Start && lhs.End == rhs.End; +} +inline bool operator<(const Range &lhs, const Range &rhs) { + return lhs.Start < rhs.Start; +} +inline bool operator<(const Range &lhs, uint64_t addr) { + return lhs.Start < addr; +} +inline bool operator<(uint64_t addr, const Range &rhs) { + return addr < rhs.Start; +} + +class Ranges { + std::vector m_ranges; + +public: + Ranges() = default; + void insert(const Range &range); + bool contains(uint64_t addr) const; +}; + +struct InlineInfo { + uint32_t Name; + uint32_t CallFile; + uint32_t CallLine; + std::vector Ranges; + std::vector Children; + InlineInfo() : Name(0), CallFile(0), CallLine(0) {} + void write(FileWriter &out, uint64_t BaseAddr) const; + + // Decode InlineInfo from data file. In the second variant, only ranges and + // children containing lookup_addr will be stored. For lookup_addr < 0 we will + // just skip the data in the data file. Returns true if successful, false if + // InlineInfo is empty (meaning end of list). + bool decode(DataExtractor &Data, uint32_t &Offset, uint64_t BaseAddr); + bool decode(DataExtractor &Data, uint32_t &Offset, uint64_t BaseAddr, + uint64_t LookupAddr); + + void clear() { + Name = 0; + CallFile = 0; + CallLine = 0; + Ranges.clear(); + Children.clear(); + } + bool isValid() const { return !Ranges.empty(); } + void dump(llvm::raw_ostream &OS, const GsymReader &GSYM, + unsigned depth) const; + bool getInlineStack(uint64_t addr, + std::vector &inline_stack) const; + +}; + +inline bool operator==(const InlineInfo &lhs, const InlineInfo &rhs) { + return lhs.Name == rhs.Name && lhs.CallFile == rhs.CallFile && + lhs.CallLine == rhs.CallLine && lhs.Ranges == rhs.Ranges && + lhs.Children == rhs.Children; +} + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_INLINEINFO_H Index: include/llvm/DebugInfo/GSYM/LineEntry.h =================================================================== --- include/llvm/DebugInfo/GSYM/LineEntry.h +++ include/llvm/DebugInfo/GSYM/LineEntry.h @@ -0,0 +1,41 @@ +//===- LineEntry.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_LINEENTRY_H +#define LLVM_DEBUGINFO_LINEENTRY_H + +#include +#include +#include +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace gsym { +struct LineEntry { + uint64_t Addr; // Start address of this line entry + uint32_t File; // 1 based index of file in FileTable + uint32_t Line; // Source line number + LineEntry(uint64_t A = 0, uint32_t F = 0, uint32_t L = 0) + : Addr(A), File(F), Line(L) {} + bool isValid() { return File != 0; } + void dump(llvm::raw_ostream &OS) const { + OS << "addr=" << format("0x%08" PRIx64, Addr) << ", file=" << + format("%3u", File) << ", line=" << format("%3u", Line) << '\n'; + } +}; +inline bool operator==(const LineEntry &lhs, const LineEntry &rhs) { + return lhs.Addr == rhs.Addr && lhs.File == rhs.File && lhs.Line == rhs.Line; +} +inline bool operator!=(const LineEntry &lhs, const LineEntry &rhs) { + return !(lhs == rhs); +} +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_LINEENTRY_H Index: include/llvm/DebugInfo/GSYM/LineTable.h =================================================================== --- include/llvm/DebugInfo/GSYM/LineTable.h +++ include/llvm/DebugInfo/GSYM/LineTable.h @@ -0,0 +1,56 @@ +//===- LineTable.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_LINETABLE_H +#define LLVM_DEBUGINFO_LINETABLE_H + +#include +#include +#include + +#include "llvm/Support/DataExtractor.h" + +namespace llvm { +namespace gsym { + +struct LineEntry; +struct FunctionInfo; +class FileWriter; + +class LineTable { + DataExtractor Data; + + enum Opcode { + DBG_END_SEQUENCE = 0x00, // End of the line table + DBG_SET_FILE = 0x01, // Set LineTableRow.file_idx, don't push a row + DBG_ADVANCE_PC = 0x02, // Increment LineTableRow.address, and push a row + DBG_ADVANCE_LINE = 0x03, // Set LineTableRow.file_line, don't push a row + DBG_FIRST_SPECIAL = 0x04, // All special opcodes push a row + }; + + static bool encode_special(int64_t min_line_delta, int64_t max_line_delta, + int64_t line_delta, uint64_t addr_delta, + uint8_t &special_opcode); + + void parse(uint64_t base_addr, + std::function const &row_callback); + +public: + LineTable(DataExtractor D) : Data(D) {} + + static bool write(FileWriter &out, const FunctionInfo &func_info); + + void parseAllEntries(std::vector &line_table, uint64_t base_addr); + LineEntry lookup(uint64_t base_addr, uint64_t addr); +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_LINETABLE_H Index: include/llvm/DebugInfo/GSYM/LookupResult.h =================================================================== --- include/llvm/DebugInfo/GSYM/LookupResult.h +++ include/llvm/DebugInfo/GSYM/LookupResult.h @@ -0,0 +1,84 @@ +//===- LookupResult.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_LOOKUPRESULT_H +#define LLVM_DEBUGINFO_LOOKUPRESULT_H + +#include +#include +#include +#include +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace gsym { +struct FileEntry; + +struct SourceLocation { + const char *Name; + const char *Dir; + const char *Base; + uint32_t Line; + SourceLocation(const char *N = nullptr, const char *D = nullptr, + const char *B = nullptr, uint32_t L = 0) + : Name(N), Dir(D), Base(B), Line(L) {} +}; +struct LookupResult { + uint64_t Addr; + uint64_t EndAddr; + std::vector Locations; + std::string getSourceFile(uint32_t idx) const { + std::string fullpath; + if (idx < Locations.size()) { + if (Locations[idx].Dir) { + fullpath = Locations[idx].Dir; + if (Locations[idx].Base) { + fullpath.append(1, '/'); + fullpath.append(Locations[idx].Base); + } + } else if (Locations[idx].Base) + fullpath = Locations[idx].Base; + } + return fullpath; + } + + LookupResult() : Addr(0), EndAddr(0) {} + void clear() { + Addr = 0; + EndAddr = 0; + Locations.clear(); + } + + void dump(llvm::raw_ostream &OS) { + OS << '[' << format_hex(Addr, 18) << " - "<< format_hex(EndAddr, 18) + << "): "; + auto NumLocations = Locations.size(); + for (size_t i = 0; i < NumLocations; ++i) { + if (i > 0) + OS.indent(43); + const bool is_inlined = i + 1 != NumLocations; + OS << Locations[i].Name; + if (Locations[i].Base) { + OS << " @ "; + if (Locations[i].Dir) + OS << Locations[i].Dir << '/'; + OS << Locations[i].Base << ':' << Locations[i].Line; + } + + if (is_inlined) + OS << " [inlined]"; + OS << '\n'; + } + } +}; +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_LOOKUPRESULT_H Index: include/llvm/DebugInfo/GSYM/StringTable.h =================================================================== --- include/llvm/DebugInfo/GSYM/StringTable.h +++ include/llvm/DebugInfo/GSYM/StringTable.h @@ -0,0 +1,49 @@ +//===- StringTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_STRINGTABLE_H +#define LLVM_DEBUGINFO_STRINGTABLE_H + +#include +#include + +#include "llvm/DebugInfo/GSYM/DataRef.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace gsym { +struct StringTable { + DataRef Data; + StringTable() : Data() {} + StringTable(DataRef D) : Data(D) {} + const char *getString(uint32_t Offset) const { + return (const char *)Data.getData(Offset); + } + void clear() { Data.clear(); } + void dump(raw_ostream &OS) const { + OS << "String table:\n"; + uint32_t Offset = 0; + while (auto cstr = getString(Offset)) { + OS << format_hex(Offset, 10) << ": \"" << cstr << "\"\n"; + Offset += strlen(cstr) + 1; + } + } + uint32_t find(const char *str) const { + std::string s(str); + s.append(1, 0); // Add a null terminator + auto pos = Data.getData().find(StringRef(s.data(), s.size())); + if (pos != StringRef::npos) + return pos; + return 0; // Return the first string (empty string) when not found + } +}; +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_STRINGTABLE_H Index: include/llvm/DebugInfo/GSYM/StringTableCreator.h =================================================================== --- include/llvm/DebugInfo/GSYM/StringTableCreator.h +++ include/llvm/DebugInfo/GSYM/StringTableCreator.h @@ -0,0 +1,57 @@ +//===- StringTableCreator.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_STRINGTABLECREATOR_H +#define LLVM_DEBUGINFO_STRINGTABLECREATOR_H + +#include +#include +#include +#include + +#include "FileWriter.h" +namespace llvm { +namespace gsym { +class StringTableCreator { + std::unordered_map Strings; + // Strings contains the backing string + std::unordered_map OffsetToString; + std::vector OrderedStrings; + uint32_t NextOffset; + +public: + StringTableCreator() : NextOffset(0) { insert(""); } + + uint32_t insert(std::string s) { + auto pos = Strings.find(s); + if (pos != Strings.end()) + return pos->second; + uint32_t offset = NextOffset; + NextOffset += s.size() + 1; + auto it = Strings.emplace(s, offset).first; + OffsetToString[offset] = it->first.c_str(); + OrderedStrings.push_back(it->first.c_str()); + return offset; + } + + uint32_t getTotalSize() const { return NextOffset; } + + const char *operator[](size_t offset) const { + auto it = OffsetToString.find(offset); + return it == OffsetToString.end() ? nullptr : it->second; + } + + void write(FileWriter &out) const { + for (auto cstr : OrderedStrings) + out.Write(cstr, strlen(cstr) + 1); + } +}; +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_STRINGTABLECREATOR_H Index: include/llvm/Support/DataExtractor.h =================================================================== --- include/llvm/Support/DataExtractor.h +++ include/llvm/Support/DataExtractor.h @@ -60,6 +60,37 @@ /// Set the address size for this extractor. void setAddressSize(uint8_t Size) { AddressSize = Size; } + /// Get a slice of data from this data extractor with the same endian + /// and address size. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// + /// @param[in] length + /// The size in byte of the data to extract. If the value is set + /// to StringRef::npos, then all remaining bytes. + /// + /// @return + /// A data extractor with the same endian and address byte size + /// settings as this object with a slice of data whose offset + /// will start at zero. + DataExtractor getSlice(uint32_t *offset_ptr, + size_t length = StringRef::npos) const { + auto Slice = Data.substr(*offset_ptr, length); + *offset_ptr += Slice.size(); + return DataExtractor(Slice, IsLittleEndian, AddressSize); + } + + const uint8_t *getDataPtr(uint32_t *offset_ptr, size_t length) const { + auto Slice = Data.substr(*offset_ptr, length); + *offset_ptr += Slice.size(); + return reinterpret_cast(Slice.data()); + } /// Extract a C string from \a *offset_ptr. /// /// Returns a pointer to a C String from the data at the offset Index: lib/DebugInfo/CMakeLists.txt =================================================================== --- lib/DebugInfo/CMakeLists.txt +++ lib/DebugInfo/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(DWARF) +add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(CodeView) add_subdirectory(PDB) Index: lib/DebugInfo/GSYM/Breakpad.cpp =================================================================== --- lib/DebugInfo/GSYM/Breakpad.cpp +++ lib/DebugInfo/GSYM/Breakpad.cpp @@ -0,0 +1,263 @@ +//===- Breakpad.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "llvm/Support/MemoryBuffer.h" + +#include "llvm/DebugInfo/GSYM/Breakpad.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +using namespace llvm; +using namespace gsym; + +bool starts_with(const char *line, std::string s) { + return strncmp(line, s.c_str(), s.size()) == 0; +} + +enum BreakpadLineType { + Invalid, + Module, + File, + Function, + SourceLine, + Public, + Stack +}; + +static std::string BPAD_MODULE("MODULE "); +static std::string BPAD_FILE("FILE "); +static std::string BPAD_FUNC("FUNC "); +static std::string BPAD_PUBLIC("PUBLIC "); +static std::string BPAD_STACK("STACK "); + +inline uint8_t char_to_nibble(char c) { + if ('0' <= c && c <= '9') + return c - '0'; + if ('a' <= c && c <= 'f') + return 10 + c - 'a'; + if ('A' <= c && c <= 'F') + return 10 + c - 'A'; + return UINT8_MAX; +} + +class Line { + const char *m_end; + const char *m_pos; + +public: + Line(const char *s, const char *e) : m_end(e), m_pos(s) {} + + BreakpadLineType GetLineType() { + if (m_pos < m_end) { + switch (m_pos[0]) { + case 'F': + if (starts_with(m_pos, BPAD_FUNC)) { + m_pos += BPAD_FUNC.size(); + return BreakpadLineType::Function; + } + if (starts_with(m_pos, BPAD_FILE)) { + m_pos += BPAD_FILE.size(); + return BreakpadLineType::File; + } + break; + case 'M': + if (starts_with(m_pos, BPAD_MODULE)) { + m_pos += BPAD_MODULE.size(); + return BreakpadLineType::Module; + } + break; + case 'P': + if (starts_with(m_pos, BPAD_PUBLIC)) { + m_pos += BPAD_PUBLIC.size(); + return BreakpadLineType::Public; + } + break; + case 'S': + if (starts_with(m_pos, BPAD_STACK)) { + m_pos += BPAD_STACK.size(); + return BreakpadLineType::Stack; + } + break; + default: + if (isxdigit(m_pos[0])) + return BreakpadLineType::SourceLine; + break; + } + } + return BreakpadLineType::Invalid; + } + std::string GetWord() { + // Get the next word from the line. Any leading spaces + // will be stripped. A word is considered any character + // that is not a space. + if (m_pos < m_end) { + // Skip leading spaces + while (m_pos < m_end && isspace(*m_pos)) { + ++m_pos; + } + const auto start = m_pos; + while (m_pos < m_end && !isspace(*m_pos)) { + ++m_pos; + } + if (start < m_pos) + return std::string(start, m_pos - start); + } + return std::string(); + } + std::string GetRestOfLineAsString() { + if (m_pos < m_end - 1) + return std::string(m_pos, m_end - 1 - m_pos); + return std::string(); + } + uint32_t GetHex32() { + auto u = GetUnsigned(16); + assert(u < UINT32_MAX); + return (uint32_t)u; + } + uint64_t GetHex() { return GetUnsigned(16); } + uint64_t GetDecimal() { return GetUnsigned(10); } + uint32_t GetDecimal32() { + auto u = GetUnsigned(10); + assert(u < UINT32_MAX); + return (uint32_t)u; + } + uint64_t GetUnsigned(int base) { + if (m_pos < m_end) { + char *end = (char *)m_pos; + auto value = strtoull(m_pos, &end, base); + if (value != ULLONG_MAX) { + m_pos = end; + while (m_pos < m_end && *m_pos == ' ') + ++m_pos; + return value; + } + } + return UINT64_MAX; + } +}; + +std::error_code +llvm::gsym::convertBreakpadToGSYM(const char *BreakpadPath, + const char *GSYMPath) { + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(BreakpadPath); + auto EC = BuffOrErr.getError(); + if (EC) + return EC; + + std::unique_ptr Buffer = std::move(BuffOrErr.get()); + + FunctionInfo function_info; + GsymData data; + // all entries in data will use the same string/file tables + auto strtab = std::make_shared(); + auto filetab = std::make_shared(strtab); + std::string error; + DataRef bpad_data(Buffer->getBuffer()); + const char *bpad_start = (const char *)bpad_data.getStart(); + const char *bpad_end = (const char *)bpad_data.getEnd(); + std::vector line_ends; + for (const char *p = bpad_start + 1; p < bpad_end; ++p) { + if (*p == '\n') { + line_ends.push_back(p + 1); + } + } + if (line_ends.back() != bpad_end) + line_ends.push_back(bpad_end); + + bool got_public = false; + const char *line_start = bpad_start; + for (auto line_end : line_ends) { + Line line(line_start, line_end); + + switch (line.GetLineType()) { + case BreakpadLineType::Invalid: + break; + case BreakpadLineType::Module: { + line.GetWord(); // Skip OS string + line.GetWord(); // Skip arch string + std::string identifier = line.GetWord(); + auto identifier_len = identifier.size(); + uint8_t high_nibble = 0; + for (size_t i = 0; i < identifier_len; ++i) { + const uint8_t nibble = char_to_nibble(identifier[i]); + if (nibble <= 0xf) { + if (i % 2) { + data.UUID.push_back(high_nibble + nibble); + } else { + high_nibble = nibble << 4; + } + } else { + data.UUID.clear(); + break; + } + } + } break; + case BreakpadLineType::File: + line.GetDecimal(); // Ignore the file index + filetab->insert(line.GetRestOfLineAsString()); + break; + case BreakpadLineType::Function: + if (function_info.isValid()) + data.Funcs.emplace_back( + FunctionData(strtab, filetab, std::move(function_info))); + function_info.Addr = line.GetHex(); + function_info.Size = line.GetHex32(); + line.GetHex(); // Skip parameter_size + function_info.Name = strtab->insert(line.GetRestOfLineAsString()); + function_info.Lines.clear(); + break; + case BreakpadLineType::SourceLine: { + uint64_t addr = line.GetHex(); + line.GetHex32(); // Skip 32 bit size + uint32_t line_num = line.GetDecimal32(); + uint32_t file_idx = line.GetDecimal32() + 1; + if (!function_info.Lines.empty()) { + auto &last = function_info.Lines.back(); + // Skip multiple line entries in a row that have the same file and line + if (last.File == file_idx && last.Line == line_num) + break; + } + function_info.Lines.push_back(LineEntry(addr, file_idx, line_num)); + } break; + case BreakpadLineType::Public: { + got_public = true; + uint64_t addr = line.GetHex(); + line.GetHex(); // Skip parameter_size + uint32_t name = strtab->insert(line.GetRestOfLineAsString()); + data.Funcs.emplace_back( + FunctionData(strtab, filetab, FunctionInfo(addr, 0, name))); + } break; + case BreakpadLineType::Stack: + break; + } + line_start = line_end; + } + if (got_public) { + // We appended public symbol FunctionInfo objects onto the end of our + // sorted func_infos array and we need to sort it now. + std::sort(data.Funcs.begin(), data.Funcs.end()); + } + GsymCreator::createFrom(data).save(GSYMPath); + + return std::error_code(); +} Index: lib/DebugInfo/GSYM/CMakeLists.txt =================================================================== --- lib/DebugInfo/GSYM/CMakeLists.txt +++ lib/DebugInfo/GSYM/CMakeLists.txt @@ -0,0 +1,16 @@ +add_llvm_library(LLVMDebugInfoGSYM + Breakpad.cpp + DwarfTransformer.cpp + FileTableCreator.cpp + FileWriter.cpp + FunctionInfo.cpp + GsymCreator.cpp + GsymReader.cpp + GsymStreamer.cpp + InlineInfo.cpp + LineTable.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/GSYM + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo + ) Index: lib/DebugInfo/GSYM/DwarfTransformer.cpp =================================================================== --- lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -0,0 +1,652 @@ +//===- DwarfTransformer.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/StringTableCreator.h" + +using namespace llvm; +using namespace gsym; + +#define HEX32(v) format("0x%8.8" PRIx32, (uint32_t)v) +#define HEX64(v) format("0x%8.8" PRIx64, (uint64_t)v) + +llvm::raw_ostream &operator<<(llvm::raw_ostream &os, + const gsym::FunctionData &f) { + os << "[" << HEX64(f.FuncInfo.Addr) << '-' << HEX64(f.FuncInfo.Addr + f.FuncInfo.Size) + << "): " << f.name(); + return os; +} + +constexpr uint32_t NT_GNU_BUILD_ID_TAG = 0x03; + +struct llvm::gsym::CUInfo { + const DWARFDebugLine::LineTable *LineTable; + const char *CompDir; + std::vector FileCache; + uint64_t language = 0; + + CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { + LineTable = DICtx.getLineTableForUnit(CU); + CompDir = CU->getCompilationDir(); + FileCache.clear(); + if (LineTable) { + FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); + } + auto die = CU->getUnitDIE(); + language = dwarf::toUnsigned(die.find(dwarf::DW_AT_language), 0); + } + + uint32_t DWARFToGSYMFileIndex(FileTableCreator &filetab, + uint32_t DwarfFileIdx) { + if (!LineTable) + return 0; + assert(DwarfFileIdx < FileCache.size()); + auto FileIdx = FileCache[DwarfFileIdx]; + if (FileIdx != UINT32_MAX) + return FileIdx; + std::string File; + if (LineTable->getFileNameByIndex( + DwarfFileIdx, CompDir, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) + FileIdx = filetab.insert(File); + else + FileIdx = 0; + FileCache[DwarfFileIdx] = FileIdx; + return FileIdx; + } +}; + +static std::vector getUUID(object::ObjectFile &Obj) { + // Extract the UUID from the object file + std::vector UUID; + if (auto *MachO = dyn_cast(&Obj)) { + const auto mach_uuid = MachO->getUuid(); + if (!mach_uuid.empty()) { + UUID.assign(mach_uuid.data(), mach_uuid.data() + mach_uuid.size()); + } + } else if (auto *Elf = dyn_cast(&Obj)) { + const StringRef gnu_build_id(".note.gnu.build-id"); + for (const auto §Ref : Obj.sections()) { + StringRef sectName; + sectRef.getName(sectName); + if (sectName == gnu_build_id) { + StringRef build_id_data; + if (!sectRef.getContents(build_id_data)) { + DataExtractor decoder(build_id_data, true, 8); // TODO: set endian and addr byte size + uint32_t Offset = 0; + const uint32_t name_size = decoder.getU32(&Offset); + const uint32_t payload_size = decoder.getU32(&Offset); + const uint32_t payload_type = decoder.getU32(&Offset); + const char *name = (const char *)decoder.getDataPtr(&Offset, + name_size); + if ((strncmp(name, "GNU", name_size) == 0) && + payload_type == NT_GNU_BUILD_ID_TAG) { + Offset = alignTo(Offset, 4); + const uint8_t *uuid_data = + (const uint8_t *)decoder.getDataPtr(&Offset, payload_size); + if (uuid_data) + UUID.assign(uuid_data, uuid_data + payload_size); + } + } + } + } + } + return UUID; +} + +DWARFDie GetParentDeclContextDIE(DWARFDie &die) { + if (auto spec = + die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { + if (auto spec_parent = GetParentDeclContextDIE(spec)) { + return spec_parent; + } + } + if (auto abst = + die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { + if (auto abst_parent = GetParentDeclContextDIE(abst)) { + return abst_parent; + } + } + + if (die.getTag() == dwarf::DW_TAG_inlined_subroutine) { + // We never want to follow parent for inlined subroutine - that would + // give us information about where the function is inlined, not what + // function is inlined + return DWARFDie(); + } + + auto parent = die.getParent(); + if (!parent) { + return DWARFDie(); + } + switch (parent.getTag()) { + case dwarf::DW_TAG_namespace: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_subprogram: + return parent; // Found parent decl context DIE + case dwarf::DW_TAG_lexical_block: + return GetParentDeclContextDIE(parent); + default: + break; + } + + return DWARFDie(); +} + +std::string getQualifiedName(DWARFDie &die, uint64_t language) { + // If the dwarf has mangled name, use mangled name + if (auto name = + dwarf::toString(die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_AT_linkage_name}), + nullptr)) { + return name; + } + + const char *name_cstr = die.getName(DINameKind::ShortName); + if (!name_cstr || !name_cstr[0]) { + return ""; + } + + // For C++ and ObjC, prepend names of all parent declaration contexts + if (!(language == dwarf::DW_LANG_C_plus_plus || + language == dwarf::DW_LANG_C_plus_plus_03 || + language == dwarf::DW_LANG_C_plus_plus_11 || + language == dwarf::DW_LANG_C_plus_plus_14 || + language == dwarf::DW_LANG_ObjC_plus_plus || + // This should not be needed for C, but we see C++ code marked as C + // in some binaries. This should hurt, so let's do it for C as well + language == dwarf::DW_LANG_C)) { + return name_cstr; + } + // Some GCC optimizations create functions with names ending with .isra. + // or .part. and those names are just DW_AT_name, not DW_AT_linkage_name + // If it looks like it could be the case, don't add any prefix + if (strncmp(name_cstr, "_Z", 2) == 0 && + (strstr(name_cstr, ".isra.") != nullptr || + strstr(name_cstr, ".part.") != nullptr)) { + return name_cstr; + } + + std::string name = name_cstr; + auto parent_die = GetParentDeclContextDIE(die); + while (parent_die) { + if (auto parent_name_cstr = parent_die.getName(DINameKind::ShortName)) { + std::string parent_name = parent_name_cstr; + // "lambda" names are wrapped in < >. Replace with { } + // to be consistent with demangled names and not to confuse with templates + if (!parent_name.empty() && parent_name.front() == '<' && + parent_name.back() == '>') { + parent_name.front() = '{'; + parent_name.back() = '}'; + } + name = parent_name + "::" + name; + } + parent_die = GetParentDeclContextDIE(parent_die); + } + + return name; +} + +static bool hasInlineInfo(DWARFDie die, uint32_t depth) { + bool checkChildren = true; + switch (die.getTag()) { + case dwarf::DW_TAG_subprogram: + // Don't look into functions within functions. + checkChildren = depth == 0; + break; + case dwarf::DW_TAG_inlined_subroutine: + return true; + default: + break; + } + if (checkChildren) { + for (auto child : die.children()) { + if (hasInlineInfo(child, depth + 1)) + return true; + } + } + return false; +} + +static void parseInlineInfo(StringTableCreator &strtab, + FileTableCreator &filetab, CUInfo &cuInfo, + DWARFDie die, uint32_t depth, FunctionInfo &FuncInfo, + InlineInfo &parent) { + if (!hasInlineInfo(die, depth)) + return; + + auto tag = die.getTag(); + if (tag == dwarf::DW_TAG_inlined_subroutine) { + // create new InlineInfo and append to parent.children + InlineInfo ii; + auto funcRange = DWARFAddressRange(FuncInfo.Addr, FuncInfo.Addr + FuncInfo.Size); + if (auto RangesOrError = die.getAddressRanges()) { + for (const auto &range : RangesOrError.get()) { + // Check that the inlined function is within the range of the function + // info, it might not be in case of split functions + if (funcRange.contains(range)) + ii.Ranges.emplace_back(Range(range.LowPC, range.HighPC)); + } + } + if (ii.Ranges.empty()) { + return; + } + + auto name = getQualifiedName(die, cuInfo.language); + if (!name.empty()) { + ii.Name = strtab.insert(std::move(name)); + } + ii.CallFile = cuInfo.DWARFToGSYMFileIndex( + filetab, dwarf::toUnsigned(die.find(dwarf::DW_AT_call_file), 0)); + ii.CallLine = dwarf::toUnsigned(die.find(dwarf::DW_AT_call_line), 0); + // parse all children and append to parent + for (auto child : die.children()) { + parseInlineInfo(strtab, filetab, cuInfo, child, depth + 1, FuncInfo, ii); + } + parent.Children.emplace_back(std::move(ii)); + } else if (tag == dwarf::DW_TAG_subprogram || + tag == dwarf::DW_TAG_lexical_block) { + // skip this die and just recurse down + for (auto child : die.children()) { + parseInlineInfo(strtab, filetab, cuInfo, child, depth + 1, FuncInfo, parent); + } + } +} + +bool DwarfTransformer::parseLineTable(FileTableCreator &filetab, CUInfo &cuInfo, + llvm::raw_ostream *OS, DWARFDie die, + FunctionInfo &FuncInfo) { + std::vector RowVector; + if (!cuInfo.LineTable->lookupAddressRange(FuncInfo.Addr, FuncInfo.Size, RowVector)) { + return false; + } + DWARFDebugLine::Row PrevRow; + for (uint32_t RowIndex : RowVector) { + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = cuInfo.LineTable->Rows[RowIndex]; + auto FileIdx = cuInfo.DWARFToGSYMFileIndex(filetab, Row.File); + + if (RowIndex != RowVector[0] && PrevRow.Address > Row.Address) { + // We have seen full duplicate line tables for functions in some + // DWARF files. Watch for those here by checking the the last + // row was the function's end address (HighPC) and that the + // current line table entry's address is the same as the first + // line entry we already have in our "function_info.Lines". If + // so break out after printing a warning. + if (LineEntry(Row.Address, FileIdx, Row.Line) == FuncInfo.Lines.front() && + OS) { + *OS << "warning: duplicate line table detected for DIE:\n"; + die.dump(*OS, 0, DIDumpOptions::getForSingleDIE()); + } else if (OS) { + // Print out (ignore if os == nulls as this is expensive) + *OS << "error: line table has addresses that do not " + << "monotonically increase:\n"; + for (uint32_t RowIndex2 : RowVector) { + cuInfo.LineTable->Rows[RowIndex2].dump(*OS); + } + die.dump(*OS, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + if (!FuncInfo.Lines.empty()) { + const auto &last = FuncInfo.Lines.back(); + // Skip multiple line entris for the same file and line. + if (last.File == FileIdx && last.Line == Row.Line) + continue; + } + if (!Row.EndSequence) { + if (FuncInfo.Addr <= Row.Address && Row.Address < FuncInfo.Addr + FuncInfo.Size) { + FuncInfo.Lines.emplace_back(LineEntry(Row.Address, FileIdx, Row.Line)); + } else if (Row.Address < FuncInfo.Addr) { + if (OS) { + // Line tables are bad in DWARF. + *OS << "warning: invalid DWARF for DIE with range [" + << HEX64(FuncInfo.Addr) << '-' << HEX64(FuncInfo.Addr + FuncInfo.Size) + << ") that has a matching line entry with an address " + << HEX64(Row.Address) << " that is lower than the function " + << "start address:\n"; + die.dump(*OS, 0, DIDumpOptions::getForSingleDIE()); + } + // Don't put multiple bogus line entries in the line table. + // Check to see if we already fixed up the first line entry to + // be the LowPC? + if (FuncInfo.Lines.size() == 1 && FuncInfo.Lines.front().Addr == FuncInfo.Addr) { + FuncInfo.Lines[0].File = FileIdx; + FuncInfo.Lines[0].Line = Row.Line; + } else { + FuncInfo.Lines.emplace_back(LineEntry(FuncInfo.Addr, FileIdx, Row.Line)); + } + } + } + PrevRow = Row; + } + return true; +} + +void DwarfTransformer::handleDie( + std::shared_ptr strtab, + std::shared_ptr filetab, CUInfo &cuInfo, + llvm::raw_ostream *OS, DWARFDie die, + std::function insertFunc) { + switch (die.getTag()) { + case dwarf::DW_TAG_subprogram: { + auto RangesOrError = die.getAddressRanges(); + if (!RangesOrError) { + llvm::consumeError(RangesOrError.takeError()); + break; + } + const auto &Ranges = RangesOrError.get(); + if (Ranges.empty()) + break; + auto name = getQualifiedName(die, cuInfo.language); + if (name.empty() && OS) { + *OS << "error: function at " << HEX64(die.getOffset()) + << " has no name\n "; + die.dump(*OS, 0, DIDumpOptions::getForSingleDIE()); + break; + } + auto nameIdx = strtab->insert(std::move(name)); + + // Create a function_info for each range + for (auto &range : Ranges) { + if (!m_data.TextRanges.contains(range.LowPC)) { + // We expect zero and -1 to be invalid addresses in DWARF depending + // on the linker of the DWARF. This indicates a function was stripped + // and the debug info wasn't able to be stripped from the DWARF. If + // the LowPC isn't zero or -1, then we should emit an error. + if (range.LowPC != 0 && range.LowPC != UINT32_MAX && + range.LowPC != UINT64_MAX && OS) { + // Unexpected invalid address, emit an error + *OS << "error: DIE has an address range whose start address is not " + " in an executable section and will not be processed:\n"; + die.dump(*OS, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + if (range.LowPC >= range.HighPC) + break; + + FunctionInfo function_info; + function_info.Addr = range.LowPC; + function_info.Size = range.HighPC - function_info.Addr; + function_info.Name = nameIdx; + if (cuInfo.LineTable) { + parseLineTable(*filetab, cuInfo, OS, die, function_info); + } + if (hasInlineInfo(die, 0)) { + function_info.InlineInfo.Name = nameIdx; + function_info.InlineInfo.Ranges = { + Range(function_info.Addr, function_info.Addr + function_info.Size)}; + parseInlineInfo(*strtab, *filetab, cuInfo, die, 0, function_info, + function_info.InlineInfo); + } + insertFunc(std::move(function_info)); + } + } break; + default: + break; + } + for (auto child : die.children()) { + handleDie(strtab, filetab, cuInfo, OS, child, insertFunc); + } +} + +void DwarfTransformer::initDataFromObj(llvm::object::ObjectFile &obj) { + // Read build ID + m_data.UUID = getUUID(obj); + + // We need to know where the valid sections are that contain instructions. + // DWARF now emits DW_TAG_subprogram tags with DW_AT_low_pc as addresses + // and DW_AT_high_pc values as sizes, and the linker will zero out the + // DW_AT_low_pc, but won't touch the DW_AT_high_pc if it isn't an address. + // This means we might have many many DW_TAG_subprogram's with a + // DW_AT_low_pc of zero and a valid size. We need to ignore these entries. + for (const auto §Ref : obj.sections()) { + if (sectRef.isText()) { + auto start = sectRef.getAddress(); + auto size = sectRef.getSize(); + if (size > 0) + m_data.TextRanges.insert(gsym::Range(start, start + size)); + } + } +} + +bool DwarfTransformer::loadDwarf(object::ObjectFile &obj) { + using namespace llvm::object; + auto DICtx = DWARFContext::create(obj); + logAllUnhandledErrors(DICtx->loadRegisterInfo(obj), *m_outStream, + m_errorBanner); + + if (!m_init) { + initDataFromObj(obj); + } + auto numBefore = m_data.Funcs.size(); + + if (m_numThreads == 1) { + // Parse all DWARF data from this thread, use the same string/file table + // for everything + auto strtab = std::make_shared(); + auto filetab = std::make_shared(strtab); + for (const auto &CU : DICtx->compile_units()) { + auto die = CU->getUnitDIE(false); + auto cuInfo = CUInfo(*DICtx, dyn_cast(CU.get())); + handleDie(strtab, filetab, cuInfo, m_outStream, die, + [&](FunctionInfo &&f) { + m_data.Funcs.emplace_back( + FunctionData(strtab, filetab, std::move(f))); + }); + } + } else { + // THIS IS VERY HACKY. Without parsing DIEs for all CUs first, we might hit + // a race condition below. LLVM Dwarf parser is not completely thread-safe: + // DWARFDie keeps a pointer to an element of a vector in DWARFUnit. If we + // dont parse all the DIEs first, they might be parsed from another thread, + // causing the vector to reallocate, causing the pointer to be invalid. We + // could sequentially call CU->getUnitDIE(false) for all CUs, but that's + // also not super fast. To do this concurrently, we need to call + // getAbbreviations sequentially first so that getUnitDIE() only works with + // its local data. + for (const auto &CU : DICtx->compile_units()) { + CU->getAbbreviations(); + } + llvm::ThreadPool pool(m_numThreads); + for (const auto &CU : DICtx->compile_units()) { + pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); + } + pool.wait(); + + std::mutex lock; // needed to insert into output.Funcs + for (const auto &CU : DICtx->compile_units()) { + auto die = CU->getUnitDIE(false /*CUDieOnly*/); + if (die) { + pool.async( + [this, cuInfo = CUInfo(*DICtx, dyn_cast(CU.get())), &lock, die]() mutable { + auto strtab = std::make_shared(); + auto filetab = std::make_shared(strtab); + std::string msg; + llvm::raw_string_ostream stream(msg); + handleDie(strtab, filetab, cuInfo, + loggingEnabled() ? &stream : nullptr, die, + [&](FunctionInfo &&f) { + std::lock_guard guard(lock); + m_data.Funcs.emplace_back( + FunctionData(strtab, filetab, std::move(f))); + }); + stream.flush(); + if (!msg.empty()) { + // Print msg lines into an actual stream under a lock + std::lock_guard guard(lock); + log() << msg; + } + }); + } + } + pool.wait(); + } + log() << "Loaded " << m_data.Funcs.size() - numBefore + << " functions from DWARF.\n"; + return m_data.Funcs.size() - numBefore > 0; +} + +bool DwarfTransformer::loadSymbolTable(llvm::object::ObjectFile &obj) { + using namespace llvm::object; + if (!m_init) { + initDataFromObj(obj); + } + auto numBefore = m_data.Funcs.size(); + + auto strtab = std::make_shared(); + auto filetab = std::make_shared(strtab); + + for (auto &s : obj.symbols()) { + auto type = s.getType(); + auto addr = s.getValue(); + if (!type || type.get() != SymbolRef::Type::ST_Function || addr == 0) + continue; + // Function size for MachO files will be 0 + auto size = isa(&obj) ? ELFSymbolRef(s).getSize() : 0; + if (auto name = s.getName()) { + m_data.Funcs.emplace_back( + FunctionData(strtab, filetab, + FunctionInfo(addr, size, strtab->insert(*name)))); + } + } + log() << "Loaded " << m_data.Funcs.size() - numBefore + << " functions from symbol table.\n"; + return m_data.Funcs.size() - numBefore > 0; +} + +void DwarfTransformer::optimize() { + // Remove duplicates in DWARF and remove functions from SymbolTable that have + // DWARF data. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + auto numBefore = m_data.Funcs.size(); + + std::sort(m_data.Funcs.begin(), m_data.Funcs.end()); + + auto Funcs = std::move(m_data.Funcs); + m_data.Funcs.clear(); + + uint64_t lastLo = 0, lastHi = 0; // range of the last added function + + for (auto &f : Funcs) { + if (f.FuncInfo.Size == 0) { + continue; + } + auto lo = f.FuncInfo.Addr; + auto hi = f.FuncInfo.Addr + f.FuncInfo.Size; + if (hi > lastHi) { // is this function covering any extra address range? + // if (loggingEnabled() && lo < lastHi) { + // log() << "Warning: function \t" << f << "\n\toverlaps with\t" + // << m_data.Funcs.back() << "\n"; + // } + lastLo = f.FuncInfo.Addr; + lastHi = f.FuncInfo.Addr + f.FuncInfo.Size; + m_data.Funcs.emplace_back(std::move(f)); + } else if (hi == lastHi && lo == lastLo) { + auto &last = m_data.Funcs.back(); + if (strcmp(last.name(), f.name()) != 0) { + // For symbol table entries (no rich info), prefer shorter name + // (not that functions with rich info are before functions without) + if (!f.FuncInfo.hasRichInfo() && !last.FuncInfo.hasRichInfo() && + strlen(f.name()) < strlen(last.name())) + m_data.Funcs.back() = f; + // if both have rich info, print a warning + // if (loggingEnabled() && f.FuncInfo.hasRichInfo() && last.FuncInfo.hasRichInfo()) + // log() << "Warning: functions with different names and " + // << "same address range: \n\t" << f << "\n\t" << last << "\n"; + } + } else if (loggingEnabled()) { + // print warnings about overlaps + assert(lastHi > hi || (lastHi == hi && lo > lastLo)); + assert(!m_data.Funcs.empty()); + // log() << "Warning: function \t" << f << "\n\toverlaps with\t" + // << m_data.Funcs.back() << "\n"; + } + } + + log() << "Pruned " << numBefore - m_data.Funcs.size() + << " functions, ended with " << m_data.Funcs.size() << " total\n"; +} + +Optional> +DwarfTransformer::getObjectFile(const std::string &filename) { + auto BuffOrErr = MemoryBuffer::getFileOrSTDIN(filename); + if (auto err = BuffOrErr.getError()) { + log() << filename << ": " << err.message(); + return None; + } + auto Buff = std::move(BuffOrErr.get()); + auto BinOrErr = object::createBinary(*Buff); + if (auto err = errorToErrorCode(BinOrErr.takeError())) { + log() << filename << ": " << err.message(); + return None; + } + + if (auto *Obj = dyn_cast(BinOrErr->get())) { + // transfer ownership + auto ptr = std::unique_ptr( + dyn_cast(BinOrErr->release())); + return object::OwningBinary(std::move(ptr), + std::move(Buff)); + } else if (auto *Fat = + dyn_cast(BinOrErr->get())) { + auto arch = object::MachOObjectFile::getHostArch().getArchName(); + if (Fat->getNumberOfObjects() == 1) { + auto MachOOrErr = Fat->begin_objects()->getAsObjectFile(); + if (auto err = BuffOrErr.getError()) { + log() << filename << ": " << err.message(); + return None; + } + return object::OwningBinary( + std::move(MachOOrErr.get()), std::move(Buff)); + } else if (auto MachOOrErr = Fat->getObjectForArch(arch)) { + return object::OwningBinary( + std::move(MachOOrErr.get()), std::move(Buff)); + } else { + log() << filename << ": file contains objects for " + << "multiple archs but not for " << arch; + } + } + log() << filename << ": unsupported binary type"; + return None; +} Index: lib/DebugInfo/GSYM/FileTableCreator.cpp =================================================================== --- lib/DebugInfo/GSYM/FileTableCreator.cpp +++ lib/DebugInfo/GSYM/FileTableCreator.cpp @@ -0,0 +1,47 @@ +//===- FileTableCreator.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include + +using namespace llvm; +using namespace gsym; + +uint32_t FileTableCreator::insert(std::string S) { + auto Dir = 0, Base = 0; + auto LastSlash = S.rfind('/'); + if (LastSlash == std::string::npos || LastSlash == 0) { + Base = StringTable->insert(S); + } else { + Dir = StringTable->insert(S.substr(0, LastSlash)); + Base = StringTable->insert(S.substr(LastSlash + 1)); + } + return insert(FileEntry(Dir, Base)); +} + +uint32_t FileTableCreator::insert(FileEntry Entry) { + auto Index = FileEntries.size(); + // find in hash map and insert if not present + auto R = EntryToIndex.emplace(Entry, Index); + if (R.second) { // if newly inserted + FileEntries.emplace_back(Entry); + } + return R.first->second; +} + +void FileTableCreator::write(FileWriter &Out) const { + // we should always have 1 blank entry + assert(!FileEntries.empty()); + assert(FileEntries[0].Dir == 0); + assert(FileEntries[0].Base == 0); + size_t NumFiles = FileEntries.size(); + Out.WriteUnsigned(NumFiles, sizeof(uint32_t)); + Out.Write(FileEntries.data(), NumFiles * sizeof(FileEntry)); +} Index: lib/DebugInfo/GSYM/FileWriter.cpp =================================================================== --- lib/DebugInfo/GSYM/FileWriter.cpp +++ lib/DebugInfo/GSYM/FileWriter.cpp @@ -0,0 +1,80 @@ +//===- FileWriter.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/LEB128.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace gsym; + +FileWriter::~FileWriter() { m_ostream.flush(); } + +bool FileWriter::WriteSLEB(int64_t value) { + uint8_t Bytes[32]; + auto Length = encodeSLEB128(value, Bytes); + assert(Length < sizeof(Bytes)); + return Write(Bytes, Length); +} + +bool FileWriter::WriteULEB(uint64_t value) { + uint8_t Bytes[32]; + auto Length = encodeULEB128(value, Bytes); + assert(Length < sizeof(Bytes)); + return Write(Bytes, Length); +} + +bool FileWriter::WriteU8(uint8_t U) { return Write(&U, sizeof(U)); } + +bool FileWriter::WriteU32(uint32_t U) { return Write(&U, sizeof(U)); } + +bool FileWriter::Fixup32(uint32_t Value, off_t Offset) { + const off_t CurrOffset = Tell(); + if (CurrOffset == -1) + return false; + if (Seek(Offset) != Offset) + return false; + if (!WriteU32(Value)) + return false; + return Seek(CurrOffset) == CurrOffset; +} + +bool FileWriter::WriteUnsigned(uint64_t U, size_t N) { + // NOTE: this only works on little endian machines + return Write(&U, N); +} +bool FileWriter::Write(const void *Src, size_t SrcLength) { + m_ostream.write((const char *)Src, SrcLength); + return m_ostream.good(); +} + +off_t FileWriter::Tell() { return m_ostream.tellp(); } + +off_t FileWriter::Seek(off_t Offset) { + m_ostream.seekp(Offset); + return m_ostream.good() ? Offset : -1; +} + +bool FileWriter::AlignTo(size_t Align) { + off_t Offset = Tell(); + assert(Offset != -1); + if (Offset == -1) + return false; + off_t AlignedOffset = (Offset + Align - 1) / Align * Align; + if (AlignedOffset == Offset) + return true; + off_t PadCount = AlignedOffset - Offset; + auto Success = + Write(std::string(PadCount, '\0').c_str(), PadCount); + assert(Tell() == AlignedOffset); + return Success; +} Index: lib/DebugInfo/GSYM/FunctionInfo.cpp =================================================================== --- lib/DebugInfo/GSYM/FunctionInfo.cpp +++ lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -0,0 +1,31 @@ +//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +void FunctionInfo::dump(llvm::raw_ostream &OS, GsymReader &GSYM) const { + OS << '[' << format_hex(Addr, 18) << '-' << format_hex(Addr + Size, 18) << + ") " << GSYM.getString(Name) << '\n'; + if (!Lines.empty()) { + OS << "Lines:\n"; + for (const auto &Line : Lines) { + Line.dump(OS); + } + } + if (InlineInfo.isValid()) + InlineInfo.dump(OS, GSYM, 0); +} Index: lib/DebugInfo/GSYM/GsymCreator.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymCreator.cpp +++ lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,363 @@ +//===- GsymCreator.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" + +#include +#include +#include +#include +#include + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/GsymStreamer.h" + +using namespace llvm; +using namespace gsym; + +GsymCreator GsymCreator::createFrom(GsymData &data) { + GsymCreator creator; + creator.UUID = data.UUID; + creator.TextRanges = data.TextRanges; + std::sort(data.Funcs.begin(), data.Funcs.end()); + if (data.Funcs.empty()) + return creator; + // reuse tables from the first entry + auto &firstFunc = data.Funcs.front(); + creator.StrTab = firstFunc.StrTab; + creator.FileTab = firstFunc.FileTab; + for (auto &f : data.Funcs) { + creator.addForeignFunction(f.StrTab, f.FileTab, f.FuncInfo); + } + return creator; +} + +void GsymCreator::createSegmentsFromRange( + const GsymData &data, std::vector::const_iterator begin, + std::vector::const_iterator end, size_t fileSizeTarget, + std::function callback) { + + auto it = begin; + while (it != end) { + GsymCreator segment; + segment.StrTab = std::make_shared(); + segment.FileTab = std::make_shared(segment.StrTab); + segment.FuncInfos.clear(); + segment.UUID = data.UUID; + while (it != end && segment.estimateFileSize() < fileSizeTarget) { + segment.addForeignFunction(it->StrTab, it->FileTab, it->FuncInfo); + it++; + } + callback(segment); + } +} + +void GsymCreator::createSegmentsFrom( + const GsymData &data, size_t fileSizeTarget, + std::function callback, + uint32_t numThreads) { + // compute number of functions per thread, rounding up + ssize_t funcPerThread = (data.Funcs.size() - 1) / numThreads + 1; + auto begin = data.Funcs.begin(); + std::vector> tasks; + while (begin != data.Funcs.end()) { + auto end = std::distance(begin, data.Funcs.end()) <= funcPerThread + ? data.Funcs.end() + : begin + funcPerThread; + // std::async is not great, but its good enough; we use it here to avoid + // external dependency on some library with better async execution + tasks.push_back(std::async( + std::launch::async, [begin, end, &data, &fileSizeTarget, &callback]() { + createSegmentsFromRange(data, begin, end, fileSizeTarget, callback); + })); + begin = end; + } + + for (const auto &task : tasks) { + task.wait(); + } +} + +size_t GsymCreator::estimateFileSize() { + for (; NextFuncToEstimate < FuncInfos.size(); NextFuncToEstimate++) { + // serialize into a stringsteam just to see how much spaces is this + // going to take + auto &fi = FuncInfos[NextFuncToEstimate]; + std::ostringstream stream; + FileWriter writer(stream); + LineTable::write(writer, fi); + fi.InlineInfo.write(writer, fi.Addr); + // 24 is a rough estimate of the size of other fields + SizeEstimate += writer.Tell() + 24; + } + return StrTab->getTotalSize() + SizeEstimate; +} + +uint32_t GsymCreator::addForeignString(const StringTableCreator &OtherStrTab, + uint32_t offset) { + return StrTab->insert(OtherStrTab[offset]); +} + +uint32_t GsymCreator::addForeignFile(const FileTableCreator &OtherFileTab, + uint32_t id) { + auto file = OtherFileTab[id]; + auto Dir = file.Dir == 0 ? 0 : addForeignString(OtherFileTab.getStringTable(), + file.Dir); + auto Base = addForeignString(OtherFileTab.getStringTable(), file.Base); + return FileTab->insert(FileEntry(Dir, Base)); +} + +void GsymCreator::fixForeignInlineInfo(InlineInfo &ii, + const StringTableCreator &OtherStrTab, + const FileTableCreator &OtherFileTab) { + if (ii.Name != 0) + ii.Name = addForeignString(OtherStrTab, ii.Name); + if (ii.CallFile != 0) + ii.CallFile = addForeignFile(OtherFileTab, ii.CallFile); + for (auto &child : ii.Children) { + fixForeignInlineInfo(child, OtherStrTab, OtherFileTab); + } +} + +void GsymCreator::addForeignFunction( + const std::shared_ptr OtherStrTab, + const std::shared_ptr OtherFileTab, const FunctionInfo &fi) { + assert(OtherStrTab.get() != nullptr); + assert(OtherFileTab.get() != nullptr); + if (StrTab == OtherStrTab && OtherFileTab == FileTab) { + // no need to translate string/file ids + FuncInfos.emplace_back(fi); + return; + } + FunctionInfo newFi; + newFi.Addr = fi.Addr; + newFi.Size = fi.Size; + newFi.Name = addForeignString(*OtherStrTab, fi.Name); + for (auto &line : fi.Lines) { + newFi.Lines.emplace_back( + LineEntry(line.Addr, addForeignFile(*OtherFileTab, line.File), line.Line)); + } + if (fi.InlineInfo.isValid()) { + newFi.InlineInfo = fi.InlineInfo; + fixForeignInlineInfo(newFi.InlineInfo, *OtherStrTab, *OtherFileTab); + } + + FuncInfos.emplace_back(std::move(newFi)); +} + +bool GsymCreator::save(const char *path) const { + std::ofstream file(path, std::ios::binary | std::ios::out | std::ios::trunc); + auto success = save(file); + file.close(); + return success; +} + +#if 0 +// TODO: implement saving using the AsmPrinter instead of doing it manually +bool GsymCreator::save(llvm::AsmPrinter &Asm) const { + if (FuncInfos.empty()) + return false; + const uint64_t MinAddr = FuncInfos.front().Addr; + const uint64_t MaxAddr = FuncInfos.back().Addr; + const uint64_t AddrDelta = MaxAddr - MinAddr; + uint8_t AddrOffSize = 8; + if (AddrDelta <= UINT8_MAX) + AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + AddrOffSize = 4; + + Asm.OutStreamer->AddComment("GSYM Magic"); + Asm.emitInt32(GSYM_MAGIC); + Asm.OutStreamer->AddComment("GSYM Version"); + Asm.emitInt32(GSYM_VERSION); + Asm.OutStreamer->AddComment("GSYM Address Offset Byte Size"); + Asm.emitInt8(AddrOffSize); + Asm.OutStreamer->AddComment("GSYM UUID Byte Size"); + Asm.emitInt8(UUID.size()); + Asm.OutStreamer->AddComment("GSYM Base Address"); + Asm.emitInt64(MinAddr); + assert(FuncInfos.size() <= UINT32_MAX); + Asm.OutStreamer->AddComment("GSYM Num Addresses"); + Asm.emitInt32(FuncInfos.size()); + MCSymbol *StrtabBegin = Asm.createTempSymbol("StrtabBegin"); + MCSymbol *StrtabEnd = Asm.createTempSymbol("StrtabEnd"); + Asm.OutStreamer->AddComment("GSYM String Table Offset"); + Asm.EmitLabelPlusOffset(StrtabBegin, 0, 4); // String table offset + Asm.OutStreamer->AddComment("GSYM String Table Size"); + Asm.EmitLabelDifference(EndLabel, BeginLabel, 4); // String table size + + Asm.emitInt32(0); // StrtabOffset placeholder + Asm.emitInt32(0); // StrtabSize placeholder + if (UUID.size() > GSYM_MAX_UUID_SIZE) { + fprintf(stderr, "error: UUID value is too large (%u bytes)\n", + (uint32_t)UUID.size()); + return false; + } + for (size_t i=0; iwrite(out); + // // Write out the sting table + // const off_t StrtabOffset = out.Tell(); + // StrTab->write(out); + // const off_t StrtabSize = out.Tell() - StrtabOffset; + // std::vector addr_info_offsets; + // // Write out the address infos for each address + // for (const auto &func_info : FuncInfos) { + // out.AlignTo(sizeof(uint32_t)); + // addr_info_offsets.push_back((uint32_t)out.Tell()); + // // Write the size in bytes of this function as a uint32_t + // out.WriteU32(func_info.Size); + // // Write the name of this function as a uint32_t string table offset + // out.WriteU32(func_info.Name); + // // Write out the line table if we have one. + // LineTable::write(out, func_info); + // + // // Write out the inline function info if we have any + // if (func_info.InlineInfo.isValid()) { + // out.WriteU32(static_cast(InfoType::InlineInfo)); + // const auto inline_info_size_offset = out.Tell(); + // out.WriteU32(0); // We will fix this up after writing the info out + // const auto inline_info_start = out.Tell(); + // func_info.InlineInfo.write(out, func_info.Addr); + // const off_t inline_info_length = out.Tell() - inline_info_start; + // out.Fixup32((uint32_t)inline_info_length, inline_info_size_offset); + // } + // + // // Terminate the data chunks with and end of list with zero size + // out.WriteU32(static_cast(InfoType::EndOfList)); + // out.WriteU32(0); + // } + // // Fixup the string table offset and size in the header + // out.Seek(offsetof(Header, StrtabOffset)); + // out.WriteU32((uint32_t)StrtabOffset); + // out.WriteU32((uint32_t)StrtabSize); + // + // // Fixup all address info offsets + // out.Seek(addr_info_offsets_offset); + // out.Write(addr_info_offsets.data(), + // addr_info_offsets.size() * sizeof(uint32_t)); + return true; + +} +#endif + +bool GsymCreator::save(std::ostream &stream) const { + if (FuncInfos.empty()) + return false; + const uint64_t MinAddr = FuncInfos.front().Addr; + const uint64_t MaxAddr = FuncInfos.back().Addr; + const uint64_t AddrDelta = MaxAddr - MinAddr; + uint8_t AddrOffSize = 8; + if (AddrDelta <= UINT8_MAX) + AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + AddrOffSize = 4; + Header header = {0, 0, 0, 0, 0, 0, 0, 0, {0}}; + header.Magic = GSYM_MAGIC; + header.Version = GSYM_VERSION; + header.AddrOffSize = AddrOffSize; + header.UUIDSize = UUID.size(); + header.BaseAddress = MinAddr; + assert(FuncInfos.size() <= UINT32_MAX); + header.NumAddresses = (uint32_t)FuncInfos.size(); + header.StrtabOffset = 0; // We will need to fix this up later. + header.StrtabSize = 0; // We will need to fix this up later. + if (header.UUIDSize > sizeof(header.UUID)) { + fprintf(stderr, "error: UUID value is too large (%u bytes)\n", + (uint32_t)UUID.size()); + return false; + } + if (UUID.size() > 0) { + memcpy(header.UUID, UUID.data(), UUID.size()); + } + FileWriter out(stream); + // Write out the header + out.Write(&header, Header::getByteSize()); + out.AlignTo(header.AddrOffSize); + // Write out the address offsets + for (const auto &func_info : FuncInfos) { + uint64_t addr_offset = func_info.Addr - header.BaseAddress; + out.WriteUnsigned(addr_offset, header.AddrOffSize); + } + // Write out all zeros for the addr_info_offsets; + out.AlignTo(sizeof(uint32_t)); + const off_t addr_info_offsets_offset = out.Tell(); + for (size_t i = 0, n = FuncInfos.size(); i < n; ++i) + out.WriteU32(0); + + // Write out the file table + out.AlignTo(sizeof(uint32_t)); + FileTab->write(out); + // Write out the sting table + const off_t StrtabOffset = out.Tell(); + StrTab->write(out); + const off_t StrtabSize = out.Tell() - StrtabOffset; + std::vector addr_info_offsets; + // Write out the address infos for each address + for (const auto &func_info : FuncInfos) { + out.AlignTo(sizeof(uint32_t)); + addr_info_offsets.push_back((uint32_t)out.Tell()); + // Write the size in bytes of this function as a uint32_t + out.WriteU32(func_info.Size); + // Write the name of this function as a uint32_t string table offset + out.WriteU32(func_info.Name); + // Write out the line table if we have one. + LineTable::write(out, func_info); + + // Write out the inline function info if we have any + if (func_info.InlineInfo.isValid()) { + out.WriteU32(static_cast(InfoType::InlineInfo)); + const auto inline_info_size_offset = out.Tell(); + out.WriteU32(0); // We will fix this up after writing the info out + const auto inline_info_start = out.Tell(); + func_info.InlineInfo.write(out, func_info.Addr); + const off_t inline_info_length = out.Tell() - inline_info_start; + out.Fixup32((uint32_t)inline_info_length, inline_info_size_offset); + } + + // Terminate the data chunks with and end of list with zero size + out.WriteU32(static_cast(InfoType::EndOfList)); + out.WriteU32(0); + } + // Fixup the string table offset and size in the header + out.Seek(offsetof(Header, StrtabOffset)); + out.WriteU32((uint32_t)StrtabOffset); + out.WriteU32((uint32_t)StrtabSize); + + // Fixup all address info offsets + out.Seek(addr_info_offsets_offset); + out.Write(addr_info_offsets.data(), + addr_info_offsets.size() * sizeof(uint32_t)); + return true; +} Index: lib/DebugInfo/GSYM/GsymReader.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymReader.cpp +++ lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,520 @@ +//===- GsymReader.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/DebugInfo/GSYM/DataRef.h" +#include "llvm/DebugInfo/GSYM/FileTableCreator.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" + +using namespace llvm; +using namespace gsym; + +static int unsigned_width(uint64_t u) { + if (u < 10) + return 1; + return 1 + unsigned_width(u/10); +} + +void Header::dump(llvm::raw_ostream &OS) const { + OS << "Header:\n"; + OS << " magic = " << format_hex(Magic, 10) << "\n"; + OS << " version = " << format_hex(Version, 6) << '\n'; + OS << " addr_off_size = " << format_hex(AddrOffSize, 4) << '\n'; + OS << " uuid_size = " << format_hex(UUIDSize, 4) << '\n'; + OS << " base_address = " << format_hex(BaseAddress, 18) << '\n'; + OS << " num_addrs = " << format_hex(NumAddresses, 10) << '\n'; + OS << " strtab_offset = " << format_hex(StrtabOffset, 10) << '\n'; + OS << " strtab_size = " << format_hex(StrtabSize, 10) << '\n'; + OS << " uuid = "; + for (uint8_t i = 0; i < UUIDSize; ++i) { + OS << format_hex_no_prefix(UUID[i], 2); + } + OS << '\n'; +} + +std::string Header::getError() const { + // TODO: support swapped GSYM files + if (Magic != GSYM_MAGIC) + return "invalid magic"; + if (Version != 1) + return "invalid version"; + return ""; +} + +GsymReader::GsymReader() { +} + +std::error_code +GsymReader::openFile(StringRef Filename) { + // Open the input file + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto error = BuffOrErr.getError(); + if (!error) { + MemBuffer = std::move(BuffOrErr.get()); + init(MemBuffer->getBuffer()); + } + return error; +} + +void GsymReader::init(StringRef Bytes) { + DataRef file_data(Bytes); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + auto Magic = file_data.getValue(0, 0); + if (Magic == GSYM_MAGIC) { + // This file is a GSYM file, the file data is the GSYM data + GSYMData = file_data; + } else { + // TODO: Load this file as an ObjectFile and extract GSYM info from a + // ".gsym" section (ELF) or "__gsym" section (mach-o) in the file. + return; + } + + GSYMHeader = GSYMData.getPointer
(0); + if (GSYMHeader == nullptr) + return; + if (!GSYMData.isValid()) + return; + + ErrorStr = GSYMHeader->getError(); + if (!ErrorStr.empty()) { + return; + } + const uint8_t *p = GSYMData.getStart(); + // Calculate the start of the address offset table. The address offset table + // contains a sorted list of the all all addresses contained in this GSYM + // file. + uint64_t Offset = alignTo(Header::getByteSize(), GSYMHeader->AddrOffSize); + AddrOffsets = p + Offset; + // Calculate the start of the address info offset table. Each address offset + // in the address offset table has a offset to the file data for the address + // in the address info offsets table. + Offset = alignTo(Offset + GSYMHeader->NumAddresses * GSYMHeader->AddrOffSize, + sizeof(uint32_t)); + AddrInfoOffsets = (const uint32_t *)(p + Offset); + // Calculate the start of the file table. + Offset = alignTo(Offset + GSYMHeader->NumAddresses * sizeof(uint32_t), + sizeof(uint32_t)); + FileTab = (const FileTable *)(p + Offset); + // Set the string table based off information in the GSYM header. + StrTab.Data = file_data.getSlice(GSYMHeader->StrtabOffset, + GSYMHeader->StrtabSize); + +} + +uint64_t GsymReader::getAddressOffset(size_t idx) const { + if (GSYMHeader && AddrOffsets && idx < GSYMHeader->NumAddresses) { + switch (GSYMHeader->AddrOffSize) { + case 1: + return reinterpret_cast(AddrOffsets)[idx]; + case 2: + return reinterpret_cast(AddrOffsets)[idx]; + case 4: + return reinterpret_cast(AddrOffsets)[idx]; + case 8: + return reinterpret_cast(AddrOffsets)[idx]; + } + } + return UINT64_MAX; +} + +size_t GsymReader::getNumAddresses() const { + if (GSYMHeader) + return GSYMHeader->NumAddresses; + return 0; +} + +uint64_t GsymReader::getAddress(size_t idx) const { + if (GSYMHeader) { + auto addr_offset = getAddressOffset(idx); + if (addr_offset != UINT64_MAX) + return GSYMHeader->BaseAddress + addr_offset; + } + return UINT64_MAX; +} + +uint64_t GsymReader::getAddressInfoOffset(size_t idx) const { + if (GSYMHeader && AddrInfoOffsets && idx < GSYMHeader->NumAddresses) { + return AddrInfoOffsets[idx]; + } + return UINT64_MAX; +} + +DataExtractor GsymReader::getAddressInfoPayload(size_t idx) const { + uint64_t start_offset = getAddressInfoOffset(idx); + if (start_offset != UINT64_MAX) { + start_offset += 8; // Skip uint32_t size and name to get to payload data + uint64_t end_offset = getAddressInfoOffset(idx + 1); + auto data = GSYMData.getSlice(start_offset, end_offset); + if (data.isValid()) + return DataExtractor(data.getData(), true, 8); + } + return DataExtractor(StringRef(), true, 8); +} + +const char *GsymReader::getInfoTypeAsString(InfoType IT) { + switch (IT) { + case InfoType::EndOfList: + return "EndOfList"; + case InfoType::LineTableInfo: + return "LineTable"; + case InfoType::InlineInfo: + return "InlineInfo"; + } + return "???"; +} + +void GsymReader::FileTable::dump(llvm::raw_ostream &OS, + const StringTable &StrTab) const { + OS << "Files:\n"; + const size_t index_width = unsigned_width(NumFiles); + for (uint32_t i = 0; i < NumFiles; ++i) { + OS << "files[" << format_decimal(i, index_width) << "] " << + format_hex(Files[i].Dir, 10) << ", " << format_hex(Files[i].Base, 10); + StringRef Dir(StrTab.getString(Files[i].Dir)); + StringRef Base(StrTab.getString(Files[i].Base)); + if (Dir.empty()) { + if (!Base.empty()) + OS << " (\"" << Base << "\")"; + } else { + assert(!Base.empty()); + OS << " (\"" << Dir << '/' << Base << "\")"; + } + OS << '\n'; + } +} + +void GsymReader::dump(llvm::raw_ostream &OS, bool Verbose) const { + // If GSYMHeader is not NULL, then the header has been validated. + if (!GSYMHeader) { + OS << "invalid gsym file\n"; + return; + } + if (Verbose) { + // When verbose dumping, dump the header, address offsets, address info + // offsets, file table and string table. + GSYMHeader->dump(OS); + OS << "Address Offsets:\n"; + const size_t addr_off_hex_width = GSYMHeader->AddrOffSize * 2 + 2; + const size_t addr_hex_width = 16 + 2; + const size_t index_width = unsigned_width(GSYMHeader->NumAddresses); + for (uint32_t i = 0; i < GSYMHeader->NumAddresses; ++i) { + auto addr_offset = getAddressOffset(i); + OS << " [" << format_decimal(i, index_width) << "] " << + format_hex(addr_offset, addr_off_hex_width) << " (" << + format_hex(addr_offset + GSYMHeader->BaseAddress, addr_hex_width) << + ")\n"; + } + OS << "Address Info Offsets:\n"; + for (uint32_t i = 0; i < GSYMHeader->NumAddresses; ++i) + OS << " [" << format_decimal(i, index_width) << "] " << + format_hex(getAddressInfoOffset(i), 10) << '\n'; + FileTab->dump(OS, StrTab); + StrTab.dump(OS); + } + + // Dump contents of all address info objects. + OS << "Address Infos:\n"; + for (uint32_t i = 0; i < GSYMHeader->NumAddresses; ++i) { + dumpAddressInfo(OS, i); + } +} + +void GsymReader::dumpAddressInfo(llvm::raw_ostream &OS, + size_t AddrInfoIndex) const { + const auto addr_info_offset = getAddressInfoOffset(AddrInfoIndex); + if (addr_info_offset == UINT64_MAX) { + OS << "error: invalid address info index " << AddrInfoIndex; + return; + } + OS << format_hex(addr_info_offset, 10) << ": "; + auto AddrInfo = GSYMData.getPointer(addr_info_offset); + if (!AddrInfo) { + OS << "error: corrupt GSYM file doesn't contain address info\n"; + return; + } + const uint64_t AddrOffset = getAddressOffset(AddrInfoIndex); + const uint64_t StartAddr = GSYMHeader->BaseAddress + AddrOffset; + const uint64_t EndAddr = StartAddr + AddrInfo->Size; + OS << '[' << format_hex(StartAddr, 18) << " - " << + format_hex(EndAddr, 18) << "): " << StrTab.getString(AddrInfo->Name) + << '\n'; + DataExtractor data = getAddressInfoPayload(AddrInfoIndex); + + bool done = false; + uint32_t Offset = 0; + while (!done) { + auto IT = static_cast(data.getU32(&Offset)); + uint32_t InfoLength = data.getU32(&Offset); + OS << " " << format_hex(addr_info_offset + 8 + Offset, 10) << ": <" << + format_hex(InfoLength, 10) << "> " << getInfoTypeAsString(IT) + << '\n'; + DataExtractor InfoData = data.getSlice(&Offset, InfoLength); + switch (IT) { + case InfoType::EndOfList: + done = true; + break; + + case InfoType::LineTableInfo: { + std::vector line_table; + LineTable line_parser(InfoData); + dumpLineTable(OS, StartAddr, InfoData, 4); + } break; + + case InfoType::InlineInfo: { + InlineInfo InlineInfo; + uint32_t InlineOffset = 0; + if (InlineInfo.decode(InfoData, InlineOffset, StartAddr)) + InlineInfo.dump(OS, *this, 4); + else + OS << "error: failed to decode inline info\n"; + } break; + } + } +} + +bool GsymReader::dumpAddressInfos(llvm::raw_ostream &OS, + const char *NameCstr) const { + uint32_t NameIdx = StrTab.find(NameCstr); + if (NameIdx == 0) + return false; + bool success = false; + for (uint32_t i = 0; i < GSYMHeader->NumAddresses; ++i) { + const auto addr_info_offset = getAddressInfoOffset(i); + auto AddrInfo = GSYMData.getPointer(addr_info_offset); + if (AddrInfo && AddrInfo->Name == NameIdx) { + dumpAddressInfo(OS, i); + success = true; + } + } + return success; +} + +void GsymReader::dumpLineTable(llvm::raw_ostream &OS, uint64_t BaseAddr, + DataExtractor &LineData, uint32_t Depth) const { + LineTable parser(LineData); + std::vector line_table; + parser.parseAllEntries(line_table, BaseAddr); + for (const auto &line_entry : line_table) { + auto file_entry = FileTab->getFile(line_entry.File); + auto Dir = StrTab.getString(file_entry.Dir); + auto Base = StrTab.getString(file_entry.Base); + OS.indent(Depth); + OS << format_hex(line_entry.Addr, 18) << ": " << Dir << '/' << + Base << ':' << line_entry.Line << '\n'; + } +} + +bool GsymReader::findAddressInfo(uint64_t addr, LookupInfo &Info) const { + if (addr < GSYMHeader->BaseAddress || GSYMHeader->NumAddresses == 0) + return false; + const uint64_t addr_offset = addr - GSYMHeader->BaseAddress; + Info.clear(); + + switch (GSYMHeader->AddrOffSize) { + case 1: { + auto first = reinterpret_cast(AddrOffsets); + auto last = first + GSYMHeader->NumAddresses; + auto pos = std::lower_bound(first, last, addr_offset); + if (pos == last || addr_offset < *pos) + --pos; + Info.AddrInfoIndex = std::distance(first, pos); + Info.MatchAddrOffset = *pos; + break; + } + case 2: { + auto first = reinterpret_cast(AddrOffsets); + auto last = first + GSYMHeader->NumAddresses; + auto pos = std::lower_bound(first, last, addr_offset); + if (pos == last || addr_offset < *pos) + --pos; + Info.AddrInfoIndex = std::distance(first, pos); + Info.MatchAddrOffset = *pos; + break; + } + case 4: { + auto first = reinterpret_cast(AddrOffsets); + auto last = first + GSYMHeader->NumAddresses; + auto pos = std::lower_bound(first, last, addr_offset); + if (pos == last || addr_offset < *pos) + --pos; + Info.AddrInfoIndex = std::distance(first, pos); + Info.MatchAddrOffset = *pos; + break; + } + case 8: { + auto first = reinterpret_cast(AddrOffsets); + auto last = first + GSYMHeader->NumAddresses; + auto pos = std::lower_bound(first, last, addr_offset); + if (pos == last || addr_offset < *pos) + --pos; + Info.AddrInfoIndex = std::distance(first, pos); + Info.MatchAddrOffset = *pos; + break; + } + default: + break; + } + + if (Info.AddrInfoIndex < GSYMHeader->NumAddresses) { + auto addr_info_offset = AddrInfoOffsets[Info.AddrInfoIndex]; + auto AddrInfo = GSYMData.getPointer(addr_info_offset); + if (AddrInfo) { + // Make sure the address is within the bounds of the address info's size + auto func_offset = addr_offset - Info.MatchAddrOffset; + // If an entry has zero size, then we will match it regardless of the + // size. These are typically symbols in the symbol table. + if (AddrInfo->Size == 0 || func_offset < AddrInfo->Size) { + Info.AddrInfo = AddrInfo; + return true; + } + } + } + return false; +} + +bool GsymReader::lookup(uint64_t addr, LookupResult &result) const { + result.clear(); + LookupInfo Info; + if (!findAddressInfo(addr, Info)) + return false; + + result.Addr = GSYMHeader->BaseAddress + Info.MatchAddrOffset; + result.EndAddr = result.Addr + Info.AddrInfo->Size; + + LineEntry line_entry; + InlineInfo InlineInfo; + DataExtractor data = getAddressInfoPayload(Info.AddrInfoIndex); + uint32_t Offset = 0; + uint32_t IT; + while ((IT = data.getU32(&Offset))) { + uint32_t InfoLength = data.getU32(&Offset); + DataExtractor InfoData = data.getSlice(&Offset, InfoLength); + switch (static_cast(IT)) { + case InfoType::LineTableInfo: { + std::vector line_table; + LineTable line_parser(InfoData); + line_entry = line_parser.lookup(result.Addr, addr); + } break; + case InfoType::InlineInfo: { + uint32_t InfoOffset = 0; + InlineInfo.decode(InfoData, InfoOffset, result.Addr, addr); + break; + } + default: + break; + } + } + if (line_entry.isValid()) { + auto file_entry = FileTab->getFile(line_entry.File); + std::vector inline_stack; + SourceLocation loc; + InlineInfo.getInlineStack(addr, inline_stack); + + if (!inline_stack.empty()) { + auto prev = inline_stack.front(); + // First entry in inline callstack. The file and line come from + // the "line_entry", and the name comes from the inline info. + loc.Name = StrTab.getString(prev->Name); + loc.Dir = StrTab.getString(file_entry.Dir); + loc.Base = StrTab.getString(file_entry.Base); + loc.Line = line_entry.Line; + result.Locations.emplace_back(std::move(loc)); + + // Rest of inlined functions. Note that we don't have to add last + // (non-inlined) function explicitly because the root InlineInfo node + // has the same name as the function + for (auto it = inline_stack.begin() + 1; it != inline_stack.end(); it++) { + auto ii = *it; + loc.Name = StrTab.getString(ii->Name); + auto CallFile = FileTab->getFile(prev->CallFile); + loc.Dir = StrTab.getString(CallFile.Dir); + loc.Base = StrTab.getString(CallFile.Base); + loc.Line = prev->CallLine; + result.Locations.push_back(loc); + prev = ii; + } + } else { + loc.Name = StrTab.getString(Info.AddrInfo->Name); + loc.Dir = StrTab.getString(file_entry.Dir); + loc.Base = StrTab.getString(file_entry.Base); + loc.Line = line_entry.Line; + result.Locations.push_back(loc); + } + } else { + SourceLocation loc; + loc.Name = StrTab.getString(Info.AddrInfo->Name); + result.Locations.push_back(loc); + } + return true; +} + +bool GsymReader::getFunctionInfo(uint64_t addr, FunctionInfo &FuncInfo) const { + LookupInfo Info; + if (!findAddressInfo(addr, Info)) + return false; + + FuncInfo.Addr = GSYMHeader->BaseAddress + Info.MatchAddrOffset; + FuncInfo.Size = Info.AddrInfo->Size; + FuncInfo.Name = Info.AddrInfo->Name; + + DataExtractor data = getAddressInfoPayload(Info.AddrInfoIndex); + uint32_t Offset = 0; + while (uint32_t IT = data.getU32(&Offset)) { + uint32_t InfoLength = data.getU32(&Offset); + DataExtractor InfoData = data.getSlice(&Offset, InfoLength); + switch (static_cast(IT)) { + case InfoType::LineTableInfo: { + LineTable parser(InfoData); + parser.parseAllEntries(FuncInfo.Lines, FuncInfo.Addr); + } break; + case InfoType::InlineInfo: { + uint32_t InlineOffset = 0; + FuncInfo.InlineInfo.decode(InfoData, InlineOffset, FuncInfo.Addr); + break; + } + default: + break; + } + } + return true; +} + +void GsymReader::unmap() { + MemBuffer.reset(); + GSYMData.clear(); + GSYMHeader = nullptr; + AddrOffsets = nullptr; + AddrInfoOffsets = nullptr; + FileTab = nullptr; + StrTab.clear(); +} + +GsymReader::~GsymReader() { unmap(); } Index: lib/DebugInfo/GSYM/GsymStreamer.cpp =================================================================== --- lib/DebugInfo/GSYM/GsymStreamer.cpp +++ lib/DebugInfo/GSYM/GsymStreamer.cpp @@ -0,0 +1,92 @@ +//===- GSYMStreamer.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymStreamer.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/WithColor.h" + +namespace llvm { +namespace gsym { + +static inline bool error(Twine Error, Twine Context = {}) { + WithColor::error() << Error + "\n"; + if (!Context.isTriviallyEmpty()) + WithColor::note() << Twine("while processing ") + Context + "\n"; + return false; +} + +bool GSYMStreamer::init(Triple TheTriple) { + std::string ErrorStr; + std::string TripleName; + StringRef Context = "gsym streamer init"; + + // Get the target. + const Target *TheTarget = + TargetRegistry::lookupTarget(TripleName, TheTriple, ErrorStr); + if (!TheTarget) + return error(ErrorStr, Context); + TripleName = TheTriple.getTriple(); + + // Create all the MC Objects. + MRI.reset(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) + return error(Twine("no register info for target ") + TripleName, Context); + + MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!MAI) + return error("no asm info for target " + TripleName, Context); + + MOFI.reset(new MCObjectFileInfo); + MC.reset(new MCContext(MAI.get(), MRI.get(), MOFI.get())); + MOFI->InitMCObjectFileInfo(TheTriple, /*PIC*/ false, *MC); + + MSTI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", "")); + if (!MSTI) + return error("no subtarget info for target " + TripleName, Context); + + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions); + if (!MAB) + return error("no asm backend for target " + TripleName, Context); + + MII.reset(TheTarget->createMCInstrInfo()); + if (!MII) + return error("no instr info info for target " + TripleName, Context); + + MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *MC); + if (!MCE) + return error("no code emitter for target " + TripleName, Context); + + MS = TheTarget->createMCObjectStreamer( + TheTriple, *MC, std::unique_ptr(MAB), + MAB->createObjectWriter(OutFile), std::unique_ptr(MCE), + *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false); + + if (!MS) + return error("no object streamer for target " + TripleName, Context); + + // Finally create the AsmPrinter we'll use to emit the DIEs. + TM.reset(TheTarget->createTargetMachine(TripleName, "", "", TargetOptions(), + None)); + if (!TM) + return error("no target machine for target " + TripleName, Context); + + Asm.reset(TheTarget->createAsmPrinter(*TM, std::unique_ptr(MS))); + if (!Asm) + return error("no asm printer for target " + TripleName, Context); + + return true; +} + +} // namespace gsym +} // namespace llvm Index: lib/DebugInfo/GSYM/InlineInfo.cpp =================================================================== --- lib/DebugInfo/GSYM/InlineInfo.cpp +++ lib/DebugInfo/GSYM/InlineInfo.cpp @@ -0,0 +1,192 @@ +//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +#include + +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +void Range::dump(llvm::raw_ostream &OS) const { + OS << '[' << format_hex(Start, 18) << " - " << format_hex(End, 18) << ")"; +} + +void Ranges::insert(const Range &R) { + auto Pos = std::upper_bound(m_ranges.begin(), m_ranges.end(), R); + m_ranges.insert(Pos, R); +} + +bool Ranges::contains(uint64_t Addr) const { + if (m_ranges.empty()) + return false; + if (Addr < m_ranges.front().Start) + return false; + if (Addr >= m_ranges.back().End) + return false; + auto begin = m_ranges.begin(); + auto EndPos = m_ranges.end(); + auto Pos = std::upper_bound(begin, EndPos, Addr); + if (Pos == EndPos) + return m_ranges.back().contains(Addr); + if (Pos != begin) { + --Pos; + return Pos->contains(Addr); + } + return false; +} + +void InlineInfo::write(FileWriter &out, uint64_t BaseAddr) const { + out.WriteULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) { + auto Offset = Range.Start - BaseAddr; + auto Size = Range.End - Range.Start; + out.WriteULEB(Offset); + out.WriteULEB(Size); + } + bool HasChildren = !Children.empty(); + out.WriteU8(HasChildren); + out.WriteU32(Name); + out.WriteULEB(CallFile); + out.WriteULEB(CallLine); + if (HasChildren) { + for (const auto &child : Children) + child.write(out, Ranges.front().Start); + out.WriteULEB(0); // Terminate child sibling chain + } +} + +bool InlineInfo::decode(DataExtractor &Data, uint32_t &Offset, + uint64_t BaseAddr) { + auto NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return false; + Ranges.reserve(NumRanges); + for (size_t I = 0; I < NumRanges; ++I) { + auto off = Data.getULEB128(&Offset); + auto size = Data.getULEB128(&Offset); + Ranges.emplace_back(Range(BaseAddr + off, BaseAddr + off + size)); + } + bool HasChildren = Data.getU8(&Offset) != 0; + Name = Data.getU32(&Offset); + CallFile = (uint32_t)Data.getULEB128(&Offset); + CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + InlineInfo child; + while (child.decode(Data, Offset, Ranges.front().Start)) { + Children.emplace_back(std::move(child)); + child.clear(); + } + } + return true; +} + +bool InlineInfo::decode(DataExtractor &Data, uint32_t &Offset, + uint64_t BaseAddr, uint64_t LookupAddr) { + auto NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return false; + + uint64_t ChildBaseAddr = 0; + if (LookupAddr < 0) { + for (size_t I = 0; I < NumRanges; ++I) { + Data.getULEB128(&Offset); // off + Data.getULEB128(&Offset); // size + } + } else { + for (size_t I = 0; I < NumRanges; ++I) { + auto off = Data.getULEB128(&Offset); + auto size = Data.getULEB128(&Offset); + auto Start = BaseAddr + off; + auto End = Start + size; + if (I == 0) + ChildBaseAddr = Start; + if (Start <= LookupAddr && End > LookupAddr) { + Ranges.emplace_back(Range(Start, End)); + } + } + } + + bool HasChildren = Data.getU8(&Offset) != 0; + Name = Data.getU32(&Offset); + if (LookupAddr < 0) { + Data.getULEB128(&Offset); // CallFile + Data.getULEB128(&Offset); // CallLine + } else { + CallFile = (uint32_t)Data.getULEB128(&Offset); + CallLine = (uint32_t)Data.getULEB128(&Offset); + } + if (HasChildren) { + InlineInfo Child; + if (Ranges.empty()) { + // This inlined function does not contain lookup_offset, no need to decode + // Ranges, just skip + while (Child.decode(Data, Offset, ChildBaseAddr, -1)) + /* Do nothing */; + } else { + while (Child.decode(Data, Offset, ChildBaseAddr, LookupAddr)) { + if (!Child.Ranges.empty()) + Children.emplace_back(std::move(Child)); + Child.clear(); + } + } + } + return true; +} + +void InlineInfo::dump(llvm::raw_ostream &OS, const GsymReader &GSYM, + unsigned Depth) const { + // Indent with spaces based on Depth + OS.indent(Depth); + bool First = true; + for (auto Range : Ranges) { + if (First) + First = false; + else + OS << ' '; + Range.dump(OS); + } + if (Name) + OS << ' ' << GSYM.getString(Name); + if (CallFile) { + auto file_entry = GSYM.getFile(CallFile); + OS << " called from " << GSYM.getString(file_entry.Dir) << '/' << + GSYM.getString(file_entry.Base) << ":" << CallLine; + } + OS << '\n'; + for (const auto &child : Children) + child.dump(OS, GSYM, Depth + 1); +} + +bool InlineInfo::getInlineStack( + uint64_t Addr, std::vector &InlineStack) const { + for (const auto &Range : Ranges) { + if (Range.contains(Addr)) { + if (Name > 0) + InlineStack.insert(InlineStack.begin(), this); + for (const auto &child : Children) { + if (child.getInlineStack(Addr, InlineStack)) + break; + } + return true; + } + } + return false; +} Index: lib/DebugInfo/GSYM/LineTable.cpp =================================================================== --- lib/DebugInfo/GSYM/LineTable.cpp +++ lib/DebugInfo/GSYM/LineTable.cpp @@ -0,0 +1,250 @@ +//===- LineTable.cpp --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +using namespace llvm; +using namespace gsym; + +// type summary add -s "delta=${var.delta}, count=${var.count}" "(anonymous +// namespace)::DeltaInfo" +struct DeltaInfo { + int64_t delta; + uint32_t count; + DeltaInfo(int64_t d, uint32_t c) : delta(d), count(c) {} +}; + +inline bool operator<(const DeltaInfo &lhs, int64_t delta) { + return lhs.delta < delta; +} + +bool LineTable::encode_special(int64_t min_line_delta, int64_t max_line_delta, + int64_t line_delta, uint64_t addr_delta, + uint8_t &special_opcode) { + if (line_delta < min_line_delta) + return false; + if (line_delta > max_line_delta) + return false; + int64_t line_range = max_line_delta - min_line_delta + 1; + int64_t adjusted_opcode = + ((line_delta - min_line_delta) + addr_delta * line_range); + int64_t opcode = adjusted_opcode + DBG_FIRST_SPECIAL; + if (opcode < 0) + return false; + if (opcode > 255) + return false; + special_opcode = (uint8_t)opcode; + return true; +} + +void LineTable::parse( + uint64_t BaseAddr, + std::function const &row_callback) { + uint32_t offset = 0; + int64_t min_delta = Data.getSLEB128(&offset); + int64_t max_delta = Data.getSLEB128(&offset); + int64_t line_range = max_delta - min_delta + 1; + uint32_t first_line = (uint32_t)Data.getULEB128(&offset); + LineEntry row(BaseAddr, 1, first_line); + bool done = false; + while (!done) { + uint8_t opcode = Data.getU8(&offset); + switch (opcode) { + case DBG_END_SEQUENCE: + done = true; + break; + case DBG_SET_FILE: + row.File = (uint32_t)Data.getULEB128(&offset); + break; + case DBG_ADVANCE_PC: { + auto delta = Data.getULEB128(&offset); + row.Addr += delta; + // If the function callback returns false, we stop parsing + if (row_callback(row) == false) + return; + } break; + case DBG_ADVANCE_LINE: { + auto delta = Data.getSLEB128(&offset); + row.Line += delta; + } break; + default: { + // A byte that contains both address and line increment + uint8_t adjusted_opcode = opcode - DBG_FIRST_SPECIAL; + int64_t line_delta = min_delta + (adjusted_opcode % line_range); + uint64_t addr_delta = (adjusted_opcode / line_range); + row.Line += line_delta; + row.Addr += addr_delta; + // If the function callback returns false, we stop parsing + if (row_callback(row) == false) + return; + break; + } + } + } +} + +bool LineTable::write(FileWriter &out, const FunctionInfo &func_info) { + if (func_info.Lines.empty()) + return false; + // Write out the unsigned InfoType::LineTableInfo enum + out.WriteU32(static_cast(InfoType::LineTableInfo)); + // Write out a zero byte size of this chunk for now, we will fixup later + const off_t line_table_length_offset = out.Tell(); + out.WriteU32(0); + const off_t line_table_start = line_table_length_offset + 4; + int64_t min_line_delta = INT64_MAX; + int64_t max_line_delta = INT64_MIN; + std::vector delta_infos; + if (func_info.Lines.size() == 1) { + min_line_delta = 0; + max_line_delta = 0; + } else { + int64_t prev_line = 1; + bool first = true; + for (const auto &line_entry : func_info.Lines) { + if (first) + first = false; + else { + int64_t line_delta = (int64_t)line_entry.Line - prev_line; + auto end = delta_infos.end(); + auto pos = std::lower_bound(delta_infos.begin(), end, line_delta); + if (pos != end && pos->delta == line_delta) + ++pos->count; + else + delta_infos.insert(pos, DeltaInfo(line_delta, 1)); + if (line_delta < min_line_delta) + min_line_delta = line_delta; + if (line_delta > max_line_delta) + max_line_delta = line_delta; + } + prev_line = (int64_t)line_entry.Line; + } + assert(min_line_delta <= max_line_delta); + } + // Set the min and max line delta intelligently based on the counts of + // the line deltas. if our range is too large. + const int64_t max_line_ranage = 14; + if (max_line_delta - min_line_delta > max_line_ranage) { + uint32_t best_index = 0; + uint32_t best_end_index = 0; + uint32_t best_count = 0; + const size_t num_delta_infos = delta_infos.size(); + for (uint32_t i = 0; i < num_delta_infos; ++i) { + const int64_t first_delta = delta_infos[i].delta; + uint32_t curr_count = 0; + uint32_t j; + for (j = i; j < num_delta_infos; ++j) { + auto line_range = delta_infos[j].delta - first_delta; + if (line_range > max_line_ranage) + break; + curr_count += delta_infos[j].count; + } + if (curr_count > best_count) { + best_index = i; + best_end_index = j - 1; + best_count = curr_count; + } + } + min_line_delta = delta_infos[best_index].delta; + max_line_delta = delta_infos[best_end_index].delta; + } + if (min_line_delta == max_line_delta && min_line_delta > 0 && + min_line_delta < max_line_ranage) + min_line_delta = 0; + assert(min_line_delta <= max_line_delta); + + // Initialize the line entry state as a starting point. All line entries + // will be deltas from this. + LineEntry prev(func_info.Addr, 1, func_info.Lines.front().Line); + + // Write out the min and max line delta as signed LEB128 + out.WriteSLEB(min_line_delta); + out.WriteSLEB(max_line_delta); + // Write out the starting line number as a unsigned LEB128 + out.WriteULEB(prev.Line); + + for (const auto &curr : func_info.Lines) { + assert(curr.Addr >= prev.Addr); + uint64_t addr_delta = curr.Addr - prev.Addr; + int64_t line_delta = 0; + if (curr.Line > prev.Line) + line_delta = curr.Line - prev.Line; + else if (prev.Line > curr.Line) + line_delta = -((int32_t)(prev.Line - curr.Line)); + + // Set the file if it doesn't match the current one. + if (curr.File != prev.File) { + out.WriteU8(DBG_SET_FILE); + out.WriteULEB(curr.File); + } + + uint8_t special_op; + if (encode_special(min_line_delta, max_line_delta, line_delta, addr_delta, + special_op)) { + // Advance the PC and line and push a row + out.WriteU8(special_op); + } else { + // We can't encode the address delta and line delta into + // a single special opcode, we must do them separately + + // Advance the line + if (line_delta != 0) { + out.WriteU8(DBG_ADVANCE_LINE); + out.WriteSLEB(line_delta); + } + + // Advance the PC and push a row + out.WriteU8(DBG_ADVANCE_PC); + out.WriteULEB(addr_delta); + } + prev = curr; + } + out.WriteU8(DBG_END_SEQUENCE); + + // Fixup the line table byte size + const off_t line_table_length = out.Tell() - line_table_start; + out.Fixup32((uint32_t)line_table_length, line_table_length_offset); + return true; +} + +// Parse all line table entries into the "line_table" vector. We can +// cache the results of this if needed, or we can call LineTable::lookup() +// below. +void LineTable::parseAllEntries(std::vector &line_table, + uint64_t BaseAddr) { + parse(BaseAddr, [&line_table](const LineEntry &row) -> bool { + line_table.push_back(row); + return true; // Keep parsing by returning true + }); +} +// Parse the line table on the fly and find the row we are looking for. +// We will need to determine if we need to cache the line table by calling +// LineTable::parseAllEntries(...) or just call this function each time. +// There is a CPU vs memory tradeoff we will need to determine. +LineEntry LineTable::lookup(uint64_t BaseAddr, uint64_t Addr) { + LineEntry result; + parse(BaseAddr, [Addr, &result](const LineEntry &row) -> bool { + if (Addr < row.Addr) + return false; // Stop parsing, result contains the line table row! + result = row; + if (Addr == row.Addr) { + // Stop parsing, this is the row we are looking for since the address + // matches. + return false; + } + return true; // Keep parsing till we find the right row + }); + return result; +} Index: lib/DebugInfo/GSYM/README.md =================================================================== --- lib/DebugInfo/GSYM/README.md +++ lib/DebugInfo/GSYM/README.md @@ -0,0 +1,119 @@ +# GSYM Introduction + +GSYM is a symbolication file format is designed to be the best format to use for symbolicating addresses into function name + source file + line information. It is a binary file format designed to be mapped into one or more processes. GSYM information can be created by converting DWARF debug information, or Breakpad files. GSYM information can exist as a stand alone file, or be contained in ELF or mach-o files in a section. When embedded into ELF or mach-o files, GSYM sections can share a string tables that already exists within a file. + +## Why use GSYM? +GSYM files are up to 7x smaller than DWARF files and up to 3x smaller than Breakpad files. The file format is designed to touch as few pages of the file as possible while doing address lookups. GSYM files can be mmap'ed into a process as shared memory allowing multiple processes on a symbolication server to share loaded GSYM pages. The file format includes inline call stack information and can help turn a single address lookup into multiple stack frames that walk the inlined call stack back to the concrete function that invoked these functions. + +## Converting DWARF Files to GSYM +`llvm-dsymutil` is available in the `llvm/tools/gsym` directory and has options to convert DWARF into GSYM files. `llvm-dsymutil` has a `-dwarf` option that specifies a DWARF file to convert into a GSYM file. The output file can be specified with the `-out-file` option. +``` +$ llvm-dsymutil -dwarf /tmp/a.out -out-file /tmp/a.out.gsym +``` + + This command will convert a DWARF file into the GSYM file format. This allows clients that are currently symbolicating with DWARF to switch to using the GSYM file format. This tool could be used in a symbolication workflow where symbolication servers convert DWARF to GSYM and cached the results on the fly, or could be used at build time to always produce a GSYM file at build time. DWARF debug information is rich enough to support encoding the inline call stack information for richer and more useful symbolication backtraces. + + ## Converting Breakpad Files to GSYM + +`llvm-dsymutil` has a `-breakpad` option that specifies a Breakpad file to convert into a GSYM file. The output file can be specified with the `-out-file` option. +``` +$ llvm-dsymutil -breakpad /tmp/foo.sym -out-file /tmp/foo.gsym +``` +This allows clients currently using breakpad to switch over to use GSYM files. This tool could be used in a symbolication workflow where symbolication servers convert breakpad to GSYM format on the fly only when needed. Breakpad files do not contain inline call stack information, so it is advisable to use `llvm-dsymutil -dwarf` when possible to avoid losing this vital information. + +## File Format Overview +The GSYM file consists of a header, address table, address info offset table and address info data for each address. + +The GSYM file format when in a stand alone file is ordered as shown: +- Header +- Address Table +- Address Data Offsets Table +- File Table +- String Table +- Address Data + + +### Header +``` +#define GSYM_MAGIC 0x4753594d +#define GSYM_VERSION 1 +struct Header { + uint32_t magic; + uint16_t version; + uint8_t addr_off_size; + uint8_t uuid_size; + uint64_t base_address; + uint32_t num_addrs; + uint32_t strtab_offset; + uint32_t strtab_size; + uint8_t uuid[20]; +}; +``` + +The magic value is set to `GSYM_MAGIC` and allows quick and easy detection of this file format when it is loaded. Addresses in the address table are stored as offsets from a 64 bit address found in `Header.base_address`. This allows the address table to contain 32, 16 or 8 bit offsets, instead of a table of full sized addresses. The file size is smaller and causes fewer pages to be touched during address lookups when the address table is smaller. The size of the address offsets in the address table is specified in the header in `Header.addr_off_size`. The header contains a UUID to ensure the GSYM file can be properly matched to the object ELf or mach-o file that created the stack trace. The header specifies the location of the string table for all strings contained in the GSYM file, or can point to an existing string table within a ELF or mach-o file. + +### Address Table +The address table immediately follows the header in the file and consists of `Header.num_addrs` address offsets. These offsets are sorted and can be binary searched for efficient lookups. Address offsets are encoded as offsets that are `Header.addr_off_size` bytes in size. During address lookup, the index of the matching address offset will be the index into the address data offsets table. + +### Address Data Offsets Table +The address data offsets table immediately follows the address table and consists of `Header.num_addrs` 32 bit file offsets: one for each address in the address table. The offsets in this table are the absolute file offset to the address data for each address in the address table. Keeping this data separate from the address table helps to reduce the number of pages that are touched when address lookups occur on a GSYM file. + +### File Table +The file table immediately follows the address data offsets table. The format of the `FileTable` is: + +``` +struct FileTable { + uint32_t count; + FileInfo files[]; +}; +``` +The file table starts with a 32 bit count of the number of files that are used in all of the address data, followed by that number of `FileInfo` structures. + +Each file in the file table is represented with a `FileInfo` structure: + +``` +struct FileInfo { + uint32_t directory; + uint32_t filename; +}; +``` + +The FileInfo structure has the file path split into a string for the directory and a string for the filename. The directory and filename are specified as offsets into the string table. Splitting paths into directory and file base name allows GSYM to use the same string table entry for common directories. + +### String Table +The string table follows the file table in stand alone GSYM files and contains all strings for everything contained in the GSYM file. Any string data should be added to the string table and any references to strings inside GSYM information must be stored as 32 bit string table offsets into this string table. + +### Address Data +The address data is the payload that contains information about the address that is being looked up. The structure that represents this data is: +``` +struct AddressInfo { + uint32_t size; + uint32_t name; + AddressData data[]; +}; +``` +It starts with a 32 bit size for the address range of the functiopn and is followed by the 32 bit string table offset for the name of the function. The size of the address range is important to encode as it stops address lookups from matching if the address is between two functions in some padding. This is followed by an array of address data information: +``` +struct AddressData { + uint32_t type; + uint32_t length; + uint8_t data[length]; +}; +``` +The address data starts with a 32 bit type, followed by a 32 bit length, followed by an array of bytes that encode each specify kind of data. +The `AddressData.type` is an enumeration value: +``` +enum class InfoType { + EndOfList = 0u, + LineTableInfo = 1u, + InlineInfo = 2u +}; +``` +The `AddressInfo.data[]` is encoded as a vector of AddressData structs that is terminated by a `AddressData` struct whose type is set to `InfoType.EndOfList`. This allows the GSYM file format the contain arbitrary data for any address range and allows us to expand the GSYM capabilities as we find more uses for it. + +`InfoType::EndOfList` is always the last `AddressData` in the `AddressInfo`. + +`InfoType::LineTableInfo` is a modified version of the DWARF line tables that efficiently stores line table information for each function. DWARF stores line table information for an entire source file and includes all functions. Having each function's line table encoded separately allows fewer pages to be touched when looking up the line entry for a specific address. The information is optional and can be omitted fo address data that is from a symbol or label where no line table information is available. + +`InfoType::InlineInfo` is a format that encodes inline call stacks. This information is optional and doesn't need to be included for each address. If the function has no inlined functions this data should not be included. + Index: tools/gsym/CMakeLists.txt =================================================================== --- tools/gsym/CMakeLists.txt +++ tools/gsym/CMakeLists.txt @@ -0,0 +1,19 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + DebugInfoDWARF + DebugInfoGSYM + AsmPrinter + AllTargetsDescs + AllTargetsInfos + MC + Object + Support + Target + ) + +add_llvm_tool(llvm-gsymutil + llvm-gsymutil.cpp + + DEPENDS + intrinsics_gen + ) Index: tools/gsym/llvm-gsymutil.cpp =================================================================== --- tools/gsym/llvm-gsymutil.cpp +++ tools/gsym/llvm-gsymutil.cpp @@ -0,0 +1,468 @@ +//===-- llvm-gsymutil.cpp - GSYM dumping and creation utility for llvm ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/RelocVisitor.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/Breakpad.h" +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" + +using namespace llvm; +using namespace gsym; +using namespace object; + +#define HEX32(v) format("0x%8.8" PRIx32, (uint32_t)v) +#define HEX64(v) format("0x%8.8" PRIx64, (uint64_t)v) +/// @} +/// Command line options. +/// @{ + +namespace { +using namespace cl; + +OptionCategory SpecificOptions("Specific Options"); +OptionCategory DWARFOptions("DWARF Conversion Options"); +OptionCategory BreakpadOptions("Breakpad Conversion Options"); +OptionCategory GSYMOptions("GSYM Options"); + +static opt Help("h", desc("Alias for -help"), Hidden, + cat(SpecificOptions)); +// TODO: fix to inly accept one file as input +static list + InputFilenames(Positional, desc(""), + ZeroOrMore, cat(SpecificOptions)); + + static opt + DWARFFilename("dwarf", cl::init(""), + cl::desc("Convert the specified DWARF file to the GSYM " + "format."), + cl::value_desc("filename"), cat(DWARFOptions)); + +static opt + BreakpadFilename("breakpad", cl::init(""), + cl::desc("Convert the specified Breakpad file to the GSYM " + "format."), + cl::value_desc("filename"), cat(BreakpadOptions)); + +static list + ArchFilters("arch", + desc("Process debug information for the specified CPU " + "architecture only. Architectures may be specified by " + "name or by number. This option can be specified " + "multiple times, once for each desired architecture."), + cat(DWARFOptions)); + +static opt + OutputFilename("out-file", cl::init(""), + cl::desc("Save converted output to the specified file. " + "Used in conjunction with the -dwarf or -breakpad " + "options."), + cl::value_desc("filename"), cat(SpecificOptions)); +static alias OutputFilenameAlias("o", desc("Alias for -out-file."), + aliasopt(OutputFilename), + cat(DWARFOptions)); +static opt + SegmentSize("segment-size", + desc("Split output file into segments of roughly this size"), + cat(DWARFOptions)); + +static opt + NumThreads("threads", + desc("Number of threads to use; defaults is number of cores"), + cat(DWARFOptions)); + +static opt LookupAddress("address", + desc("Lookup an address in a GSYM file"), + cat(GSYMOptions), cl::value_desc("address"), + cl::init(UINT64_MAX)); + +static opt LookupName("name", desc("Lookup a name in a GSYM file"), + cat(GSYMOptions), cl::value_desc("name"), + cl::init("")); + +static opt Verbose("verbose", + desc("Print more low-level encoding details."), + cat(SpecificOptions)); + +static opt Verify("verify", + desc("Verify the generated GSYM file against the DWARF " + "by looking up all addresses."), + cat(SpecificOptions)); + +} // namespace +/// @} +//===----------------------------------------------------------------------===// + +static void error(StringRef Prefix, std::error_code EC) { + if (!EC) + return; + errs() << Prefix << ": " << EC.message() << "\n"; + exit(1); +} + +/// If the input path is a .dSYM bundle (as created by the dsymutil tool), +/// replace it with individual entries for each of the object files inside the +/// bundle otherwise return the input path. +static std::vector expandBundle(const std::string &InputPath) { + std::vector BundlePaths; + SmallString<256> BundlePath(InputPath); + // Manually open up the bundle to avoid introducing additional dependencies. + if (sys::fs::is_directory(BundlePath) && + sys::path::extension(BundlePath) == ".dSYM") { + std::error_code EC; + sys::path::append(BundlePath, "Contents", "Resources", "DWARF"); + for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + const std::string &Path = Dir->path(); + sys::fs::file_status Status; + EC = sys::fs::status(Path, Status); + error(Path, EC); + switch (Status.type()) { + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + BundlePaths.push_back(Path); + break; + default: /*ignore*/; + } + } + error(BundlePath, EC); + } + if (!BundlePaths.size()) + BundlePaths.push_back(InputPath); + return BundlePaths; +} + +static void verify(ObjectFile &Obj, raw_ostream &OS, + const std::string &OutFile) { + OS << "Verifying GSYM file \"" << OutFile << "\":\n"; + auto DICtx = DWARFContext::create(Obj); + + gsym::GsymReader gsymFile; + auto error = gsymFile.openFile(OutFile); + if (error) { + OS << "error: error opening \"" << OutFile << "\"\n"; + return; + } + + auto num_addrs = gsymFile.getNumAddresses(); + DILineInfoSpecifier DLIS( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + DILineInfoSpecifier::FunctionNameKind::LinkageName); + LookupResult gsymLookup; + std::string gsymFilename; + for (uint32_t i = 0; i < num_addrs; ++i) { + auto func_addr = gsymFile.getAddress(i); + FunctionInfo func_info; + if (gsymFile.getFunctionInfo(func_addr, func_info)) { + for (auto addr = func_addr; addr < func_addr + func_info.Size; ++addr) { + if (gsymFile.lookup(addr, gsymLookup)) { + auto dwarfInlineInfos = DICtx->getInliningInfoForAddress(addr, DLIS); + uint32_t numDwarfInlineInfos = dwarfInlineInfos.getNumberOfFrames(); + if (numDwarfInlineInfos == 0) { + dwarfInlineInfos.addFrame(DICtx->getLineInfoForAddress(addr, DLIS)); + } + + // Check for 1 entry that has no file and line info + if (numDwarfInlineInfos == 1 && + dwarfInlineInfos.getFrame(0).FileName == "") { + dwarfInlineInfos = DIInliningInfo(); + numDwarfInlineInfos = 0; + } + if (numDwarfInlineInfos > 0 && + numDwarfInlineInfos != gsymLookup.Locations.size()) { + OS << "error: address " << HEX64(addr) << " has " + << numDwarfInlineInfos << " DWARF inline frames and GSYM has " + << gsymLookup.Locations.size() << "\n"; + OS << " " << numDwarfInlineInfos << " DWARF frames:\n"; + for (size_t idx = 0; idx < numDwarfInlineInfos; ++idx) { + const auto dii = dwarfInlineInfos.getFrame(idx); + OS << " [" << idx << "]: " << dii.FunctionName << " @ " + << dii.FileName << ':' << dii.Line << '\n'; + } + fprintf(stderr, " %zu GSYM frames:\n", + gsymLookup.Locations.size()); + for (size_t idx = 0, count = gsymLookup.Locations.size(); + idx < count; ++idx) { + const auto &gii = gsymLookup.Locations[idx]; + OS << " [" << idx << "]: " << gii.Name << " @ " << gii.Dir + << '/' << gii.Base << ':' << gii.Line << '\n'; + } + dwarfInlineInfos = DICtx->getInliningInfoForAddress(addr, DLIS); + func_info.dump(outs(), gsymFile); + continue; + } + + for (size_t idx = 0, count = gsymLookup.Locations.size(); idx < count; + ++idx) { + const auto &gii = gsymLookup.Locations[idx]; + if (idx < numDwarfInlineInfos) { + const auto dii = dwarfInlineInfos.getFrame(idx); + gsymFilename = gsymLookup.getSourceFile(idx); + // Verify function name + if (dii.FunctionName.find(gii.Name) != 0) + fprintf(stderr, + "error: address 0x%8.8" PRIx64 " DWARF function \"%s\" " + "doesn't match GSYM " + "function \"%s\"\n", + addr, dii.FunctionName.c_str(), gii.Name); + // Verify source file path + if (dii.FileName != gsymFilename) + fprintf(stderr, + "error: address 0x%8.8" PRIx64 + " DWARF path \"%s\" doesn't match GSYM path \"%s\"\n", + addr, dii.FileName.c_str(), gsymFilename.c_str()); + // Verify source file line + if (dii.Line != gii.Line) + fprintf(stderr, + "error: address 0x%8.8" PRIx64 + " DWARF line %u != GSYM line %u\n", + addr, dii.Line, gii.Line); + } + } + } + } + } else { + fprintf(stderr, + "error: address lookup failed for address[%u] 0x%8.8" PRIx64 "\n", + i, func_addr); + } + } +} + +static uint32_t getCPUType(MachOObjectFile &MachO) { + if (MachO.is64Bit()) + return MachO.getHeader64().cputype; + else + return MachO.getHeader().cputype; +} + +/// Return true if the object file has not been filtered by an --arch option. +static bool filterArch(ObjectFile &Obj) { + if (ArchFilters.empty()) + return true; + + if (auto *MachO = dyn_cast(&Obj)) { + std::string ObjArch = + Triple::getArchTypeName(MachO->getArchTriple().getArch()); + + for (auto Arch : ArchFilters) { + // Match name. + if (Arch == ObjArch) + return true; + + // Match architecture number. + unsigned Value; + if (!StringRef(Arch).getAsInteger(0, Value)) + if (Value == getCPUType(*MachO)) + return true; + } + } + return false; +} + +static bool handleDWARFObjectFile(StringRef Filename, ObjectFile &Obj, + const std::string &OutFile) { + auto ThreadCount = + NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency(); + DwarfTransformer Transformer; + Transformer.setOutStream(&outs()); + Transformer.setNumThreads(ThreadCount); + + auto Dwarf = Transformer.loadDwarf(Obj); + auto SymbolTable = Transformer.loadSymbolTable(Obj); + Transformer.optimize(); + auto Result = Dwarf || SymbolTable; + if (SegmentSize > 0) { + GsymCreator::createSegmentsFrom( + Transformer.getData(), SegmentSize, + [&](const GsymCreator &c) { + char SegmentName[12]; + HEX64(c.getFirstAddr()).snprint(SegmentName, 12); + auto FileName = OutFile + "-" + SegmentName; + Result &= c.save(FileName.c_str()); + if (Verify) { + verify(Obj, outs(), FileName); + } + }, + ThreadCount); + } else { + Result &= + GsymCreator::createFrom(Transformer.getData()).save(OutFile.c_str()); + if (Verify) { + verify(Obj, llvm::outs(), OutFile); + } + } + + return Result; +} + +static bool handleDWARFBuffer(StringRef Filename, MemoryBufferRef Buffer, + const std::string &OutFile) { + Expected> BinOrErr = object::createBinary(Buffer); + error(Filename, errorToErrorCode(BinOrErr.takeError())); + + bool Result = true; + if (auto *Obj = dyn_cast(BinOrErr->get())) { + Result &= handleDWARFObjectFile(Filename, *Obj, OutFile.c_str()); + } else if (auto *Fat = dyn_cast(BinOrErr->get())) { + for (auto &ObjForArch : Fat->objects()) { + if (auto MachOOrErr = ObjForArch.getAsObjectFile()) { + auto &Obj = **MachOOrErr; + if (filterArch(Obj)) { + auto archName = ObjForArch.getArchFlagName(); + std::string ArchOutFile(OutFile); + ArchOutFile.append(1, '.'); + ArchOutFile.append(archName); + printf("Hanlding %s architecture...\n", archName.c_str()); + Result &= handleDWARFObjectFile(Filename, Obj, ArchOutFile); + } + } else { + consumeError(MachOOrErr.takeError()); + } + } + } + return Result; +} + +static bool handleDWARFFile(StringRef Filename, const std::string &OutFile) { + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + error(Filename, BuffOrErr.getError()); + std::unique_ptr Buffer = std::move(BuffOrErr.get()); + return handleDWARFBuffer(Filename, *Buffer, OutFile); +} + +static bool convertDWARFToGSYM(raw_ostream &OS) { + // Expand any .dSYM bundles to the individual object files contained therein. + std::vector Objects; + std::string OutFile = OutputFilename; + if (OutFile.empty()) { + OutFile = DWARFFilename; + OutFile += ".gsym"; + } + + OS << "Converting DWARF from \"" << DWARFFilename << "\" to GSYM in \"" << + OutFile << "\"\n"; + + auto Objs = expandBundle(DWARFFilename); + Objects.insert(Objects.end(), Objs.begin(), Objs.end()); + + for (auto Object : Objects) + handleDWARFFile(Object, OutFile); + return true; +} + +int main(int argc, char const *argv[]) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(argv[0]); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + + const char * Overview = "A tool that displays information in GSYM files, or " + "converts other formats to GSYM.\n\nSpecify one or more GSYM " + "paths as arguments to dump or lookup information.\nUse the -dwarf " + "option to specify a DWARF file to convert to GSYM.\nUse the " + "-breakpad option to specify a breakpad file to convert to GSYM.\n"; + HideUnrelatedOptions({&SpecificOptions, &DWARFOptions, &BreakpadOptions, + &GSYMOptions}); + cl::ParseCommandLineOptions(argc, argv, Overview); + + if (Help) { + PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true); + return 0; + } + + raw_ostream &OS = outs(); + + if (!DWARFFilename.empty()) { + // Convert DWARF to GSYM + if (!InputFilenames.empty()) { + OS << "error: no input files cat be specified when using the -dwarf " + "option \n"; + return 1; + } + return convertDWARFToGSYM(OS) ? EXIT_SUCCESS : 1; + } else if (!BreakpadFilename.empty()) { + std::string OutFile = OutputFilename; + if (OutFile.empty()) { + OutFile = BreakpadFilename; + OutFile += ".gsym"; + } + error(BreakpadFilename, convertBreakpadToGSYM(BreakpadFilename.c_str(), + OutFile.c_str())); + } + + // Dump or access data inside GSYM files + for (const auto &GSYMPath : InputFilenames) { + gsym::GsymReader gsymFile; + auto Error = gsymFile.openFile(GSYMPath); + if (Error) { + OS << "error: error opening \"" << GSYMPath << "\"\n"; + return 1; + } + if (LookupAddress != UINT64_MAX) { + // Lookup an address in a GSYM file and print any matches. + OS << "Looking up " << format_hex(LookupAddress, 18) << " in GSYM file \"" + << GSYMPath << "\":\n"; + LookupResult Result; + if (gsymFile.lookup(LookupAddress, Result)) { + Result.dump(OS); + } else { + OS << "error: address not found\n"; + } + } else if (!LookupName.empty()) { + // Lookup all address infos with matching names + OS << "Looking up \"" << LookupName << "\" in GSYM file \"" + << GSYMPath << "\":\n"; + gsymFile.dumpAddressInfos(OS, LookupName.c_str()); + } else { + OS << "Dumping GSYM file \"" << GSYMPath << "\":\n"; + gsymFile.dump(OS, Verbose); + } + } + return EXIT_SUCCESS; +} Index: unittests/DebugInfo/CMakeLists.txt =================================================================== --- unittests/DebugInfo/CMakeLists.txt +++ unittests/DebugInfo/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(CodeView) add_subdirectory(DWARF) +add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(PDB)