Index: lib/sanitizer_common/sanitizer_symbolizer_mac.cc =================================================================== --- lib/sanitizer_common/sanitizer_symbolizer_mac.cc +++ lib/sanitizer_common/sanitizer_symbolizer_mac.cc @@ -19,6 +19,10 @@ #include "sanitizer_mac.h" #include "sanitizer_symbolizer_mac.h" +#include +#include +#include + namespace __sanitizer { #include @@ -27,6 +31,314 @@ #include #include #include +#include +#include + +template class iterator_range { + IteratorT begin_iterator, end_iterator; + + public: + template + explicit iterator_range(Container &&c) + : begin_iterator(c.begin()), + end_iterator(c.end()) {} + iterator_range(IteratorT begin_iterator, IteratorT end_iterator) + : begin_iterator(std::move(begin_iterator)), + end_iterator(std::move(end_iterator)) {} + + IteratorT begin() const { return begin_iterator; } + IteratorT end() const { return end_iterator; } +}; + +/// \brief Convenience function for iterating over sub-ranges. +/// +/// This provides a bit of syntactic sugar to make using sub-ranges +/// in for loops a bit easier. Analogous to std::make_pair(). +template iterator_range make_range(T x, T y) { + return iterator_range(std::move(x), std::move(y)); +} + +// FIXME: Do we need bidirectional_iterator_tag? +class load_command_iterator + : public std::iterator { + friend class MachOObject; + friend class segment_command_iterator; + const load_command *cmd; + load_command_iterator(const load_command *cmd) : cmd(cmd) { } //NOLINT + + public: + load_command_iterator() : cmd(nullptr) {} + load_command_iterator &operator++() { + cmd = (const load_command *)(((uint64_t)cmd) + cmd->cmdsize); + return *this; + } + // FIXME: This needs bounds checking. + load_command_iterator operator++(int) { + load_command_iterator I(*this); + ++*this; + return I; + } + // FIXME: Assert that this isn't the end? + reference operator*() const { return *cmd; } + pointer operator->() const { return cmd; } + + bool operator!=(const load_command_iterator &other) const { + return other.cmd != cmd; + } + bool operator==(const load_command_iterator &other) const { + return other.cmd == cmd; + } +}; + +class symbol_iterator + : public std::iterator { + friend class MachOObject; + const nlist_64 *p; + symbol_iterator(const nlist_64 *p) : p(p) {} //NOLINT + + public: + symbol_iterator() : p(nullptr) {} + // FIXME: This needs bounds checking. + symbol_iterator &operator++() { + ++p; + return *this; + } + symbol_iterator operator++(int) { + symbol_iterator I(*this); + ++*this; + return I; + } + // FIXME: Assert that this isn't the end. + reference operator*() const { return *p; } + pointer operator->() const { return p; } + bool operator!=(const symbol_iterator &other) const { return p != other.p; } + bool operator==(const symbol_iterator &other) const { return p == other.p; } +}; + +class MachOObject; + +class segment_command_iterator + : public std::iterator { + friend class MachOObject; + const segment_command_64 *p; + const MachOObject *mach_o; + segment_command_iterator(const segment_command_64 *p, + const MachOObject *mach_o) + : p(p), mach_o(mach_o) {} + + public: + segment_command_iterator() : p(nullptr), mach_o(nullptr) {} + segment_command_iterator &operator=(const segment_command_iterator &other) { + assert(mach_o == nullptr || mach_o == other.mach_o); + p = other.p; + mach_o = other.mach_o; + return *this; + } + segment_command_iterator &operator++(); + segment_command_iterator operator++(int) { + segment_command_iterator I(*this); + ++*this; + return I; + } + reference operator*() const { return *p; } + pointer operator->() const { return p; } + bool operator!=(const segment_command_iterator &other) const { + return p != other.p; + } + bool operator==(const segment_command_iterator &other) const { + return p == other.p; + } +}; + +typedef iterator_range symbol_range; +typedef iterator_range segment_command_range; + +static inline bool isDebuggerSymbol(const nlist_64 &sym) { + return 0 != (sym.n_type & N_STAB); +} + +static inline bool isUndefinedSymbol(const nlist_64 &sym) { + return sym.n_type == N_UNDF; +} + +class MachOObject { + const mach_header_64 *header; + symbol_iterator nextSymbol(symbol_iterator s) const; + + public: + explicit MachOObject(const mach_header_64 *header) : header(header) {} + typedef iterator_range load_command_range; + load_command_range loadCommands() const; + symbol_range symbols() const; + segment_command_range segmentCommands() const; + size_t getSymbolSize(const nlist_64 &s) const; + uintptr_t getSymbolAddress(const nlist_64 &s) const; + size_t getSymbolSize(const char *name) const; + const char *getSymbolName(const nlist_64 &S) const; +}; + +segment_command_iterator &segment_command_iterator::operator++() { + load_command_iterator cur_lc( + reinterpret_cast((uintptr_t)p)); + load_command_iterator last_lc = mach_o->loadCommands().end(); + + assert(cur_lc != last_lc); + ++cur_lc; + + for (; cur_lc != last_lc; ++cur_lc) + if (cur_lc->cmd == LC_SEGMENT_64) { + p = reinterpret_cast(std::addressof(*cur_lc)); + break; + } + + if (cur_lc == last_lc) + *this = mach_o->segmentCommands().end(); + + return *this; +} + +MachOObject::load_command_range MachOObject::loadCommands() const { + const load_command *first = + (const load_command *)((uint64_t)header + sizeof(mach_header_64)); + const load_command *last = + (const load_command *)((uint64_t)first + header->sizeofcmds); + return make_range(load_command_iterator(first), + load_command_iterator(last)); +} + +symbol_range MachOObject::symbols() const { + load_command_range lc_range = loadCommands(); + if (lc_range.begin() == lc_range.end()) + return make_range(symbol_iterator(), symbol_iterator()); + load_command_iterator it = + std::find_if(lc_range.begin(), lc_range.end(), + [](const load_command &lc) { return lc.cmd == LC_SYMTAB; }); + if (it == lc_range.end()) + return make_range(symbol_iterator(), symbol_iterator()); + const symtab_command *symtab = + reinterpret_cast(std::addressof(*it)); + nlist_64 *first_sym = (nlist_64 *)((uint64_t)header + symtab->symoff); + return make_range(first_sym, first_sym + symtab->nsyms); +} + +segment_command_range MachOObject::segmentCommands() const { + load_command_range range = loadCommands(); + if (range.begin() == range.end()) + return make_range(segment_command_iterator(nullptr, nullptr), + segment_command_iterator(nullptr, nullptr)); + + load_command_iterator first_segment = std::adjacent_find( + range.begin(), range.end(), + [](const load_command &lc1, const load_command &lc2) { + return lc1.cmd == LC_SEGMENT_64 && lc2.cmd == LC_SEGMENT_64; + }); + if (first_segment == range.end()) + return make_range(segment_command_iterator(nullptr, nullptr), + segment_command_iterator(nullptr, nullptr)); + + load_command_iterator link_edit_segment = + std::find_if(first_segment, range.end(), [](const load_command &lc) { + if (lc.cmd != LC_SEGMENT_64) + return false; + const segment_command_64 &seg = + reinterpret_cast(lc); + return strncmp(SEG_LINKEDIT, seg.segname, 10) == 0; + }); + assert(link_edit_segment != range.end() && + "Could not find link edit segment"); + return make_range( + segment_command_iterator(reinterpret_cast( + std::addressof(*first_segment)), + this), + segment_command_iterator(reinterpret_cast( + std::addressof(*++link_edit_segment)), + this)); +} + +const char *MachOObject::getSymbolName(const nlist_64 &sym) const { + auto range = loadCommands(); + load_command_iterator lc_symtab = + std::find_if(range.begin(), range.end(), + [](const load_command &lc) { return lc.cmd == LC_SYMTAB; }); + + if (lc_symtab == range.end()) + return nullptr; + const symtab_command *symtab = + reinterpret_cast(std::addressof(*lc_symtab)); + const char *string_table = (const char *)header + symtab->stroff; + return string_table + sym.n_un.n_strx; +} + +uintptr_t MachOObject::getSymbolAddress(const nlist_64 &sym) const { + return (isUndefinedSymbol(sym) || isDebuggerSymbol(sym)) && sym.n_value == 0 + ? uintptr_t(~0) + : sym.n_value; +} + +symbol_iterator MachOObject::nextSymbol(symbol_iterator after) const { + const nlist_64 *smallest_larger = nullptr; + symbol_range sym_range = symbols(); + for (const auto &sym : sym_range) { + if (sym.n_sect != after->n_sect || isDebuggerSymbol(sym)) + continue; + if (sym.n_value > after->n_value) { + if (!smallest_larger || smallest_larger->n_value > sym.n_value) + smallest_larger = &sym; + } + } + if (!smallest_larger) + return sym_range.end(); + return symbol_iterator(smallest_larger); +} + +size_t MachOObject::getSymbolSize(const nlist_64 &sym) const { + uintptr_t addr = getSymbolAddress(sym); + if (addr == ~0ULL) + return 0; + uintptr_t next_addr = uintptr_t(~0U); + symbol_iterator cur_sym(&sym); + symbol_iterator next_sym = nextSymbol(cur_sym); + if (next_sym == std::end(symbols())) { + unsigned sect_count = 0; + auto range = segmentCommands(); + const section_64 *sect = nullptr; + // FIXME: Is there a better algorithm for this? + segment_command_iterator seg_iter = std::find_if( + range.begin(), range.end(), [&](const segment_command_64 &seg) { + sect_count += seg.nsects; + if (sect_count >= cur_sym->n_sect - 1) { + sect = reinterpret_cast( + uintptr_t(std::addressof(seg)) + + sizeof(segment_command_64)) + + (seg.nsects - (sect_count - cur_sym->n_sect) - 1); + return true; + } + return false; + }); + if (seg_iter == range.end()) + return size_t(~0U); // FIXME: variable templatize ~0Us? + assert(sect != nullptr && "Section not found"); + + next_addr = sect->addr + sect->size; + } else { + next_addr = getSymbolAddress(*next_sym); + } + + return next_addr == uintptr_t(~0) ? 0 : next_addr - addr; +} + +size_t MachOObject::getSymbolSize(const char *name) const { + symbol_range range = symbols(); + auto sym_iter = std::find_if( + range.begin(), range.end(), [this, name](const nlist_64 &sym) { + return !strcmp(getSymbolName(sym), name) && !isUndefinedSymbol(sym) && + !isDebuggerSymbol(sym) && sym.n_value != 0; + }); + if (sym_iter == range.end()) + return size_t(~0); + return getSymbolSize(*sym_iter); +} bool DlAddrSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { Dl_info info; @@ -37,8 +349,37 @@ return true; } +static bool IsCXXName(const char * SymName) { + CHECK(SymName != nullptr); + // FIXME: This is valid only for Itanium C++ ABI. + if (internal_strlen(SymName) < 3) + return false; + return SymName[0] == '_' && SymName[1] == 'Z'; +} + +static bool Is64BitMachO(const void *hdr) { + return *((int *)hdr) == MH_MAGIC_64; +} + bool DlAddrSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { - return false; + Dl_info dli; + int result = dladdr((const void *)addr, &dli); + if (!result) + return false; + uptr symaddr = 0; + if (Is64BitMachO(dli.dli_fbase)) { + MachOObject o(reinterpret_cast(dli.dli_fbase)); + info->size = o.getSymbolSize(dli.dli_sname); + // FIXME: Just return 0 from getSymbolSize? + if (info->size == static_cast(~0)) + info->size = 0; + } + symaddr += (addr - info->module_offset); + info->start = symaddr; + const char *demangled = + IsCXXName(dli.dli_sname) ? DemangleCXXABI(dli.dli_sname) : dli.dli_sname; + info->name = internal_strdup(demangled); + return true; } class AtosSymbolizerProcess : public SymbolizerProcess {