diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1907,6 +1907,7 @@ std::vector m_section_infos; }; +#define TRIE_SYMBOL_IS_THUMB (1ULL << 63) struct TrieEntry { void Dump() const { printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"", @@ -1920,7 +1921,9 @@ } ConstString name; uint64_t address = LLDB_INVALID_ADDRESS; - uint64_t flags = 0; + uint64_t flags = + 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, + // TRIE_SYMBOL_IS_THUMB uint64_t other = 0; ConstString import_name; }; @@ -1943,13 +1946,16 @@ }; static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset, - const bool is_arm, + const bool is_arm, addr_t text_seg_base_addr, std::vector &nameSlices, std::set &resolver_addresses, - std::vector &output) { + std::vector &reexports, + std::vector &ext_symbols) { if (!data.ValidOffset(offset)) return true; + // Terminal node -- end of a branch, possibly add this to + // the symbol table or resolver table. const uint64_t terminalSize = data.GetULEB128(&offset); lldb::offset_t children_offset = offset + terminalSize; if (terminalSize != 0) { @@ -1962,6 +1968,8 @@ import_name = data.GetCStr(&offset); } else { e.entry.address = data.GetULEB128(&offset); + if (text_seg_base_addr != LLDB_INVALID_ADDRESS) + e.entry.address += text_seg_base_addr; if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { e.entry.other = data.GetULEB128(&offset); uint64_t resolver_addr = e.entry.other; @@ -1971,9 +1979,18 @@ } else e.entry.other = 0; } - // Only add symbols that are reexport symbols with a valid import name - if (EXPORT_SYMBOL_FLAGS_REEXPORT & e.entry.flags && import_name && - import_name[0]) { + bool add_this_entry = false; + if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) && + import_name && import_name[0]) { + // add symbols that are reexport symbols with a valid import name. + add_this_entry = true; + } else if (e.entry.flags == 0 && + (import_name == nullptr || import_name[0] == '\0')) { + // add externally visible symbols, in case the nlist record has + // been stripped/omitted. + add_this_entry = true; + } + if (add_this_entry) { std::string name; if (!nameSlices.empty()) { for (auto name_slice : nameSlices) @@ -1987,7 +2004,15 @@ // Skip the leading '_' e.entry.import_name.SetCString(import_name + 1); } - output.push_back(e); + if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) { + reexports.push_back(e); + } else { + if (is_arm && (e.entry.address & 1)) { + e.entry.flags |= TRIE_SYMBOL_IS_THUMB; + e.entry.address &= THUMB_ADDRESS_BIT_MASK; + } + ext_symbols.push_back(e); + } } } @@ -2000,8 +2025,9 @@ return false; // Corrupt data lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset); if (childNodeOffset) { - if (!ParseTrieEntries(data, childNodeOffset, is_arm, nameSlices, - resolver_addresses, output)) { + if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr, + nameSlices, resolver_addresses, reexports, + ext_symbols)) { return false; } } @@ -2066,7 +2092,15 @@ struct symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0}; struct linkedit_data_command function_starts_load_command = {0, 0, 0, 0}; struct dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + // The data element of type bool indicates that this entry is thumb + // code. typedef AddressDataArray FunctionStarts; + + // Record the address of every function/data that we add to the symtab. + // We add symbols to the table in the order of most information (nlist + // records) to least (function starts), and avoid duplicating symbols + // via this set. + std::set symbols_added; FunctionStarts function_starts; lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); uint32_t i; @@ -2091,29 +2125,6 @@ if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) == nullptr) // fill in symoff, nsyms, stroff, strsize fields return 0; - if (symtab_load_command.symoff == 0) { - if (log) - module_sp->LogMessage(log, "LC_SYMTAB.symoff == 0"); - return 0; - } - - if (symtab_load_command.stroff == 0) { - if (log) - module_sp->LogMessage(log, "LC_SYMTAB.stroff == 0"); - return 0; - } - - if (symtab_load_command.nsyms == 0) { - if (log) - module_sp->LogMessage(log, "LC_SYMTAB.nsyms == 0"); - return 0; - } - - if (symtab_load_command.strsize == 0) { - if (log) - module_sp->LogMessage(log, "LC_SYMTAB.strsize == 0"); - return 0; - } break; case LC_DYLD_INFO: @@ -2354,27 +2365,7 @@ } } - if (nlist_data.GetByteSize() == 0 && - memory_module_load_level == eMemoryModuleLoadLevelComplete) { - if (log) - module_sp->LogMessage(log, "failed to read nlist data"); - return 0; - } - const bool have_strtab_data = strtab_data.GetByteSize() > 0; - if (!have_strtab_data) { - if (process) { - if (strtab_addr == LLDB_INVALID_ADDRESS) { - if (log) - module_sp->LogMessage(log, "failed to locate the strtab in memory"); - return 0; - } - } else { - if (log) - module_sp->LogMessage(log, "failed to read strtab data"); - return 0; - } - } ConstString g_segment_name_TEXT = GetSegmentNameTEXT(); ConstString g_segment_name_DATA = GetSegmentNameDATA(); @@ -2401,6 +2392,7 @@ section_list->FindSectionByName(g_section_name_eh_frame); const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM); + const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions(); // lldb works best if it knows the start address of all functions in a // module. Linker symbols or debug info are normally the best source of @@ -2426,6 +2418,14 @@ 0) { // Now append the current entry function_start_entry.addr += delta; + if (is_arm) { + if (function_start_entry.addr & 1) { + function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; + function_start_entry.data = true; + } else if (always_thumb) { + function_start_entry.data = true; + } + } function_starts.Append(function_start_entry); } } else { @@ -2448,6 +2448,14 @@ if (func) { FunctionStarts::Entry function_start_entry; function_start_entry.addr = func->base - text_base_addr; + if (is_arm) { + if (function_start_entry.addr & 1) { + function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK; + function_start_entry.data = true; + } else if (always_thumb) { + function_start_entry.data = true; + } + } function_starts.Append(function_start_entry); } } @@ -2508,25 +2516,21 @@ std::string memory_symbol_name; uint32_t unmapped_local_symbols_found = 0; - std::vector trie_entries; + std::vector reexport_trie_entries; + std::vector external_sym_trie_entries; std::set resolver_addresses; if (dyld_trie_data.GetByteSize() > 0) { - std::vector nameSlices; - ParseTrieEntries(dyld_trie_data, 0, is_arm, nameSlices, resolver_addresses, - trie_entries); - ConstString text_segment_name("__TEXT"); SectionSP text_segment_sp = GetSectionList()->FindSectionByName(text_segment_name); - if (text_segment_sp) { - const lldb::addr_t text_segment_file_addr = - text_segment_sp->GetFileAddress(); - if (text_segment_file_addr != LLDB_INVALID_ADDRESS) { - for (auto &e : trie_entries) - e.entry.address += text_segment_file_addr; - } - } + lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS; + if (text_segment_sp) + text_segment_file_addr = text_segment_sp->GetFileAddress(); + std::vector nameSlices; + ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr, + nameSlices, resolver_addresses, reexport_trie_entries, + external_sym_trie_entries); } typedef std::set IndirectSymbols; @@ -3598,6 +3602,9 @@ symbol_section); sym[GSYM_sym_idx].GetAddressRef().SetOffset( symbol_value); + symbols_added.insert(sym[GSYM_sym_idx] + .GetAddress() + .GetFileAddress()); // We just need the flags from the linker // symbol, so put these flags // into the N_GSYM flags to avoid duplicate @@ -3617,6 +3624,8 @@ if (set_value) { sym[sym_idx].GetAddressRef().SetSection(symbol_section); sym[sym_idx].GetAddressRef().SetOffset(symbol_value); + symbols_added.insert( + sym[sym_idx].GetAddress().GetFileAddress()); } sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); @@ -4428,6 +4437,8 @@ // invalid address of zero when the global is a common symbol. sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section); sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value); + symbols_added.insert( + sym[GSYM_sym_idx].GetAddress().GetFileAddress()); // We just need the flags from the linker symbol, so put these // flags into the N_GSYM flags to avoid duplicate symbols in // the symbol table. @@ -4445,6 +4456,7 @@ if (set_value) { sym[sym_idx].GetAddressRef().SetSection(symbol_section); sym[sym_idx].GetAddressRef().SetOffset(symbol_value); + symbols_added.insert(sym[sym_idx].GetAddress().GetFileAddress()); } sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); if (nlist.n_desc & N_WEAK_REF) @@ -4501,12 +4513,60 @@ } } + // Count how many trie symbols we'll add to the symbol table + int trie_symbol_table_augment_count = 0; + for (auto &e : external_sym_trie_entries) { + if (symbols_added.find(e.entry.address) == symbols_added.end()) + trie_symbol_table_augment_count++; + } + + if (num_syms < sym_idx + trie_symbol_table_augment_count) { + num_syms = sym_idx + trie_symbol_table_augment_count; + sym = symtab->Resize(num_syms); + } uint32_t synthetic_sym_id = symtab_load_command.nsyms; + // Add symbols from the trie to the symbol table. + for (auto &e : external_sym_trie_entries) { + if (symbols_added.find(e.entry.address) != symbols_added.end()) + continue; + + // Find the section that this trie address is in, use that to annotate + // symbol type as we add the trie address and name to the symbol table. + Address symbol_addr; + if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) { + SectionSP symbol_section(symbol_addr.GetSection()); + if (symbol_section) { + sym[sym_idx].SetID(synthetic_sym_id++); + sym[sym_idx].GetMangled().SetMangledName(e.entry.name); + switch (symbol_section->GetType()) { + case eSectionTypeCode: + sym[sym_idx].SetType(eSymbolTypeCode); + break; + case eSectionTypeOther: + case eSectionTypeData: + case eSectionTypeZeroFill: + sym[sym_idx].SetType(eSymbolTypeData); + break; + default: + break; + } + sym[sym_idx].SetIsSynthetic(false); + sym[sym_idx].SetExternal(true); + sym[sym_idx].GetAddressRef() = symbol_addr; + symbols_added.insert(symbol_addr.GetFileAddress()); + if (e.entry.flags & TRIE_SYMBOL_IS_THUMB) + sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB); + ++sym_idx; + } + } + } + if (function_starts_count > 0) { uint32_t num_synthetic_function_symbols = 0; for (i = 0; i < function_starts_count; ++i) { - if (!function_starts.GetEntryRef(i).data) + if (symbols_added.find(function_starts.GetEntryRef(i).addr) == + symbols_added.end()) ++num_synthetic_function_symbols; } @@ -4518,14 +4578,11 @@ for (i = 0; i < function_starts_count; ++i) { const FunctionStarts::Entry *func_start_entry = function_starts.GetEntryAtIndex(i); - if (!func_start_entry->data) { + if (symbols_added.find(func_start_entry->addr) == symbols_added.end()) { addr_t symbol_file_addr = func_start_entry->addr; uint32_t symbol_flags = 0; - if (is_arm) { - if (symbol_file_addr & 1) - symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; - symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; - } + if (func_start_entry->data) + symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; Address symbol_addr; if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) { SectionSP symbol_section(symbol_addr.GetSection()); @@ -4552,6 +4609,7 @@ sym[sym_idx].SetType(eSymbolTypeCode); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = symbol_addr; + symbols_added.insert(symbol_addr.GetFileAddress()); if (symbol_flags) sym[sym_idx].SetFlags(symbol_flags); if (symbol_byte_size) @@ -4652,6 +4710,7 @@ sym[sym_idx].SetType(eSymbolTypeResolver); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = so_addr; + symbols_added.insert(so_addr.GetFileAddress()); sym[sym_idx].SetByteSize(symbol_stub_byte_size); ++sym_idx; } @@ -4669,8 +4728,8 @@ } } - if (!trie_entries.empty()) { - for (const auto &e : trie_entries) { + if (!reexport_trie_entries.empty()) { + for (const auto &e : reexport_trie_entries) { if (e.entry.import_name) { // Only add indirect symbols from the Trie entries if we didn't have // a N_INDR nlist entry for this already diff --git a/lldb/test/API/macosx/dyld-trie-symbols/Makefile b/lldb/test/API/macosx/dyld-trie-symbols/Makefile new file mode 100644 --- /dev/null +++ b/lldb/test/API/macosx/dyld-trie-symbols/Makefile @@ -0,0 +1,13 @@ +CXX_SOURCES := main.cpp +EXE := a.out +MAKE_DSYM := NO +LD_EXTRAS = -dynamiclib -image_base 0x8000 +CFLAGS = $(CFLAGS_NO_DEBUG) + +include Makefile.rules + +all: a.out a.out-stripped + +a.out-stripped: + cp a.out a.out-stripped + strip a.out-stripped diff --git a/lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py b/lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py @@ -0,0 +1,87 @@ +""" +Test that we read the exported symbols from the dyld trie +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class DyldTrieSymbolsTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + NO_DEBUG_INFO_TESTCASE = True + + @skipIfRemote + @skipUnlessDarwin + + def test_dyld_trie_symbols(self): + """Test that we make create symbol table entries from the dyld trie data structure.""" + self.build() + unstripped_exe = self.getBuildArtifact("a.out") + stripped_exe = self.getBuildArtifact("a.out-stripped") + + unstripped_target = self.dbg.CreateTarget(unstripped_exe) + self.assertTrue(unstripped_target.IsValid(), "Got a vaid stripped target.") + + # Verify that the expected symbols are present in an unstripped + # binary, and that we didn't duplicate the entries in the symbol + # table. + unstripped_bazval_symbols = unstripped_target.FindSymbols("bazval") + self.assertEqual(unstripped_bazval_symbols.GetSize(), 1) + unstripped_patval_symbols = unstripped_target.FindSymbols("patval") + self.assertEqual(unstripped_patval_symbols.GetSize(), 1) + unstripped_Z3foo_symbols = unstripped_target.FindSymbols("_Z3foov") + self.assertEqual(unstripped_Z3foo_symbols.GetSize(), 1) + unstripped_foo_symbols = unstripped_target.FindSymbols("foo") + self.assertEqual(unstripped_foo_symbols.GetSize(), 1) + + # make sure we can look up the mangled name, demangled base name, + # demangled name with argument. + unstripped_Z3pat_symbols = unstripped_target.FindSymbols("_Z3pati") + self.assertEqual(unstripped_Z3pat_symbols.GetSize(), 1) + unstripped_pat_symbols = unstripped_target.FindSymbols("pat") + self.assertEqual(unstripped_pat_symbols.GetSize(), 1) + unstripped_patint_symbols = unstripped_target.FindSymbols("pat(int)") + self.assertEqual(unstripped_patint_symbols.GetSize(), 1) + + unstripped_bar_symbols = unstripped_target.FindSymbols("bar") + self.assertEqual(unstripped_bar_symbols.GetSize(), 1) + + + + # Verify that we can retrieve all the symbols with external + # linkage after the binary has been stripped; they should not + # exist in the nlist records at this point and can only be + # retrieved from the dyld trie structure. + + stripped_target = self.dbg.CreateTarget(stripped_exe) + self.assertTrue(stripped_target.IsValid(), "Got a vaid stripped target.") + + # Check that we're able to still retrieve all the symbols after + # the binary has been stripped. Check that one we know will be + # removed is absent. + stripped_bazval_symbols = stripped_target.FindSymbols("bazval") + self.assertEqual(stripped_bazval_symbols.GetSize(), 1) + stripped_patval_symbols = stripped_target.FindSymbols("patval") + self.assertEqual(stripped_patval_symbols.GetSize(), 1) + stripped_Z3foo_symbols = stripped_target.FindSymbols("_Z3foov") + self.assertEqual(stripped_Z3foo_symbols.GetSize(), 1) + stripped_foo_symbols = stripped_target.FindSymbols("foo") + self.assertEqual(stripped_foo_symbols.GetSize(), 1) + + # make sure we can look up the mangled name, demangled base name, + # demangled name with argument. + stripped_Z3pat_symbols = stripped_target.FindSymbols("_Z3pati") + self.assertEqual(stripped_Z3pat_symbols.GetSize(), 1) + stripped_pat_symbols = stripped_target.FindSymbols("pat") + self.assertEqual(stripped_pat_symbols.GetSize(), 1) + stripped_patint_symbols = stripped_target.FindSymbols("pat(int)") + self.assertEqual(stripped_patint_symbols.GetSize(), 1) + + # bar should have been strippped. We should not find it, or the + # stripping went wrong. + stripped_bar_symbols = stripped_target.FindSymbols("bar") + self.assertEqual(stripped_bar_symbols.GetSize(), 0) + diff --git a/lldb/test/API/macosx/dyld-trie-symbols/main.cpp b/lldb/test/API/macosx/dyld-trie-symbols/main.cpp new file mode 100644 --- /dev/null +++ b/lldb/test/API/macosx/dyld-trie-symbols/main.cpp @@ -0,0 +1,29 @@ +int patval; // external symbol, will not be completely stripped +int pat(int in) { // external symbol, will not be completely stripped + if (patval == 0) + patval = in; + return patval; +} + +static int fooval; // static symbol, stripped +int foo() { // external symbol, will not be completely stripped + if (fooval == 0) + fooval = 5; + return fooval; +} + +int bazval = 10; // external symbol, will not be completely stripped +int baz () { // external symbol, will not be completely stripped + return foo() + bazval; +} + +static int barval = 15; // static symbol, stripped +static int bar () { // static symbol, stripped; __lldb_unnamed_symbol from func starts + return baz() + barval; +} + +int calculate () // external symbol, will not be completely stripped +{ + return bar(); +} +