diff --git a/lldb/lit/Modules/ELF/Inputs/load-from-dynsym-alone.c b/lldb/lit/Modules/ELF/Inputs/load-from-dynsym-alone.c new file mode 100644 --- /dev/null +++ b/lldb/lit/Modules/ELF/Inputs/load-from-dynsym-alone.c @@ -0,0 +1,7 @@ +// This function will be embedded within the .dynsym section of the main binary. +int functionInDynsym(int num) { return num * 3; } + +int main(int argc, char *argv[]) { + int y = functionInDynsym(argc); + return y; +} diff --git a/lldb/lit/Modules/ELF/Inputs/load-symtab-and-dynsym.c b/lldb/lit/Modules/ELF/Inputs/load-symtab-and-dynsym.c new file mode 100644 --- /dev/null +++ b/lldb/lit/Modules/ELF/Inputs/load-symtab-and-dynsym.c @@ -0,0 +1,12 @@ +// This function will be embedded within the .symtab section of the +// .gnu_debugdata section. +int functionInSymtab(int num) { return num * 4; } + +// This function will be embedded within the .dynsym section of the main binary. +int functionInDynsym(int num) { return num * 3; } + +int main(int argc, char *argv[]) { + int x = functionInSymtab(argc); + int y = functionInDynsym(x); + return y; +} diff --git a/lldb/lit/Modules/ELF/load-from-dynsym-alone.test b/lldb/lit/Modules/ELF/load-from-dynsym-alone.test new file mode 100644 --- /dev/null +++ b/lldb/lit/Modules/ELF/load-from-dynsym-alone.test @@ -0,0 +1,33 @@ +# REQUIRES: system-linux + +# This test ensures that we will load .dynsym even if there's no .symtab section. +# We do this by compiling a small C program with a function and we direct the +# linker where to put the symbols so that in the end the layout is as follows: +# +# Symbol table '.dynsym' contains 4 entries: +# Num: Value Size Type Bind Vis Ndx Name +# 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# 1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __libc_start_main@GLIBC_2.2.5 +# 2: 0000000000000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__ +# 3: 0000000000401110 13 FUNC GLOBAL DEFAULT 10 functionInDynsym + +# We want to keep the symbol "functionInDynsym" in the .dynsym section and not +# have it put the default .symtab section. +# RUN: echo "{functionInDynsym;};" > %T/dynamic-symbols.txt +# RUN: %clang -Wl,--dynamic-list=%T/dynamic-symbols.txt -g -o %t.binary %p/Inputs/load-from-dynsym-alone.c + +# Remove not needed symbols +# RUN: echo "functionInDynsym" > %t.keep_symbols +# RUN: llvm-objcopy --strip-all --remove-section .gdb_index --remove-section .comment --keep-symbols=%t.keep_symbols %t.binary + +# Remove functionInDynsym symbol from .symtab (will leave symbol in .dynsym intact) +# RUN: llvm-strip --strip-symbol=functionInDynsym %t.binary + +# RUN: %lldb -b -o 'b functionInDynsym' -o 'run' -o 'continue' %t.binary | FileCheck %s + +# CHECK: (lldb) b functionInDynsym +# CHECK-NEXT: Breakpoint 1: where = {{.*}}.binary`functionInDynsym, address = 0x{{.*}} + +# CHECK: (lldb) run +# CHECK-NEXT: Process {{.*}} stopped +# CHECK-NEXT: * thread #1, name = 'load-from-dynsy', stop reason = breakpoint 1.1 diff --git a/lldb/lit/Modules/ELF/load-symtab-and-dynsym.test b/lldb/lit/Modules/ELF/load-symtab-and-dynsym.test new file mode 100644 --- /dev/null +++ b/lldb/lit/Modules/ELF/load-symtab-and-dynsym.test @@ -0,0 +1,48 @@ +# REQUIRES: system-linux + +# This test ensures that we will load .dynsym even if there's a .symtab section. +# We do this by compiling a small C program with two functions and we direct the +# linker where to put the symbols so that in the end the layout is as follows: +# +# Symbol table '.dynsym' contains 4 entries: +# Num: Value Size Type Bind Vis Ndx Name +# 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# 1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __libc_start_main@GLIBC_2.2.5 +# 2: 0000000000000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__ +# 3: 0000000000401120 13 FUNC GLOBAL DEFAULT 10 functionInDynsym +# +# Symbol table '.symtab' contains 2 entries: +# Num: Value Size Type Bind Vis Ndx Name +# 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# 1: 0000000000401110 15 FUNC GLOBAL DEFAULT 10 functionInSymtab + +# We want to keep the symbol "functionInDynsym" in the .dynsym section and not +# have it put the default .symtab section. +# RUN: echo "{functionInDynsym;};" > %T/dynamic-symbols.txt +# RUN: %clang -Wl,--dynamic-list=%T/dynamic-symbols.txt -g -o %t.binary %p/Inputs/load-symtab-and-dynsym.c + +# Remove not needed symbols +# RUN: echo "functionInSymtab" > %t.keep_symbols +# RUN: echo "functionInDynsym" >> %t.keep_symbols +# RUN: llvm-objcopy --strip-all --remove-section .gdb_index --remove-section .comment --keep-symbols=%t.keep_symbols %t.binary + +# Remove functionInDynsym symbol from .symtab (will leave symbol in .dynsym intact) +# RUN: llvm-strip --strip-symbol=functionInDynsym %t.binary + +# RUN: %lldb -b -o 'b functionInSymtab' -o 'b functionInDynsym' -o 'run' -o 'continue' %t.binary | FileCheck %s + +# CHECK: (lldb) b functionInSymtab +# CHECK-NEXT: Breakpoint 1: where = {{.*}}.binary`functionInSymtab, address = 0x{{.*}} + +# CHECK: (lldb) b functionInDynsym +# CHECK-NEXT: Breakpoint 2: where = {{.*}}.binary`functionInDynsym, address = 0x{{.*}} + +# CHECK: (lldb) run +# CHECK-NEXT: Process {{.*}} stopped +# CHECK-NEXT: * thread #1, name = 'load-symtab-and', stop reason = breakpoint 1.1 + +# CHECK: (lldb) continue +# CHECK-NEXT: Process {{.*}} resuming +# CHECK-NEXT: Process {{.*}} stopped +# CHECK-NEXT: * thread #1, name = 'load-symtab-and', stop reason = breakpoint 2.1 + diff --git a/lldb/lit/Modules/ELF/merge-symbols.yaml b/lldb/lit/Modules/ELF/merge-symbols.yaml new file mode 100644 --- /dev/null +++ b/lldb/lit/Modules/ELF/merge-symbols.yaml @@ -0,0 +1,96 @@ +# This test ensures that two symbols with the same name are only merged if at +# least their size, or their address, or their section differ. + +# RUN: yaml2obj %s > %t + +# RUN: llvm-objcopy \ +# RUN: --redefine-sym=all_same2=all_same1 \ +# RUN: --redefine-sym=different_section2=different_section1 \ +# RUN: --redefine-sym=different_address2=different_address1 \ +# RUN: --redefine-sym=different_size2=different_size1 %t + +# RUN: %lldb -b -o 'image dump symtab' %t | FileCheck -v --color -dump-input=always %s + +# CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name +# CHECK: [ 0] 1 Code 0x0000000000500000 0x0000000000000008 0x00000002 all_same1 +# CHECK-NEXT: [ 1] 3 Code 0x0000000000500000 0x0000000000000008 0x00000002 different_section1 +# CHECK-NEXT: [ 2] 4 Code 0x0000000000500000 0x0000000000000008 0x00000002 different_section1 +# CHECK-NEXT: [ 3] 5 Code 0x0000000000500000 0x0000000000000008 0x00000002 different_address1 +# CHECK-NEXT: [ 4] 6 Code 0x0000000000500001 0x0000000000000008 0x00000002 different_address1 +# CHECK-NEXT: [ 5] 7 Code 0x0000000000500000 0x0000000000000008 0x00000002 different_size1 +# CHECK-NEXT: [ 6] 8 Code 0x0000000000500000 0x0000000000000009 0x00000002 different_size1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x0000000000400000 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x0000000000400000 + AddressAlign: 0x0000000000000010 + Content: DEADBEEFBAADF00D + - Name: .text2 + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x0000000000500000 + AddressAlign: 0x0000000000000010 + Content: DEADBEEFBAADF00D +Symbols: + # Symbols with same addresses, names, sizes but different sections + - Name: all_same1 + Type: STT_FUNC + Section: .text + Value: 0x0000000000500000 + Size: 0x0000000000000008 + # Symbol all_same2 will be renamed to all_same1 and should therefore + # disappear from the dumped smymtab because all fields are equal. + - Name: all_same2 + Type: STT_FUNC + Section: .text + Value: 0x0000000000500000 + Size: 0x0000000000000008 + # Symbols with same addresses, names, sizes and sections + - Name: different_section1 + Type: STT_FUNC + Section: .text + Value: 0x0000000000500000 + Size: 0x0000000000000008 + # Symbol different_section2 will be renamed to different_section1 and we + # should# see two symbols named different_section1 in the dumped symtab. + - Name: different_section2 + Type: STT_FUNC + Section: .text2 + Value: 0x0000000000500000 + Size: 0x0000000000000008 + # Symbols with same names, sizes and sections but different addresses + - Name: different_address1 + Type: STT_FUNC + Section: .text + Value: 0x0000000000500000 + Size: 0x0000000000000008 + # Symbol different_address2 will be renamed to different_address1 and we + # should see two symbols named different_address1 in the dumped symtab. + - Name: different_address2 + Type: STT_FUNC + Section: .text + Value: 0x0000000000500001 + Size: 0x0000000000000008 + # Symbols with same names, addresses and sections but different sizes + - Name: different_size1 + Type: STT_FUNC + Section: .text + Value: 0x0000000000500000 + Size: 0x0000000000000008 + # Symbol different_size2 will be renamed to different_size1 and we should + # see two symbols named different_size1 in the dumped symtab. + - Name: different_size2 + Type: STT_FUNC + Section: .text + Value: 0x0000000000500000 + Size: 0x0000000000000009 +... diff --git a/lldb/lit/helper/toolchain.py b/lldb/lit/helper/toolchain.py --- a/lldb/lit/helper/toolchain.py +++ b/lldb/lit/helper/toolchain.py @@ -126,6 +126,6 @@ support_tools = ['yaml2obj', 'obj2yaml', 'llvm-pdbutil', 'llvm-mc', 'llvm-readobj', 'llvm-objdump', - 'llvm-objcopy', 'lli'] + 'llvm-objcopy', 'lli', 'llvm-strip'] additional_tool_dirs += [config.lldb_tools_dir, config.llvm_tools_dir] llvm_config.add_tool_substitutions(support_tools, additional_tool_dirs) diff --git a/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h b/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h --- a/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h +++ b/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h @@ -24,6 +24,8 @@ #include "lldb/lldb-enumerations.h" #include "lldb/lldb-types.h" +#include "lldb/Utility/ConstString.h" +#include "llvm/ADT/Hashing.h" namespace lldb_private { class DataExtractor; @@ -230,6 +232,8 @@ elf_half st_shndx; ///< Section to which this symbol applies. ELFSymbol(); + + ELFSymbol(const ELFSymbol &other); /// Returns the binding attribute of the st_info member. unsigned char getBinding() const { return st_info >> 4; } @@ -269,6 +273,40 @@ void Dump(lldb_private::Stream *s, uint32_t idx, const lldb_private::DataExtractor *strtab_data, const lldb_private::SectionList *section_list); + + /// \returns \c true when all fields of the right hand side object are the + /// same as the once of this object; otherwise \c false is returned. + bool operator==(const ELFSymbol &rhs) const noexcept; + + /// \c returns a combined hash value for the given \c ELFSymbol over all + /// struct fields. + std::size_t hash() const noexcept; +}; + +/// A \c NamedELFSymbol is an ELFSymbol that, alongside it's name and section +/// index, stores the name of the symbol and section as a string and only uses +/// those for comparison instead of the name or section index which could differ +/// depending on which section the symbol is defined in (e.g. .strtab or +/// .dynstr) or which object file a section belongs to (see .gnu_debugdata). +struct NamedELFSymbol : ELFSymbol { + lldb_private::ConstString st_name_string; ///< Actual name of the ELF symbol + lldb_private::ConstString + st_section_name_string; ///< Actual name of the section + + NamedELFSymbol(const ELFSymbol &sym, lldb_private::ConstString symbol_name, + lldb_private::ConstString section_name); + + /// \returns \c true when all fields (except name and section indexes) of the + /// right hand side object are the same as the once of this object; otherwise + /// \c false is returned. We ignore the name and section index in order to + /// only compare actual name strings and not where strings are located. + bool operator==(const NamedELFSymbol &rhs) const noexcept; + + /// \c returns a combined hash value for the given \c NamedELFSymbol over all + /// struct fields but ignores the name and section index of the base struct in + /// order to only compare actual name strings and not where strings are + /// located. + std::size_t hash() const noexcept; }; /// \class ELFDynamic @@ -391,4 +429,22 @@ } // End namespace elf. +namespace std { + +// Define specializations of the std::hash struct for ELFSymbol and +// NamedELFSymbol so they can be used in an std::unordered_set + +template <> struct hash { + std::size_t operator()(const elf::ELFSymbol &s) const noexcept { + return s.hash(); + } +}; + +template <> struct hash { + std::size_t operator()(const elf::NamedELFSymbol &s) const noexcept { + return s.hash(); + } +}; +} // namespace std + #endif // #ifndef liblldb_ELFHeader_h_ diff --git a/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp b/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp --- a/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp @@ -250,6 +250,11 @@ ELFSymbol::ELFSymbol() { memset(this, 0, sizeof(ELFSymbol)); } +ELFSymbol::ELFSymbol(const ELFSymbol &other) + : st_value(other.st_value), st_size(other.st_size), st_name(other.st_name), + st_info(other.st_info), st_other(other.st_other), + st_shndx(other.st_shndx) {} + #define ENUM_TO_CSTR(e) \ case e: \ return #e @@ -353,6 +358,51 @@ return true; } +bool ELFSymbol::operator==(const ELFSymbol &rhs) const noexcept { + return st_value == rhs.st_value && st_size == rhs.st_size && + st_name == rhs.st_name && st_info == rhs.st_info && + st_other == rhs.st_other && st_shndx == rhs.st_shndx; +} + +std::size_t ELFSymbol::hash() const noexcept { + std::size_t h1 = std::hash()(st_value); + std::size_t h2 = std::hash()(st_size); + std::size_t h3 = std::hash()(st_name); + std::size_t h4 = std::hash()(st_info); + std::size_t h5 = std::hash()(st_other); + std::size_t h6 = std::hash()(st_shndx); + return llvm::hash_combine(h1, h2, h3, h4, h5, h6); +} + +// NamedELFSymbol + +NamedELFSymbol::NamedELFSymbol(const ELFSymbol &other, + lldb_private::ConstString symbol_name, + lldb_private::ConstString section_name) + : ELFSymbol(other), st_name_string(symbol_name), + st_section_name_string(section_name) {} + +bool NamedELFSymbol::operator==(const NamedELFSymbol &rhs) const noexcept { + NamedELFSymbol r = rhs; + r.st_name = st_name; + r.st_shndx = st_shndx; + return elf::ELFSymbol::operator==(r) && + st_name_string == rhs.st_name_string && + st_section_name_string == rhs.st_section_name_string; +} + +std::size_t NamedELFSymbol::hash() const noexcept { + elf::ELFSymbol tmp = *this; + // ignore the name and section index when hashing the ELFSymbol + tmp.st_name = 0; + tmp.st_shndx = 0; + std::size_t h1 = std::hash()(tmp); + std::size_t h2 = std::hash()(st_name_string.AsCString()); + std::size_t h3 = + std::hash()(st_section_name_string.AsCString()); + return llvm::hash_combine(h1, h2, h3); +} + // ELFProgramHeader ELFProgramHeader::ELFProgramHeader() { diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h @@ -12,6 +12,7 @@ #include #include +#include #include "lldb/Symbol/ObjectFile.h" #include "lldb/Utility/ArchSpec.h" @@ -55,7 +56,7 @@ /// This class provides a generic ELF (32/64 bit) reader plugin implementing /// the ObjectFile protocol. class ObjectFileELF : public lldb_private::ObjectFile { -public: +public: // Static Functions static void Initialize(); @@ -184,6 +185,8 @@ typedef std::map FileAddressToAddressClassMap; + + typedef std::unordered_set UniqueElfSymbolColl; /// Version of this reader common to all plugins based on this class. static const uint32_t m_plugin_version = 1; @@ -278,7 +281,8 @@ /// will parse the symbols only once. Returns the number of symbols parsed. unsigned ParseSymbolTable(lldb_private::Symtab *symbol_table, lldb::user_id_t start_id, - lldb_private::Section *symtab); + lldb_private::Section *symtab, + UniqueElfSymbolColl &unique_elf_symbols); /// Helper routine for ParseSymbolTable(). unsigned ParseSymbols(lldb_private::Symtab *symbol_table, @@ -286,7 +290,8 @@ lldb_private::SectionList *section_list, const size_t num_symbols, const lldb_private::DataExtractor &symtab_data, - const lldb_private::DataExtractor &strtab_data); + const lldb_private::DataExtractor &strtab_data, + UniqueElfSymbolColl &unique_elf_symbols); /// Scans the relocation entries and adds a set of artificial symbols to the /// given symbol table for each PLT slot. Returns the number of symbols diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1871,7 +1871,8 @@ SectionList *section_list, const size_t num_symbols, const DataExtractor &symtab_data, - const DataExtractor &strtab_data) { + const DataExtractor &strtab_data, + UniqueElfSymbolColl &unique_elf_symbols) { ELFSymbol symbol; lldb::offset_t offset = 0; @@ -2196,20 +2197,27 @@ symbol_size_valid, // Symbol size is valid has_suffix, // Contains linker annotations? flags); // Symbol flags. - symtab->AddSymbol(dc_symbol); + + NamedELFSymbol needle(symbol, ConstString(symbol_ref), + ConstString(symbol_section_sp->GetName())); + if (unique_elf_symbols.insert(needle).second) { + symtab->AddSymbol(dc_symbol); + } } return i; } -unsigned ObjectFileELF::ParseSymbolTable(Symtab *symbol_table, - user_id_t start_id, - lldb_private::Section *symtab) { +unsigned +ObjectFileELF::ParseSymbolTable(Symtab *symbol_table, user_id_t start_id, + lldb_private::Section *symtab, + UniqueElfSymbolColl &unique_elf_symbols) { if (symtab->GetObjectFile() != this) { // If the symbol table section is owned by a different object file, have it // do the parsing. ObjectFileELF *obj_file_elf = static_cast(symtab->GetObjectFile()); - return obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab); + return obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab, + unique_elf_symbols); } // Get section list for this object file. @@ -2237,7 +2245,7 @@ size_t num_symbols = symtab_data.GetByteSize() / symtab_hdr->sh_entsize; return ParseSymbols(symbol_table, start_id, section_list, num_symbols, - symtab_data, strtab_data); + symtab_data, strtab_data, unique_elf_symbols); } } @@ -2405,7 +2413,7 @@ true, // Size is valid false, // Contains linker annotations? 0); // Symbol flags. - + symbol_table->AddSymbol(jump_symbol); } @@ -2635,6 +2643,8 @@ return module_obj_file->GetSymtab(); if (m_symtab_up == nullptr) { + // A unique set of ELF symbols added to the symtab + UniqueElfSymbolColl unique_elf_symbols; SectionList *section_list = module_sp->GetSectionList(); if (!section_list) return nullptr; @@ -2644,22 +2654,31 @@ // Sharable objects and dynamic executables usually have 2 distinct symbol // tables, one named ".symtab", and the other ".dynsym". The dynsym is a - // smaller version of the symtab that only contains global symbols. The - // information found in the dynsym is therefore also found in the symtab, - // while the reverse is not necessarily true. + // smaller version of the symtab that only contains global symbols. + // Information in the dynsym section is *usually* also found in the symtab, + // but this is not required as symtab entries can be removed after linking. + // The minidebuginfo format makes use of this facility to create smaller + // symbol tables. Section *symtab = section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get(); - if (!symtab) { - // The symtab section is non-allocable and can be stripped, so if it - // doesn't exist then use the dynsym section which should always be - // there. - symtab = - section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) - .get(); - } if (symtab) { m_symtab_up.reset(new Symtab(symtab->GetObjectFile())); - symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab); + symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab, + unique_elf_symbols); + } + + // The symtab section is non-allocable and can be stripped, while the dynsym + // section which should always be always be there. If both exist we load + // both to support the minidebuginfo case. Otherwise we just load the dynsym + // section. + Section *dynsym = + section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) + .get(); + if (dynsym) { + if (!m_symtab_up) + m_symtab_up.reset(new Symtab(dynsym->GetObjectFile())); + symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, dynsym, + unique_elf_symbols); } // DT_JMPREL