diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -712,8 +712,6 @@ /// false otherwise. bool SetModulesArchitecture(const ArchSpec &new_arch); - ConstString GetNextSyntheticSymbolName(); - static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size, uint64_t Offset); diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h --- a/lldb/include/lldb/Symbol/Symbol.h +++ b/lldb/include/lldb/Symbol/Symbol.h @@ -113,14 +113,20 @@ lldb::LanguageType GetLanguage() const { // TODO: See if there is a way to determine the language for a symbol // somehow, for now just return our best guess - return m_mangled.GuessLanguage(); + return GetMangled().GuessLanguage(); } void SetID(uint32_t uid) { m_uid = uid; } - Mangled &GetMangled() { return m_mangled; } + Mangled &GetMangled() { + SynthesizeNameIfNeeded(); + return m_mangled; + } - const Mangled &GetMangled() const { return m_mangled; } + const Mangled &GetMangled() const { + SynthesizeNameIfNeeded(); + return m_mangled; + } ConstString GetReExportedSymbolName() const; @@ -166,9 +172,9 @@ bool IsTrampoline() const; bool IsIndirect() const; - + bool IsWeak() const { return m_is_weak; } - + void SetIsWeak (bool b) { m_is_weak = b; } bool GetByteSizeIsValid() const { return m_size_is_valid; } @@ -223,6 +229,10 @@ bool ContainsFileAddress(lldb::addr_t file_addr) const; + static llvm::StringRef GetSyntheticSymbolPrefix() { + return "___lldb_unnamed_symbol"; + } + protected: // This is the internal guts of ResolveReExportedSymbol, it assumes // reexport_name is not null, and that module_spec is valid. We track the @@ -233,6 +243,8 @@ lldb_private::ModuleSpec &module_spec, lldb_private::ModuleList &seen_modules) const; + void SynthesizeNameIfNeeded() const; + uint32_t m_uid = UINT32_MAX; // User ID (usually the original symbol table index) uint16_t m_type_data = 0; // data specific to m_type @@ -258,7 +270,7 @@ // doing name lookups m_is_weak : 1, m_type : 6; // Values from the lldb::SymbolType enum. - Mangled m_mangled; // uniqued symbol name/mangled name pair + mutable Mangled m_mangled; // uniqued symbol name/mangled name pair AddressRange m_addr_range; // Contains the value, or the section offset // address when the value is an address in a // section, and the size (if any) diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h --- a/lldb/include/lldb/Symbol/Symtab.h +++ b/lldb/include/lldb/Symbol/Symtab.h @@ -219,6 +219,26 @@ return false; } + /// A helper function that looks up full function names. + /// + /// We generate unique names for synthetic symbols so that users can look + /// them up by name when needed. But because doing so is uncommon in normal + /// debugger use, we trade off some performance at lookup time for faster + /// symbol table building by detecting these symbols and generating their + /// names lazily, rather than adding them to the normal symbol indexes. This + /// function does the job of first consulting the name indexes, and if that + /// fails it extracts the information it needs from the synthetic name and + /// locates the symbol. + /// + /// @param[in] symbol_name The symbol name to search for. + /// + /// @param[out] indexes The vector if symbol indexes to update with results. + /// + /// @returns The number of indexes added to the index vector. Zero if no + /// matches were found. + uint32_t GetNameIndexes(ConstString symbol_name, + std::vector &indexes); + void SymbolIndicesToSymbolContextList(std::vector &symbol_indexes, SymbolContextList &sc_list); diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1880,7 +1880,7 @@ unified_section_list.AddSection(symtab_section_sp); } } - } + } } std::shared_ptr ObjectFileELF::GetGnuDebugDataObjectFile() { @@ -2813,31 +2813,37 @@ if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress( entry_point_file_addr)) { uint64_t symbol_id = m_symtab_up->GetNumSymbols(); - Symbol symbol(symbol_id, - GetNextSyntheticSymbolName().GetCString(), // Symbol name. - eSymbolTypeCode, // Type of this symbol. - true, // Is this globally visible? - false, // Is this symbol debug info? - false, // Is this symbol a trampoline? - true, // Is this symbol artificial? - entry_point_addr.GetSection(), // Section where this - // symbol is defined. - 0, // Offset in section or symbol value. - 0, // Size. - false, // Size is valid. - false, // Contains linker annotations? - 0); // Symbol flags. - m_symtab_up->AddSymbol(symbol); + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + SectionSP section_sp = entry_point_addr.GetSection(); + Symbol symbol( + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/0, + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); // When the entry point is arm thumb we need to explicitly set its // class address to reflect that. This is important because expression // evaluation relies on correctly setting a breakpoint at this // address. if (arch.GetMachine() == llvm::Triple::arm && - (entry_point_file_addr & 1)) + (entry_point_file_addr & 1)) { + symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1); m_address_class_map[entry_point_file_addr ^ 1] = AddressClass::eCodeAlternateISA; - else + } else { m_address_class_map[entry_point_file_addr] = AddressClass::eCode; + } + m_symtab_up->AddSymbol(symbol); } } @@ -2917,22 +2923,24 @@ section_list->FindSectionContainingFileAddress(file_addr); if (section_sp) { addr_t offset = file_addr - section_sp->GetFileAddress(); - const char *symbol_name = GetNextSyntheticSymbolName().GetCString(); uint64_t symbol_id = ++last_symbol_id; + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. Symbol eh_symbol( - symbol_id, // Symbol table index. - symbol_name, // Symbol name. - eSymbolTypeCode, // Type of this symbol. - true, // Is this globally visible? - false, // Is this symbol debug info? - false, // Is this symbol a trampoline? - true, // Is this symbol artificial? - section_sp, // Section in which this symbol is defined or null. - offset, // Offset in section or symbol value. - 0, // Size: Don't specify the size as an FDE can - false, // Size is valid: cover multiple symbols. - false, // Contains linker annotations? - 0); // Symbol flags. + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/offset, + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); new_symbols.push_back(eh_symbol); } } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -4696,8 +4696,10 @@ symbol_byte_size = section_end_file_addr - symbol_file_addr; } sym[sym_idx].SetID(synthetic_sym_id++); - sym[sym_idx].GetMangled().SetDemangledName( - GetNextSyntheticSymbolName()); + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + sym[sym_idx].GetMangled().SetDemangledName(ConstString()); sym[sym_idx].SetType(eSymbolTypeCode); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = symbol_addr; diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -616,16 +616,6 @@ return symbol_type_hint; } -ConstString ObjectFile::GetNextSyntheticSymbolName() { - llvm::SmallString<256> name; - llvm::raw_svector_ostream os(name); - ConstString file_name = GetModule()->GetFileSpec().GetFilename(); - ++m_synthetic_symbol_idx; - os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$" - << file_name.GetStringRef(); - return ConstString(os.str()); -} - std::vector ObjectFile::GetLoadableData(Target &target) { std::vector loadables; diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -56,8 +56,8 @@ m_size_is_synthesized(false), m_size_is_valid(size_is_valid || range.GetByteSize() > 0), m_demangled_is_synthesized(false), - m_contains_linker_annotations(contains_linker_annotations), - m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), + m_contains_linker_annotations(contains_linker_annotations), + m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), m_flags(flags) {} Symbol::Symbol(const Symbol &rhs) @@ -119,7 +119,7 @@ } ConstString Symbol::GetDisplayName() const { - return m_mangled.GetDisplayDemangledName(); + return GetMangled().GetDisplayDemangledName(); } ConstString Symbol::GetReExportedSymbolName() const { @@ -202,7 +202,7 @@ s->Printf(", value = 0x%16.16" PRIx64, m_addr_range.GetBaseAddress().GetOffset()); } - ConstString demangled = m_mangled.GetDemangledName(); + ConstString demangled = GetMangled().GetDemangledName(); if (demangled) s->Printf(", name=\"%s\"", demangled.AsCString()); if (m_mangled.GetMangledName()) @@ -218,7 +218,7 @@ // Make sure the size of the symbol is up to date before dumping GetByteSize(); - ConstString name = m_mangled.GetName(name_preference); + ConstString name = GetMangled().GetName(name_preference); if (ValueIsAddress()) { if (!m_addr_range.GetBaseAddress().Dump(s, nullptr, Address::DumpStyleFileAddress)) @@ -330,9 +330,11 @@ } bool Symbol::Compare(ConstString name, SymbolType type) const { - if (type == eSymbolTypeAny || m_type == type) - return m_mangled.GetMangledName() == name || - m_mangled.GetDemangledName() == name; + if (type == eSymbolTypeAny || m_type == type) { + const Mangled &mangled = GetMangled(); + return mangled.GetMangledName() == name || + mangled.GetDemangledName() == name; + } return false; } @@ -495,10 +497,10 @@ return LLDB_INVALID_ADDRESS; } -ConstString Symbol::GetName() const { return m_mangled.GetName(); } +ConstString Symbol::GetName() const { return GetMangled().GetName(); } ConstString Symbol::GetNameNoArguments() const { - return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments); + return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments); } lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const { @@ -565,3 +567,21 @@ bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const { return m_addr_range.ContainsFileAddress(file_addr); } + +void Symbol::SynthesizeNameIfNeeded() const { + if (m_is_synthetic && !m_mangled) { + // Synthetic symbol names don't mean anything, but they do uniquely + // identify individual symbols so we give them a unique name. The name + // starts with the synthetic symbol prefix, followed by a unique number. + // Typically the UserID of a real symbol is the symbol table index of the + // symbol in the object file's symbol table(s), so it will be the same + // every time you read in the object file. We want the same persistence for + // synthetic symbols so that users can identify them across multiple debug + // sessions, to understand crashes in those symbols and to reliably set + // breakpoints on them. + llvm::SmallString<256> name; + llvm::raw_svector_ostream os(name); + os << GetSyntheticSymbolPrefix() << GetID(); + m_mangled.SetDemangledName(ConstString(os.str())); + } +} diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -301,7 +301,7 @@ // the trampoline symbols to be searchable by name we can remove this and // then possibly add a new bool to any of the Symtab functions that // lookup symbols by name to indicate if they want trampolines. - if (symbol->IsTrampoline()) + if (symbol->IsTrampoline() || symbol->IsSynthetic()) continue; // If the symbol's name string matched a Mangled::ManglingScheme, it is @@ -628,6 +628,36 @@ } } +uint32_t Symtab::GetNameIndexes(ConstString symbol_name, + std::vector &indexes) { + auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); + const uint32_t count = name_to_index.GetValues(symbol_name, indexes); + if (count) + return count; + // Synthetic symbol names are not added to the name indexes, but they start + // with a prefix and end with a the symbol UserID. This allows users to find + // these symbols without having to add them to the name indexes. These + // queries will not happen very often since the names don't mean anything, so + // performance is not paramount in this case. + llvm::StringRef name = symbol_name.GetStringRef(); + // String the synthetic prefix if the name starts with it. + if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix())) + return 0; // Not a synthetic symbol name + + // Extract the user ID from the symbol name + user_id_t uid = 0; + if (getAsUnsignedInteger(name, /*Radix=*/10, uid)) + return 0; // Failed to extract the user ID as an integer + Symbol *symbol = FindSymbolByID(uid); + if (symbol == nullptr) + return 0; + const uint32_t symbol_idx = GetIndexForSymbol(symbol); + if (symbol_idx == UINT32_MAX) + return 0; + indexes.push_back(symbol_idx); + return 1; +} + uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, std::vector &indexes) { std::lock_guard guard(m_mutex); @@ -637,8 +667,7 @@ if (!m_name_indexes_computed) InitNameIndexes(); - auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); - return name_to_index.GetValues(symbol_name, indexes); + return GetNameIndexes(symbol_name, indexes); } return 0; } @@ -655,10 +684,9 @@ if (!m_name_indexes_computed) InitNameIndexes(); - auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); std::vector all_name_indexes; const size_t name_match_count = - name_to_index.GetValues(symbol_name, all_name_indexes); + GetNameIndexes(symbol_name, all_name_indexes); for (size_t i = 0; i < name_match_count; ++i) { if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type, symbol_visibility)) diff --git a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml --- a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml @@ -3,8 +3,8 @@ # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name # CHECK: [ 0] 1 SourceFile 0x0000000000000000 0x0000000000000000 0x00000004 - -# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol1$${{.*}} -# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol2$${{.*}} +# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} +# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} --- !ELF FileHeader: diff --git a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test --- a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test @@ -5,7 +5,7 @@ # CHECK-LABEL: (lldb) image dump symtab symtab.out # CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5: # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name -# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}$$symtab.out +# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} # CHECK: [ 1] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func # CHECK: [ 2] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only # CHECK: [ 3] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2