diff --git a/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/Makefile b/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/Makefile new file mode 100644 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/Makefile @@ -0,0 +1,52 @@ +LEVEL = ../../make +C_SOURCES := main.c + +all: clean binary + +.PHONY: binary +binary: a.out + $(RM) -r $@ + cp a.out binary + + # Extract the dynamic symbols from the main binary, there is no need + # to also have these in the normal symbol table. + llvm-nm -D binary --format=posix --defined-only \ + | awk '{ print $$1 }' | sort > dynsyms + + # Extract all the text (i.e. function) symbols from the debuginfo. + # (Note that we actually also accept "D" symbols, for the benefit + # of platforms like PowerPC64 that use function descriptors.) + llvm-nm binary --format=posix --defined-only \ + | awk '{ if ($$2 == "T" || $$2 == "t" || $$2 == "D") print $$1 }' \ + | sort > funcsyms + + # Keep all the function symbols not already in the dynamic symbol + # table. + comm -13 dynsyms funcsyms > keep_symbols + + # Separate full debug info into debug binary. + $(OBJCOPY) --only-keep-debug binary debug + + # Copy the full debuginfo, keeping only a minimal set of symbols and + # removing some unnecessary sections. + $(OBJCOPY) -S --remove-section .gdb_index --remove-section .comment \ + --keep-symbols=keep_symbols debug mini_debuginfo + + # Drop the full debug info from the original binary. + llvm-strip --strip-all -R .comment binary + + # Inject the compressed data into the .gnu_debugdata section of the + # original binary. + xz --keep mini_debuginfo + $(OBJCOPY) --add-section .gnu_debugdata=mini_debuginfo.xz binary + +clean:: + $(RM) -r mini_debuginfo \ + mini_debuginfo.xz \ + binary \ + debug \ + keep_symbols \ + funcsyms \ + dynsyms + +include $(LEVEL)/Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/TestMiniDebugInfo.py b/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/TestMiniDebugInfo.py new file mode 100644 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/TestMiniDebugInfo.py @@ -0,0 +1,36 @@ +""" Testing debugging of a binary with "mini debuginfo" in .gnu_debugdata section. """ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestMinidebugInfo(TestBase): + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + TestBase.setUp(self) + + @no_debug_info_test # Prevent the genaration of the dwarf version of this test + @add_test_categories(["dwo"]) + @skipUnlessPlatform(["linux"]) + def test_mini_debug_info(self): + """Test that we can set and hit a breakpoint on a symbol from .gnu_debugdata.""" + + self.build() + exe = self.getBuildArtifact("binary") + + self.target = self.dbg.CreateTarget(exe) + self.assertTrue(self.target, VALID_TARGET) + + main_bp = self.target.BreakpointCreateByName("multiplyByThree", "binary") + self.assertTrue(main_bp, VALID_BREAKPOINT) + + self.process = self.target.LaunchSimple( + None, None, self.get_process_working_directory()) + self.assertTrue(self.process, PROCESS_IS_VALID) + + # The stop reason of the thread should be breakpoint. + self.assertTrue(self.process.GetState() == lldb.eStateStopped, + STOPPED_DUE_TO_BREAKPOINT) + diff --git a/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/main.c b/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/main.c new file mode 100644 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/linux/minidebuginfo/main.c @@ -0,0 +1,6 @@ +int multiplyByThree(int num) { return num * 3; } + +int main(int argc, char *argv[]) { + (void)argv; + return multiplyByThree(argc); +} diff --git a/lldb/source/Plugins/ObjectFile/ELF/CMakeLists.txt b/lldb/source/Plugins/ObjectFile/ELF/CMakeLists.txt --- a/lldb/source/Plugins/ObjectFile/ELF/CMakeLists.txt +++ b/lldb/source/Plugins/ObjectFile/ELF/CMakeLists.txt @@ -1,3 +1,17 @@ +include(CMakeDependentOption) +set(minidebuginfo_libs) +find_package(LibLZMA) +cmake_dependent_option(LLDB_ENABLE_ELF_LZMA "Compile LLDB with LZMA" ON "LIBLZMA_FOUND" OFF) +if (LLDB_ENABLE_ELF_LZMA) + if (LIBLZMA_FOUND) + add_definitions(-DLLDB_ENABLE_ELF_MINIDEBUGINFO) + set(minidebuginfo_libs ${LIBLZMA_LIBRARIES}) + include_directories(${LIBLZMA_INCLUDE_DIRS}) + else() + message(FATAL_ERROR "LZMA is required when LLDB_ENABLE_ELF_LZMA is On.") + endif() +endif() + add_lldb_library(lldbPluginObjectFileELF PLUGIN ELFHeader.cpp ObjectFileELF.cpp @@ -7,6 +21,7 @@ lldbHost lldbSymbol lldbTarget + ${minidebuginfo_libs} LINK_COMPONENTS BinaryFormat Object diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h @@ -154,6 +154,13 @@ void RelocateSection(lldb_private::Section *section) override; + /// Takes the .gnu_debugdata and returns the decompressed object file that is + /// stored within that section. + /// + /// \returns either the decompressed object file stored within the + /// .gnu_debugdata section or \c nullptr if an error occured. + std::shared_ptr GetGnuDebugDataObjectFile(); + protected: std::vector @@ -383,6 +390,8 @@ lldb_private::UUID &uuid); bool AnySegmentHasPhysicalAddress(); + + std::shared_ptr m_gnuDebugDataObjectFile; }; #endif // liblldb_ObjectFileELF_h_ diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -12,6 +12,10 @@ #include #include +#ifdef LLDB_ENABLE_ELF_MINIDEBUGINFO +#include +#endif // LLDB_ENABLE_ELF_MINIDEBUGINFO + #include "lldb/Core/FileSpecList.h" #include "lldb/Core/Module.h" #include "lldb/Core/ModuleSpec.h" @@ -1844,6 +1848,167 @@ unified_section_list = *m_sections_up; } +#ifdef LLDB_ENABLE_ELF_MINIDEBUGINFO +static std::string lzma_ret_to_string(lzma_ret code) { + switch (code) { +#define X(n) \ + case n: \ + return #n; + X(LZMA_OK) + X(LZMA_STREAM_END) + X(LZMA_NO_CHECK) + X(LZMA_UNSUPPORTED_CHECK) + X(LZMA_GET_CHECK) + X(LZMA_MEM_ERROR) + X(LZMA_MEMLIMIT_ERROR) + X(LZMA_FORMAT_ERROR) + X(LZMA_OPTIONS_ERROR) + X(LZMA_DATA_ERROR) + X(LZMA_BUF_ERROR) + X(LZMA_PROG_ERROR) +#undef X + } + return "Unknown LZMA return code: " + std::to_string(code); +} + +/// Decompresses the xz-compressed content at \a data_start of size \a +/// data_size. +/// +/// \param[in] data_start +/// Pointer to the start of the data. +/// +/// \param[in] data_size +/// Size of the compressed data. +/// +/// \returns a vector with the decompressed content or an error. +llvm::Expected> decompressXZ(const uint8_t *data_start, + uint64_t data_size) { + if (!data_start || data_size == 0) { + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "address (%p) or size (%lu) of xz-compressed blob cannot be 0", + data_start, data_size); + } + auto opts = lzma_stream_flags{}; + if (data_size < LZMA_STREAM_HEADER_SIZE) { + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "size of xz-compressed blob ({0} bytes) is smaller than the " + "LZMA_STREAM_HEADER_SIZE ({1} bytes)", + data_size, LZMA_STREAM_HEADER_SIZE); + } + + // Decode xz footer. + auto xzerr = lzma_stream_footer_decode(&opts, data_start + data_size - + LZMA_STREAM_HEADER_SIZE); + if (xzerr != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_stream_footer_decode()={%s}", + lzma_ret_to_string(xzerr).c_str()); + } + if (data_size < (opts.backward_size + LZMA_STREAM_HEADER_SIZE)) { + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "xz-compressed buffer size ({%lu} bytes) too small (required at " + "least {%lu} bytes) ", + data_size, (opts.backward_size + LZMA_STREAM_HEADER_SIZE)); + } + + // Decode xz index. + lzma_index *xzindex; + uint64_t memlimit(UINT64_MAX); + size_t inpos = 0; + xzerr = lzma_index_buffer_decode( + &xzindex, &memlimit, nullptr, + data_start + data_size - LZMA_STREAM_HEADER_SIZE - opts.backward_size, + &inpos, data_size); + if (xzerr != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_index_buffer_decode()={%s}", + lzma_ret_to_string(xzerr).c_str()); + } + + // Get size of uncompressed file to construct an in-memory buffer of the + // same size. + const auto uncompressed_file_size = lzma_index_uncompressed_size(xzindex); + std::vector result(uncompressed_file_size); + // std::vector decompressed_buf(uncompressed_file_size); + if (result.size() != uncompressed_file_size) { + if (xzindex != nullptr) { + lzma_index_end(xzindex, nullptr); + xzindex = nullptr; + } + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "failed to allocate decompression buffer of size {%lu} bytes", + uncompressed_file_size); + } + + // Deallocate xz index as it is no longer needed. + lzma_index_end(xzindex, nullptr); + + // Decompress xz buffer to buffer. + inpos = 0; + size_t outpos = 0; + xzerr = lzma_stream_buffer_decode(&memlimit, 0, nullptr, data_start, &inpos, + data_size, result.data(), &outpos, + result.size()); + if (xzerr != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_stream_buffer_decode()={%s}", + lzma_ret_to_string(xzerr).c_str()); + } + + return std::move(result); +} +#endif // LLDB_ENABLE_ELF_MINIDEBUGINFO + +std::shared_ptr ObjectFileELF::GetGnuDebugDataObjectFile() { + if (m_gnuDebugDataObjectFile != nullptr) { + return m_gnuDebugDataObjectFile; + } + + SectionSP section = + GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata")); + if (!section) { + return nullptr; + } + +#ifndef LLDB_ENABLE_ELF_MINIDEBUGINFO + GetModule()->ReportWarning( + "No LZMA support found for reading .gnu_debugdata section"); + return nullptr; +#else + + auto data = DataExtractor(); + section->GetSectionData(data); + auto res = decompressXZ(data.GetDataStart(), data.GetByteSize()); + if (!res) { + GetModule()->ReportWarning( + "An error occured while decompression the section %s: %s", + section->GetName().AsCString(), Status(res.takeError()).AsCString()); + return nullptr; + } + + // Construct ObjectFileELF object from decompressed buffer + DataBufferSP gdd_data_buf(new DataBufferHeap(res->data(), res->size())); + auto fspec = GetFileSpec().CopyByAppendingPathComponent( + llvm::StringRef("gnu_debugdata")); + m_gnuDebugDataObjectFile.reset(new ObjectFileELF( + GetModule(), gdd_data_buf, 0, &fspec, 0, gdd_data_buf->GetByteSize())); + + // This line is essential; otherwise a breakpoint can be set but not hit. + m_gnuDebugDataObjectFile->SetType(ObjectFile::eTypeDebugInfo); + + ArchSpec spec = m_gnuDebugDataObjectFile->GetArchitecture(); + if (spec && m_gnuDebugDataObjectFile->SetModulesArchitecture(spec)) { + return m_gnuDebugDataObjectFile; + } + + return nullptr; +#endif // LLDB_ENABLE_ELF_MINIDEBUGINFO +} + // Find the arm/aarch64 mapping symbol character in the given symbol name. // Mapping symbols have the form of "$[.]*". Additionally we // recognize cases when the mapping symbol prefixed by an arbitrary string @@ -2649,19 +2814,25 @@ // while the reverse is not necessarily true. Section *symtab = section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get(); - if (!symtab) { - // The symtab section is non-allocable and can be stripped, so if it - // doesn't exist then use the dynsym section which should always be - // there. - symtab = - section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) - .get(); - } if (symtab) { m_symtab_up.reset(new Symtab(symtab->GetObjectFile())); symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab); } + // The symtab section is non-allocable and can be stripped, so if it + // doesn't exist then use the dynsym section which should always be + // there. If both, .symtab and .dynsym exist, we load both. + auto dynsym = + section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) + .get(); + if (dynsym) { + if (!m_symtab_up) { + auto sec = symtab ? symtab : dynsym; + m_symtab_up.reset(new Symtab(sec->GetObjectFile())); + } + symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, dynsym); + } + // DT_JMPREL // If present, this entry's d_ptr member holds the address of // relocation diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -59,6 +59,8 @@ SymbolVendor * SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, lldb_private::Stream *feedback_strm) { + (void)feedback_strm; + if (!module_sp) return nullptr; @@ -67,6 +69,30 @@ if (!obj_file) return nullptr; + std::unique_ptr symbol_vendor( + new SymbolVendorELF(module_sp)); + SectionList *module_section_list = module_sp->GetSectionList(); + + // If there's a .gnu_debugdata section, we'll try to read the .symtab that's + // embedded in there and replace the one in the original object file (if any). + // If there's none in the orignal object file, we add it to it. + if (auto gdd_obj_file = + obj_file->GetGnuDebugDataObjectFile()) { + if (auto gdd_objfile_section_list = gdd_obj_file->GetSectionList()) { + if (SectionSP symtab_section_sp = + gdd_objfile_section_list->FindSectionByType( + eSectionTypeELFSymbolTable, true)) { + SectionSP module_section_sp = module_section_list->FindSectionByType( + eSectionTypeELFSymbolTable, true); + if (module_section_sp) + module_section_list->ReplaceSection(module_section_sp->GetID(), + symtab_section_sp); + else + module_section_list->AddSection(symtab_section_sp); + } + } + } + lldb_private::UUID uuid = obj_file->GetUUID(); if (!uuid) return nullptr; @@ -111,11 +137,9 @@ // have stripped the code sections, etc. dsym_objfile_sp->SetType(ObjectFile::eTypeDebugInfo); - SymbolVendorELF *symbol_vendor = new SymbolVendorELF(module_sp); - // Get the module unified section list and add our debug sections to // that. - SectionList *module_section_list = module_sp->GetSectionList(); + module_section_list = module_sp->GetSectionList(); SectionList *objfile_section_list = dsym_objfile_sp->GetSectionList(); static const SectionType g_sections[] = { @@ -141,7 +165,8 @@ } symbol_vendor->AddSymbolFileRepresentation(dsym_objfile_sp); - return symbol_vendor; + + return symbol_vendor.release(); } // PluginInterface protocol diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -314,8 +314,10 @@ // cleaner and nicer if we read them from the YAML as a separate // top-level key, which automatically ensures that invariants like there // being a single SHT_SYMTAB section are upheld. - std::vector Symbols; + Optional> Symbols; std::vector DynamicSymbols; + + bool HasSymbolsEntryInYAML() const { return Symbols.hasValue(); } }; } // end namespace ELFYAML diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -268,7 +268,7 @@ T reverseBits(T Val) { unsigned char in[sizeof(Val)]; unsigned char out[sizeof(Val)]; - std::memcpy(in, &Val, sizeof(Val)); + memcpy(in, &Val, sizeof(Val)); for (unsigned i = 0; i < sizeof(Val); ++i) out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; std::memcpy(&Val, out, sizeof(Val)); diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -192,7 +192,12 @@ std::make_unique( ELFYAML::Section::SectionKind::RawContent, /*IsImplicit=*/true)); - std::vector ImplicitSections = {".symtab", ".strtab", ".shstrtab"}; + std::vector ImplicitSections = {".strtab", ".shstrtab"}; + // We only generate .symtab ELF section if there was a "Symbols" entry in the + // YAML file. + if (Doc.HasSymbolsEntryInYAML()) { + ImplicitSections.insert(ImplicitSections.begin(), {".symtab"}); + } if (!Doc.DynamicSymbols.empty()) ImplicitSections.insert(ImplicitSections.end(), {".dynsym", ".dynstr"}); @@ -478,7 +483,8 @@ ELFYAML::Section *YAMLSec) { bool IsStatic = STType == SymtabType::Static; - const auto &Symbols = IsStatic ? Doc.Symbols : Doc.DynamicSymbols; + const auto &Symbols = + IsStatic && Doc.Symbols ? *Doc.Symbols : Doc.DynamicSymbols; ELFYAML::RawContentSection *RawSec = dyn_cast_or_null(YAMLSec); @@ -1013,14 +1019,15 @@ } template bool ELFState::buildSymbolIndexes() { - return buildSymbolsMap(Doc.Symbols, SymN2I) && + return Doc.Symbols && buildSymbolsMap(*Doc.Symbols, SymN2I) && buildSymbolsMap(Doc.DynamicSymbols, DynSymN2I); } template void ELFState::finalizeStrings() { // Add the regular symbol names to .strtab section. - for (const ELFYAML::Symbol &Sym : Doc.Symbols) - DotStrtab.add(dropUniqueSuffix(Sym.Name)); + if (Doc.Symbols) + for (const ELFYAML::Symbol &Sym : *Doc.Symbols) + DotStrtab.add(dropUniqueSuffix(Sym.Name)); DotStrtab.finalize(); // Add the dynamic symbol names to .dynstr section. diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -911,6 +911,7 @@ StringRef MappingTraits::validate(IO &IO, ELFYAML::Symbol &Symbol) { + (void)IO; if (Symbol.Index && Symbol.Section.data()) return "Index and Section cannot both be specified for Symbol"; if (Symbol.NameIndex && !Symbol.Name.empty()) diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -192,8 +192,8 @@ return TableOrErr.takeError(); ShndxTable = *TableOrErr; } - if (SymTab) - if (Error E = dumpSymbols(SymTab, Y->Symbols)) + if (SymTab && Y->Symbols) + if (Error E = dumpSymbols(SymTab, *Y->Symbols)) return std::move(E); if (DynSymTab) if (Error E = dumpSymbols(DynSymTab, Y->DynamicSymbols))