diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -98,7 +98,7 @@ message(FATAL_ERROR "LLDB test compilers not specified. Tests will not run.") endif() - set(LLDB_TEST_DEPS lldb) + set(LLDB_TEST_DEPS lldb llvm-nm llvm-strip) # darwin-debug is an hard dependency for the testsuite. if (CMAKE_SYSTEM_NAME MATCHES "Darwin") diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -1,5 +1,6 @@ include(CheckCXXSymbolExists) include(CheckTypeSize) +include(CMakeDependentOption) set(LLDB_PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) set(LLDB_SOURCE_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/source") @@ -332,6 +333,12 @@ set(LLDB_VERSION "${LLDB_VERSION_MAJOR}.${LLDB_VERSION_MINOR}.${LLDB_VERSION_PATCH}${LLDB_VERSION_SUFFIX}") message(STATUS "LLDB version: ${LLDB_VERSION}") +find_package(LibLZMA) +cmake_dependent_option(LLDB_ENABLE_LZMA "Support LZMA compression" ON "LIBLZMA_FOUND" OFF) +if (LLDB_ENABLE_LZMA) + include_directories(${LIBLZMA_INCLUDE_DIRS}) +endif() + include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include diff --git a/lldb/include/lldb/Host/Config.h.cmake b/lldb/include/lldb/Host/Config.h.cmake --- a/lldb/include/lldb/Host/Config.h.cmake +++ b/lldb/include/lldb/Host/Config.h.cmake @@ -35,4 +35,6 @@ #cmakedefine HAVE_LIBCOMPRESSION #endif +#cmakedefine LLDB_ENABLE_LZMA + #endif // #ifndef LLDB_HOST_CONFIG_H diff --git a/lldb/include/lldb/Host/LZMA.h b/lldb/include/lldb/Host/LZMA.h new file mode 100644 --- /dev/null +++ b/lldb/include/lldb/Host/LZMA.h @@ -0,0 +1,35 @@ +//===-- LZMA.h --------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_Host_LZMA_h_ +#define liblldb_Host_LZMA_h_ + +#include +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { +class Error; +} // End of namespace llvm + +namespace lldb_private { + +namespace lzma { + +bool isAvailable(); + +llvm::Error getUncompressedSize(llvm::ArrayRef InputBuffer, + uint64_t &uncompressedSize); + +llvm::Error uncompress(llvm::ArrayRef InputBuffer, + llvm::SmallVectorImpl &Uncompressed); + +} // End of namespace lzma + +} // End of namespace lldb_private + +#endif // liblldb_Host_LZMA_h_ diff --git a/lldb/lit/Breakpoint/Inputs/minidebuginfo-lib.h b/lldb/lit/Breakpoint/Inputs/minidebuginfo-lib.h new file mode 100644 --- /dev/null +++ b/lldb/lit/Breakpoint/Inputs/minidebuginfo-lib.h @@ -0,0 +1,2 @@ +// This function will be embedded within the .dynsym section. +int multiplyByThree(int num); diff --git a/lldb/lit/Breakpoint/Inputs/minidebuginfo-lib.c b/lldb/lit/Breakpoint/Inputs/minidebuginfo-lib.c new file mode 100644 --- /dev/null +++ b/lldb/lit/Breakpoint/Inputs/minidebuginfo-lib.c @@ -0,0 +1,5 @@ +#include "minidebuginfo-lib.h" + +int multiplyByThree(int num) { + return num * 3; +} diff --git a/lldb/lit/Breakpoint/Inputs/minidebuginfo-main.c b/lldb/lit/Breakpoint/Inputs/minidebuginfo-main.c new file mode 100644 --- /dev/null +++ b/lldb/lit/Breakpoint/Inputs/minidebuginfo-main.c @@ -0,0 +1,11 @@ +#include "minidebuginfo-lib.h" + +// This function will be embedded within the .symtab section of the +// .gnu_debugdata section. +int multiplyByFour(int num) { return num * 4; } + +int main(int argc, char *argv[]) { + int x = multiplyByThree(argc); + int y = multiplyByFour(x); + return y; +} diff --git a/lldb/lit/Breakpoint/minidebuginfo.test b/lldb/lit/Breakpoint/minidebuginfo.test new file mode 100644 --- /dev/null +++ b/lldb/lit/Breakpoint/minidebuginfo.test @@ -0,0 +1,58 @@ +# REQUIRES: system-linux, lzma + +# RUN: %clang -g -shared -fpic -o %T/mylib.so -I%p/Inputs %p/Inputs/minidebuginfo-lib.c +# RUN: %clang -g -o %t.binary mylib.so %p/Inputs/minidebuginfo-main.c + +# The following section is adapted from GDB's official documentation: +# http://sourceware.org/gdb/current/onlinedocs/gdb/MiniDebugInfo.html#MiniDebugInfo + +# Extract the dynamic symbols from the main binary, there is no need +# to also have these in the normal symbol table. + +# RUN: llvm-nm -D %t.binary --format=posix --defined-only | awk '{ print $1 }' | sort > %t.dynsyms + +# Extract all the text (i.e. function) symbols from the debuginfo. +# (Note that we actually also accept "D" symbols, for the benefit +# of platforms like PowerPC64 that use function descriptors.) + +# RUN: llvm-nm %t.binary --format=posix --defined-only | awk '{ if ($2 == "T" || $2 == "t" || $2 == "D") print $1 }' | sort > %t.funcsyms + +# Keep all the function symbols not already in the dynamic symbol +# table. + +# RUN: comm -13 %t.dynsyms %t.funcsyms > %t.keep_symbols + +# Separate full debug info into debug binary. + +# RUN: llvm-objcopy --only-keep-debug %t.binary %t.debug + +# Copy the full debuginfo, keeping only a minimal set of symbols and +# removing some unnecessary sections. + +# RUN: llvm-objcopy -S --remove-section .gdb_index --remove-section .comment --keep-symbols=keep_symbols %t.debug %t.mini_debuginfo + +# Drop the full debug info from the original binary. + +# RUN: llvm-strip --strip-all -R .comment %t.binary + +# Inject the compressed data into the .gnu_debugdata section of the +# original binary. + +# RUN: xz --force --keep %t.mini_debuginfo + +# RUN: llvm-objcopy --add-section .gnu_debugdata=%t.mini_debuginfo.xz %t.binary + +# Now run the binary and see that we can set and hit a breakpoint +# from within the .dynsym section (multiplyByThree) and one from +# the .symtab section embedded in the .gnu_debugdata section (multiplyByFour). + +# RUN: LD_LIBRARY_PATH=$LD_LIBRARY_PATH:%T %lldb -x -b -o 'b multiplyByThree' -o 'b multiplyByFour' -o 'breakpoint list -v' -o 'run' -o 'continue' %t.binary | FileCheck %s + +# CHECK: (lldb) b multiplyByThree +# CHECK-NEXT: Breakpoint 1: where = mylib.so`multiplyByThree + 7 at minidebuginfo-lib.c:4:13, address = 0x{{.*}} + +# CHECK: (lldb) b multiplyByFour +# CHECK-NEXT: Breakpoint 2: where = minidebuginfo.test.tmp.binary`multiplyByFour, address = 0x{{.*}} + +# CHECK: * thread #1, name = 'minidebuginfo.t', stop reason = breakpoint 1.2 +# CHECK: * thread #1, name = 'minidebuginfo.t', stop reason = breakpoint 2.1 diff --git a/lldb/lit/CMakeLists.txt b/lldb/lit/CMakeLists.txt --- a/lldb/lit/CMakeLists.txt +++ b/lldb/lit/CMakeLists.txt @@ -53,9 +53,11 @@ lli llvm-config llvm-dwarfdump + llvm-nm llvm-mc llvm-objcopy llvm-readobj + llvm-strip ) if(TARGET lld) diff --git a/lldb/lit/lit.cfg.py b/lldb/lit/lit.cfg.py --- a/lldb/lit/lit.cfg.py +++ b/lldb/lit/lit.cfg.py @@ -101,3 +101,6 @@ if not config.lldb_disable_python: config.available_features.add('python') + +if config.lldb_enable_lzma == "ON": + config.available_features.add('lzma') diff --git a/lldb/lit/lit.site.cfg.py.in b/lldb/lit/lit.site.cfg.py.in --- a/lldb/lit/lit.site.cfg.py.in +++ b/lldb/lit/lit.site.cfg.py.in @@ -15,6 +15,7 @@ config.target_triple = "@TARGET_TRIPLE@" config.python_executable = "@PYTHON_EXECUTABLE@" config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.lldb_enable_lzma = "@LLDB_ENABLE_LZMA@" config.host_triple = "@LLVM_HOST_TRIPLE@" config.lldb_bitness = 64 if @LLDB_IS_64_BITS@ else 32 config.lldb_disable_python = @LLDB_DISABLE_PYTHON@ diff --git a/lldb/source/Host/CMakeLists.txt b/lldb/source/Host/CMakeLists.txt --- a/lldb/source/Host/CMakeLists.txt +++ b/lldb/source/Host/CMakeLists.txt @@ -29,6 +29,7 @@ common/HostProcess.cpp common/HostThread.cpp common/LockFileBase.cpp + common/LZMA.cpp common/MainLoop.cpp common/MonitoringProcessLauncher.cpp common/NativeProcessProtocol.cpp @@ -157,6 +158,9 @@ if (NOT LLDB_DISABLE_LIBEDIT) list(APPEND EXTRA_LIBS ${libedit_LIBRARIES}) endif() +if (LLDB_ENABLE_LZMA) + list(APPEND EXTRA_LIBS ${LIBLZMA_LIBRARIES}) +endif() if (NOT LLDB_DISABLE_LIBEDIT) list(APPEND LLDB_LIBEDIT_LIBS ${libedit_LIBRARIES}) diff --git a/lldb/source/Host/common/LZMA.cpp b/lldb/source/Host/common/LZMA.cpp new file mode 100644 --- /dev/null +++ b/lldb/source/Host/common/LZMA.cpp @@ -0,0 +1,155 @@ +//===--- Compression.cpp - Compression implementation ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Host/Config.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +#ifdef LLDB_ENABLE_LZMA +#include +#endif // LLDB_ENABLE_LZMA + +namespace lldb_private { + +namespace lzma { + +#ifndef LLDB_ENABLE_LZMA +bool isAvailable() { return false; } +llvm::Error getUncompressedSize(llvm::ArrayRef InputBuffer, + uint64_t &uncompressedSize) { + llvm_unreachable("lzma::getUncompressedSize is unavailable"); +} + +llvm::Error uncompress(llvm::ArrayRef InputBuffer, + llvm::SmallVectorImpl &Uncompressed) { + llvm_unreachable("lzma::uncompress is unavailable"); +} + +#else // LLDB_ENABLE_LZMA + +bool isAvailable() { return true; } + +static const char *convertLZMACodeToString(lzma_ret Code) { + switch (Code) { + case LZMA_STREAM_END: + return "lzma error: LZMA_STREAM_END"; + case LZMA_NO_CHECK: + return "lzma error: LZMA_NO_CHECK"; + case LZMA_UNSUPPORTED_CHECK: + return "lzma error: LZMA_UNSUPPORTED_CHECK"; + case LZMA_GET_CHECK: + return "lzma error: LZMA_GET_CHECK"; + case LZMA_MEM_ERROR: + return "lzma error: LZMA_MEM_ERROR"; + case LZMA_MEMLIMIT_ERROR: + return "lzma error: LZMA_MEMLIMIT_ERROR"; + case LZMA_FORMAT_ERROR: + return "lzma error: LZMA_FORMAT_ERROR"; + case LZMA_OPTIONS_ERROR: + return "lzma error: LZMA_OPTIONS_ERROR"; + case LZMA_DATA_ERROR: + return "lzma error: LZMA_DATA_ERROR"; + case LZMA_BUF_ERROR: + return "lzma error: LZMA_BUF_ERROR"; + case LZMA_PROG_ERROR: + return "lzma error: LZMA_PROG_ERROR"; + default: + llvm_unreachable("unknown or unexpected lzma status code"); + } +} + +llvm::Error getUncompressedSize(llvm::ArrayRef InputBuffer, + uint64_t &uncompressedSize) { + if (InputBuffer.size() == 0) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "size of xz-compressed blob cannot be 0"); + + auto opts = lzma_stream_flags{}; + if (InputBuffer.size() < LZMA_STREAM_HEADER_SIZE) { + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "size of xz-compressed blob (%lu bytes) is smaller than the " + "LZMA_STREAM_HEADER_SIZE (%lu bytes)", + InputBuffer.size(), LZMA_STREAM_HEADER_SIZE); + } + + // Decode xz footer. + auto xzerr = lzma_stream_footer_decode( + &opts, InputBuffer.data() + InputBuffer.size() - LZMA_STREAM_HEADER_SIZE); + if (xzerr != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_stream_footer_decode()=%s", + convertLZMACodeToString(xzerr)); + } + if (InputBuffer.size() < (opts.backward_size + LZMA_STREAM_HEADER_SIZE)) { + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "xz-compressed buffer size (%lu bytes) too small (required at " + "least %lu bytes) ", + InputBuffer.size(), (opts.backward_size + LZMA_STREAM_HEADER_SIZE)); + } + + // Decode xz index. + lzma_index *xzindex; + uint64_t memlimit(UINT64_MAX); + size_t inpos = 0; + xzerr = + lzma_index_buffer_decode(&xzindex, &memlimit, nullptr, + InputBuffer.data() + InputBuffer.size() - + LZMA_STREAM_HEADER_SIZE - opts.backward_size, + &inpos, InputBuffer.size()); + if (xzerr != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_index_buffer_decode()=%s", + convertLZMACodeToString(xzerr)); + } + + // Get size of uncompressed file to construct an in-memory buffer of the + // same size on the calling end (if needed). + uncompressedSize = lzma_index_uncompressed_size(xzindex); + + // Deallocate xz index as it is no longer needed. + lzma_index_end(xzindex, nullptr); + + return llvm::Error::success(); +} + +llvm::Error uncompress(llvm::ArrayRef InputBuffer, + llvm::SmallVectorImpl &Uncompressed) { + if (InputBuffer.size() == 0) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "size of xz-compressed blob cannot be 0"); + + uint64_t uncompressedSize = 0; + auto err = getUncompressedSize(InputBuffer, uncompressedSize); + if (err) + return err; + + Uncompressed.resize(uncompressedSize); + + // Decompress xz buffer to buffer. + uint64_t memlimit(UINT64_MAX); + size_t inpos = 0; + inpos = 0; + size_t outpos = 0; + auto xzerr = lzma_stream_buffer_decode( + &memlimit, 0, nullptr, InputBuffer.data(), &inpos, InputBuffer.size(), + Uncompressed.data(), &outpos, Uncompressed.size()); + if (xzerr != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_stream_buffer_decode()=%s", + convertLZMACodeToString(xzerr)); + } + + return llvm::Error::success(); +} + +#endif // LLDB_ENABLE_LZMA + +} // end of namespace lzma +} // namespace lldb_private diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h @@ -208,6 +208,10 @@ /// Collection of symbols from the dynamic table. DynamicSymbolColl m_dynamic_symbols; + /// Object file parsed from .gnu_debugdata section (\sa + /// GetGnuDebugDataObjectFile()) + std::shared_ptr m_gnu_debug_data_object_file; + /// List of file specifications corresponding to the modules (shared /// libraries) on which this object file depends. mutable std::unique_ptr m_filespec_up; @@ -383,6 +387,14 @@ lldb_private::UUID &uuid); bool AnySegmentHasPhysicalAddress(); + + /// Takes the .gnu_debugdata and returns the decompressed object file that is + /// stored within that section. + /// + /// \returns either the decompressed object file stored within the + /// .gnu_debugdata section or \c nullptr if an error occured or if there's no + /// section with that name. + std::shared_ptr GetGnuDebugDataObjectFile(); }; #endif // liblldb_ObjectFileELF_h_ diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -18,6 +18,7 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Core/Section.h" #include "lldb/Host/FileSystem.h" +#include "lldb/Host/LZMA.h" #include "lldb/Symbol/DWARFCallFrameInfo.h" #include "lldb/Symbol/SymbolContext.h" #include "lldb/Target/SectionLoadList.h" @@ -1842,6 +1843,72 @@ // unified section list. if (GetType() != eTypeDebugInfo) unified_section_list = *m_sections_up; + + // If there's a .gnu_debugdata section, we'll try to read the .symtab that's + // embedded in there and replace the one in the original object file (if any). + // If there's none in the orignal object file, we add it to it. + SectionList *module_section_list = GetModule()->GetSectionList(); + if (auto gdd_obj_file = GetGnuDebugDataObjectFile()) { + if (auto gdd_objfile_section_list = gdd_obj_file->GetSectionList()) { + if (SectionSP symtab_section_sp = + gdd_objfile_section_list->FindSectionByType( + eSectionTypeELFSymbolTable, true)) { + SectionSP module_section_sp = module_section_list->FindSectionByType( + eSectionTypeELFSymbolTable, true); + if (module_section_sp) + module_section_list->ReplaceSection(module_section_sp->GetID(), + symtab_section_sp); + else + module_section_list->AddSection(symtab_section_sp); + } + } + } +} + +std::shared_ptr ObjectFileELF::GetGnuDebugDataObjectFile() { + if (m_gnu_debug_data_object_file != nullptr) + return m_gnu_debug_data_object_file; + + SectionSP section = + GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata")); + if (!section) + return nullptr; + + if (!lldb_private::lzma::isAvailable()) { + GetModule()->ReportWarning( + "No LZMA support found for reading .gnu_debugdata section"); + return nullptr; + } + + // Uncompress the data + DataExtractor data; + section->GetSectionData(data); + llvm::ArrayRef compressedData(data.GetDataStart(), data.GetByteSize()); + llvm::SmallVector uncompressedData; + auto err = lldb_private::lzma::uncompress(compressedData, uncompressedData); + if (err) { + GetModule()->ReportWarning( + "An error occured while decompression the section %s: %s", + section->GetName().AsCString(), llvm::toString(std::move(err)).c_str()); + return nullptr; + } + + // Construct ObjectFileELF object from decompressed buffer + DataBufferSP gdd_data_buf( + new DataBufferHeap(uncompressedData.data(), uncompressedData.size())); + auto fspec = GetFileSpec().CopyByAppendingPathComponent( + llvm::StringRef("gnu_debugdata")); + m_gnu_debug_data_object_file.reset(new ObjectFileELF( + GetModule(), gdd_data_buf, 0, &fspec, 0, gdd_data_buf->GetByteSize())); + + // This line is essential; otherwise a breakpoint can be set but not hit. + m_gnu_debug_data_object_file->SetType(ObjectFile::eTypeDebugInfo); + + ArchSpec spec = m_gnu_debug_data_object_file->GetArchitecture(); + if (spec && m_gnu_debug_data_object_file->SetModulesArchitecture(spec)) + return m_gnu_debug_data_object_file; + + return nullptr; } // Find the arm/aarch64 mapping symbol character in the given symbol name. @@ -2649,19 +2716,25 @@ // while the reverse is not necessarily true. Section *symtab = section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get(); - if (!symtab) { - // The symtab section is non-allocable and can be stripped, so if it - // doesn't exist then use the dynsym section which should always be - // there. - symtab = - section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) - .get(); - } if (symtab) { m_symtab_up.reset(new Symtab(symtab->GetObjectFile())); symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab); } + // The symtab section is non-allocable and can be stripped. If both, .symtab + // and .dynsym exist, we load both. And if only .dymsym exists, we load it + // alone. + auto dynsym = + section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) + .get(); + if (dynsym) { + if (!m_symtab_up) { + auto sec = symtab ? symtab : dynsym; + m_symtab_up.reset(new Symtab(sec->GetObjectFile())); + } + symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, dynsym); + } + // DT_JMPREL // If present, this entry's d_ptr member holds the address of // relocation