diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h @@ -16,18 +16,21 @@ class DWARFDebugArangeSet { public: struct Header { - uint32_t length; // The total length of the entries for that set, not - // including the length field itself. - uint16_t version; // The DWARF version number - uint32_t cu_offset; // The offset from the beginning of the .debug_info - // section of the compilation unit entry referenced by - // the table. - uint8_t addr_size; // The size in bytes of an address on the target - // architecture. For segmented addressing, this is the - // size of the offset portion of the address - uint8_t seg_size; // The size in bytes of a segment descriptor on the target - // architecture. If the target system uses a flat address - // space, this value is 0. + /// The total length of the entries for that set, not including the length + /// field itself. + uint32_t length = 0; + /// The DWARF version number. + uint16_t version = 0; + /// The offset from the beginning of the .debug_info section of the + /// compilation unit entry referenced by the table. + uint32_t cu_offset = 0; + /// The size in bytes of an address on the target architecture. For + /// segmented addressing, this is the size of the offset portion of the + /// address. + uint8_t addr_size = 0; + /// The size in bytes of a segment descriptor on the target architecture. + /// If the target system uses a flat address space, this value is 0. + uint8_t seg_size = 0; }; struct Descriptor { @@ -44,7 +47,7 @@ dw_offset_t FindAddress(dw_addr_t address) const; size_t NumDescriptors() const { return m_arange_descriptors.size(); } const Header &GetHeader() const { return m_header; } - + dw_offset_t GetNextOffset() const { return m_next_offset; } const Descriptor &GetDescriptorRef(uint32_t i) const { return m_arange_descriptors[i]; } @@ -54,7 +57,8 @@ typedef DescriptorColl::iterator DescriptorIter; typedef DescriptorColl::const_iterator DescriptorConstIter; - uint32_t m_offset; + dw_offset_t m_offset; + dw_offset_t m_next_offset; Header m_header; DescriptorColl m_arange_descriptors; }; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp @@ -8,22 +8,18 @@ #include "DWARFDebugArangeSet.h" #include "DWARFDataExtractor.h" +#include "LogChannelDWARF.h" #include "llvm/Object/Error.h" #include using namespace lldb_private; DWARFDebugArangeSet::DWARFDebugArangeSet() - : m_offset(DW_INVALID_OFFSET), m_header(), m_arange_descriptors() { - m_header.length = 0; - m_header.version = 0; - m_header.cu_offset = 0; - m_header.addr_size = 0; - m_header.seg_size = 0; -} + : m_offset(DW_INVALID_OFFSET), m_next_offset(DW_INVALID_OFFSET) {} void DWARFDebugArangeSet::Clear() { m_offset = DW_INVALID_OFFSET; + m_next_offset = DW_INVALID_OFFSET; m_header.length = 0; m_header.version = 0; m_header.cu_offset = 0; @@ -54,6 +50,12 @@ // consists of an address and a length, each in the size appropriate for an // address on the target architecture. m_header.length = data.GetDWARFInitialLength(offset_ptr); + // The length could be 4 bytes or 12 bytes, so use the current offset to + // determine the next offset correctly. + if (m_header.length > 0) + m_next_offset = *offset_ptr + m_header.length; + else + m_next_offset = DW_INVALID_OFFSET; m_header.version = data.GetU16(offset_ptr); m_header.cu_offset = data.GetDWARFOffset(offset_ptr); m_header.addr_size = data.GetU8(offset_ptr); @@ -105,17 +107,45 @@ "DWARFDebugArangeSet::Descriptor.address and " "DWARFDebugArangeSet::Descriptor.length must have same size"); - while (data.ValidOffset(*offset_ptr)) { + const lldb::offset_t next_offset = GetNextOffset(); + assert(next_offset != DW_INVALID_OFFSET); + uint32_t num_terminators = 0; + bool last_was_terminator = false; + while (*offset_ptr < next_offset) { arangeDescriptor.address = data.GetMaxU64(offset_ptr, m_header.addr_size); arangeDescriptor.length = data.GetMaxU64(offset_ptr, m_header.addr_size); // Each set of tuples is terminated by a 0 for the address and 0 for - // the length. - if (!arangeDescriptor.address && !arangeDescriptor.length) - return llvm::ErrorSuccess(); - - m_arange_descriptors.push_back(arangeDescriptor); + // the length. Some linkers can emit .debug_aranges with multiple + // terminator pair entries that are still withing the length of the + // DWARFDebugArangeSet. We want to be sure to parse all entries for + // this DWARFDebugArangeSet so that we don't stop parsing early and end up + // treating addresses as a header of the next DWARFDebugArangeSet. We also + // need to make sure we parse all valid address pairs so we don't omit them + // from the aranges result, so we can't stop at the first terminator entry + // we find. + if (arangeDescriptor.address == 0 && arangeDescriptor.length == 0) { + ++num_terminators; + last_was_terminator = true; + } else { + last_was_terminator = false; + // Only add .debug_aranges address entries that have a non zero size. + // Some linkers will zero out the length field for some .debug_aranges + // entries if they were stripped. We also could watch out for multiple + // entries at address zero and remove those as well. + if (arangeDescriptor.length > 0) + m_arange_descriptors.push_back(arangeDescriptor); + } + } + if (num_terminators > 1) { + Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + LLDB_LOG(log, + "warning: DWARFDebugArangeSet at %#" PRIx64 " contains %u " + "terminator entries", + m_offset, num_terminators); } + if (last_was_terminator) + return llvm::ErrorSuccess(); return llvm::make_error( "arange descriptors not terminated by null entry"); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h @@ -26,8 +26,7 @@ void Clear() { m_aranges.Clear(); } - llvm::Error - extract(const lldb_private::DWARFDataExtractor &debug_aranges_data); + void extract(const lldb_private::DWARFDataExtractor &debug_aranges_data); // Use append range multiple times and then call sort void AppendRange(dw_offset_t cu_offset, dw_addr_t low_pc, dw_addr_t high_pc); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -9,6 +9,7 @@ #include "DWARFDebugAranges.h" #include "DWARFDebugArangeSet.h" #include "DWARFUnit.h" +#include "LogChannelDWARF.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Timer.h" @@ -31,31 +32,40 @@ }; // Extract -llvm::Error -DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) { +void DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) { lldb::offset_t offset = 0; DWARFDebugArangeSet set; Range range; while (debug_aranges_data.ValidOffset(offset)) { - llvm::Error error = set.extract(debug_aranges_data, &offset); - if (error) - return error; + const lldb::offset_t set_offset = offset; + if (llvm::Error error = set.extract(debug_aranges_data, &offset)) { + Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + LLDB_LOG_ERROR(log, std::move(error), + "DWARFDebugAranges::extract failed to extract " + ".debug_aranges set at offset %#" PRIx64, + set_offset); + } else { + const uint32_t num_descriptors = set.NumDescriptors(); + if (num_descriptors > 0) { + const dw_offset_t cu_offset = set.GetHeader().cu_offset; - const uint32_t num_descriptors = set.NumDescriptors(); - if (num_descriptors > 0) { - const dw_offset_t cu_offset = set.GetHeader().cu_offset; - - for (uint32_t i = 0; i < num_descriptors; ++i) { - const DWARFDebugArangeSet::Descriptor &descriptor = - set.GetDescriptorRef(i); - m_aranges.Append(RangeToDIE::Entry(descriptor.address, - descriptor.length, cu_offset)); + for (uint32_t i = 0; i < num_descriptors; ++i) { + const DWARFDebugArangeSet::Descriptor &descriptor = + set.GetDescriptorRef(i); + m_aranges.Append(RangeToDIE::Entry(descriptor.address, + descriptor.length, cu_offset)); + } } } + // Always use the previous DWARFDebugArangeSet's information to calculate + // the offset of the next DWARFDebugArangeSet in case we entouncter an + // error in the current DWARFDebugArangeSet and our offset position is + // still in the middle of the data. If we do this, we can parse all valid + // DWARFDebugArangeSet objects without returning invalid errors. + offset = set.GetNextOffset(); set.Clear(); } - return llvm::ErrorSuccess(); } void DWARFDebugAranges::Dump(Log *log) const { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h @@ -54,7 +54,7 @@ (1 << 2) // Show all parent DIEs when dumping single DIEs }; - llvm::Expected GetCompileUnitAranges(); + const DWARFDebugAranges &GetCompileUnitAranges(); protected: typedef std::vector UnitColl; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp @@ -34,17 +34,18 @@ lldb_private::DWARFContext &context) : m_dwarf(dwarf), m_context(context), m_units(), m_cu_aranges_up() {} -llvm::Expected DWARFDebugInfo::GetCompileUnitAranges() { +const DWARFDebugAranges &DWARFDebugInfo::GetCompileUnitAranges() { if (m_cu_aranges_up) return *m_cu_aranges_up; m_cu_aranges_up = std::make_unique(); const DWARFDataExtractor &debug_aranges_data = m_context.getOrLoadArangesData(); - if (llvm::Error error = m_cu_aranges_up->extract(debug_aranges_data)) - return std::move(error); - // Make a list of all CUs represented by the arange data in the file. + // Extract what we can from the .debug_aranges first. + m_cu_aranges_up->extract(debug_aranges_data); + + // Make a list of all CUs represented by the .debug_aranges data. std::set cus_with_data; for (size_t n = 0; n < m_cu_aranges_up->GetNumRanges(); n++) { dw_offset_t offset = m_cu_aranges_up->OffsetAtIndex(n); @@ -52,8 +53,7 @@ cus_with_data.insert(offset); } - // Manually build arange data for everything that wasn't in the - // .debug_aranges table. + // Manually build arange data for everything that wasn't in .debug_aranges. const size_t num_units = GetNumUnits(); for (size_t idx = 0; idx < num_units; ++idx) { DWARFUnit *cu = GetUnitAtIndex(idx); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1862,17 +1862,8 @@ lldb::addr_t file_vm_addr = so_addr.GetFileAddress(); DWARFDebugInfo &debug_info = DebugInfo(); - llvm::Expected aranges = - debug_info.GetCompileUnitAranges(); - if (!aranges) { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); - LLDB_LOG_ERROR(log, aranges.takeError(), - "SymbolFileDWARF::ResolveSymbolContext failed to get cu " - "aranges. {0}"); - return 0; - } - - const dw_offset_t cu_offset = aranges->FindAddress(file_vm_addr); + const DWARFDebugAranges &aranges = debug_info.GetCompileUnitAranges(); + const dw_offset_t cu_offset = aranges.FindAddress(file_vm_addr); if (cu_offset == DW_INVALID_OFFSET) { // Global variables are not in the compile unit address ranges. The only // way to currently find global variables is to iterate over the diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp --- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp +++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp @@ -347,12 +347,101 @@ EXPECT_EQ(off, 12U); // Parser should read no further than the segment size } +TEST_F(SymbolFileDWARFTests, ParseArangesWithMultipleTerminators) { + // This .debug_aranges set has multiple terminator entries which appear in + // binaries produced by popular linux compilers and linker combinations. We + // must be able to parse all the way through the data for each + // DWARFDebugArangeSet. Previously the DWARFDebugArangeSet::extract() + // function would stop parsing as soon as we ran into a terminator even + // though the length field stated that there was more data that follows. This + // would cause the next DWARFDebugArangeSet to be parsed immediately + // following the first terminator and it would attempt to decode the + // DWARFDebugArangeSet header using the remaining segment + address pairs + // from the remaining bytes. + unsigned char binary_data[] = { + 0, 0, 0, 0, // unit_length that will be set correctly after this + 0, 2, // DWARF version number (uint16_t) + 0, 0, 0, 0, // CU offset (ignored for the purposes of this test) + 4, // address size + 0, // segment size + 0, 0, 0, 0, // alignment for the first tuple + // BEGIN TUPLES + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // premature terminator + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, // [0x1000-0x1100) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // premature terminator + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x10, // [0x2000-0x2010) + // END TUPLES + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // terminator + }; + // Set the big endian length correctly. + const offset_t binary_data_size = sizeof(binary_data); + binary_data[3] = (uint8_t)binary_data_size - 4; + DWARFDataExtractor data; + data.SetData(static_cast(binary_data), sizeof binary_data, + lldb::ByteOrder::eByteOrderBig); + DWARFDebugArangeSet set; + offset_t off = 0; + llvm::Error error = set.extract(data, &off); + // Multiple terminators are not fatal as they do appear in binaries. + EXPECT_FALSE(bool(error)); + // Parser should read all terminators to the end of the length specified. + EXPECT_EQ(off, binary_data_size); + ASSERT_EQ(set.NumDescriptors(), 2U); + ASSERT_EQ(set.GetDescriptorRef(0).address, (dw_addr_t)0x1000); + ASSERT_EQ(set.GetDescriptorRef(0).length, (dw_addr_t)0x100); + ASSERT_EQ(set.GetDescriptorRef(1).address, (dw_addr_t)0x2000); + ASSERT_EQ(set.GetDescriptorRef(1).length, (dw_addr_t)0x10); +} + +TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) { + // This .debug_aranges set has some address ranges which have zero length + // and we ensure that these are ignored by our DWARFDebugArangeSet parser + // and not included in the descriptors that are returned. + unsigned char binary_data[] = { + 0, 0, 0, 0, // unit_length that will be set correctly after this + 0, 2, // DWARF version number (uint16_t) + 0, 0, 0, 0, // CU offset (ignored for the purposes of this test) + 4, // address size + 0, // segment size + 0, 0, 0, 0, // alignment for the first tuple + // BEGIN TUPLES + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, // [0x1000-0x1100) + 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, // [0x1100-0x1100) + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x10, // [0x2000-0x2010) + 0x00, 0x00, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, // [0x2010-0x2010) + // END TUPLES + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // terminator + }; + // Set the big endian length correctly. + const offset_t binary_data_size = sizeof(binary_data); + binary_data[3] = (uint8_t)binary_data_size - 4; + DWARFDataExtractor data; + data.SetData(static_cast(binary_data), sizeof binary_data, + lldb::ByteOrder::eByteOrderBig); + DWARFDebugArangeSet set; + offset_t off = 0; + llvm::Error error = set.extract(data, &off); + // Multiple terminators are not fatal as they do appear in binaries. + EXPECT_FALSE(bool(error)); + // Parser should read all terminators to the end of the length specified. + // Previously the DWARFDebugArangeSet would stop at the first terminator + // entry and leave the offset in the middle of the current + // DWARFDebugArangeSet data, and that would cause the next extracted + // DWARFDebugArangeSet to fail. + EXPECT_EQ(off, binary_data_size); + ASSERT_EQ(set.NumDescriptors(), 2U); + ASSERT_EQ(set.GetDescriptorRef(0).address, (dw_addr_t)0x1000); + ASSERT_EQ(set.GetDescriptorRef(0).length, (dw_addr_t)0x100); + ASSERT_EQ(set.GetDescriptorRef(1).address, (dw_addr_t)0x2000); + ASSERT_EQ(set.GetDescriptorRef(1).length, (dw_addr_t)0x10); +} + TEST_F(SymbolFileDWARFTests, ParseAranges) { // Test we can successfully parse a DWARFDebugAranges. The initial error // checking code had a bug where it would always return an empty address // ranges for everything in .debug_aranges and no error. - const unsigned char binary_data[] = { - 60, 0, 0, 0, // unit_length + unsigned char binary_data[] = { + 0, 0, 0, 0, // unit_length that will be set correctly after this 2, 0, // DWARF version number 255, 0, 0, 0, // offset into the .debug_info_table 8, // address size @@ -367,14 +456,14 @@ 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Size 0x0100 // Terminating tuple 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Terminator - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // Terminator }; + // Set the little endian length correctly. + binary_data[0] = sizeof(binary_data) - 4; DWARFDataExtractor data; data.SetData(static_cast(binary_data), sizeof binary_data, lldb::ByteOrder::eByteOrderLittle); DWARFDebugAranges debug_aranges; - llvm::Error error = debug_aranges.extract(data); - ASSERT_FALSE(bool(error)); + debug_aranges.extract(data); EXPECT_EQ(debug_aranges.GetNumRanges(), 2u); EXPECT_EQ(debug_aranges.FindAddress(0x0fff), DW_INVALID_OFFSET); EXPECT_EQ(debug_aranges.FindAddress(0x1000), 255u); @@ -385,3 +474,58 @@ EXPECT_EQ(debug_aranges.FindAddress(0x2100 - 1), 255u); EXPECT_EQ(debug_aranges.FindAddress(0x2100), DW_INVALID_OFFSET); } + +TEST_F(SymbolFileDWARFTests, ParseArangesSkipErrors) { + // Test we can successfully parse a DWARFDebugAranges that contains some + // valid DWARFDebugArangeSet objects and some with errors as long as their + // length is set correctly. This helps LLDB ensure that it can parse newer + // .debug_aranges version that LLDB currently doesn't support, or ignore + // errors in individual DWARFDebugArangeSet objects as long as the length + // is set correctly. + const unsigned char binary_data[] = { + // This DWARFDebugArangeSet is well formed and has a single address range + // for [0x1000-0x1100) with a CU offset of 0x00000000. + 0, 0, 0, 28, // unit_length that will be set correctly after this + 0, 2, // DWARF version number (uint16_t) + 0, 0, 0, 0, // CU offset = 0x00000000 + 4, // address size + 0, // segment size + 0, 0, 0, 0, // alignment for the first tuple + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, // [0x1000-0x1100) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // terminator + // This DWARFDebugArangeSet has the correct length, but an invalid + // version. We need to be able to skip this correctly and ignore it. + 0, 0, 0, 20, // unit_length that will be set correctly after this + 0, 44, // invalid DWARF version number (uint16_t) + 0, 0, 1, 0, // CU offset = 0x00000100 + 4, // address size + 0, // segment size + 0, 0, 0, 0, // alignment for the first tuple + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // terminator + // This DWARFDebugArangeSet is well formed and has a single address range + // for [0x2000-0x2100) with a CU offset of 0x00000000. + 0, 0, 0, 28, // unit_length that will be set correctly after this + 0, 2, // DWARF version number (uint16_t) + 0, 0, 2, 0, // CU offset = 0x00000200 + 4, // address size + 0, // segment size + 0, 0, 0, 0, // alignment for the first tuple + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, // [0x2000-0x2100) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // terminator + }; + + DWARFDataExtractor data; + data.SetData(static_cast(binary_data), sizeof binary_data, + lldb::ByteOrder::eByteOrderBig); + DWARFDebugAranges debug_aranges; + debug_aranges.extract(data); + EXPECT_EQ(debug_aranges.GetNumRanges(), 2u); + EXPECT_EQ(debug_aranges.FindAddress(0x0fff), DW_INVALID_OFFSET); + EXPECT_EQ(debug_aranges.FindAddress(0x1000), 0u); + EXPECT_EQ(debug_aranges.FindAddress(0x1100 - 1), 0u); + EXPECT_EQ(debug_aranges.FindAddress(0x1100), DW_INVALID_OFFSET); + EXPECT_EQ(debug_aranges.FindAddress(0x1fff), DW_INVALID_OFFSET); + EXPECT_EQ(debug_aranges.FindAddress(0x2000), 0x200u); + EXPECT_EQ(debug_aranges.FindAddress(0x2100 - 1), 0x200u); + EXPECT_EQ(debug_aranges.FindAddress(0x2100), DW_INVALID_OFFSET); +}