diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -364,10 +364,12 @@ getLocalsForAddress(object::SectionedAddress Address) override; bool isLittleEndian() const { return DObj->isLittleEndian(); } + static unsigned getMaxSupportedVersion() { return 5; } static bool isSupportedVersion(unsigned version) { - return version == 2 || version == 3 || version == 4 || version == 5; + return version >= 2 && version <= getMaxSupportedVersion(); } + static const char *getSupportedAddressSizes() { return "2, 4, 8"; } static bool isAddressSizeSupported(unsigned AddressSize) { return AddressSize == 2 || AddressSize == 4 || AddressSize == 8; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h @@ -47,6 +47,8 @@ return Decls.end(); } + std::string getCodeRange() const; + private: void clear(); }; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -357,6 +357,8 @@ return StringOffsetsTableContribution->Base; } + uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); } + const DWARFAbbreviationDeclarationSet *getAbbreviations() const; static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -67,6 +67,37 @@ return &Decls[AbbrCode - FirstAbbrCode]; } +std::string DWARFAbbreviationDeclarationSet::getCodeRange() const { + // Create a sorted list of all abbrev codes. + std::vector Codes; + Codes.reserve(Decls.size()); + llvm::transform( + Decls, std::back_inserter(Codes), + [](const DWARFAbbreviationDeclaration &Decl) { return Decl.getCode(); }); + llvm::sort(Codes); + + std::string Buffer = ""; + raw_string_ostream Stream(Buffer); + // Each iteration through this look represents a single contiguous range in + // the set of codes. + for (auto Current = Codes.begin(), End = Codes.end(); Current != End;) { + uint32_t RangeStart = *Current; + // Add the current range start. + Stream << *Current; + uint32_t RangeEnd = RangeStart; + // Find the end of the current range. + while (++Current != End && *Current == RangeEnd + 1) + ++RangeEnd; + // If there is more than one value in the range, add the range end too. + if (RangeStart != RangeEnd) + Stream << "-" << RangeEnd; + // If there is at least one more range, add a separator. + if (Current != End) + Stream << ", "; + } + return Buffer; +} + DWARFDebugAbbrev::DWARFDebugAbbrev() { clear(); } void DWARFDebugAbbrev::clear() { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" @@ -30,17 +31,41 @@ uint64_t UEndOffset, uint32_t D) { Offset = *OffsetPtr; Depth = D; - if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset)) + if (Offset >= UEndOffset) { + U.getContext().getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit [0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ") " + "tries to read DIEs at offset 0x%8.8" PRIx64, + U.getOffset(), U.getNextUnitOffset(), *OffsetPtr)); return false; + } + assert(DebugInfoData.isValidOffset(UEndOffset - 1)); uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); if (0 == AbbrCode) { // NULL debug tag entry. AbbrevDecl = nullptr; return true; } - if (const auto *AbbrevSet = U.getAbbreviations()) - AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode); - if (nullptr == AbbrevDecl) { + const auto *AbbrevSet = U.getAbbreviations(); + if (!AbbrevSet) { + U.getContext().getWarningHandler()(createStringError( + errc::invalid_argument, + "DWARF unit [0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ") " + "contains invalid abbreviation set offset 0x%" PRIx64, + U.getOffset(), U.getNextUnitOffset(), U.getAbbreviationsOffset())); + // Restore the original offset. + *OffsetPtr = Offset; + return false; + } + AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode); + if (!AbbrevDecl) { + U.getContext().getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit [0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ") " + "contains invalid abbreviation %u at " + "offset 0x%8.8" PRIx64 ", valid abbreviations are %s", + U.getOffset(), U.getNextUnitOffset(), AbbrCode, + *OffsetPtr, AbbrevSet->getCodeRange().c_str())); // Restore the original offset. *OffsetPtr = Offset; return false; @@ -62,6 +87,11 @@ OffsetPtr, U.getFormParams())) { // We failed to skip this attribute's value, restore the original offset // and return the failure status. + U.getContext().getWarningHandler()(createStringError( + errc::invalid_argument, + "DWARF unit [0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ") " + "contains invalid FORM_* 0x%x at offset 0x%8.8" PRIx64, + U.getOffset(), U.getNextUnitOffset(), AttrSpec.Form, *OffsetPtr)); *OffsetPtr = Offset; return false; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -259,8 +259,14 @@ } else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton) DWOId = debug_info.getU64(offset_ptr, &Err); - if (errorToBool(std::move(Err))) + if (Err) { + Context.getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit at 0x%8.8" PRIx64 " " + "cannot be parsed: %s", + Offset, toString(std::move(Err)).c_str())); return false; + } // Header fields all parsed, capture the size of this unit header. assert(*offset_ptr - Offset <= 255 && "unexpected header size"); @@ -268,17 +274,47 @@ // Type offset is unit-relative; should be after the header and before // the end of the current unit. - bool TypeOffsetOK = - !isTypeUnit() - ? true - : TypeOffset >= Size && - TypeOffset < getLength() + getUnitLengthFieldByteSize(); - bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); - bool VersionOK = DWARFContext::isSupportedVersion(getVersion()); - bool AddrSizeOK = DWARFContext::isAddressSizeSupported(getAddressByteSize()); - - if (!LengthOK || !VersionOK || !AddrSizeOK || !TypeOffsetOK) + uint64_t NextCUOffset = Offset + getUnitLengthFieldByteSize() + getLength(); + if (isTypeUnit() && + (TypeOffset < Size || + TypeOffset >= getUnitLengthFieldByteSize() + getLength())) { + Context.getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF Type unit " + "[0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ") " + "has its relative Type offset 0x%8.8" PRIx64 " " + "outside of its relative boundary " + "[0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ")", + Offset, NextCUOffset, TypeOffset, Size, + getLength() + getUnitLengthFieldByteSize())); return false; + } + if (!debug_info.isValidOffset(getNextUnitOffset() - 1)) { + Context.getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit [0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ") " + "extends past section size 0x%8.8" PRIx64, + Offset, NextCUOffset, debug_info.size())); + return false; + } + if (!DWARFContext::isSupportedVersion(getVersion())) { + Context.getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit [0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ") " + "has unsupported version %u, supported are 2-%u", + Offset, NextCUOffset, getVersion(), + DWARFContext::getMaxSupportedVersion())); + return false; + } + if (!DWARFContext::isAddressSizeSupported(getAddressByteSize())) { + Context.getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit [0x%8.8" PRIx64 ", 0x%8.8" PRIx64 ") " + "has unsupported address size %u, supported are %s", + Offset, NextCUOffset, getAddressByteSize(), + DWARFContext::getSupportedAddressSizes())); + return false; + } // Keep track of the highest DWARF version we encounter across all units. Context.setMaxVersionIfGreater(getVersion()); @@ -361,6 +397,8 @@ uint64_t NextCUOffset = getNextUnitOffset(); DWARFDebugInfoEntry DIE; DWARFDataExtractor DebugInfoData = getDebugInfoExtractor(); + // The end offset has been already checked by DWARFUnitHeader::extract. + assert(DebugInfoData.isValidOffset(NextCUOffset - 1)); uint32_t Depth = 0; bool IsCUDie = true; @@ -385,6 +423,8 @@ // Normal DIE if (AbbrDecl->hasChildren()) ++Depth; + else if (Depth == 0) + break; // This unit has a single DIE with no children. } else { // NULL DIE. if (Depth > 0) @@ -393,17 +433,6 @@ break; // We are done with this compile unit! } } - - // Give a little bit of info if we encounter corrupt DWARF (our offset - // should always terminate at or before the start of the next compilation - // unit header). - if (DIEOffset > NextCUOffset) - Context.getWarningHandler()( - createStringError(errc::invalid_argument, - "DWARF compile unit extends beyond its " - "bounds cu 0x%8.8" PRIx64 " " - "at 0x%8.8" PRIx64 "\n", - getOffset(), DIEOffset)); } void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { @@ -794,7 +823,7 @@ const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const { if (!Abbrevs) - Abbrevs = Abbrev->getAbbreviationDeclarationSet(Header.getAbbrOffset()); + Abbrevs = Abbrev->getAbbreviationDeclarationSet(getAbbreviationsOffset()); return Abbrevs; } diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s @@ -0,0 +1,106 @@ +## Test llvm-dwarfdump detects and reports invalid DWARF format of the file. + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=CUEND=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=CUEND %s +# CUEND: warning: DWARF unit [0x0000000c, 0x0000002b) tries to read DIEs at offset 0x0000002b + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=ABBREVSETINVALID=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=ABBREVSETINVALID %s +# ABBREVSETINVALID: warning: DWARF unit [0x0000000c, 0x0000002c) contains invalid abbreviation set offset 0x0 + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=ABBREVNO=2 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=ABBREVNO %s +# ABBREVNO: warning: DWARF unit [0x0000000c, 0x0000002c) contains invalid abbreviation 2 at offset 0x00000018, valid abbreviations are 1, 3-5 + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=FORMNO=0xdead \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=FORMNO %s +# FORMNO: warning: DWARF unit [0x0000000c, 0x0000002c) contains invalid FORM_* 0xdead at offset 0x00000018 + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=SHORTINITLEN=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=SHORTINITLEN %s +# SHORTINITLEN: warning: DWARF unit at 0x0000002c cannot be parsed: unexpected end of data at offset 0x2d while reading [0x2c, 0x30) + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=BADTYPEUNIT=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=BADTYPEUNIT %s +# BADTYPEUNIT: warning: DWARF Type unit [0x0000002c, 0x00000045) has its relative Type offset 0x00000007 outside of its relative boundary [0x00000018, 0x00000019) + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=TOOLONG=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=TOOLONG %s +# TOOLONG: warning: DWARF unit [0x0000000c, 0x0000002d) extends past section size 0x0000002c + + .section .debug_abbrev,"",@progbits +.ifndef ABBREVSETINVALID + .uleb128 1 # Abbreviation Code + .uleb128 17 # DW_TAG_compile_unit + .uleb128 1 # DW_CHILDREN_yes + .uleb128 37 # DW_AT_producer +.ifndef FORMNO + .uleb128 8 # DW_FORM_string +.else + .uleb128 FORMNO +.endif + .uleb128 0 # end abbrev 1 DW_AT_* + .uleb128 0 # end abbrev 1 DW_FORM_* + .uleb128 5 # Abbreviation Code + .uleb128 10 # DW_TAG_label + .uleb128 0 # DW_CHILDREN_no + .uleb128 0 # end abbrev 4 DW_AT_* + .uleb128 0 # end abbrev 4 DW_FORM_* + .uleb128 3 # Abbreviation Code + .uleb128 10 # DW_TAG_label + .uleb128 0 # DW_CHILDREN_no + .uleb128 0 # end abbrev 3 DW_AT_* + .uleb128 0 # end abbrev 3 DW_FORM_* + .uleb128 4 # Abbreviation Code + .uleb128 10 # DW_TAG_label + .uleb128 0 # DW_CHILDREN_no + .uleb128 0 # end abbrev 4 DW_AT_* + .uleb128 0 # end abbrev 4 DW_FORM_* + .uleb128 0 # end abbrevs section +.endif + + .section .debug_info,"",@progbits +## The first CU is here to shift the next CU being really tested to non-zero CU +## offset to check more for error messages. + .long .Lcu_endp-.Lcu_startp # Length of Unit +.Lcu_startp: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .uleb128 0 # End Of Children Mark +.Lcu_endp: + +.ifndef TOOLONG +.equ TOOLONG, 0 +.endif + .long .Lcu_end0-.Lcu_start0 + TOOLONG # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) +.ifndef ABBREVNO + .uleb128 1 # Abbrev [1] DW_TAG_compile_unit +.else + .uleb128 ABBREVNO +.endif + .asciz "hand-written DWARF" # DW_AT_producer +.ifndef CUEND + .uleb128 0 # End Of Children Mark +.endif +.Lcu_end0: + +.ifdef SHORTINITLEN + .byte 0x55 # too short Length of Unit +.endif +.ifdef BADTYPEUNIT + .long .Lcu_end1-.Lcu_start1 # Length of Unit +.Lcu_start1: + .short 5 # DWARF version number + .byte 2 # DW_UT_type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad 0xbaddefacedfacade # Type Signature + .long 7 # Type DIE Offset + .uleb128 0 # End Of Children Mark +.Lcu_end1: +.endif