diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -364,12 +364,17 @@ getLocalsForAddress(object::SectionedAddress Address) override; bool isLittleEndian() const { return DObj->isLittleEndian(); } + static unsigned getMaxSupportedVersion() { return 5; } static bool isSupportedVersion(unsigned version) { - return version == 2 || version == 3 || version == 4 || version == 5; + return version >= 2 && version <= getMaxSupportedVersion(); } + static SmallVector getSupportedAddressSizes() { + return {2, 4, 8}; + } static bool isAddressSizeSupported(unsigned AddressSize) { - return AddressSize == 2 || AddressSize == 4 || AddressSize == 8; + return llvm::any_of(getSupportedAddressSizes(), + [=](auto Elem) { return Elem == AddressSize; }); } std::shared_ptr getDWOContext(StringRef AbsolutePath); diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h @@ -47,6 +47,8 @@ return Decls.end(); } + std::string getCodeRange() const; + private: void clear(); }; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -357,6 +357,8 @@ return StringOffsetsTableContribution->Base; } + uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); } + const DWARFAbbreviationDeclarationSet *getAbbreviations() const; static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -67,6 +67,35 @@ return &Decls[AbbrCode - FirstAbbrCode]; } +std::string DWARFAbbreviationDeclarationSet::getCodeRange() const { + // Create a sorted list of all abbrev codes. + std::vector Codes; + Codes.reserve(Decls.size()); + for (const auto &Decl : Decls) + Codes.push_back(Decl.getCode()); + + std::string Buffer = ""; + raw_string_ostream Stream(Buffer); + // Each iteration through this loop represents a single contiguous range in + // the set of codes. + for (auto Current = Codes.begin(), End = Codes.end(); Current != End;) { + uint32_t RangeStart = *Current; + // Add the current range start. + Stream << *Current; + uint32_t RangeEnd = RangeStart; + // Find the end of the current range. + while (++Current != End && *Current == RangeEnd + 1) + ++RangeEnd; + // If there is more than one value in the range, add the range end too. + if (RangeStart != RangeEnd) + Stream << "-" << RangeEnd; + // If there is at least one more range, add a separator. + if (Current != End) + Stream << ", "; + } + return Buffer; +} + DWARFDebugAbbrev::DWARFDebugAbbrev() { clear(); } void DWARFDebugAbbrev::clear() { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" @@ -30,17 +31,42 @@ uint64_t UEndOffset, uint32_t D) { Offset = *OffsetPtr; Depth = D; - if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset)) + if (Offset >= UEndOffset) { + U.getContext().getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit from offset 0x%8.8" PRIx64 " incl. " + "to offset 0x%8.8" PRIx64 " excl. " + "tries to read DIEs at offset 0x%8.8" PRIx64, + U.getOffset(), U.getNextUnitOffset(), *OffsetPtr)); return false; + } + assert(DebugInfoData.isValidOffset(UEndOffset - 1)); uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); if (0 == AbbrCode) { // NULL debug tag entry. AbbrevDecl = nullptr; return true; } - if (const auto *AbbrevSet = U.getAbbreviations()) - AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode); - if (nullptr == AbbrevDecl) { + const auto *AbbrevSet = U.getAbbreviations(); + if (!AbbrevSet) { + U.getContext().getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit at offset 0x%8.8" PRIx64 " " + "contains invalid abbreviation set offset 0x%" PRIx64, + U.getOffset(), U.getAbbreviationsOffset())); + // Restore the original offset. + *OffsetPtr = Offset; + return false; + } + AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode); + if (!AbbrevDecl) { + U.getContext().getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit at offset 0x%8.8" PRIx64 " " + "contains invalid abbreviation %" PRIu64 " at " + "offset 0x%8.8" PRIx64 ", valid abbreviations are %s", + U.getOffset(), AbbrCode, *OffsetPtr, + AbbrevSet->getCodeRange().c_str())); // Restore the original offset. *OffsetPtr = Offset; return false; @@ -62,6 +88,11 @@ OffsetPtr, U.getFormParams())) { // We failed to skip this attribute's value, restore the original offset // and return the failure status. + U.getContext().getWarningHandler()(createStringError( + errc::invalid_argument, + "DWARF unit at offset 0x%8.8" PRIx64 " " + "contains invalid FORM_* 0x%" PRIx16 " at offset 0x%8.8" PRIx64, + U.getOffset(), AttrSpec.Form, *OffsetPtr)); *OffsetPtr = Offset; return false; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -259,26 +259,73 @@ } else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton) DWOId = debug_info.getU64(offset_ptr, &Err); - if (errorToBool(std::move(Err))) + if (Err) { + Context.getWarningHandler()(joinErrors( + createStringError( + errc::invalid_argument, + "DWARF unit at 0x%8.8" PRIx64 " cannot be parsed:", Offset), + std::move(Err))); return false; + } // Header fields all parsed, capture the size of this unit header. assert(*offset_ptr - Offset <= 255 && "unexpected header size"); Size = uint8_t(*offset_ptr - Offset); + uint64_t NextCUOffset = Offset + getUnitLengthFieldByteSize() + getLength(); + + if (!debug_info.isValidOffset(getNextUnitOffset() - 1)) { + Context.getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF unit from offset 0x%8.8" PRIx64 " incl. " + "to offset 0x%8.8" PRIx64 " excl. " + "extends past section size 0x%8.8zx", + Offset, NextCUOffset, debug_info.size())); + return false; + } + + if (!DWARFContext::isSupportedVersion(getVersion())) { + Context.getWarningHandler()(createStringError( + errc::invalid_argument, + "DWARF unit at offset 0x%8.8" PRIx64 " " + "has unsupported version %" PRIu16 ", supported are 2-%u", + Offset, getVersion(), DWARFContext::getMaxSupportedVersion())); + return false; + } // Type offset is unit-relative; should be after the header and before // the end of the current unit. - bool TypeOffsetOK = - !isTypeUnit() - ? true - : TypeOffset >= Size && - TypeOffset < getLength() + getUnitLengthFieldByteSize(); - bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); - bool VersionOK = DWARFContext::isSupportedVersion(getVersion()); - bool AddrSizeOK = DWARFContext::isAddressSizeSupported(getAddressByteSize()); - - if (!LengthOK || !VersionOK || !AddrSizeOK || !TypeOffsetOK) + if (isTypeUnit() && TypeOffset < Size) { + Context.getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF type unit at offset " + "0x%8.8" PRIx64 " " + "has its relocated type_offset 0x%8.8" PRIx64 " " + "pointing inside the header", + Offset, Offset + TypeOffset)); + return false; + } + if (isTypeUnit() && + TypeOffset >= getUnitLengthFieldByteSize() + getLength()) { + Context.getWarningHandler()(createStringError( + errc::invalid_argument, + "DWARF type unit from offset 0x%8.8" PRIx64 " incl. " + "to offset 0x%8.8" PRIx64 " excl. has its " + "relocated type_offset 0x%8.8" PRIx64 " pointing past the unit end", + Offset, NextCUOffset, Offset + TypeOffset)); + return false; + } + + if (!DWARFContext::isAddressSizeSupported(getAddressByteSize())) { + SmallVector Sizes; + for (auto Size : DWARFContext::getSupportedAddressSizes()) + Sizes.push_back(std::to_string(Size)); + Context.getWarningHandler()(createStringError( + errc::invalid_argument, + "DWARF unit at offset 0x%8.8" PRIx64 " " + "has unsupported address size %" PRIu8 ", supported are %s", + Offset, getAddressByteSize(), llvm::join(Sizes, ", ").c_str())); return false; + } // Keep track of the highest DWARF version we encounter across all units. Context.setMaxVersionIfGreater(getVersion()); @@ -361,6 +408,8 @@ uint64_t NextCUOffset = getNextUnitOffset(); DWARFDebugInfoEntry DIE; DWARFDataExtractor DebugInfoData = getDebugInfoExtractor(); + // The end offset has been already checked by DWARFUnitHeader::extract. + assert(DebugInfoData.isValidOffset(NextCUOffset - 1)); uint32_t Depth = 0; bool IsCUDie = true; @@ -385,6 +434,8 @@ // Normal DIE if (AbbrDecl->hasChildren()) ++Depth; + else if (Depth == 0) + break; // This unit has a single DIE with no children. } else { // NULL DIE. if (Depth > 0) @@ -393,17 +444,6 @@ break; // We are done with this compile unit! } } - - // Give a little bit of info if we encounter corrupt DWARF (our offset - // should always terminate at or before the start of the next compilation - // unit header). - if (DIEOffset > NextCUOffset) - Context.getWarningHandler()( - createStringError(errc::invalid_argument, - "DWARF compile unit extends beyond its " - "bounds cu 0x%8.8" PRIx64 " " - "at 0x%8.8" PRIx64 "\n", - getOffset(), DIEOffset)); } void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { @@ -794,7 +834,7 @@ const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const { if (!Abbrevs) - Abbrevs = Abbrev->getAbbreviationDeclarationSet(Header.getAbbrOffset()); + Abbrevs = Abbrev->getAbbreviationDeclarationSet(getAbbreviationsOffset()); return Abbrevs; } diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s @@ -0,0 +1,111 @@ +## Test llvm-dwarfdump detects and reports invalid DWARF format of the file. + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=CUEND=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=CUEND %s +# CUEND: warning: DWARF unit from offset 0x0000000c incl. to offset 0x0000002b excl. tries to read DIEs at offset 0x0000002b + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=ABBREVSETINVALID=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=ABBREVSETINVALID %s +# ABBREVSETINVALID: warning: DWARF unit at offset 0x0000000c contains invalid abbreviation set offset 0x0 + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=ABBREVNO=2 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=ABBREVNO %s +# ABBREVNO: warning: DWARF unit at offset 0x0000000c contains invalid abbreviation 2 at offset 0x00000018, valid abbreviations are 1, 5, 3-4 + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=FORMNO=0xdead \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=FORMNO %s +# FORMNO: warning: DWARF unit at offset 0x0000000c contains invalid FORM_* 0xdead at offset 0x00000018 + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=SHORTINITLEN=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=SHORTINITLEN %s +# SHORTINITLEN: warning: DWARF unit at 0x0000002c cannot be parsed: +# SHORTINITLEN-NEXT: warning: unexpected end of data at offset 0x2d while reading [0x2c, 0x30) + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=BADTYPEUNIT=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=BADTYPEUNITBEFORE %s +# BADTYPEUNITBEFORE: warning: DWARF type unit at offset 0x0000002c has its relocated type_offset 0x0000002d pointing inside the header + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=BADTYPEUNIT=0x100 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=BADTYPEUNITAFTER %s +# BADTYPEUNITAFTER: warning: DWARF type unit from offset 0x0000002c incl. to offset 0x00000045 excl. has its relocated type_offset 0x0000012c pointing past the unit end + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=TOOLONG=1 \ +# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=TOOLONG %s +# TOOLONG: warning: DWARF unit from offset 0x0000000c incl. to offset 0x0000002d excl. extends past section size 0x0000002c + + .section .debug_abbrev,"",@progbits +.ifndef ABBREVSETINVALID + .uleb128 1 # Abbreviation Code + .uleb128 17 # DW_TAG_compile_unit + .uleb128 1 # DW_CHILDREN_yes + .uleb128 37 # DW_AT_producer +.ifndef FORMNO + .uleb128 8 # DW_FORM_string +.else + .uleb128 FORMNO +.endif + .uleb128 0 # end abbrev 1 DW_AT_* + .uleb128 0 # end abbrev 1 DW_FORM_* + .uleb128 5 # Abbreviation Code + .uleb128 10 # DW_TAG_label + .uleb128 0 # DW_CHILDREN_no + .uleb128 0 # end abbrev 4 DW_AT_* + .uleb128 0 # end abbrev 4 DW_FORM_* + .uleb128 3 # Abbreviation Code + .uleb128 10 # DW_TAG_label + .uleb128 0 # DW_CHILDREN_no + .uleb128 0 # end abbrev 3 DW_AT_* + .uleb128 0 # end abbrev 3 DW_FORM_* + .uleb128 4 # Abbreviation Code + .uleb128 10 # DW_TAG_label + .uleb128 0 # DW_CHILDREN_no + .uleb128 0 # end abbrev 4 DW_AT_* + .uleb128 0 # end abbrev 4 DW_FORM_* + .uleb128 0 # end abbrevs section +.endif + + .section .debug_info,"",@progbits +## The first CU is here to shift the next CU being really tested to non-zero CU +## offset to check more for error messages. + .long .Lcu_endp-.Lcu_startp # Length of Unit +.Lcu_startp: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .uleb128 0 # End Of Children Mark +.Lcu_endp: + +.ifndef TOOLONG +.equ TOOLONG, 0 +.endif + .long .Lcu_end0-.Lcu_start0 + TOOLONG # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) +.ifndef ABBREVNO + .uleb128 1 # Abbrev [1] DW_TAG_compile_unit +.else + .uleb128 ABBREVNO +.endif + .asciz "hand-written DWARF" # DW_AT_producer +.ifndef CUEND + .uleb128 0 # End Of Children Mark +.endif +.Lcu_end0: + +.ifdef SHORTINITLEN + .byte 0x55 # Too short Length of Unit +.endif +.ifdef BADTYPEUNIT + .long .Lcu_end1-.Lcu_start1 # Length of Unit +.Lcu_start1: + .short 5 # DWARF version number + .byte 2 # DW_UT_type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad 0xbaddefacedfacade # Type Signature + .long BADTYPEUNIT # Type DIE Offset + .uleb128 0 # End Of Children Mark +.Lcu_end1: +.endif