Index: include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -245,7 +245,7 @@ struct Header : public HeaderPOD { SmallString<8> AugmentationString; - Error extract(const DWARFDataExtractor &AS, uint32_t *Offset); + void extract(DWARFDataStream &DS); void dump(ScopedPrinter &W) const; }; @@ -406,12 +406,11 @@ Optional Hash) const; void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const; - Expected extractAttributeEncoding(uint32_t *Offset); + AttributeEncoding extractAttributeEncoding(DataStream &DS); - Expected> - extractAttributeEncodings(uint32_t *Offset); + std::vector extractAttributeEncodings(DataStream &DS); - Expected extractAbbrev(uint32_t *Offset); + Abbrev extractAbbrev(DataStream &DS); public: NameIndex(const DWARFDebugNames &Section, uint32_t Base) Index: include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h +++ include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h @@ -54,6 +54,24 @@ size_t size() const { return Section == nullptr ? 0 : Section->Data.size(); } }; +class DWARFDataStream : public DataStream { +public: + DWARFDataStream(const DWARFDataExtractor &DE, uint32_t Offset) + : DataStream(DE, Offset) {} + + const DWARFDataExtractor &getExtractor() const { + return static_cast(DataStream::getExtractor()); + } + + uint64_t getRelocatedAddress(uint64_t *SecIx = nullptr) { + uint32_t SavedOffset = Offset; + uint64_t Result = getExtractor().getRelocatedAddress(&Offset, SecIx); + if (SavedOffset == Offset) + Error = true; + return Result; + } +}; + } // end namespace llvm #endif // LLVM_DEBUGINFO_DWARFDATAEXTRACTOR_H Index: include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -68,8 +68,7 @@ /// Return the location list at the given offset or nullptr. LocationList const *getLocationListAtOffset(uint64_t Offset) const; - static Expected parseOneLocationList(DWARFDataExtractor Data, - uint32_t *Offset); + static Expected parseOneLocationList(DWARFDataStream &DS); }; class DWARFDebugLoclists { @@ -106,8 +105,8 @@ /// Return the location list at the given offset or nullptr. LocationList const *getLocationListAtOffset(uint64_t Offset) const; - static Expected - parseOneLocationList(DataExtractor Data, unsigned *Offset, unsigned Version); + static Expected parseOneLocationList(DataStream &DS, + unsigned Version); }; } // end namespace llvm Index: include/llvm/Support/DataExtractor.h =================================================================== --- include/llvm/Support/DataExtractor.h +++ include/llvm/Support/DataExtractor.h @@ -422,6 +422,59 @@ } }; +class DataStream { +public: + DataStream(const DataExtractor &DE, uint32_t Offset = 0) + : DE(DE), Offset(Offset), Error(false) {} + DataStream(const DataStream &) = delete; + DataStream &operator=(const DataStream &) = delete; + + uint32_t tell() const { return Offset; } + explicit operator bool() const { return !Error; } + bool eof() const { return Offset == DE.getData().size(); } + void skip(uint32_t Value) { + if (DE.isValidOffsetForDataOfSize(Offset, Value)) + Offset += Value; + else + Error = true; + } + + uint8_t getU8() { return wrap(&DataExtractor::getU8); } + uint8_t *getU8(uint8_t *dst, uint32_t count) { + // If count == 0, Offset is validly not updated. + return count ? wrap(&DataExtractor::getU8, dst, count) : dst; + } + uint16_t getU16() { return wrap(&DataExtractor::getU16); } + uint32_t getU32() { return wrap(&DataExtractor::getU32); } + uint64_t getULEB128() { return wrap(&DataExtractor::getULEB128); } + uint64_t getAddress() { return wrap(&DataExtractor::getAddress); } + StringRef getBytes(uint32_t Size) { + if (!DE.isValidOffsetForDataOfSize(Offset, Size)) { + Error = true; + return StringRef(); + } + StringRef Bytes = DE.getData().substr(Offset, Size); + Offset += Size; + return Bytes; + } + + const DataExtractor &getExtractor() const { return DE; } + +protected: + const DataExtractor &DE; + uint32_t Offset; + bool Error; + + template + Ret wrap(Ret (DataExtractor::*Fun)(uint32_t *, Ts...) const, Ts... Vals) { + uint32_t SavedOffset = Offset; + Ret Result = (DE.*Fun)(&Offset, Vals...); + if (SavedOffset == Offset) + Error = true; + return Result; + } +}; + } // namespace llvm #endif Index: lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -42,19 +42,26 @@ DWARFAcceleratorTable::~DWARFAcceleratorTable() = default; Error AppleAcceleratorTable::extract() { - uint32_t Offset = 0; + DataStream DS(AccelSection); - // Check that we can at least read the header. - if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength) + 4)) - return createStringError(errc::illegal_byte_sequence, - "Section too small: cannot read header."); + Hdr.Magic = DS.getU32(); + Hdr.Version = DS.getU16(); + Hdr.HashFunction = DS.getU16(); + Hdr.BucketCount = DS.getU32(); + Hdr.HashCount = DS.getU32(); + Hdr.HeaderDataLength = DS.getU32(); + + HdrData.DIEOffsetBase = DS.getU32(); + uint32_t NumAtoms = DS.getU32(); + + for (unsigned i = 0; i < NumAtoms; ++i) { + uint16_t AtomType = DS.getU16(); + auto AtomForm = static_cast(DS.getU16()); + HdrData.Atoms.push_back(std::make_pair(AtomType, AtomForm)); + } - Hdr.Magic = AccelSection.getU32(&Offset); - Hdr.Version = AccelSection.getU16(&Offset); - Hdr.HashFunction = AccelSection.getU16(&Offset); - Hdr.BucketCount = AccelSection.getU32(&Offset); - Hdr.HashCount = AccelSection.getU32(&Offset); - Hdr.HeaderDataLength = AccelSection.getU32(&Offset); + if (!DS) + return createStringError(errc::illegal_byte_sequence, "Section too small"); // Check that we can read all the hashes and offsets from the // section (see SourceLevelDebugging.rst for the structure of the index). @@ -66,15 +73,6 @@ errc::illegal_byte_sequence, "Section too small: cannot read buckets and hashes."); - HdrData.DIEOffsetBase = AccelSection.getU32(&Offset); - uint32_t NumAtoms = AccelSection.getU32(&Offset); - - for (unsigned i = 0; i < NumAtoms; ++i) { - uint16_t AtomType = AccelSection.getU16(&Offset); - auto AtomForm = static_cast(AccelSection.getU16(&Offset)); - HdrData.Atoms.push_back(std::make_pair(AtomType, AtomForm)); - } - IsValid = true; return Error::success(); } @@ -376,32 +374,21 @@ W.startLine() << "Augmentation: '" << AugmentationString << "'\n"; } -Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS, - uint32_t *Offset) { - // Check that we can read the fixed-size part. - if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1)) - return createStringError(errc::illegal_byte_sequence, - "Section too small: cannot read header."); - - UnitLength = AS.getU32(Offset); - Version = AS.getU16(Offset); - Padding = AS.getU16(Offset); - CompUnitCount = AS.getU32(Offset); - LocalTypeUnitCount = AS.getU32(Offset); - ForeignTypeUnitCount = AS.getU32(Offset); - BucketCount = AS.getU32(Offset); - NameCount = AS.getU32(Offset); - AbbrevTableSize = AS.getU32(Offset); - AugmentationStringSize = alignTo(AS.getU32(Offset), 4); - - if (!AS.isValidOffsetForDataOfSize(*Offset, AugmentationStringSize)) - return createStringError( - errc::illegal_byte_sequence, - "Section too small: cannot read header augmentation."); +void DWARFDebugNames::Header::extract(DWARFDataStream &DS) { + UnitLength = DS.getU32(); + Version = DS.getU16(); + Padding = DS.getU16(); + CompUnitCount = DS.getU32(); + LocalTypeUnitCount = DS.getU32(); + ForeignTypeUnitCount = DS.getU32(); + BucketCount = DS.getU32(); + NameCount = DS.getU32(); + AbbrevTableSize = DS.getU32(); + + AugmentationStringSize = alignTo(DS.getU32(), 4); AugmentationString.resize(AugmentationStringSize); - AS.getU8(Offset, reinterpret_cast(AugmentationString.data()), + DS.getU8(reinterpret_cast(AugmentationString.data()), AugmentationStringSize); - return Error::success(); } void DWARFDebugNames::Abbrev::dump(ScopedPrinter &W) const { @@ -436,87 +423,82 @@ return DWARFDebugNames::Abbrev(~0, dwarf::Tag(0), {}); } -Expected -DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) { - if (*Offset >= EntriesBase) { - return createStringError(errc::illegal_byte_sequence, - "Incorrectly terminated abbreviation table."); - } - - uint32_t Index = Section.AccelSection.getULEB128(Offset); - uint32_t Form = Section.AccelSection.getULEB128(Offset); +DWARFDebugNames::AttributeEncoding +DWARFDebugNames::NameIndex::extractAttributeEncoding(DataStream &DS) { + uint32_t Index = DS.getULEB128(); + uint32_t Form = DS.getULEB128(); return AttributeEncoding(dwarf::Index(Index), dwarf::Form(Form)); } -Expected> -DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) { +std::vector +DWARFDebugNames::NameIndex::extractAttributeEncodings(DataStream &DS) { std::vector Result; for (;;) { - auto AttrEncOr = extractAttributeEncoding(Offset); - if (!AttrEncOr) - return AttrEncOr.takeError(); - if (isSentinel(*AttrEncOr)) - return std::move(Result); + AttributeEncoding AttrEnc = extractAttributeEncoding(DS); + if (isSentinel(AttrEnc)) + return Result; - Result.emplace_back(*AttrEncOr); + Result.emplace_back(AttrEnc); } } -Expected -DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) { - if (*Offset >= EntriesBase) { - return createStringError(errc::illegal_byte_sequence, - "Incorrectly terminated abbreviation table."); - } - - uint32_t Code = Section.AccelSection.getULEB128(Offset); +DWARFDebugNames::Abbrev +DWARFDebugNames::NameIndex::extractAbbrev(DataStream &DS) { + uint32_t Code = DS.getULEB128(); if (Code == 0) return sentinelAbbrev(); - uint32_t Tag = Section.AccelSection.getULEB128(Offset); - auto AttrEncOr = extractAttributeEncodings(Offset); - if (!AttrEncOr) - return AttrEncOr.takeError(); - return Abbrev(Code, dwarf::Tag(Tag), std::move(*AttrEncOr)); + uint32_t Tag = DS.getULEB128(); + std::vector AttrEnc = extractAttributeEncodings(DS); + return Abbrev(Code, dwarf::Tag(Tag), std::move(AttrEnc)); } Error DWARFDebugNames::NameIndex::extract() { - const DWARFDataExtractor &AS = Section.AccelSection; - uint32_t Offset = Base; - if (Error E = Hdr.extract(AS, &Offset)) - return E; - - CUsBase = Offset; - Offset += Hdr.CompUnitCount * 4; - Offset += Hdr.LocalTypeUnitCount * 4; - Offset += Hdr.ForeignTypeUnitCount * 8; - BucketsBase = Offset; - Offset += Hdr.BucketCount * 4; - HashesBase = Offset; + DWARFDataStream DS(Section.AccelSection, Base); + Hdr.extract(DS); + + CUsBase = DS.tell(); + DS.skip(Hdr.CompUnitCount * 4); + DS.skip(Hdr.LocalTypeUnitCount * 4); + DS.skip(Hdr.ForeignTypeUnitCount * 8); + BucketsBase = DS.tell(); + DS.skip(Hdr.BucketCount * 4); + HashesBase = DS.tell(); if (Hdr.BucketCount > 0) - Offset += Hdr.NameCount * 4; - StringOffsetsBase = Offset; - Offset += Hdr.NameCount * 4; - EntryOffsetsBase = Offset; - Offset += Hdr.NameCount * 4; - - if (!AS.isValidOffsetForDataOfSize(Offset, Hdr.AbbrevTableSize)) + DS.skip(Hdr.NameCount * 4); + StringOffsetsBase = DS.tell(); + DS.skip(Hdr.NameCount * 4); + EntryOffsetsBase = DS.tell(); + DS.skip(Hdr.NameCount * 4); + + if (!DS) + return createStringError(errc::illegal_byte_sequence, "Section too small."); + + uint32_t AbbrevBase = DS.tell(); + if (!DS.getExtractor().isValidOffsetForDataOfSize(AbbrevBase, + Hdr.AbbrevTableSize)) return createStringError(errc::illegal_byte_sequence, "Section too small: cannot read abbreviations."); - EntriesBase = Offset + Hdr.AbbrevTableSize; + EntriesBase = DS.tell() + Hdr.AbbrevTableSize; + DataExtractor AbbrevExtractor( + DS.getExtractor().getData().substr(AbbrevBase, Hdr.AbbrevTableSize), + DS.getExtractor().isLittleEndian(), DS.getExtractor().getAddressSize()); + DataStream AbbrevStream(AbbrevExtractor, 0); for (;;) { - auto AbbrevOr = extractAbbrev(&Offset); - if (!AbbrevOr) - return AbbrevOr.takeError(); - if (isSentinel(*AbbrevOr)) - return Error::success(); + Abbrev Abbr = extractAbbrev(AbbrevStream); + if (isSentinel(Abbr)) + break; - if (!Abbrevs.insert(std::move(*AbbrevOr)).second) + if (!Abbrevs.insert(std::move(Abbr)).second) return createStringError(errc::invalid_argument, "Duplicate abbreviation code."); } + if (!AbbrevStream) + return createStringError(errc::illegal_byte_sequence, + "Incorrectly terminated abbreviation table."); + return Error::success(); } DWARFDebugNames::Entry::Entry(const NameIndex &NameIdx, const Abbrev &Abbr) Index: lib/DebugInfo/DWARF/DWARFDebugLoc.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -93,39 +93,32 @@ } Expected -DWARFDebugLoc::parseOneLocationList(DWARFDataExtractor Data, unsigned *Offset) { +DWARFDebugLoc::parseOneLocationList(DWARFDataStream &DS) { LocationList LL; - LL.Offset = *Offset; + LL.Offset = DS.tell(); // 2.6.2 Location Lists // A location list entry consists of: while (true) { Entry E; - if (!Data.isValidOffsetForDataOfSize(*Offset, 2 * Data.getAddressSize())) - return createOverflowError("debug_loc"); // 1. A beginning address offset. ... - E.Begin = Data.getRelocatedAddress(Offset); + E.Begin = DS.getRelocatedAddress(); // 2. An ending address offset. ... - E.End = Data.getRelocatedAddress(Offset); + E.End = DS.getRelocatedAddress(); + if (!DS) + return createOverflowError("debug_loc"); // The end of any given location list is marked by an end of list entry, // which consists of a 0 for the beginning address offset and a 0 for the // ending address offset. if (E.Begin == 0 && E.End == 0) return LL; - if (!Data.isValidOffsetForDataOfSize(*Offset, 2)) - return createOverflowError("debug_loc"); - - unsigned Bytes = Data.getU16(Offset); - if (!Data.isValidOffsetForDataOfSize(*Offset, Bytes)) - return createOverflowError("debug_loc"); - + unsigned Bytes = DS.getU16(); // A single location description describing the location of the object... - StringRef str = Data.getData().substr(*Offset, Bytes); - *Offset += Bytes; + StringRef str = DS.getBytes(Bytes); E.Loc.reserve(str.size()); llvm::copy(str, std::back_inserter(E.Loc)); LL.Entries.push_back(std::move(E)); @@ -136,101 +129,73 @@ IsLittleEndian = data.isLittleEndian(); AddressSize = data.getAddressSize(); - uint32_t Offset = 0; - while (data.isValidOffset(Offset + data.getAddressSize() - 1)) { - if (auto LL = parseOneLocationList(data, &Offset)) + DWARFDataStream DS(data, 0); + while (!DS.eof()) { + if (auto LL = parseOneLocationList(DS)) Locations.push_back(std::move(*LL)); else { logAllUnhandledErrors(LL.takeError(), WithColor::error()); break; } } - if (data.isValidOffset(Offset)) - WithColor::error() << "failed to consume entire .debug_loc section\n"; } Expected -DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset, - unsigned Version) { +DWARFDebugLoclists::parseOneLocationList(DataStream &DS, unsigned Version) { LocationList LL; - LL.Offset = *Offset; - - while (true) { - if (!Data.isValidOffset(*Offset)) - return createOverflowError("debug_loclists"); + LL.Offset = DS.tell(); + // dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list. + while (auto Kind = static_cast(DS.getU8())) { Entry E; - E.Kind = static_cast(Data.getU8(Offset)); - if (E.Kind == dwarf::DW_LLE_end_of_list) - return LL; - - // Verify that the reads were sucessful by checking the offset field before - // and after the read. This relies on DataExtractor not updating the offset - // on failure. - // TODO: Simplify this code once we have a better way of handling - // DataExtractor errors. - unsigned SavedOffset = *Offset; - switch (E.Kind) { - case dwarf::DW_LLE_startx_length: - case dwarf::DW_LLE_offset_pair: - E.Value0 = Data.getULEB128(Offset); - break; - case dwarf::DW_LLE_start_length: - case dwarf::DW_LLE_base_address: - E.Value0 = Data.getAddress(Offset); - break; - default: - return createError("LLE of kind %x not implemented", (int)E.Kind); - } - if (SavedOffset == *Offset) - return createOverflowError("debug_loclists"); - - SavedOffset = *Offset; - switch (E.Kind) { + E.Kind = Kind; + switch (Kind) { case dwarf::DW_LLE_startx_length: + E.Value0 = DS.getULEB128(); // Pre-DWARF 5 has different interpretation of the length field. We have // to support both pre- and standartized styles for the compatibility. if (Version < 5) - E.Value1 = Data.getU32(Offset); + E.Value1 = DS.getU32(); else - E.Value1 = Data.getULEB128(Offset); + E.Value1 = DS.getULEB128(); break; case dwarf::DW_LLE_start_length: + E.Value0 = DS.getAddress(); + E.Value1 = DS.getULEB128(); + break; case dwarf::DW_LLE_offset_pair: - E.Value1 = Data.getULEB128(Offset); + E.Value0 = DS.getULEB128(); + E.Value1 = DS.getULEB128(); break; case dwarf::DW_LLE_base_address: + E.Value0 = DS.getAddress(); break; + default: + return createError("LLE of kind %x not implemented", (int)E.Kind); } - if (E.Kind != dwarf::DW_LLE_base_address) { - if (SavedOffset == *Offset) - return createOverflowError("debug_loclists"); - - SavedOffset = *Offset; - unsigned Bytes = - Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset); - if (SavedOffset == *Offset || - !Data.isValidOffsetForDataOfSize(*Offset, Bytes)) - return createOverflowError("debug_loclists"); + if (Kind != dwarf::DW_LLE_base_address) { + unsigned Bytes = Version >= 5 ? DS.getULEB128() : DS.getU16(); // A single location description describing the location of the object... - StringRef str = Data.getData().substr(*Offset, Bytes); - *Offset += Bytes; + StringRef str = DS.getBytes(Bytes); E.Loc.resize(str.size()); llvm::copy(str, E.Loc.begin()); } LL.Entries.push_back(std::move(E)); } + if (!DS) + return createOverflowError("debug_loclists"); + return LL; } void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) { IsLittleEndian = data.isLittleEndian(); AddressSize = data.getAddressSize(); - uint32_t Offset = 0; - while (data.isValidOffset(Offset)) { - if (auto LL = parseOneLocationList(data, &Offset, Version)) + DataStream DS(data); + while (!DS.eof()) { + if (auto LL = parseOneLocationList(DS, Version)) Locations.push_back(std::move(*LL)); else { logAllUnhandledErrors(LL.takeError(), WithColor::error()); Index: lib/DebugInfo/DWARF/DWARFDie.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFDie.cpp +++ lib/DebugInfo/DWARF/DWARFDie.cpp @@ -97,7 +97,8 @@ DWARFDebugLoc DebugLoc; DWARFDataExtractor Data(Obj, *U->getLocSection(), Ctx.isLittleEndian(), Obj.getAddressSize()); - if (auto LL = DebugLoc.parseOneLocationList(Data, &Offset)) { + DWARFDataStream DS(Data, Offset); + if (auto LL = DebugLoc.parseOneLocationList(DS)) { uint64_t BaseAddr = 0; if (Optional BA = U->getBaseAddress()) BaseAddr = BA->Address; @@ -121,8 +122,9 @@ // Modern locations list (.debug_loclists) are used starting from v5. // Ideally we should take the version from the .debug_loclists section // header, but using CU's version for simplicity. + DataStream DS(Data, Offset); auto LL = DWARFDebugLoclists::parseOneLocationList( - Data, &Offset, UseLocLists ? U->getVersion() : 4); + DS, UseLocLists ? U->getVersion() : 4); uint64_t BaseAddr = 0; if (Optional BA = U->getBaseAddress()) Index: test/tools/llvm-dwarfdump/X86/debug-names-verify-short1.s =================================================================== --- test/tools/llvm-dwarfdump/X86/debug-names-verify-short1.s +++ test/tools/llvm-dwarfdump/X86/debug-names-verify-short1.s @@ -1,7 +1,7 @@ # RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \ # RUN: not llvm-dwarfdump -verify - | FileCheck %s -# CHECK: Section too small: cannot read header. +# CHECK: Section too small. .section .debug_str,"MS",@progbits,1 .Lstring_producer: Index: test/tools/llvm-dwarfdump/X86/debug-names-verify-short2.s =================================================================== --- test/tools/llvm-dwarfdump/X86/debug-names-verify-short2.s +++ test/tools/llvm-dwarfdump/X86/debug-names-verify-short2.s @@ -1,7 +1,7 @@ # RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \ # RUN: not llvm-dwarfdump -verify - | FileCheck %s -# CHECK: Section too small: cannot read header augmentation. +# CHECK: Section too small. .section .debug_str,"MS",@progbits,1 .Lstring_producer: