Index: include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h +++ include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h @@ -21,6 +21,9 @@ const DWARFObject *Obj = nullptr; const DWARFSection *Section = nullptr; + uint64_t relocateValue(size_t Offset, uint64_t Value, + uint64_t *SectionIndex) const; + public: /// Constructor for the normal case of extracting data from a DWARF section. /// The DWARFSection's lifetime must be at least as long as the extractor's. @@ -38,12 +41,19 @@ uint64_t getRelocatedValue(uint32_t Size, uint32_t *Off, uint64_t *SectionIndex = nullptr) const; + uint64_t getRelocatedValue(uint32_t Size, Cursor &C, + uint64_t *SectionIndex = nullptr) const; + /// Extracts an address-sized value and applies a relocation to the result if /// one exists for the given offset. uint64_t getRelocatedAddress(uint32_t *Off, uint64_t *SecIx = nullptr) const { return getRelocatedValue(getAddressSize(), Off, SecIx); } + uint64_t getRelocatedAddress(Cursor &C, uint64_t *SecIx = nullptr) const { + return getRelocatedValue(getAddressSize(), C, SecIx); + } + /// Extracts a DWARF-encoded pointer in \p Offset using \p Encoding. /// There is a DWARF encoding that uses a PC-relative adjustment. /// For these values, \p AbsPosOffset is used to fix them, which should Index: include/llvm/Support/DataExtractor.h =================================================================== --- include/llvm/Support/DataExtractor.h +++ include/llvm/Support/DataExtractor.h @@ -11,6 +11,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/Error.h" namespace llvm { @@ -41,7 +42,35 @@ StringRef Data; uint8_t IsLittleEndian; uint8_t AddressSize; + + template + bool isValidOffsetForDataOfSizeT(T Offset, T Length) const { + return Offset + Length >= Offset && isValidOffset(Offset + Length - 1); + } + + template + T getU(OffsetT *OffsetPtr, llvm::Error *Err) const; + + template + T *getUs(OffsetT *OffsetPtr, T *Dst, OffsetT Count, llvm::Error *Err) const; + + template + uint64_t getULEB128(OffsetT *OffsetPtr, llvm::Error *Err) const; + public: + class Cursor { + size_t Offset; + Error Err; + + friend class DataExtractor; + + public: + explicit Cursor(size_t Offset) : Offset(Offset), Err(Error::success()) {} + explicit operator bool() { return !Err; } + uint32_t tell() const { return Offset; } + Error takeError() { return std::move(Err); } + }; + /// Construct with a buffer that is owned by the caller. /// /// This constructor allows us to use data that is owned by the @@ -128,6 +157,7 @@ /// The unsigned integer value that was extracted, or zero on /// failure. uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const; + uint64_t getUnsigned(Cursor &C, uint32_t Size) const; /// Extract an signed integer of size \a byte_size from \a *offset_ptr. /// @@ -174,6 +204,7 @@ uint64_t getAddress(uint32_t *offset_ptr) const { return getUnsigned(offset_ptr, AddressSize); } + uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); } /// Extract a uint8_t value from \a *offset_ptr. /// @@ -190,6 +221,7 @@ /// @return /// The extracted uint8_t value. uint8_t getU8(uint32_t *offset_ptr) const; + uint8_t getU8(Cursor &C) const; /// Extract \a count uint8_t values from \a *offset_ptr. /// @@ -215,6 +247,19 @@ /// \a dst if all values were properly extracted and copied, /// NULL otherise. uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const; + uint8_t *getU8(Cursor &C, uint8_t *Dst, size_t Count) const; + + template + void getU8(Cursor &C, SmallVectorImpl &Dst, size_t Count) const { + static_assert( + std::is_same::value || std::is_same::value, ""); + if (isValidOffsetForDataOfSizeT(C.Offset, Count)) + Dst.resize(Count); + + // This relies on the fact that getU8 will not attempt to write to the + // buffer if isValidOffsetForDataOfSizeT(C.Offset, Count) is false. + getU8(C, reinterpret_cast(Dst.data()), Count); + } //------------------------------------------------------------------ /// Extract a uint16_t value from \a *offset_ptr. @@ -233,6 +278,7 @@ /// The extracted uint16_t value. //------------------------------------------------------------------ uint16_t getU16(uint32_t *offset_ptr) const; + uint16_t getU16(Cursor &C) const; /// Extract \a count uint16_t values from \a *offset_ptr. /// @@ -291,6 +337,7 @@ /// @return /// The extracted uint32_t value. uint32_t getU32(uint32_t *offset_ptr) const; + uint32_t getU32(Cursor &C) const; /// Extract \a count uint32_t values from \a *offset_ptr. /// @@ -332,6 +379,7 @@ /// @return /// The extracted uint64_t value. uint64_t getU64(uint32_t *offset_ptr) const; + uint64_t getU64(Cursor &C) const; /// Extract \a count uint64_t values from \a *offset_ptr. /// @@ -393,13 +441,17 @@ /// @return /// The extracted unsigned integer value. uint64_t getULEB128(uint32_t *offset_ptr) const; + uint64_t getULEB128(Cursor &C) const; + + void skip(Cursor &C, size_t Length) const; + bool eof(const Cursor &C) const { return Data.size() == C.Offset; } /// Test the validity of \a offset. /// /// @return /// \b true if \a offset is a valid offset into the data in this /// object, \b false otherwise. - bool isValidOffset(uint32_t offset) const { return Data.size() > offset; } + bool isValidOffset(size_t Offset) const { return Data.size() > Offset; } /// Test the availability of \a length bytes of data from \a offset. /// @@ -407,7 +459,7 @@ /// \b true if \a offset is a valid offset and there are \a /// length bytes available at that offset, \b false otherwise. bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const { - return offset + length >= offset && isValidOffset(offset + length - 1); + return isValidOffsetForDataOfSizeT(offset, length); } /// Test the availability of enough bytes of data for a pointer from Index: lib/DebugInfo/DWARF/DWARFDataExtractor.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFDataExtractor.cpp +++ lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -12,24 +12,35 @@ using namespace llvm; -uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off, - uint64_t *SecNdx) const { - if (SecNdx) - *SecNdx = object::SectionedAddress::UndefSection; +uint64_t DWARFDataExtractor::relocateValue(size_t Offset, uint64_t Value, + uint64_t *SectionIndex) const { + if (SectionIndex) + *SectionIndex = object::SectionedAddress::UndefSection; if (!Section) - return getUnsigned(Off, Size); - Optional E = Obj->find(*Section, *Off); - uint64_t A = getUnsigned(Off, Size); + return Value; + Optional E = Obj->find(*Section, Offset); if (!E) - return A; - if (SecNdx) - *SecNdx = E->SectionIndex; - uint64_t R = E->Resolver(E->Reloc, E->SymbolValue, A); + return Value; + if (SectionIndex) + *SectionIndex = E->SectionIndex; + uint64_t R = E->Resolver(E->Reloc, E->SymbolValue, Value); if (E->Reloc2) R = E->Resolver(*E->Reloc2, E->SymbolValue2, R); return R; } +uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off, + uint64_t *SectionIndex) const { + size_t Offset = *Off; + return relocateValue(Offset, getUnsigned(Off, Size), SectionIndex); +} + +uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, Cursor &C, + uint64_t *SectionIndex) const { + size_t Offset = C.tell(); + return relocateValue(Offset, getUnsigned(C, Size), SectionIndex); +} + Optional DWARFDataExtractor::getEncodedPointer(uint32_t *Offset, uint8_t Encoding, uint64_t PCRelOffset) const { Index: lib/Support/DataExtractor.cpp =================================================================== --- lib/Support/DataExtractor.cpp +++ lib/Support/DataExtractor.cpp @@ -7,96 +7,131 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/LEB128.h" + using namespace llvm; -template -static T getU(uint32_t *offset_ptr, const DataExtractor *de, - bool isLittleEndian, const char *Data) { - T val = 0; - uint32_t offset = *offset_ptr; - if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) { - std::memcpy(&val, &Data[offset], sizeof(val)); - if (sys::IsLittleEndianHost != isLittleEndian) - sys::swapByteOrder(val); +static void unexpectedEndReached(Error *E) { + if (E) + *E = createStringError(errc::illegal_byte_sequence, + "unexpected end of data"); +} + +static bool isError(Error *E) { return E && *E; } - // Advance the offset - *offset_ptr += sizeof(val); +template +T DataExtractor::getU(OffsetT *offset_ptr, llvm::Error *Err) const { + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return T(0); + + OffsetT offset = *offset_ptr; + if (!isValidOffsetForDataOfSizeT(offset, sizeof(T))) { + unexpectedEndReached(Err); + return T(0); } + + T val = 0; + std::memcpy(&val, &Data.data()[offset], sizeof(val)); + if (sys::IsLittleEndianHost != IsLittleEndian) + sys::swapByteOrder(val); + + // Advance the offset + *offset_ptr += sizeof(val); return val; } -template -static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count, - const DataExtractor *de, bool isLittleEndian, const char *Data){ - uint32_t offset = *offset_ptr; +template +T *DataExtractor::getUs(OffsetT *offset_ptr, T *dst, OffsetT count, + llvm::Error *Err) const { + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return nullptr; - if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) { - for (T *value_ptr = dst, *end = dst + count; value_ptr != end; - ++value_ptr, offset += sizeof(*dst)) - *value_ptr = getU(offset_ptr, de, isLittleEndian, Data); - // Advance the offset - *offset_ptr = offset; - // Return a non-NULL pointer to the converted data as an indicator of - // success - return dst; + size_t offset = *offset_ptr; + if (!isValidOffsetForDataOfSizeT(offset, sizeof(*dst) * count)) { + unexpectedEndReached(Err); + return nullptr; } - return nullptr; + + for (T *value_ptr = dst, *end = dst + count; value_ptr != end; + ++value_ptr, offset += sizeof(*dst)) + *value_ptr = getU(offset_ptr, Err); + // Advance the offset + *offset_ptr = offset; + // Return a non-NULL pointer to the converted data as an indicator of + // success + return dst; } uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); + return getU(offset_ptr, nullptr); +} + +uint8_t DataExtractor::getU8(Cursor &C) const { + return getU(&C.Offset, &C.Err); } uint8_t * DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const { - return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + return getUs(offset_ptr, dst, count, nullptr); } +uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, size_t Count) const { + return getUs(&C.Offset, Dst, Count, &C.Err); +} uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); + return getU(offset_ptr, nullptr); +} + +uint16_t DataExtractor::getU16(Cursor &C) const { + return getU(&C.Offset, &C.Err); } uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const { - return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + return getUs(offset_ptr, dst, count, nullptr); } uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const { - uint24_t ExtractedVal = - getU(offset_ptr, this, IsLittleEndian, Data.data()); + uint24_t ExtractedVal = getU(offset_ptr, nullptr); // The 3 bytes are in the correct byte order for the host. return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); } uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); + return getU(offset_ptr, nullptr); +} + +uint32_t DataExtractor::getU32(Cursor &C) const { + return getU(&C.Offset, &C.Err); } uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const { - return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + return getUs(offset_ptr, dst, count, nullptr); } uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); + return getU(offset_ptr, nullptr); +} + +uint64_t DataExtractor::getU64(Cursor &C) const { + return getU(&C.Offset, &C.Err); } uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const { - return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + return getUs(offset_ptr, dst, count, nullptr); } -uint64_t -DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const { +uint64_t DataExtractor::getUnsigned(uint32_t *offset_ptr, + uint32_t byte_size) const { switch (byte_size) { case 1: return getU8(offset_ptr); @@ -110,6 +145,20 @@ llvm_unreachable("getUnsigned unhandled case!"); } +uint64_t DataExtractor::getUnsigned(Cursor &C, uint32_t byte_size) const { + switch (byte_size) { + case 1: + return getU8(C); + case 2: + return getU16(C); + case 4: + return getU32(C); + case 8: + return getU64(C); + } + llvm_unreachable("getUnsigned unhandled case!"); +} + int64_t DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const { switch (byte_size) { @@ -145,20 +194,35 @@ return StringRef(); } -uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { +template +uint64_t DataExtractor::getULEB128(OffsetT *offset_ptr, + llvm::Error *Err) const { assert(*offset_ptr <= Data.size()); + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return 0; const char *error; unsigned bytes_read; uint64_t result = decodeULEB128( reinterpret_cast(Data.data() + *offset_ptr), &bytes_read, reinterpret_cast(Data.data() + Data.size()), &error); - if (error) + if (error) { + unexpectedEndReached(Err); return 0; + } *offset_ptr += bytes_read; return result; } +uint64_t DataExtractor::getULEB128(uint32_t *OffsetPtr) const { + return getULEB128(OffsetPtr, nullptr); +} + +uint64_t DataExtractor::getULEB128(Cursor &C) const { + return getULEB128(&C.Offset, &C.Err); +} + int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { assert(*offset_ptr <= Data.size()); @@ -172,3 +236,14 @@ *offset_ptr += bytes_read; return result; } + +void DataExtractor::skip(Cursor &C, size_t Length) const { + ErrorAsOutParameter ErrAsOut(&C.Err); + if (isError(&C.Err)) + return; + + if (isValidOffsetForDataOfSizeT(C.Offset, Length)) + C.Offset += Length; + else + unexpectedEndReached(&C.Err); +} Index: unittests/Support/DataExtractorTest.cpp =================================================================== --- unittests/Support/DataExtractorTest.cpp +++ unittests/Support/DataExtractorTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DataExtractor.h" +#include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" using namespace llvm; @@ -126,4 +127,147 @@ EXPECT_EQ(0U, DE.getSLEB128(&Offset)); EXPECT_EQ(0U, Offset); } + +TEST(DataExtractorTest, Cursor_tell) { + DataExtractor DE(StringRef("AB"), false, 8); + DataExtractor::Cursor C(0); + // A successful read operation advances the cursor + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_EQ(1u, C.tell()); + + // An unsuccessful one doesn't. + EXPECT_EQ(0u, DE.getU16(C)); + EXPECT_EQ(1u, C.tell()); + + // And neither do any subsequent operations. + EXPECT_EQ(0, DE.getU8(C)); + EXPECT_EQ(1u, C.tell()); + + consumeError(C.takeError()); +} + +TEST(DataExtractorTest, Cursor_takeError) { + DataExtractor DE(StringRef("AB"), false, 8); + DataExtractor::Cursor C(0); + // Initially, the cursor is in the "success" state. + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + + // It remains "success" after a successful read. + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + + // An unsuccessful read sets the error state. + EXPECT_EQ(0u, DE.getU32(C)); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + + // Once set the error sticks until explicitly cleared. + EXPECT_EQ(0u, DE.getU32(C)); + EXPECT_EQ(0, DE.getU8(C)); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + + // At which point reads can be succeed again. + EXPECT_EQ('B', DE.getU8(C)); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); +} + +TEST(DataExtractorTest, Cursor_chaining) { + DataExtractor DE(StringRef("ABCD"), false, 8); + DataExtractor::Cursor C(0); + + // Multiple reads can be chained without trigerring any assertions. + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_EQ('B', DE.getU8(C)); + EXPECT_EQ('C', DE.getU8(C)); + EXPECT_EQ('D', DE.getU8(C)); + // And the error checked at the end. + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); +} + +#if defined(GTEST_HAS_DEATH_TEST) && defined(_DEBUG) +TEST(DataExtractorDeathTest, Cursor) { + DataExtractor DE(StringRef("AB"), false, 8); + + // Even an unused cursor must be checked for errors: + EXPECT_DEATH(DataExtractor::Cursor(0), + "Success values must still be checked prior to being destroyed"); + + { + DataExtractor::Cursor C(0); + EXPECT_EQ(0u, DE.getU32(C)); + // It must also be checked after an unsuccessful operation. + // destruction. + EXPECT_DEATH(C.~Cursor(), "unexpected end of data"); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + } + { + DataExtractor::Cursor C(0); + EXPECT_EQ('A', DE.getU8(C)); + // Same goes for a successful one. + EXPECT_DEATH( + C.~Cursor(), + "Success values must still be checked prior to being destroyed"); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + } + { + DataExtractor::Cursor C(0); + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_EQ(0u, DE.getU32(C)); + // Even if a successful operation is followed by an unsuccessful one. + EXPECT_DEATH(C.~Cursor(), "unexpected end of data"); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + } + { + DataExtractor::Cursor C(0); + EXPECT_EQ(0u, DE.getU32(C)); + EXPECT_EQ(0, DE.getU8(C)); + // Even if an unsuccessful operation is followed by one that would normally + // succeed. + EXPECT_DEATH(C.~Cursor(), "unexpected end of data"); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + } +} +#endif + +TEST(DataExtractorTest, getU8_vector) { + DataExtractor DE(StringRef("AB"), false, 8); + DataExtractor::Cursor C(0); + SmallString<2> S; + + DE.getU8(C, S, 4); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + EXPECT_EQ("", S); + + DE.getU8(C, S, 2); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + EXPECT_EQ("AB", S); +} + +TEST(DataExtractorTest, skip) { + DataExtractor DE(StringRef("AB"), false, 8); + DataExtractor::Cursor C(0); + + DE.skip(C, 4); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + EXPECT_EQ(0u, C.tell()); + + DE.skip(C, 2); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + EXPECT_EQ(2u, C.tell()); +} + +TEST(DataExtractorTest, eof) { + DataExtractor DE(StringRef("A"), false, 8); + DataExtractor::Cursor C(0); + + EXPECT_FALSE(DE.eof(C)); + + EXPECT_EQ(0, DE.getU16(C)); + EXPECT_FALSE(DE.eof(C)); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_TRUE(DE.eof(C)); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); +} + }