Index: include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h +++ include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h @@ -36,13 +36,18 @@ /// Extracts a value and applies a relocation to the result if /// one exists for the given offset. uint64_t getRelocatedValue(uint32_t Size, uint32_t *Off, - uint64_t *SectionIndex = nullptr) const; + uint64_t *SectionIndex = nullptr, + Error *Err = nullptr) const; /// Extracts an address-sized value and applies a relocation to the result if /// one exists for the given offset. uint64_t getRelocatedAddress(uint32_t *Off, uint64_t *SecIx = nullptr) const { return getRelocatedValue(getAddressSize(), Off, SecIx); } + uint64_t getRelocatedAddress(Cursor &C, uint64_t *SecIx = nullptr) const { + return getRelocatedValue(getAddressSize(), &getOffset(C), SecIx, + &getError(C)); + } /// Extracts a DWARF-encoded pointer in \p Offset using \p Encoding. /// There is a DWARF encoding that uses a PC-relative adjustment. Index: include/llvm/Support/DataExtractor.h =================================================================== --- include/llvm/Support/DataExtractor.h +++ include/llvm/Support/DataExtractor.h @@ -11,6 +11,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/Error.h" namespace llvm { @@ -42,6 +43,21 @@ uint8_t IsLittleEndian; uint8_t AddressSize; public: + + class Cursor { + uint32_t Offset; + Error Err; + + friend class DataExtractor; + + public: + explicit Cursor(uint32_t Offset) + : Offset(Offset), Err(Error::success()) {} + explicit operator bool() { return !Err; } + uint32_t tell() const { return Offset; } + Error takeError() { return std::move(Err); } + }; + /// Construct with a buffer that is owned by the caller. /// /// This constructor allows us to use data that is owned by the @@ -127,7 +143,11 @@ /// @return /// The unsigned integer value that was extracted, or zero on /// failure. - uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const; + uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size, + Error *Err = nullptr) const; + uint64_t getUnsigned(Cursor &C, uint32_t Size) const { + return getUnsigned(&C.Offset, Size, &C.Err); + } /// Extract an signed integer of size \a byte_size from \a *offset_ptr. /// @@ -174,6 +194,7 @@ uint64_t getAddress(uint32_t *offset_ptr) const { return getUnsigned(offset_ptr, AddressSize); } + uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); } /// Extract a uint8_t value from \a *offset_ptr. /// @@ -189,7 +210,8 @@ /// /// @return /// The extracted uint8_t value. - uint8_t getU8(uint32_t *offset_ptr) const; + uint8_t getU8(uint32_t *offset_ptr, Error *Err = nullptr) const; + uint8_t getU8(Cursor &C) const { return getU8(&C.Offset, &C.Err); } /// Extract \a count uint8_t values from \a *offset_ptr. /// @@ -215,6 +237,19 @@ /// \a dst if all values were properly extracted and copied, /// NULL otherise. uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const; + uint8_t *getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const; + + template + void getU8(Cursor &C, SmallVectorImpl &Dst, uint32_t Count) const { + static_assert( + std::is_same::value || std::is_same::value, ""); + if (isValidOffsetForDataOfSize(C.Offset, Count)) + Dst.resize(Count); + + // This relies on the fact that getU8 will not attempt to write to the + // buffer it isValidOffsetForDataOfSize(C.Offset, Count) is false. + getU8(C, reinterpret_cast(Dst.data()), Count); + } //------------------------------------------------------------------ /// Extract a uint16_t value from \a *offset_ptr. @@ -232,7 +267,8 @@ /// @return /// The extracted uint16_t value. //------------------------------------------------------------------ - uint16_t getU16(uint32_t *offset_ptr) const; + uint16_t getU16(uint32_t *offset_ptr, Error *Err = nullptr) const; + uint16_t getU16(Cursor &C) const { return getU16(&C.Offset, &C.Err); } /// Extract \a count uint16_t values from \a *offset_ptr. /// @@ -290,7 +326,8 @@ /// /// @return /// The extracted uint32_t value. - uint32_t getU32(uint32_t *offset_ptr) const; + uint32_t getU32(uint32_t *offset_ptr, Error *Err = nullptr) const; + uint32_t getU32(Cursor &C) const { return getU32(&C.Offset, &C.Err); } /// Extract \a count uint32_t values from \a *offset_ptr. /// @@ -331,7 +368,8 @@ /// /// @return /// The extracted uint64_t value. - uint64_t getU64(uint32_t *offset_ptr) const; + uint64_t getU64(uint32_t *offset_ptr, Error *Err = nullptr) const; + uint64_t getU64(Cursor &C) const { return getU64(&C.Offset, &C.Err); } /// Extract \a count uint64_t values from \a *offset_ptr. /// @@ -392,7 +430,11 @@ /// /// @return /// The extracted unsigned integer value. - uint64_t getULEB128(uint32_t *offset_ptr) const; + uint64_t getULEB128(uint32_t *offset_ptr, llvm::Error *Err = nullptr) const; + uint64_t getULEB128(Cursor &C) const { return getULEB128(&C.Offset, &C.Err); } + + void skip(Cursor &C, uint32_t Length) const; + bool eof(const Cursor &C) const { return Data.size() == C.Offset; } /// Test the validity of \a offset. /// @@ -420,6 +462,12 @@ bool isValidOffsetForAddress(uint32_t offset) const { return isValidOffsetForDataOfSize(offset, AddressSize); } + +protected: + // Make it possible for subclasses to access these fields without making them + // public. + static uint32_t &getOffset(Cursor &C) { return C.Offset; } + static Error &getError(Cursor &C) { return C.Err; } }; } // namespace llvm Index: lib/DebugInfo/DWARF/DWARFDataExtractor.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFDataExtractor.cpp +++ lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -13,13 +13,14 @@ using namespace llvm; uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off, - uint64_t *SecNdx) const { + uint64_t *SecNdx, + Error *Err) const { if (SecNdx) *SecNdx = object::SectionedAddress::UndefSection; if (!Section) - return getUnsigned(Off, Size); + return getUnsigned(Off, Size, Err); Optional E = Obj->find(*Section, *Off); - uint64_t A = getUnsigned(Off, Size); + uint64_t A = getUnsigned(Off, Size, Err); if (!E) return A; if (SecNdx) Index: lib/Support/DataExtractor.cpp =================================================================== --- lib/Support/DataExtractor.cpp +++ lib/Support/DataExtractor.cpp @@ -7,104 +7,136 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" + using namespace llvm; +static Error createError() { + return createStringError(errc::illegal_byte_sequence, + "unexpected end of data"); +} + +static void unexpectedEndReached(Error *E) { + if (E) + *E = createError(); +} + +static bool isError(Error *E) { return E && *E; } + template static T getU(uint32_t *offset_ptr, const DataExtractor *de, - bool isLittleEndian, const char *Data) { - T val = 0; - uint32_t offset = *offset_ptr; - if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) { - std::memcpy(&val, &Data[offset], sizeof(val)); - if (sys::IsLittleEndianHost != isLittleEndian) - sys::swapByteOrder(val); + bool isLittleEndian, const char *Data, llvm::Error *Err) { + ErrorAsOutParameter ErrAsOut(Err); + if (LLVM_UNLIKELY(isError(Err))) + return T(0); - // Advance the offset - *offset_ptr += sizeof(val); + uint32_t offset = *offset_ptr; + if (LLVM_UNLIKELY(!de->isValidOffsetForDataOfSize(offset, sizeof(T)))) { + unexpectedEndReached(Err); + return T(0); } + + T val = 0; + std::memcpy(&val, &Data[offset], sizeof(val)); + if (sys::IsLittleEndianHost != isLittleEndian) + sys::swapByteOrder(val); + + // Advance the offset + *offset_ptr += sizeof(val); return val; } template static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count, - const DataExtractor *de, bool isLittleEndian, const char *Data){ - uint32_t offset = *offset_ptr; + const DataExtractor *de, bool isLittleEndian, const char *Data, + llvm::Error *Err) { + ErrorAsOutParameter ErrAsOut(Err); + if (LLVM_UNLIKELY(isError(Err))) + return nullptr; - if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) { - for (T *value_ptr = dst, *end = dst + count; value_ptr != end; - ++value_ptr, offset += sizeof(*dst)) - *value_ptr = getU(offset_ptr, de, isLittleEndian, Data); - // Advance the offset - *offset_ptr = offset; - // Return a non-NULL pointer to the converted data as an indicator of - // success - return dst; + uint32_t offset = *offset_ptr; + if (LLVM_UNLIKELY( + !de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count))) { + unexpectedEndReached(Err); + return nullptr; } - return nullptr; + + for (T *value_ptr = dst, *end = dst + count; value_ptr != end; + ++value_ptr, offset += sizeof(*dst)) + *value_ptr = getU(offset_ptr, de, isLittleEndian, Data, Err); + // Advance the offset + *offset_ptr = offset; + // Return a non-NULL pointer to the converted data as an indicator of + // success + return dst; } -uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint8_t DataExtractor::getU8(uint32_t *offset_ptr, Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } uint8_t * DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } +uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const { + return getUs(&C.Offset, Dst, Count, this, IsLittleEndian, + Data.data(), &C.Err); +} -uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint16_t DataExtractor::getU16(uint32_t *offset_ptr, Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const { uint24_t ExtractedVal = - getU(offset_ptr, this, IsLittleEndian, Data.data()); + getU(offset_ptr, this, IsLittleEndian, Data.data(), nullptr); // The 3 bytes are in the correct byte order for the host. return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); } -uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint32_t DataExtractor::getU32(uint32_t *offset_ptr, Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } -uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint64_t DataExtractor::getU64(uint32_t *offset_ptr, Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } -uint64_t -DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const { +uint64_t DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size, + Error *Err) const { switch (byte_size) { case 1: - return getU8(offset_ptr); + return getU8(offset_ptr, Err); case 2: - return getU16(offset_ptr); + return getU16(offset_ptr, Err); case 4: - return getU32(offset_ptr); + return getU32(offset_ptr, Err); case 8: - return getU64(offset_ptr); + return getU64(offset_ptr, Err); } llvm_unreachable("getUnsigned unhandled case!"); } @@ -144,11 +176,13 @@ return StringRef(); } -uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { - uint64_t result = 0; - if (Data.empty()) +uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr, + llvm::Error *Err) const { + ErrorAsOutParameter ErrAsOut(Err); + if (LLVM_UNLIKELY(isError(Err))) return 0; + uint64_t result = 0; unsigned shift = 0; uint32_t offset = *offset_ptr; uint8_t byte = 0; @@ -162,6 +196,7 @@ return result; } } + unexpectedEndReached(Err); return 0; } @@ -189,3 +224,14 @@ } return 0; } + +void DataExtractor::skip(Cursor &C, uint32_t Length) const { + ErrorAsOutParameter ErrAsOut(&C.Err); + if (isError(&C.Err)) + return; + + if (isValidOffsetForDataOfSize(C.Offset, Length)) + C.Offset += Length; + else + unexpectedEndReached(&C.Err); +} Index: unittests/Support/DataExtractorTest.cpp =================================================================== --- unittests/Support/DataExtractorTest.cpp +++ unittests/Support/DataExtractorTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DataExtractor.h" +#include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" using namespace llvm; @@ -126,4 +127,142 @@ EXPECT_EQ(0U, DE.getSLEB128(&Offset)); EXPECT_EQ(0U, Offset); } + +TEST(DataExtractorTest, Cursor_tell) { + DataExtractor DE(StringRef("AB"), false, 8); + DataExtractor::Cursor C(0); + // A successful read operation advances the cursor + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_EQ(1u, C.tell()); + + // An unsuccessful one doesn't. + EXPECT_EQ(0u, DE.getU16(C)); + EXPECT_EQ(1u, C.tell()); + + // And neither do any subsequent operations. + EXPECT_EQ(0, DE.getU8(C)); + EXPECT_EQ(1u, C.tell()); + + consumeError(C.takeError()); +} + +TEST(DataExtractorTest, Cursor_takeError) { + DataExtractor DE(StringRef("AB"), false, 8); + DataExtractor::Cursor C(0); + // Initially, the cursor is in the "success" state. + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + + // It remains "success" after a successful read. + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + + // An unsuccessful read sets the error state. + EXPECT_EQ(0u, DE.getU32(C)); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + + // Once set the error sticks until explicitly cleared. + EXPECT_EQ(0u, DE.getU32(C)); + EXPECT_EQ(0, DE.getU8(C)); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + + // At which point reads can be succeed again. + EXPECT_EQ('B', DE.getU8(C)); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); +} + +TEST(DataExtractorTest, Cursor_chaining) { + DataExtractor DE(StringRef("ABCD"), false, 8); + DataExtractor::Cursor C(0); + + // Multiple reads can be chained without trigerring any assertions. + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_EQ('B', DE.getU8(C)); + EXPECT_EQ('C', DE.getU8(C)); + EXPECT_EQ('D', DE.getU8(C)); + // And the error checked at the end. + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); +} + +#if defined(GTEST_HAS_DEATH_TEST) && defined(_DEBUG) +TEST(DataExtractorDeathTest, Cursor) { + DataExtractor DE(StringRef("AB"), false, 8); + + // Even an unused cursor must be checked for errors: + EXPECT_DEATH(DataExtractor::Cursor(0), + "Success values must still be checked prior to being destroyed"); + + { + DataExtractor::Cursor C(0); + EXPECT_EQ(0u, DE.getU32(C)); + // But after a read operation, it must be checked for errors before + // destruction. + EXPECT_DEATH(C.~Cursor(), "unexpected end of data"); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + } + { + DataExtractor::Cursor C(0); + EXPECT_EQ('A', DE.getU8(C)); + // Even if the operation succeeded. + EXPECT_DEATH( + C.~Cursor(), + "Success values must still be checked prior to being destroyed"); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + } + { + DataExtractor::Cursor C(0); + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_EQ(0u, DE.getU32(C)); + // Even if a successful operation is followed by an unsuccessful one. + EXPECT_DEATH(C.~Cursor(), "unexpected end of data"); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + } + { + DataExtractor::Cursor C(0); + EXPECT_EQ(0u, DE.getU32(C)); + EXPECT_EQ(0, DE.getU8(C)); + // Even if an unsuccessful operation is followed by one that would normally + // succeed. + EXPECT_DEATH(C.~Cursor(), "unexpected end of data"); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + } +} +#endif + +TEST(DataExtractorTest, getU8_vector) { + DataExtractor DE(StringRef("AB"), false, 8); + DataExtractor::Cursor C(0); + SmallString<2> S; + + DE.getU8(C, S, 4); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + EXPECT_EQ("", S); + + DE.getU8(C, S, 2); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + EXPECT_EQ("AB", S); +} + +TEST(DataExtractorTest, skip) { + DataExtractor DE(StringRef("AB"), false, 8); + DataExtractor::Cursor C(0); + + DE.skip(C, 4); + EXPECT_THAT_ERROR(C.takeError(), Failed()); + EXPECT_EQ(0u, C.tell()); + + DE.skip(C, 2); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); + EXPECT_EQ(2u, C.tell()); +} + +TEST(DataExtractorTest, eof) { + DataExtractor DE(StringRef("A"), false, 8); + DataExtractor::Cursor C(0); + + EXPECT_FALSE(DE.eof(C)); + EXPECT_EQ('A', DE.getU8(C)); + EXPECT_TRUE(DE.eof(C)); + EXPECT_THAT_ERROR(C.takeError(), Succeeded()); +} + }