Index: llvm/include/llvm/Support/LEB128.h =================================================================== --- llvm/include/llvm/Support/LEB128.h +++ llvm/include/llvm/Support/LEB128.h @@ -168,6 +168,89 @@ ///@} }; +/// Decodes LEB128-encoded bytes into a value. +/// Matches the interface of std output iterators. +template class LEB128OutputIterator { + /// The value into which the decoded bytes are written. + ValueT &Value; + + /// Whether this iterator is outputting a signed or unsigned value. + bool IsSigned; + + /// Contains the error message of the last error. Null if there was no error. + const char *Error; + + /// The bit index inside Value where we'll write the next decoded bits. + unsigned Shift; + + /// Returns true if we've seen a byte without a continuation bit. + bool IsComplete; + +public: + /// Boilerplate typedefs for C++ iterators. + ///@{ + using iterator_category = std::output_iterator_tag; + using value_type = void; + using difference_type = void; + using pointer = void; + using reference = void; + ///@} + + /// Constructs an output iterator that decodes LEB128-encoded bytes and writes + /// them into the specified \p Value. + LEB128OutputIterator(ValueT &Value, bool IsSigned) + : Value(Value), IsSigned(IsSigned), Error(nullptr), Shift(0), + IsComplete(false) { + // Initially zero before any decoded bits are written into it. + this->Value = 0; + } + + /// Decodes one byte from a stream of LEB128-encoded bytes, and stores the + /// decoded bits in the Value that was used to construct the output iterator. + LEB128OutputIterator &operator=(uint8_t Byte) { + assert(!IsComplete && "Already saw final LEB128 byte. Can't decode more."); + assert(!Error && "Should abandon LEB128 decoding when an error happens."); + + uint64_t Slice = Byte & 0x7f; + if (!IsSigned) { + // TODO: Implement an equivalent check for signed values? + // (The existing code did not check this error for signed values, so + // this refactor is leaving it unimplemented.) + if (Shift >= 64 || Slice << Shift >> Shift != Slice) { + Error = "uleb128 too big for uint64"; + return *this; + } + } + Value |= Slice << Shift; + Shift += 7; + + if (!(Byte & 0x80)) { + // No continuation bit, therefore this was the final byte. + IsComplete = true; + // Sign extend negative numbers if needed. + if (IsSigned && Shift < 64 && (Byte & 0x40)) + Value |= -1ULL << Shift; + } + return *this; + } + + /// No-op. Provided to satisfy the requirements of LegacyOutputIterator. + ///@{ + LEB128OutputIterator &operator*() { return *this; } + LEB128OutputIterator &operator++() { return *this; } + LEB128OutputIterator &operator++(int) { return *this; } + ///@} + + /// Returns string describing the last error that happened during decoding. + /// If there was no error, returns nullptr. + const char *error() const { return Error; } + + /// Returns true if the stream of LEB128-encoded bytes has reached a byte + /// without a continuation bit. False otherwise. + /// In other words, returns true if a LEB128 value was completely decoded. + bool complete() const { return IsComplete; } +}; + /// Utility function to encode a SLEB128 or ULEB128 value to a buffer. Returns /// the length in bytes of the encoded value. template @@ -216,67 +299,49 @@ return encodeLEB128(Value, /* IsSigned */ false, p, PadTo); } -/// Utility function to decode a ULEB128 value. -inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr, - const uint8_t *end = nullptr, - const char **error = nullptr) { +/// Utility function to decode a ULEB128/SLEB128 value, where the signedness is +/// defined by \p IsSigned. +template +ValueT decodeLEB128(const uint8_t *p, bool IsSigned, unsigned *n = nullptr, + const uint8_t *end = nullptr, + const char **error = nullptr) { + ValueT Value; + LEB128OutputIterator Decoder(Value, IsSigned); const uint8_t *orig_p = p; - uint64_t Value = 0; - unsigned Shift = 0; - if (error) - *error = nullptr; - do { + const char *found_error = nullptr; + while (!Decoder.complete()) { if (p == end) { - if (error) - *error = "malformed uleb128, extends past end"; - if (n) - *n = (unsigned)(p - orig_p); - return 0; + if (IsSigned) + found_error = "malformed sleb128, extends past end"; + else + found_error = "malformed uleb128, extends past end"; + break; } - uint64_t Slice = *p & 0x7f; - if (Shift >= 64 || Slice << Shift >> Shift != Slice) { - if (error) - *error = "uleb128 too big for uint64"; - if (n) - *n = (unsigned)(p - orig_p); - return 0; - } - Value += uint64_t(*p & 0x7f) << Shift; - Shift += 7; - } while (*p++ >= 128); + *Decoder = *p++; + if ((found_error = Decoder.error())) + break; + } if (n) *n = (unsigned)(p - orig_p); + if (error) + *error = found_error; + if (found_error) + return 0; return Value; } +/// Utility function to decode a ULEB128 value. +inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr, + const uint8_t *end = nullptr, + const char **error = nullptr) { + return decodeLEB128(p, /* IsSigned */ false, n, end, error); +} + /// Utility function to decode a SLEB128 value. inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr, const uint8_t *end = nullptr, const char **error = nullptr) { - const uint8_t *orig_p = p; - int64_t Value = 0; - unsigned Shift = 0; - uint8_t Byte; - if (error) - *error = nullptr; - do { - if (p == end) { - if (error) - *error = "malformed sleb128, extends past end"; - if (n) - *n = (unsigned)(p - orig_p); - return 0; - } - Byte = *p++; - Value |= (uint64_t(Byte & 0x7f) << Shift); - Shift += 7; - } while (Byte >= 128); - // Sign extend negative numbers if needed. - if (Shift < 64 && (Byte & 0x40)) - Value |= (-1ULL) << Shift; - if (n) - *n = (unsigned)(p - orig_p); - return Value; + return decodeLEB128(p, /* IsSigned */ true, n, end, error); } /// Utility function to get the size of the ULEB128-encoded value. Index: llvm/unittests/Support/LEB128Test.cpp =================================================================== --- llvm/unittests/Support/LEB128Test.cpp +++ llvm/unittests/Support/LEB128Test.cpp @@ -370,4 +370,19 @@ EXPECT_EQ(10u, getULEB128Size(UINT64_MAX)); } -} // anonymous namespace +template +ValueT decodeWithCopy(StringRef LEBBytes, bool IsSigned) { + ValueT Value; + std::copy(LEBBytes.begin(), LEBBytes.end(), + LEB128OutputIterator(Value, IsSigned)); + return Value; +} + +TEST(LEB128Test, OutputIteratorCopy) { + // Test using LEB128OutputIterator as the output of a std algorithm. + // Mainly checking that the output iterator interface is compatible with std. + EXPECT_EQ(decodeWithCopy("\xbf\x7f", /* IsSigned */ true), -65); + EXPECT_EQ(decodeWithCopy("\x80\x01", /* IsSigned */ false), 0x80U); +} + +} // anonymous namespace