Index: llvm/include/llvm/Support/LEB128.h =================================================================== --- llvm/include/llvm/Support/LEB128.h +++ llvm/include/llvm/Support/LEB128.h @@ -15,113 +15,206 @@ #define LLVM_SUPPORT_LEB128_H #include "llvm/Support/raw_ostream.h" +#include +#include namespace llvm { -/// Utility function to encode a SLEB128 value to an output stream. Returns -/// the length in bytes of the encoded value. -inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, - unsigned PadTo = 0) { +/// Converts a value into bytes encoded in LEB128 style. +/// Matches the interface of std input iterators. +template class LEB128InputIterator { + /// Denotes whether this object represents an end() iterator. + bool IsEnd; + + /// The current value that the iterator is converting into LEB128. + ValueT Value; + + /// Whether this iterator is outputting a signed or unsigned LEB128 data. + bool IsSigned; + + /// Whether there will be more output after the previously outputted byte. bool More; - unsigned Count = 0; - do { - uint8_t Byte = Value & 0x7f; - // NOTE: this assumes that this signed shift is an arithmetic right shift. - Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); + + /// The output will be sext-ed/zext-ed to this number of bytes if necessary. + unsigned PadTo; + + /// The current number of outputted bytes. + unsigned Count; + + /// The current LEB128 encoded byte that this iterator will return. + uint8_t CurrByte; + + /// Consumes 7 bits from Value and encodes them as a LEB128 byte in CurrByte. + void encodeNextByte() { + CurrByte = Value & 0x7f; + if (IsSigned) { + int64_t SValue = static_cast(Value); + // NOTE: this assumes that this signed shift is an arithmetic right shift. + SValue >>= 7; + More = !((((SValue == 0) && ((CurrByte & 0x40) == 0)) || + ((SValue == -1) && ((CurrByte & 0x40) != 0)))); + Value = static_cast(SValue); + } else { + // Logical right shift. + uint64_t UValue = static_cast(Value); + UValue >>= 7; + More = UValue != 0; + Value = static_cast(UValue); + } Count++; if (More || Count < PadTo) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - OS << char(Byte); - } while (More); - - // Pad with 0x80 and emit a terminating byte at the end. - if (Count < PadTo) { - uint8_t PadValue = Value < 0 ? 0x7f : 0x00; - for (; Count < PadTo - 1; ++Count) - OS << char(PadValue | 0x80); - OS << char(PadValue); - Count++; + CurrByte |= 0x80; // Mark this byte to show that more bytes will follow. + } + +public: + /// Boilerplate typedefs for C++ iterators. + ///@{ + using iterator_category = std::input_iterator_tag; + using value_type = uint8_t; + using difference_type = std::ptrdiff_t; + using pointer = const uint8_t *; + using reference = const uint8_t &; + ///@} + + /// Constructs the end-of-input iterator. + LEB128InputIterator() : IsEnd(true) {} + + /// Initializes the iterator that converts \p Value to LEB128. + /// + /// \param Value The value to convert to LEB128 bytes. + /// + /// \param IsSigned How to treat the signedness of \p Value. In other words: + /// If IsSigned is true, then it encodes as SLEB128. If it's false, it encodes + /// as ULEB128. + /// + /// \param PadTo Pads the output to this number of bytes if fewer than this + /// number of bytes have been outputted. If IsSigned is true, then the padding + /// is sign-extended. If IsSigned is false, then it's zero-extended. + explicit LEB128InputIterator(ValueT Value, bool IsSigned, unsigned PadTo) + : IsEnd(false), Value(std::move(Value)), IsSigned(IsSigned), PadTo(PadTo), + Count(0) { + // Initialize the iterator to the first LEB128-encoded byte. + encodeNextByte(); + } + + /// Constructs a copy of \p Other. + LEB128InputIterator(const LEB128InputIterator &Other) = default; + + /// Get the current LEB128-encoded byte. + ///@{ + const uint8_t &operator*() const { + assert(!IsEnd && "operator*() called on past-the-end LEB128InputIterator"); + return CurrByte; + } + const uint8_t *operator->() const { return &operator*(); } + ///@} + + /// Increment the iterator to the next LEB128-encoded byte. + ///@{ + LEB128InputIterator &operator++() { + // At first, keep encoding bytes from Value until there is nothing left. + if (More) { + encodeNextByte(); + return *this; + } + + // Pad with 0s or 1s depending on whether we want to zext or sext. + uint8_t PadValue; + if (IsSigned && static_cast(Value) < 0) + PadValue = 0x7f; + else + PadValue = 0x00; + + // Output padding as necessary. + if (Count < PadTo) { + CurrByte = PadValue; + // Add a continuation bit to signal that there is more padding after this. + if (Count < PadTo - 1) + CurrByte |= 0x80; + Count++; + return *this; + } + + // Nothing left to output, so we've reached the end. + assert(!IsEnd && "operator++() called on past-the-end LEB128InputIterator"); + IsEnd = true; + return *this; + } + LEB128InputIterator operator++(int) { + LEB128InputIterator Prev = *this; + operator++(); + return Prev; + } + ///@} + + /// Checks whether both iterators are equal. Two iterators are equal if both + /// of them are end-of-input iterators or both of them would generate the same + /// sequence of outputs. + ///@{ + bool operator==(const LEB128InputIterator &Other) const { + // Both are end-of-input iterators, so they compare the same. + if (IsEnd && Other.IsEnd) + return true; + + // Otherwise, consider two iterators equal if they would generate the same + // sequence of bytes. + return IsEnd == Other.IsEnd && Value == Other.Value && + IsSigned == Other.IsSigned && More == Other.More && + PadTo == Other.PadTo && Count == Other.Count && + CurrByte == Other.CurrByte; } - return Count; + bool operator!=(const LEB128InputIterator &Other) const { + return !operator==(Other); + } + ///@} +}; + +/// Utility function to encode a SLEB128 or ULEB128 value to a buffer. Returns +/// the length in bytes of the encoded value. +template +unsigned encodeLEB128(const ValueT &Value, bool IsSigned, uint8_t *p, + unsigned PadTo = 0) { + uint8_t *orig_p = p; + p = std::copy(LEB128InputIterator(Value, IsSigned, PadTo), + LEB128InputIterator(), p); + return (unsigned)(p - orig_p); +} + +/// Utility function to encode a SLEB128 or ULEB128 value to an output stream. +/// Returns the length in bytes of the encoded value. +template +unsigned encodeLEB128(const ValueT &Value, bool IsSigned, raw_ostream &OS, + unsigned PadTo = 0) { + uint64_t TellBefore = OS.tell(); + std::copy(LEB128InputIterator(Value, IsSigned, PadTo), + LEB128InputIterator(), raw_ostream_iterator(OS)); + return (unsigned)(OS.tell() - TellBefore); +} + +/// Utility function to encode a SLEB128 value to an output stream. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, + unsigned PadTo = 0) { + return encodeLEB128(Value, /* IsSigned */ true, OS, PadTo); } /// Utility function to encode a SLEB128 value to a buffer. Returns /// the length in bytes of the encoded value. inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) { - uint8_t *orig_p = p; - unsigned Count = 0; - bool More; - do { - uint8_t Byte = Value & 0x7f; - // NOTE: this assumes that this signed shift is an arithmetic right shift. - Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); - Count++; - if (More || Count < PadTo) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - *p++ = Byte; - } while (More); - - // Pad with 0x80 and emit a terminating byte at the end. - if (Count < PadTo) { - uint8_t PadValue = Value < 0 ? 0x7f : 0x00; - for (; Count < PadTo - 1; ++Count) - *p++ = (PadValue | 0x80); - *p++ = PadValue; - } - return (unsigned)(p - orig_p); + return encodeLEB128(Value, /* IsSigned */ true, p, PadTo); } /// Utility function to encode a ULEB128 value to an output stream. Returns /// the length in bytes of the encoded value. inline unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo = 0) { - unsigned Count = 0; - do { - uint8_t Byte = Value & 0x7f; - Value >>= 7; - Count++; - if (Value != 0 || Count < PadTo) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - OS << char(Byte); - } while (Value != 0); - - // Pad with 0x80 and emit a null byte at the end. - if (Count < PadTo) { - for (; Count < PadTo - 1; ++Count) - OS << '\x80'; - OS << '\x00'; - Count++; - } - return Count; + return encodeLEB128(Value, /* IsSigned */ false, OS, PadTo); } /// Utility function to encode a ULEB128 value to a buffer. Returns /// the length in bytes of the encoded value. -inline unsigned encodeULEB128(uint64_t Value, uint8_t *p, - unsigned PadTo = 0) { - uint8_t *orig_p = p; - unsigned Count = 0; - do { - uint8_t Byte = Value & 0x7f; - Value >>= 7; - Count++; - if (Value != 0 || Count < PadTo) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - *p++ = Byte; - } while (Value != 0); - - // Pad with 0x80 and emit a null byte at the end. - if (Count < PadTo) { - for (; Count < PadTo - 1; ++Count) - *p++ = '\x80'; - *p++ = '\x00'; - } - - return (unsigned)(p - orig_p); +inline unsigned encodeULEB128(uint64_t Value, uint8_t *p, unsigned PadTo = 0) { + return encodeLEB128(Value, /* IsSigned */ false, p, PadTo); } /// Utility function to decode a ULEB128 value.