Index: llvm/include/llvm/Support/LEB128.h =================================================================== --- llvm/include/llvm/Support/LEB128.h +++ llvm/include/llvm/Support/LEB128.h @@ -15,113 +15,250 @@ #define LLVM_SUPPORT_LEB128_H #include "llvm/Support/raw_ostream.h" +#include +#include namespace llvm { -/// Utility function to encode a SLEB128 value to an output stream. Returns -/// the length in bytes of the encoded value. -inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, - unsigned PadTo = 0) { - bool More; - unsigned Count = 0; - do { - uint8_t Byte = Value & 0x7f; - // NOTE: this assumes that this signed shift is an arithmetic right shift. - Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); +namespace detail { +/// Encodes one byte's worth of LEB128 data from the LSB of the given value. +/// This function is used in the inner loop of LEB128 encoding. +/// +/// \param IsSigned How to treat the signedness of the value. If IsSigned is +/// true, then it encodes as SLEB128. If it's false, it encodes as ULEB128. This +/// is not particularly useful for types like int64_t/uint64_t since we already +/// know the signedness from their types, but it's important to extend LEB128 +/// encoding to types like APInt which don't inherently have a signedness. +/// +/// \param ValueT The type of the value of the source of the LEB128 conversion. +/// +/// \param Value The value to convert to LEB128 bytes. Shifted in-place as +/// bits are consumed from its LSB to create LEB128-encoded bytes. +/// +/// \param AllValueBitsEncoded Whether there are still more non-padding bits +/// from the value to encode. Updated in-place when all that's left is padding. +/// +/// \param Count The number of bytes encoded so far. Updated in-place to account +/// for newly encoded bytes. +/// +/// \param PadTo Pads the output to this number of bytes if fewer than this +/// number of bytes have been outputted. If IsSigned is true, then the padding +/// is sign-extended. If IsSigned is false, then it's zero-extended. +/// +/// \return The next LEB128-encoded byte, or None if the encoding is complete. +template +Optional encodeLEB128Byte(ValueT &Value, bool &AllValueBitsEncoded, + unsigned &Count, unsigned PadTo = 0) { + // If the value is not done being encoded, then read the next bits from it and + // encode them as LEB128. + if (!AllValueBitsEncoded) { + // Get the next byte from the input value. + uint8_t CurrByte = Value & 0x7f; + if (IsSigned) { + int64_t SValue = static_cast(Value); + // NOTE: this assumes that this signed shift is an arithmetic right shift. + SValue >>= 7; + AllValueBitsEncoded = + // The value was positive, all set bits have been encoded, and this + // byte sets the sign bit to 0, so encoding the value is complete. + (SValue == 0 && (CurrByte & 0x40) == 0) || + // The value was negative, only negative sign bits are left, and this + // byte sets the sign bit to 1, so encoding the value is complete. + (SValue == -1 && (CurrByte & 0x40) != 0); + Value = static_cast(SValue); + } else { + // Logical right shift. + uint64_t UValue = static_cast(Value); + UValue >>= 7; + AllValueBitsEncoded = UValue == 0; + Value = static_cast(UValue); + } + if (!AllValueBitsEncoded || Count + 1 < PadTo) + CurrByte |= 0x80; // Mark this byte to show that more bytes will follow. Count++; - if (More || Count < PadTo) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - OS << char(Byte); - } while (More); + return CurrByte; + } - // Pad with 0x80 and emit a terminating byte at the end. + // If there are no more bytes to read from the value itself, then encode a + // padding byte next. if (Count < PadTo) { - uint8_t PadValue = Value < 0 ? 0x7f : 0x00; - for (; Count < PadTo - 1; ++Count) - OS << char(PadValue | 0x80); - OS << char(PadValue); + // Pad with 0s or 1s depending on whether we want to zext or sext. + assert((Value == 0 || static_cast(Value) == -1) && + "Assuming the leftover Value can be used as signed padding."); + uint8_t PadByte = Value & 0x7f; + // Add a continuation bit to say that there is more padding after this. + if (Count + 1 < PadTo) + PadByte |= 0x80; Count++; + return PadByte; } - return Count; + + // At this point, all necessary padding has been added, so there are no + // more bytes to output. + return None; } +} // namespace detail -/// Utility function to encode a SLEB128 value to a buffer. Returns -/// the length in bytes of the encoded value. -inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) { - uint8_t *orig_p = p; +/// Input iterator interface to encode a value as LEB128 bytes. +template class LEB128InputIterator { + /// The value that the iterator reads from to encode bits as LEB128. + ValueT Value = ValueT(); + + /// The output will be sext-ed/zext-ed to this number of bytes if necessary. + unsigned PadTo = 0; + + /// The number of bytes encoded so far. unsigned Count = 0; - bool More; - do { - uint8_t Byte = Value & 0x7f; - // NOTE: this assumes that this signed shift is an arithmetic right shift. - Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); - Count++; - if (More || Count < PadTo) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - *p++ = Byte; - } while (More); - // Pad with 0x80 and emit a terminating byte at the end. - if (Count < PadTo) { - uint8_t PadValue = Value < 0 ? 0x7f : 0x00; - for (; Count < PadTo - 1; ++Count) - *p++ = (PadValue | 0x80); - *p++ = PadValue; + /// Whether there are still more non-padding bits from the Value to encode. + bool AllValueBitsEncoded = false; + + /// The current LEB128 encoded byte that this iterator will return. + /// If this is None then this object represents an end() iterator. + Optional CurrByte; + + /// Consumes 7 bits from Value and encodes them as a LEB128 byte in CurrByte. + void getNextByte() { + CurrByte = detail::encodeLEB128Byte( + Value, AllValueBitsEncoded, Count, PadTo); + } + +public: + /// Boilerplate typedefs for C++ iterators. + ///@{ + using iterator_category = std::input_iterator_tag; + using value_type = uint8_t; + using difference_type = std::ptrdiff_t; + using pointer = const uint8_t *; + using reference = const uint8_t &; + ///@} + + /// Constructs an end() iterator. + LEB128InputIterator() = default; + + /// Initializes an iterator wrapper for \ref getLEB128Byte. + explicit LEB128InputIterator(ValueT Value, unsigned PadTo = 0) + : Value(std::move(Value)), PadTo(PadTo) { + // Initialize the iterator to the first LEB128-encoded byte. + getNextByte(); + } + + /// Constructs a copy of \p Other. + LEB128InputIterator(const LEB128InputIterator &Other) = default; + + /// Get the current LEB128-encoded byte. + ///@{ + const uint8_t &operator*() const { + assert(CurrByte && "operator*() used on past-the-end LEB128InputIterator"); + return *CurrByte; } + const uint8_t *operator->() const { return &operator*(); } + ///@} + + /// Increment the iterator to the next LEB128-encoded byte. + ///@{ + LEB128InputIterator &operator++() { + assert(CurrByte && "operator++() used on past-the-end LEB128InputIterator"); + getNextByte(); + return *this; + } + LEB128InputIterator operator++(int) { + LEB128InputIterator Prev = *this; + operator++(); + return Prev; + } + ///@} + + /// Checks whether both iterators are equal. Two iterators are equal if both + /// of them are end() iterators or both of them would generate the same + /// sequence of outputs. + ///@{ + bool operator==(const LEB128InputIterator &Other) const { + // Both are end() iterators, so they compare the same. + // NOTE: This is similar to the design of istream_iterator, and it + // simplifies the comparison logic below. + if (!CurrByte && !Other.CurrByte) + return true; + // Otherwise, consider two iterators equal if they would generate the same + // sequence of bytes. + return CurrByte == Other.CurrByte && Value == Other.Value && + PadTo == Other.PadTo && Count == Other.Count && + AllValueBitsEncoded == Other.AllValueBitsEncoded; + } + bool operator!=(const LEB128InputIterator &Other) const { + return !operator==(Other); + } + ///@} +}; + +// Convenience function for LEB128 input iterator ranges. +template , + class... ArgTs> +auto makeLEB128InputRange(ValueT Value, ArgTs &&... Args) { + return make_range( + InputIteratorT(std::move(Value), std::forward(Args)...), + InputIteratorT()); +} + +// Convenience function for SLEB128 input iterator ranges. +template auto makeSLEB128InputRange(ArgTs &&... Args) { + return makeLEB128InputRange( + std::forward(Args)...); +} + +// Convenience function for ULEB128 input iterator ranges. +template auto makeULEB128InputRange(ArgTs &&... Args) { + return makeLEB128InputRange( + std::forward(Args)...); +} + +/// Utility function to encode a SLEB128 or ULEB128 value to a buffer. Returns +/// the length in bytes of the encoded value. +template > +unsigned encodeLEB128(const ValueT &Value, uint8_t *p, unsigned PadTo = 0) { + uint8_t *orig_p = p; + p = std::copy(InputIteratorT(Value, PadTo), InputIteratorT(), p); return (unsigned)(p - orig_p); } -/// Utility function to encode a ULEB128 value to an output stream. Returns +/// Utility function to encode a SLEB128 or ULEB128 value to an output stream. +/// Returns the length in bytes of the encoded value. +template > +unsigned encodeLEB128(const ValueT &Value, raw_ostream &OS, + unsigned PadTo = 0) { + uint64_t TellBefore = OS.tell(); + std::copy(InputIteratorT(Value, PadTo), InputIteratorT(), + raw_ostream_iterator(OS)); + return (unsigned)(OS.tell() - TellBefore); +} + +/// Utility function to encode a SLEB128 value to an output stream. Returns /// the length in bytes of the encoded value. -inline unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, +inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo = 0) { - unsigned Count = 0; - do { - uint8_t Byte = Value & 0x7f; - Value >>= 7; - Count++; - if (Value != 0 || Count < PadTo) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - OS << char(Byte); - } while (Value != 0); + return encodeLEB128(Value, OS, PadTo); +} - // Pad with 0x80 and emit a null byte at the end. - if (Count < PadTo) { - for (; Count < PadTo - 1; ++Count) - OS << '\x80'; - OS << '\x00'; - Count++; - } - return Count; +/// Utility function to encode a SLEB128 value to a buffer. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) { + return encodeLEB128(Value, p, PadTo); } -/// Utility function to encode a ULEB128 value to a buffer. Returns +/// Utility function to encode a ULEB128 value to an output stream. Returns /// the length in bytes of the encoded value. -inline unsigned encodeULEB128(uint64_t Value, uint8_t *p, +inline unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo = 0) { - uint8_t *orig_p = p; - unsigned Count = 0; - do { - uint8_t Byte = Value & 0x7f; - Value >>= 7; - Count++; - if (Value != 0 || Count < PadTo) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - *p++ = Byte; - } while (Value != 0); - - // Pad with 0x80 and emit a null byte at the end. - if (Count < PadTo) { - for (; Count < PadTo - 1; ++Count) - *p++ = '\x80'; - *p++ = '\x00'; - } + return encodeLEB128(Value, OS, PadTo); +} - return (unsigned)(p - orig_p); +/// Utility function to encode a ULEB128 value to a buffer. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeULEB128(uint64_t Value, uint8_t *p, unsigned PadTo = 0) { + return encodeLEB128(Value, p, PadTo); } /// Utility function to decode a ULEB128 value. Index: llvm/utils/TableGen/FixedLenDecoderEmitter.cpp =================================================================== --- llvm/utils/TableGen/FixedLenDecoderEmitter.cpp +++ llvm/utils/TableGen/FixedLenDecoderEmitter.cpp @@ -689,9 +689,7 @@ } else { Table.push_back(MCD::OPC_FilterValue); // Encode and emit the value to filter against. - uint8_t Buffer[16]; - unsigned Len = encodeULEB128(Filter.first, Buffer); - Table.insert(Table.end(), Buffer, Buffer + Len); + copy(makeULEB128InputRange(Filter.first), std::back_inserter(Table)); // Reserve space for the NumToSkip entry. We'll backpatch the value // later. PrevFilter = Table.size(); @@ -1287,14 +1285,10 @@ // Figure out the index into the predicate table for the predicate just // computed. unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); - SmallString<16> PBytes; - raw_svector_ostream S(PBytes); - encodeULEB128(PIdx, S); TableInfo.Table.push_back(MCD::OPC_CheckPredicate); // Predicate index - for (unsigned i = 0, e = PBytes.size(); i != e; ++i) - TableInfo.Table.push_back(PBytes[i]); + copy(makeULEB128InputRange(PIdx), std::back_inserter(TableInfo.Table)); // Push location for NumToSkip backpatching. TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); TableInfo.Table.push_back(0); @@ -1346,19 +1340,14 @@ TableInfo.Table.push_back(MCD::OPC_SoftFail); - SmallString<16> MaskBytes; - raw_svector_ostream S(MaskBytes); if (NeedPositiveMask) { - encodeULEB128(PositiveMask.getZExtValue(), S); - for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) - TableInfo.Table.push_back(MaskBytes[i]); + copy(makeULEB128InputRange(PositiveMask.getZExtValue()), + std::back_inserter(TableInfo.Table)); } else TableInfo.Table.push_back(0); if (NeedNegativeMask) { - MaskBytes.clear(); - encodeULEB128(NegativeMask.getZExtValue(), S); - for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) - TableInfo.Table.push_back(MaskBytes[i]); + copy(makeULEB128InputRange(NegativeMask.getZExtValue()), + std::back_inserter(TableInfo.Table)); } else TableInfo.Table.push_back(0); } @@ -1386,11 +1375,8 @@ TableInfo.Table.push_back(MCD::OPC_CheckField); TableInfo.Table.push_back(StartBits[I-1]); TableInfo.Table.push_back(NumBits); - uint8_t Buffer[16], *p; - encodeULEB128(FieldVals[I-1], Buffer); - for (p = Buffer; *p >= 128 ; ++p) - TableInfo.Table.push_back(*p); - TableInfo.Table.push_back(*p); + copy(makeULEB128InputRange(FieldVals[I - 1]), + std::back_inserter(TableInfo.Table)); // Push location for NumToSkip backpatching. TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); // The fixup is always 24-bits, so go ahead and allocate the space @@ -1420,19 +1406,10 @@ TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode : MCD::OPC_TryDecode); NumEncodingsSupported++; - uint8_t Buffer[16], *p; - encodeULEB128(Opc.Opcode, Buffer); - for (p = Buffer; *p >= 128 ; ++p) - TableInfo.Table.push_back(*p); - TableInfo.Table.push_back(*p); - - SmallString<16> Bytes; - raw_svector_ostream S(Bytes); - encodeULEB128(DIdx, S); + copy(makeULEB128InputRange(Opc.Opcode), std::back_inserter(TableInfo.Table)); // Decoder index - for (unsigned i = 0, e = Bytes.size(); i != e; ++i) - TableInfo.Table.push_back(Bytes[i]); + copy(makeULEB128InputRange(DIdx), std::back_inserter(TableInfo.Table)); if (!HasCompleteDecoder) { // Push location for NumToSkip backpatching.