Index: llvm/include/llvm/Support/LEB128.h =================================================================== --- llvm/include/llvm/Support/LEB128.h +++ llvm/include/llvm/Support/LEB128.h @@ -14,6 +14,7 @@ #ifndef LLVM_SUPPORT_LEB128_H #define LLVM_SUPPORT_LEB128_H +#include "llvm/Support/LEB128CodecInfo.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -22,7 +23,8 @@ /// Converts a value into bytes encoded in LEB128 style. /// Matches the interface of std input iterators. -template class LEB128InputIterator { +template > +class LEB128InputIterator { /// Denotes whether this object represents an end() iterator. bool IsEnd; @@ -46,20 +48,14 @@ /// Consumes 7 bits from Value and encodes them as a LEB128 byte in CurrByte. void encodeNextByte() { - CurrByte = Value & 0x7f; + CurrByte = CodecInfoT::getLo7Bits(Value); if (IsSigned) { - int64_t SValue = static_cast(Value); - // NOTE: this assumes that this signed shift is an arithmetic right shift. - SValue >>= 7; - More = !((((SValue == 0) && ((CurrByte & 0x40) == 0)) || - ((SValue == -1) && ((CurrByte & 0x40) != 0)))); - Value = static_cast(SValue); + CodecInfoT::ashr7InPlace(Value); + More = !((!Value && ((CurrByte & 0x40) == 0)) || + (CodecInfoT::isAllOnesValue(Value) && ((CurrByte & 0x40) != 0))); } else { - // Logical right shift. - uint64_t UValue = static_cast(Value); - UValue >>= 7; - More = UValue != 0; - Value = static_cast(UValue); + CodecInfoT::lshr7InPlace(Value); + More = !!Value; } Count++; if (More || Count < PadTo) @@ -120,7 +116,7 @@ // Pad with 0s or 1s depending on whether we want to zext or sext. uint8_t PadValue; - if (IsSigned && static_cast(Value) < 0) + if (IsSigned && CodecInfoT::isAllOnesValue(Value)) PadValue = 0x7f; else PadValue = 0x00; @@ -171,7 +167,8 @@ /// Decodes LEB128-encoded bytes into a value. /// Matches the interface of std output iterators. -template class LEB128OutputIterator { +template > +class LEB128OutputIterator { /// The value into which the decoded bytes are written. ValueT &Value; @@ -212,25 +209,28 @@ assert(!IsComplete && "Already saw final LEB128 byte. Can't decode more."); assert(!Error && "Should abandon LEB128 decoding when an error happens."); - uint64_t Slice = Byte & 0x7f; + uint8_t Slice = Byte & 0x7f; if (!IsSigned) { // TODO: Implement an equivalent check for signed values? // (The existing code did not check this error for signed values, so // this refactor is leaving it unimplemented.) - if (Shift >= 64 || Slice << Shift >> Shift != Slice) { + unsigned RequiredBits = Shift + (8 - countLeadingZeros(Slice)); + if (Shift >= CodecInfoT::getMaxNumBits() || + RequiredBits > CodecInfoT::getMaxNumBits()) { + // TODO: Update this error message to be more general. Error = "uleb128 too big for uint64"; return *this; } } - Value |= Slice << Shift; + CodecInfoT::insertLo7InPlace(Value, Slice, Shift); Shift += 7; if (!(Byte & 0x80)) { // No continuation bit, therefore this was the final byte. IsComplete = true; // Sign extend negative numbers if needed. - if (IsSigned && Shift < 64 && (Byte & 0x40)) - Value |= -1ULL << Shift; + if (IsSigned && Shift < CodecInfoT::getMaxNumBits() && (Byte & 0x40)) + CodecInfoT::negSExtInPlace(Value, Shift); } return *this; } Index: llvm/include/llvm/Support/LEB128CodecInfo.h =================================================================== --- /dev/null +++ llvm/include/llvm/Support/LEB128CodecInfo.h @@ -0,0 +1,104 @@ +//===- llvm/Support/LEB128CodecInfo.h - Type traits for LEB128 --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines LEB128CodecInfo traits for [SU]LEB128 encoding/decoding. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_LEB128CODECINFO_H +#define LLVM_SUPPORT_LEB128CODECINFO_H + +#include +#include + +namespace llvm { + +/// The basic operations required to be supported by a type in order to be used +/// as the source of LEB128 encoding or the destination of LEB128 decoding. +/// +/// Note: There are also some additional requirements not mentioned here. For +/// example, checking that the value is zero is done with operator!(). +template struct LEB128CodecInfo { + /// Get the 7 least significant bits from Value and return them as a uint8_t. + /// + /// static uint8_t getLo7Bits(const ValueT &Value); + + /// Do an in-place logical right shift of 7 bits on Value. + /// + /// static void lshr7InPlace(ValueT &Value); + + /// Do an in-place arithmetic right shift of 7 bits on Value. + /// + /// static void ashr7InPlace(ValueT &Value); + + /// Return true if all bits in Value are 1. False otherwise. + /// In practice, this is used to check if we have the value of a negative + /// signed integer that has been arithmetically shifted to the right an + /// infinite number of times. + /// + /// static bool isAllOnesValue(const ValueT &Value); + + /// Get the maximum number of bits possible to represent with ValueT. + /// May return a very large number if ValueT has no particular limit of bits. + /// + /// static unsigned getMaxNumBits(); + + /// Set the half-open range of bits [Shift, Shift + 7) of Value to match the + /// contents of Lo7. The value of Shift increases monotonically with every + /// call to this function. The ranges of bits of successive calls do not + /// overlap. You may assume that the bits in the range were previously zero. + /// You may assume that Lo7 always has a value that fits in 7 bits. + /// + /// static void insertLo7InPlace(ValueT &Value, uint8_t Lo7, unsigned Shift); + + /// Sign extends Value, where Value is an integer that has a number + /// of bits corresponding to NumUnextendedBits, and where it is assumed that + /// Value is a negative number. In other words, sets all bits in the + /// representation of Value to 1 except for the number of least significant + /// bits stored in NumUnextendedBits. + /// + /// static void negSExtInPlace(ValueT &Value, unsigned NumUnextendedBits); +}; + +/// Specialization of LEB128CodecInfo for integral types. +template +struct LEB128CodecInfo< + ValueT, typename std::enable_if::value>::type> { + static uint8_t getLo7Bits(const ValueT &Value) { return Value & 0x7f; } + + static void lshr7InPlace(ValueT &Value) { + uint64_t UValue = static_cast(Value); + UValue >>= 7; + Value = static_cast(UValue); + } + + static void ashr7InPlace(ValueT &Value) { + int64_t SValue = static_cast(Value); + // NOTE: this assumes that this signed shift is an arithmetic right shift. + SValue >>= 7; + Value = static_cast(SValue); + } + + static bool isAllOnesValue(const ValueT &Value) { + return Value == ~static_cast(0); + } + + static unsigned getMaxNumBits() { return sizeof(ValueT) * 8; } + + static void insertLo7InPlace(ValueT &Value, uint8_t Lo7, unsigned Shift) { + Value |= static_cast(Lo7) << Shift; + } + + static void negSExtInPlace(ValueT &Value, unsigned NumUnextendedBits) { + Value |= ~static_cast(0) << NumUnextendedBits; + } +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_LEB128CODECINFO_H