Index: llvm/include/llvm/Support/LEB128.h =================================================================== --- llvm/include/llvm/Support/LEB128.h +++ llvm/include/llvm/Support/LEB128.h @@ -181,6 +181,11 @@ /// The bit index inside Value where we'll write the next decoded bits. unsigned Shift; + /// Whether the value is known to be negative. + /// For signed values, this can only be known after we insert the last bit + /// into the representation, since the last bit is the sign bit. + bool IsNegative; + /// Returns true if we've seen a byte without a continuation bit. bool IsComplete; @@ -198,7 +203,7 @@ /// them into the specified \p Value. LEB128OutputIterator(ValueT &Value, bool IsSigned) : Value(Value), IsSigned(IsSigned), Error(nullptr), Shift(0), - IsComplete(false) { + IsNegative(false), IsComplete(false) { // Initially zero before any decoded bits are written into it. this->Value = ValueT(); } @@ -209,29 +214,54 @@ assert(!IsComplete && "Already saw final LEB128 byte. Can't decode more."); assert(!Error && "Should abandon LEB128 decoding when an error happens."); - uint8_t Slice = Byte & 0x7f; - if (!IsSigned) { - // TODO: Implement an equivalent check for signed values? - // (The existing code did not check this error for signed values, so - // this refactor is leaving it unimplemented.) - unsigned RequiredBits = Shift + (8 - countLeadingZeros(Slice)); - if (Shift >= CodecInfoT::getMaxNumBits() || - RequiredBits > CodecInfoT::getMaxNumBits()) { - // TODO: Update this error message to be more general. - Error = "uleb128 too big for uint64"; - return *this; - } + // Insert bits if the representation still has room for them. + if (Shift < CodecInfoT::getMaxNumBits()) { + // Try to avoid shifting in bits that don't fit in the representation. + // This is to avoid signed overflow, which is undefined behavior. + unsigned NumBitsToInsert = + std::min(7U, CodecInfoT::getMaxNumBits() - Shift); + uint8_t SliceBitsToInsert = + Byte & maskTrailingOnes(NumBitsToInsert); + CodecInfoT::insertLo7InPlace(Value, SliceBitsToInsert, Shift); + + // If we inserted the last bit of the representation in this operation, + // then we can determine if this was a negative number based on the value + // of the last inserted bit. + if (IsSigned && Shift + NumBitsToInsert == CodecInfoT::getMaxNumBits()) + IsNegative = (SliceBitsToInsert & (1 << (NumBitsToInsert - 1))) != 0; } - CodecInfoT::insertLo7InPlace(Value, Slice, Shift); Shift += 7; if (!(Byte & 0x80)) { // No continuation bit, therefore this was the final byte. IsComplete = true; // Sign extend negative numbers if needed. - if (IsSigned && Shift < CodecInfoT::getMaxNumBits() && (Byte & 0x40)) + if (IsSigned && Shift < CodecInfoT::getMaxNumBits() && (Byte & 0x40)) { CodecInfoT::negSExtInPlace(Value, Shift); + IsNegative = true; + } } + + // After all the bits that fit in the representation have been set, any + // further bits should be padding bits. These padding bits should all + // correspond to a zext/sext of what is stored in the representation. If + // they don't, then the number can't be completely decoded into the + // representation, and we signal an error. + if (Shift > CodecInfoT::getMaxNumBits()) { + unsigned NumPaddingBits = + std::min(7U, Shift - CodecInfoT::getMaxNumBits()); + uint8_t SignBitMask = maskLeadingOnes(NumPaddingBits) >> 1; + uint8_t ExpectedSignBits = IsNegative ? SignBitMask : 0; + bool Overflow = (SignBitMask & Byte) != ExpectedSignBits; + if (Overflow) { + // TODO: Update these error messages to be more accurate. + if (IsSigned) + Error = "sleb128 too big for sint64"; + else + Error = "uleb128 too big for uint64"; + } + } + return *this; } Index: llvm/include/llvm/Support/LEB128CodecInfo.h =================================================================== --- llvm/include/llvm/Support/LEB128CodecInfo.h +++ llvm/include/llvm/Support/LEB128CodecInfo.h @@ -54,7 +54,9 @@ /// contents of Lo7. The value of Shift increases monotonically with every /// call to this function. The ranges of bits of successive calls do not /// overlap. You may assume that the bits in the range were previously zero. - /// You may assume that Lo7 always has a value that fits in 7 bits. + /// You may assume that Lo7 always has a value that fits in 7 bits. You may + /// assume that the insertion will not attempt to set bits outside the range + /// of getMaxNumBits(). /// /// static void insertLo7InPlace(ValueT &Value, uint8_t Lo7, unsigned Shift); Index: llvm/unittests/Support/LEB128Test.cpp =================================================================== --- llvm/unittests/Support/LEB128Test.cpp +++ llvm/unittests/Support/LEB128Test.cpp @@ -646,4 +646,253 @@ EXPECT_EQ(decodeWithCopy("\x80\x01", /* IsSigned */ false), 0x80U); } +/// Checks if the Value can be represented with SizeInBits bits. +template +bool canRepresentWithBits(I64T Value, unsigned SizeInBits) { + if (SizeInBits == 8) + return static_cast(Value) == Value; + else if (SizeInBits == 16) + return static_cast(Value) == Value; + else if (SizeInBits == 32) + return static_cast(Value) == Value; + else if (SizeInBits == 64) + return true; + else + assert(false && "Unhandled SizeInBits"); +} + +/// Calls encodeLEB128 with a template argument "ValueT" that matches the +/// specified SizeInBits. +template +unsigned +encodeLEB128WithDynamicSize(I64T Value, unsigned SizeInBits, bool IsSigned, + BufferOrStreamT &BufferOrStream, unsigned PadTo) { + if (SizeInBits == 8) + return encodeLEB128(static_cast(Value), IsSigned, BufferOrStream, + PadTo); + else if (SizeInBits == 16) + return encodeLEB128(static_cast(Value), IsSigned, BufferOrStream, + PadTo); + else if (SizeInBits == 32) + return encodeLEB128(static_cast(Value), IsSigned, BufferOrStream, + PadTo); + else if (SizeInBits == 64) + return encodeLEB128(static_cast(Value), IsSigned, BufferOrStream, + PadTo); + else + assert(false && "Unhandled SizeInBits"); +} + +/// Calls decodeLEB128 with a template argument "ValueT" that matches the +/// specified SizeInBits. +template +I64T decodeLEB128WithDynamicSize(const uint8_t *p, unsigned SizeInBits, + bool IsSigned, unsigned *n) { + if (SizeInBits == 8) + return decodeLEB128(p, IsSigned, n); + else if (SizeInBits == 16) + return decodeLEB128(p, IsSigned, n); + else if (SizeInBits == 32) + return decodeLEB128(p, IsSigned, n); + else if (SizeInBits == 64) + return decodeLEB128(p, IsSigned, n); + else + assert(false && "Unhandled SizeInBits"); +} + +// Test calling encodeSLEB128 using int8_t, int16_t, int32_t, int64_t. +TEST(LEB128Test, EncodeSLEB128MultiSize) { +#define EXPECT_SLEB128_EQ(EXPECTED, VALUE, PAD) \ + do { \ + std::string Expected(EXPECTED, sizeof(EXPECTED) - 1); \ + \ + for (unsigned SizeInBits = 8; SizeInBits <= 64; SizeInBits *= 2) { \ + /* Skip tests that aren't possible for the size. */ \ + if (!canRepresentWithBits(VALUE, SizeInBits)) \ + continue; \ + /* encodeSLEB128(uint[SizeInBits]_t, raw_ostream &, unsigned) */ \ + std::string Actual1; \ + raw_string_ostream Stream(Actual1); \ + encodeLEB128WithDynamicSize( \ + VALUE, SizeInBits, /* IsSigned */ true, Stream, PAD); \ + Stream.flush(); \ + EXPECT_EQ(Expected, Actual1); \ + \ + /* encodeSLEB128(uint[SizeInBits]_t, uint8_t *, unsigned) */ \ + uint8_t Buffer[32]; \ + unsigned Size = encodeLEB128WithDynamicSize( \ + VALUE, SizeInBits, /* IsSigned */ true, Buffer, PAD); \ + std::string Actual2(reinterpret_cast(Buffer), Size); \ + EXPECT_EQ(Expected, Actual2); \ + } /* End SizeInBits loop */ \ + } while (0) + + // Encode SLEB128 + EXPECT_SLEB128_EQ("\x00", 0, 0); + EXPECT_SLEB128_EQ("\x01", 1, 0); + EXPECT_SLEB128_EQ("\x7f", -1, 0); + EXPECT_SLEB128_EQ("\x3f", 63, 0); + EXPECT_SLEB128_EQ("\x41", -63, 0); + EXPECT_SLEB128_EQ("\x40", -64, 0); + EXPECT_SLEB128_EQ("\xbf\x7f", -65, 0); + EXPECT_SLEB128_EQ("\xc0\x00", 64, 0); + + // Encode SLEB128 with some extra padding bytes + EXPECT_SLEB128_EQ("\x80\x00", 0, 2); + EXPECT_SLEB128_EQ("\x80\x80\x00", 0, 3); + EXPECT_SLEB128_EQ("\xff\x80\x00", 0x7f, 3); + EXPECT_SLEB128_EQ("\xff\x80\x80\x00", 0x7f, 4); + EXPECT_SLEB128_EQ("\x80\x81\x00", 0x80, 3); + EXPECT_SLEB128_EQ("\x80\x81\x80\x00", 0x80, 4); + EXPECT_SLEB128_EQ("\xc0\x7f", -0x40, 2); + + EXPECT_SLEB128_EQ("\xc0\xff\x7f", -0x40, 3); + EXPECT_SLEB128_EQ("\x80\xff\x7f", -0x80, 3); + EXPECT_SLEB128_EQ("\x80\xff\xff\x7f", -0x80, 4); + +#undef EXPECT_SLEB128_EQ +} + +// Test calling encodeULEB128 using uint8_t, uint16_t, uint32_t, uint64_t. +TEST(LEB128Test, EncodeULEB128MultiSize) { +#define EXPECT_ULEB128_EQ(EXPECTED, VALUE, PAD) \ + do { \ + std::string Expected(EXPECTED, sizeof(EXPECTED) - 1); \ + \ + for (unsigned SizeInBits = 8; SizeInBits <= 64; SizeInBits *= 2) { \ + /* Skip tests that aren't possible for the size. */ \ + if (!canRepresentWithBits(VALUE, SizeInBits)) \ + continue; \ + /* encodeSLEB128(uint[SizeInBits]_t, raw_ostream &, unsigned) */ \ + std::string Actual1; \ + raw_string_ostream Stream(Actual1); \ + encodeLEB128WithDynamicSize( \ + VALUE, SizeInBits, /* IsSigned */ false, Stream, PAD); \ + Stream.flush(); \ + EXPECT_EQ(Expected, Actual1); \ + \ + /* encodeSLEB128(uint[SizeInBits]_t, uint8_t *, unsigned) */ \ + uint8_t Buffer[32]; \ + unsigned Size = encodeLEB128WithDynamicSize( \ + VALUE, SizeInBits, /* IsSigned */ false, Buffer, PAD); \ + std::string Actual2(reinterpret_cast(Buffer), Size); \ + EXPECT_EQ(Expected, Actual2); \ + } /* End SizeInBits loop */ \ + } while (0) + + // Encode ULEB128 + EXPECT_ULEB128_EQ("\x00", 0, 0); + EXPECT_ULEB128_EQ("\x01", 1, 0); + EXPECT_ULEB128_EQ("\x3f", 63, 0); + EXPECT_ULEB128_EQ("\x40", 64, 0); + EXPECT_ULEB128_EQ("\x7f", 0x7f, 0); + EXPECT_ULEB128_EQ("\x80\x01", 0x80, 0); + EXPECT_ULEB128_EQ("\x81\x01", 0x81, 0); + EXPECT_ULEB128_EQ("\x90\x01", 0x90, 0); + EXPECT_ULEB128_EQ("\xff\x01", 0xff, 0); + EXPECT_ULEB128_EQ("\x80\x02", 0x100, 0); + EXPECT_ULEB128_EQ("\x81\x02", 0x101, 0); + + // Encode ULEB128 with some extra padding bytes + EXPECT_ULEB128_EQ("\x80\x00", 0, 2); + EXPECT_ULEB128_EQ("\x80\x80\x00", 0, 3); + EXPECT_ULEB128_EQ("\xff\x00", 0x7f, 2); + EXPECT_ULEB128_EQ("\xff\x80\x00", 0x7f, 3); + EXPECT_ULEB128_EQ("\x80\x81\x00", 0x80, 3); + EXPECT_ULEB128_EQ("\x80\x81\x80\x00", 0x80, 4); + +#undef EXPECT_ULEB128_EQ +} + +// Test calling decodeSLEB128 using int8_t, int16_t, int32_t, int64_t. +TEST(LEB128Test, DecodeSLEB128MultiSize) { +#define EXPECT_DECODE_SLEB128_EQ(EXPECTED, VALUE) \ + do { \ + for (unsigned SizeInBits = 8; SizeInBits <= 64; SizeInBits *= 2) { \ + /* Skip tests that aren't possible for the size. */ \ + if (!canRepresentWithBits(EXPECTED, SizeInBits)) \ + continue; \ + unsigned ActualSize = 0; \ + int64_t Actual = decodeLEB128WithDynamicSize( \ + reinterpret_cast(VALUE), SizeInBits, \ + /* IsSigned */ true, &ActualSize); \ + EXPECT_EQ(sizeof(VALUE) - 1, ActualSize); \ + EXPECT_EQ(EXPECTED, Actual); \ + } /* End SizeInBits loop */ \ + } while (0) + + // Don't crash + EXPECT_EQ(0, decodeSLEB128(nullptr, nullptr, nullptr)); + + // Decode SLEB128 + EXPECT_DECODE_SLEB128_EQ(0L, "\x00"); + EXPECT_DECODE_SLEB128_EQ(1L, "\x01"); + EXPECT_DECODE_SLEB128_EQ(63L, "\x3f"); + EXPECT_DECODE_SLEB128_EQ(-64L, "\x40"); + EXPECT_DECODE_SLEB128_EQ(-63L, "\x41"); + EXPECT_DECODE_SLEB128_EQ(-1L, "\x7f"); + EXPECT_DECODE_SLEB128_EQ(128L, "\x80\x01"); + EXPECT_DECODE_SLEB128_EQ(129L, "\x81\x01"); + EXPECT_DECODE_SLEB128_EQ(-129L, "\xff\x7e"); + EXPECT_DECODE_SLEB128_EQ(-128L, "\x80\x7f"); + EXPECT_DECODE_SLEB128_EQ(-127L, "\x81\x7f"); + EXPECT_DECODE_SLEB128_EQ(64L, "\xc0\x00"); + EXPECT_DECODE_SLEB128_EQ(-12345L, "\xc7\x9f\x7f"); + + // Decode unnormalized SLEB128 with extra padding bytes. + EXPECT_DECODE_SLEB128_EQ(0L, "\x80\x00"); + EXPECT_DECODE_SLEB128_EQ(0L, "\x80\x80\x00"); + EXPECT_DECODE_SLEB128_EQ(0x7fL, "\xff\x00"); + EXPECT_DECODE_SLEB128_EQ(0x7fL, "\xff\x80\x00"); + EXPECT_DECODE_SLEB128_EQ(0x80L, "\x80\x81\x00"); + EXPECT_DECODE_SLEB128_EQ(0x80L, "\x80\x81\x80\x00"); + +#undef EXPECT_DECODE_SLEB128_EQ +} + +// Test calling decodeULEB128 using uint8_t, uint16_t, uint32_t, uint64_t. +TEST(LEB128Test, DecodeULEB128MultiSize) { +#define EXPECT_DECODE_ULEB128_EQ(EXPECTED, VALUE) \ + do { \ + for (unsigned SizeInBits = 8; SizeInBits <= 64; SizeInBits *= 2) { \ + /* Skip tests that aren't possible for the size. */ \ + if (!canRepresentWithBits(EXPECTED, SizeInBits)) \ + continue; \ + unsigned ActualSize = 0; \ + uint64_t Actual = decodeLEB128WithDynamicSize( \ + reinterpret_cast(VALUE), SizeInBits, \ + /* IsSigned */ false, &ActualSize); \ + EXPECT_EQ(sizeof(VALUE) - 1, ActualSize); \ + EXPECT_EQ(EXPECTED, Actual); \ + } /* End SizeInBits loop */ \ + } while (0) + + // Don't crash + EXPECT_EQ(0u, decodeULEB128(nullptr, nullptr, nullptr)); + + // Decode ULEB128 + EXPECT_DECODE_ULEB128_EQ(0u, "\x00"); + EXPECT_DECODE_ULEB128_EQ(1u, "\x01"); + EXPECT_DECODE_ULEB128_EQ(63u, "\x3f"); + EXPECT_DECODE_ULEB128_EQ(64u, "\x40"); + EXPECT_DECODE_ULEB128_EQ(0x7fu, "\x7f"); + EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x01"); + EXPECT_DECODE_ULEB128_EQ(0x81u, "\x81\x01"); + EXPECT_DECODE_ULEB128_EQ(0x90u, "\x90\x01"); + EXPECT_DECODE_ULEB128_EQ(0xffu, "\xff\x01"); + EXPECT_DECODE_ULEB128_EQ(0x100u, "\x80\x02"); + EXPECT_DECODE_ULEB128_EQ(0x101u, "\x81\x02"); + EXPECT_DECODE_ULEB128_EQ(4294975616ULL, "\x80\xc1\x80\x80\x10"); + + // Decode ULEB128 with extra padding bytes + EXPECT_DECODE_ULEB128_EQ(0u, "\x80\x00"); + EXPECT_DECODE_ULEB128_EQ(0u, "\x80\x80\x00"); + EXPECT_DECODE_ULEB128_EQ(0x7fu, "\xff\x00"); + EXPECT_DECODE_ULEB128_EQ(0x7fu, "\xff\x80\x00"); + EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x00"); + EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x80\x00"); + +#undef EXPECT_DECODE_ULEB128_EQ +} + } // anonymous namespace