Index: llvm/include/llvm/Support/LEB128.h =================================================================== --- llvm/include/llvm/Support/LEB128.h +++ llvm/include/llvm/Support/LEB128.h @@ -300,6 +300,34 @@ return encodeLEB128(Value, /* IsSigned */ false, p, PadTo); } +/// Utility function to encode a SLEB128 value to an output stream. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeSLEB128(const APInt &Value, raw_ostream &OS, + unsigned PadTo = 0) { + return encodeLEB128(Value, /* IsSigned */ true, OS, PadTo); +} + +/// Utility function to encode a SLEB128 value to a buffer. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeSLEB128(const APInt &Value, uint8_t *p, + unsigned PadTo = 0) { + return encodeLEB128(Value, /* IsSigned */ true, p, PadTo); +} + +/// Utility function to encode a ULEB128 value to an output stream. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeULEB128(const APInt &Value, raw_ostream &OS, + unsigned PadTo = 0) { + return encodeLEB128(Value, /* IsSigned */ false, OS, PadTo); +} + +/// Utility function to encode a ULEB128 value to a buffer. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeULEB128(const APInt &Value, uint8_t *p, + unsigned PadTo = 0) { + return encodeLEB128(Value, /* IsSigned */ false, p, PadTo); +} + /// Utility function to decode a ULEB128/SLEB128 value, where the signedness is /// defined by \p IsSigned. template Index: llvm/include/llvm/Support/LEB128CodecInfo.h =================================================================== --- llvm/include/llvm/Support/LEB128CodecInfo.h +++ llvm/include/llvm/Support/LEB128CodecInfo.h @@ -13,6 +13,8 @@ #ifndef LLVM_SUPPORT_LEB128CODECINFO_H #define LLVM_SUPPORT_LEB128CODECINFO_H +#include "llvm/ADT/APInt.h" +#include #include #include @@ -99,6 +101,54 @@ } }; +/// Specialization of LEB128CodecInfo for APInt. +template <> struct LEB128CodecInfo { + static uint8_t getLo7Bits(const APInt &Value) { + return Value.extractBitsAsZExtValue(getNumLoBits(Value), 0); + } + + static void lshr7InPlace(APInt &Value) { + Value.lshrInPlace(getNumLoBits(Value)); + } + + static void ashr7InPlace(APInt &Value) { + Value.ashrInPlace(getNumLoBits(Value)); + } + + static bool isAllOnesValue(const APInt &Value) { + return Value.isAllOnesValue(); + } + + static unsigned getMaxNumBits() { + // In practice, APInt has an unbounded number of bits. + return ~0U; + } + + static void insertLo7InPlace(APInt &Value, uint8_t Lo7, unsigned Shift) { + // Resize to have room to put the new bits. + Value = Value.zext(Shift + 7); + // Copy the bits into the new room. + Value.insertBits(Lo7, Shift, 7); + } + + static void negSExtInPlace(APInt &Value, unsigned NumUnextendedBits) { + // Don't need to do anything. APInts don't need any more than one signed + // bit, and we know that Value is a negative number as a precondition to + // this function, so any further sign extension would be redundant. + // Extending it any further would only be useful if we wanted to match a + // specific bit width, which is not required here. + } + +private: + /// If an APInt has fewer than 7 bits, trying to do operations on its 7 lowest + /// bits will fail due to constraints of the API of APInt. To counter this, do + /// operations on fewer than 7 bits when necessary, using this function to + /// count the number of bits to use. + static unsigned getNumLoBits(const APInt &Value) { + return std::min(7U, Value.getBitWidth()); + } +}; + } // namespace llvm #endif // LLVM_SUPPORT_LEB128CODECINFO_H Index: llvm/unittests/Support/LEB128Test.cpp =================================================================== --- llvm/unittests/Support/LEB128Test.cpp +++ llvm/unittests/Support/LEB128Test.cpp @@ -15,6 +15,44 @@ namespace { +// To test encoding/decoding with APInts, the previously existing tests are +// reused but prepended with a "big prefix" that turns the number into something +// too big for an ordinary 64-bit integer. +const unsigned BigPrefixWordSizeInBits = 16; +const unsigned BigPrefixNumWords = 8; +const unsigned BigPrefixNumBytes = BigPrefixNumWords * 2; + +// Adds the "big prefix" to a string that represents an encoded test case. +std::string addBigPrefix(const std::string &Base) { + const char *BigPrefixWord = "\xff\x81"; + std::string BigPrefix; + for (unsigned i = 0; i < BigPrefixNumWords; i++) + BigPrefix += BigPrefixWord; + return BigPrefix + Base; +} + +// Takes a value used for a normal unit tests, and shifts in a big number before +// it to turn it into something that wouldn't fit in a 64-bit integer. +APInt makeBigValue(int64_t BaseValue) { + // This function deals with values that aren't yet LEB128 encoded. Therefore, + // the continuation bits are not necessary. The -2 here and the difference in + // the BigPrefixWord from addBigPrefix comes from these continuation bits + // being removed. + const unsigned BigPrefixWordSizeInBitsWithoutCont = + BigPrefixWordSizeInBits - 2; + const uint16_t BigPrefixWord = 0x00ff; + // Allocate enough for the prefix and the original value afterwards. + APInt Big(BigPrefixNumWords * BigPrefixWordSizeInBitsWithoutCont + 64, 0); + // Fill in the prefix bits. + for (unsigned i = 0; i < BigPrefixNumWords; i++) + Big.insertBits(BigPrefixWord, i * BigPrefixWordSizeInBitsWithoutCont, + BigPrefixWordSizeInBitsWithoutCont); + // Put the original value in the last 64 bits. + Big.insertBits(BaseValue, + BigPrefixNumWords * BigPrefixWordSizeInBitsWithoutCont, 64); + return Big; +} + TEST(LEB128Test, EncodeSLEB128) { #define EXPECT_SLEB128_EQ(EXPECTED, VALUE, PAD) \ do { \ @@ -60,6 +98,67 @@ #undef EXPECT_SLEB128_EQ } +TEST(LEB128Test, EncodeSLEB128APInt) { +#define EXPECT_SLEB128_EQ(EXPECTED, VALUE, PAD) \ + do { \ + std::string ShortExpected(EXPECTED, sizeof(EXPECTED) - 1); \ + std::string BigExpected = addBigPrefix(ShortExpected); \ + APInt ShortValue = APInt(64, VALUE); \ + APInt BigValue = makeBigValue(VALUE); \ + unsigned ShortPad = PAD; \ + unsigned BigPad = PAD ? PAD + BigPrefixNumBytes : 0; \ + for (int Big = 0; Big <= 1; Big++) { \ + \ + /* Prefixing zero doesn't give a canonical representation */ \ + /* (unless it's padding), so skip. */ \ + if (Big && VALUE == 0 && PAD == 0) \ + continue; \ + const std::string &Expected = Big ? BigExpected : ShortExpected; \ + const APInt& Value = Big ? BigValue : ShortValue; \ + unsigned Pad = Big ? BigPad : ShortPad; \ + \ + /* encodeSLEB128(APInt, raw_ostream &, unsigned) */ \ + std::string Actual1; \ + raw_string_ostream Stream(Actual1); \ + encodeSLEB128(Value, Stream, Pad); \ + Stream.flush(); \ + EXPECT_EQ(Expected, Actual1); \ + \ + /* encodeSLEB128(APInt, uint8_t *, unsigned) */ \ + uint8_t Buffer[32]; \ + unsigned Size = encodeSLEB128(Value, Buffer, Pad); \ + std::string Actual2(reinterpret_cast(Buffer), Size); \ + EXPECT_EQ(Expected, Actual2); \ + \ + } /* end "Big" loop */ \ + } while (0) + + // Encode SLEB128 + EXPECT_SLEB128_EQ("\x00", 0, 0); + EXPECT_SLEB128_EQ("\x01", 1, 0); + EXPECT_SLEB128_EQ("\x7f", -1, 0); + EXPECT_SLEB128_EQ("\x3f", 63, 0); + EXPECT_SLEB128_EQ("\x41", -63, 0); + EXPECT_SLEB128_EQ("\x40", -64, 0); + EXPECT_SLEB128_EQ("\xbf\x7f", -65, 0); + EXPECT_SLEB128_EQ("\xc0\x00", 64, 0); + + // Encode SLEB128 with some extra padding bytes + EXPECT_SLEB128_EQ("\x80\x00", 0, 2); + EXPECT_SLEB128_EQ("\x80\x80\x00", 0, 3); + EXPECT_SLEB128_EQ("\xff\x80\x00", 0x7f, 3); + EXPECT_SLEB128_EQ("\xff\x80\x80\x00", 0x7f, 4); + EXPECT_SLEB128_EQ("\x80\x81\x00", 0x80, 3); + EXPECT_SLEB128_EQ("\x80\x81\x80\x00", 0x80, 4); + EXPECT_SLEB128_EQ("\xc0\x7f", -0x40, 2); + + EXPECT_SLEB128_EQ("\xc0\xff\x7f", -0x40, 3); + EXPECT_SLEB128_EQ("\x80\xff\x7f", -0x80, 3); + EXPECT_SLEB128_EQ("\x80\xff\xff\x7f", -0x80, 4); + +#undef EXPECT_SLEB128_EQ +} + TEST(LEB128Test, EncodeULEB128) { #define EXPECT_ULEB128_EQ(EXPECTED, VALUE, PAD) \ do { \ @@ -103,6 +202,65 @@ #undef EXPECT_ULEB128_EQ } +TEST(LEB128Test, EncodeULEB128APInt) { +#define EXPECT_ULEB128_EQ(EXPECTED, VALUE, PAD) \ + do { \ + std::string ShortExpected(EXPECTED, sizeof(EXPECTED) - 1); \ + std::string BigExpected = addBigPrefix(ShortExpected); \ + APInt ShortValue = APInt(64, VALUE); \ + APInt BigValue = makeBigValue(VALUE); \ + unsigned ShortPad = PAD; \ + unsigned BigPad = PAD ? PAD + BigPrefixNumBytes : 0; \ + for (int Big = 0; Big <= 1; Big++) { \ + \ + /* Prefixing zero doesn't give a canonical representation */ \ + /* (unless it's padding), so skip. */ \ + if (Big && VALUE == 0 && PAD == 0) \ + continue; \ + const std::string &Expected = Big ? BigExpected : ShortExpected; \ + const APInt& Value = Big ? BigValue : ShortValue; \ + unsigned Pad = Big ? BigPad : ShortPad; \ + \ + /* encodeULEB128(APInt, raw_ostream &, unsigned) */ \ + std::string Actual1; \ + raw_string_ostream Stream(Actual1); \ + encodeULEB128(Value, Stream, Pad); \ + Stream.flush(); \ + EXPECT_EQ(Expected, Actual1); \ + \ + /* encodeULEB128(APInt, uint8_t *, unsigned) */ \ + uint8_t Buffer[32]; \ + unsigned Size = encodeULEB128(Value, Buffer, Pad); \ + std::string Actual2(reinterpret_cast(Buffer), Size); \ + EXPECT_EQ(Expected, Actual2); \ + \ + } /* end "Big" loop */ \ + } while (0) + + // Encode ULEB128 + EXPECT_ULEB128_EQ("\x00", 0, 0); + EXPECT_ULEB128_EQ("\x01", 1, 0); + EXPECT_ULEB128_EQ("\x3f", 63, 0); + EXPECT_ULEB128_EQ("\x40", 64, 0); + EXPECT_ULEB128_EQ("\x7f", 0x7f, 0); + EXPECT_ULEB128_EQ("\x80\x01", 0x80, 0); + EXPECT_ULEB128_EQ("\x81\x01", 0x81, 0); + EXPECT_ULEB128_EQ("\x90\x01", 0x90, 0); + EXPECT_ULEB128_EQ("\xff\x01", 0xff, 0); + EXPECT_ULEB128_EQ("\x80\x02", 0x100, 0); + EXPECT_ULEB128_EQ("\x81\x02", 0x101, 0); + + // Encode ULEB128 with some extra padding bytes + EXPECT_ULEB128_EQ("\x80\x00", 0, 2); + EXPECT_ULEB128_EQ("\x80\x80\x00", 0, 3); + EXPECT_ULEB128_EQ("\xff\x00", 0x7f, 2); + EXPECT_ULEB128_EQ("\xff\x80\x00", 0x7f, 3); + EXPECT_ULEB128_EQ("\x80\x81\x00", 0x80, 3); + EXPECT_ULEB128_EQ("\x80\x81\x80\x00", 0x80, 4); + +#undef EXPECT_ULEB128_EQ +} + TEST(LEB128Test, DecodeULEB128) { #define EXPECT_DECODE_ULEB128_EQ(EXPECTED, VALUE) \ do { \ @@ -141,6 +299,57 @@ #undef EXPECT_DECODE_ULEB128_EQ } +TEST(LEB128Test, DecodeULEB128APInt) { +#define EXPECT_DECODE_ULEB128_EQ(EXPECTED, VALUE) \ + do { \ + std::string ShortValue(VALUE, sizeof(VALUE) - 1); \ + std::string BigValue = addBigPrefix(ShortValue); \ + APInt ShortExpected = APInt(64, EXPECTED); \ + APInt BigExpected = makeBigValue(EXPECTED); \ + for (int Big = 0; Big <= 1; Big++) { \ + \ + const std::string &Value = Big ? BigValue : ShortValue; \ + const APInt& Expected = Big ? BigExpected : ShortExpected; \ + unsigned ExpectedSize = sizeof(VALUE) - 1 + (Big ? BigPrefixNumBytes : 0); \ + \ + unsigned ActualSize = 0; \ + APInt Actual = \ + decodeLEB128(reinterpret_cast(Value.data()), \ + /* IsSigned */ false, &ActualSize); \ + EXPECT_EQ(ExpectedSize, ActualSize); \ + EXPECT_EQ(Expected, Actual.zextOrSelf(Expected.getBitWidth())); \ + \ + } /* end "Big" loop */ \ + } while (0) + + // Don't crash + EXPECT_EQ(0u, decodeULEB128(nullptr, nullptr, nullptr)); + + // Decode ULEB128 + EXPECT_DECODE_ULEB128_EQ(0u, "\x00"); + EXPECT_DECODE_ULEB128_EQ(1u, "\x01"); + EXPECT_DECODE_ULEB128_EQ(63u, "\x3f"); + EXPECT_DECODE_ULEB128_EQ(64u, "\x40"); + EXPECT_DECODE_ULEB128_EQ(0x7fu, "\x7f"); + EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x01"); + EXPECT_DECODE_ULEB128_EQ(0x81u, "\x81\x01"); + EXPECT_DECODE_ULEB128_EQ(0x90u, "\x90\x01"); + EXPECT_DECODE_ULEB128_EQ(0xffu, "\xff\x01"); + EXPECT_DECODE_ULEB128_EQ(0x100u, "\x80\x02"); + EXPECT_DECODE_ULEB128_EQ(0x101u, "\x81\x02"); + EXPECT_DECODE_ULEB128_EQ(4294975616ULL, "\x80\xc1\x80\x80\x10"); + + // Decode ULEB128 with extra padding bytes + EXPECT_DECODE_ULEB128_EQ(0u, "\x80\x00"); + EXPECT_DECODE_ULEB128_EQ(0u, "\x80\x80\x00"); + EXPECT_DECODE_ULEB128_EQ(0x7fu, "\xff\x00"); + EXPECT_DECODE_ULEB128_EQ(0x7fu, "\xff\x80\x00"); + EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x00"); + EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x80\x00"); + +#undef EXPECT_DECODE_ULEB128_EQ +} + TEST(LEB128Test, DecodeSLEB128) { #define EXPECT_DECODE_SLEB128_EQ(EXPECTED, VALUE) \ do { \ @@ -180,6 +389,58 @@ #undef EXPECT_DECODE_SLEB128_EQ } +TEST(LEB128Test, DecodeSLEB128APInt) { +#define EXPECT_DECODE_SLEB128_EQ(EXPECTED, VALUE) \ + do { \ + std::string ShortValue(VALUE, sizeof(VALUE) - 1); \ + std::string BigValue = addBigPrefix(ShortValue); \ + APInt ShortExpected = APInt(64, EXPECTED); \ + APInt BigExpected = makeBigValue(EXPECTED); \ + for (int Big = 0; Big <= 1; Big++) { \ + \ + const std::string &Value = Big ? BigValue : ShortValue; \ + const APInt& Expected = Big ? BigExpected : ShortExpected; \ + unsigned ExpectedSize = sizeof(VALUE) - 1 + (Big ? BigPrefixNumBytes : 0); \ + \ + unsigned ActualSize = 0; \ + APInt Actual = \ + decodeLEB128(reinterpret_cast(Value.data()), \ + /* IsSigned */ true, &ActualSize); \ + EXPECT_EQ(ExpectedSize, ActualSize); \ + EXPECT_EQ(Expected, Actual.sextOrSelf(Expected.getBitWidth())); \ + \ + } /* end "Big" loop */ \ + } while (0) + + // Don't crash + EXPECT_EQ(0, decodeSLEB128(nullptr, nullptr, nullptr)); + + // Decode SLEB128 + EXPECT_DECODE_SLEB128_EQ(0L, "\x00"); + EXPECT_DECODE_SLEB128_EQ(1L, "\x01"); + EXPECT_DECODE_SLEB128_EQ(63L, "\x3f"); + EXPECT_DECODE_SLEB128_EQ(-64L, "\x40"); + EXPECT_DECODE_SLEB128_EQ(-63L, "\x41"); + EXPECT_DECODE_SLEB128_EQ(-1L, "\x7f"); + EXPECT_DECODE_SLEB128_EQ(128L, "\x80\x01"); + EXPECT_DECODE_SLEB128_EQ(129L, "\x81\x01"); + EXPECT_DECODE_SLEB128_EQ(-129L, "\xff\x7e"); + EXPECT_DECODE_SLEB128_EQ(-128L, "\x80\x7f"); + EXPECT_DECODE_SLEB128_EQ(-127L, "\x81\x7f"); + EXPECT_DECODE_SLEB128_EQ(64L, "\xc0\x00"); + EXPECT_DECODE_SLEB128_EQ(-12345L, "\xc7\x9f\x7f"); + + // Decode unnormalized SLEB128 with extra padding bytes. + EXPECT_DECODE_SLEB128_EQ(0L, "\x80\x00"); + EXPECT_DECODE_SLEB128_EQ(0L, "\x80\x80\x00"); + EXPECT_DECODE_SLEB128_EQ(0x7fL, "\xff\x00"); + EXPECT_DECODE_SLEB128_EQ(0x7fL, "\xff\x80\x00"); + EXPECT_DECODE_SLEB128_EQ(0x80L, "\x80\x81\x00"); + EXPECT_DECODE_SLEB128_EQ(0x80L, "\x80\x81\x80\x00"); + +#undef EXPECT_DECODE_SLEB128_EQ +} + TEST(LEB128Test, SLEB128Size) { // Positive Value Testing Plan: // (1) 128 ^ n - 1 ........ need (n+1) bytes