Skip to content

Commit 20f9cd8

Browse files
committedAug 22, 2018
[BinaryFormat] Add MessagePack reader/writer
Add support for reading and writing MessagePack, a binary object serialization format which aims to be more compact than text formats like JSON or YAML. The specification can be found at https://github.com/msgpack/msgpack/blob/master/spec.md Will be used for encoding metadata in AMDGPU code objects. Differential Revision: https://reviews.llvm.org/D44429 llvm-svn: 340457
1 parent f3c39a7 commit 20f9cd8

File tree

10 files changed

+2361
-0
lines changed

10 files changed

+2361
-0
lines changed
 
+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//===- MsgPack.def - MessagePack definitions --------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// Macros for running through MessagePack enumerators.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#if !( \
16+
defined HANDLE_MP_FIRST_BYTE || defined HANDLE_MP_FIX_BITS || \
17+
defined HANDLE_MP_FIX_BITS_MASK || defined HANDLE_MP_FIX_MAX || \
18+
defined HANDLE_MP_FIX_LEN || defined HANDLE_MP_FIX_MIN)
19+
#error "Missing macro definition of HANDLE_MP*"
20+
#endif
21+
22+
#ifndef HANDLE_MP_FIRST_BYTE
23+
#define HANDLE_MP_FIRST_BYTE(ID, NAME)
24+
#endif
25+
26+
#ifndef HANDLE_MP_FIX_BITS
27+
#define HANDLE_MP_FIX_BITS(ID, NAME)
28+
#endif
29+
30+
#ifndef HANDLE_MP_FIX_BITS_MASK
31+
#define HANDLE_MP_FIX_BITS_MASK(ID, NAME)
32+
#endif
33+
34+
#ifndef HANDLE_MP_FIX_MAX
35+
#define HANDLE_MP_FIX_MAX(ID, NAME)
36+
#endif
37+
38+
#ifndef HANDLE_MP_FIX_LEN
39+
#define HANDLE_MP_FIX_LEN(ID, NAME)
40+
#endif
41+
42+
#ifndef HANDLE_MP_FIX_MIN
43+
#define HANDLE_MP_FIX_MIN(ID, NAME)
44+
#endif
45+
46+
HANDLE_MP_FIRST_BYTE(0xc0, Nil)
47+
HANDLE_MP_FIRST_BYTE(0xc2, False)
48+
HANDLE_MP_FIRST_BYTE(0xc3, True)
49+
HANDLE_MP_FIRST_BYTE(0xc4, Bin8)
50+
HANDLE_MP_FIRST_BYTE(0xc5, Bin16)
51+
HANDLE_MP_FIRST_BYTE(0xc6, Bin32)
52+
HANDLE_MP_FIRST_BYTE(0xc7, Ext8)
53+
HANDLE_MP_FIRST_BYTE(0xc8, Ext16)
54+
HANDLE_MP_FIRST_BYTE(0xc9, Ext32)
55+
HANDLE_MP_FIRST_BYTE(0xca, Float32)
56+
HANDLE_MP_FIRST_BYTE(0xcb, Float64)
57+
HANDLE_MP_FIRST_BYTE(0xcc, UInt8)
58+
HANDLE_MP_FIRST_BYTE(0xcd, UInt16)
59+
HANDLE_MP_FIRST_BYTE(0xce, UInt32)
60+
HANDLE_MP_FIRST_BYTE(0xcf, UInt64)
61+
HANDLE_MP_FIRST_BYTE(0xd0, Int8)
62+
HANDLE_MP_FIRST_BYTE(0xd1, Int16)
63+
HANDLE_MP_FIRST_BYTE(0xd2, Int32)
64+
HANDLE_MP_FIRST_BYTE(0xd3, Int64)
65+
HANDLE_MP_FIRST_BYTE(0xd4, FixExt1)
66+
HANDLE_MP_FIRST_BYTE(0xd5, FixExt2)
67+
HANDLE_MP_FIRST_BYTE(0xd6, FixExt4)
68+
HANDLE_MP_FIRST_BYTE(0xd7, FixExt8)
69+
HANDLE_MP_FIRST_BYTE(0xd8, FixExt16)
70+
HANDLE_MP_FIRST_BYTE(0xd9, Str8)
71+
HANDLE_MP_FIRST_BYTE(0xda, Str16)
72+
HANDLE_MP_FIRST_BYTE(0xdb, Str32)
73+
HANDLE_MP_FIRST_BYTE(0xdc, Array16)
74+
HANDLE_MP_FIRST_BYTE(0xdd, Array32)
75+
HANDLE_MP_FIRST_BYTE(0xde, Map16)
76+
HANDLE_MP_FIRST_BYTE(0xdf, Map32)
77+
78+
HANDLE_MP_FIX_BITS(0x00, PositiveInt)
79+
HANDLE_MP_FIX_BITS(0x80, Map)
80+
HANDLE_MP_FIX_BITS(0x90, Array)
81+
HANDLE_MP_FIX_BITS(0xa0, String)
82+
HANDLE_MP_FIX_BITS(0xe0, NegativeInt)
83+
84+
HANDLE_MP_FIX_BITS_MASK(0x80, PositiveInt)
85+
HANDLE_MP_FIX_BITS_MASK(0xf0, Map)
86+
HANDLE_MP_FIX_BITS_MASK(0xf0, Array)
87+
HANDLE_MP_FIX_BITS_MASK(0xe0, String)
88+
HANDLE_MP_FIX_BITS_MASK(0xe0, NegativeInt)
89+
90+
HANDLE_MP_FIX_MAX(0x7f, PositiveInt)
91+
HANDLE_MP_FIX_MAX(0x0f, Map)
92+
HANDLE_MP_FIX_MAX(0x0f, Array)
93+
HANDLE_MP_FIX_MAX(0x1f, String)
94+
95+
HANDLE_MP_FIX_LEN(0x01, Ext1)
96+
HANDLE_MP_FIX_LEN(0x02, Ext2)
97+
HANDLE_MP_FIX_LEN(0x04, Ext4)
98+
HANDLE_MP_FIX_LEN(0x08, Ext8)
99+
HANDLE_MP_FIX_LEN(0x10, Ext16)
100+
101+
HANDLE_MP_FIX_MIN(-0x20, NegativeInt)
102+
103+
#undef HANDLE_MP_FIRST_BYTE
104+
#undef HANDLE_MP_FIX_BITS
105+
#undef HANDLE_MP_FIX_BITS_MASK
106+
#undef HANDLE_MP_FIX_MAX
107+
#undef HANDLE_MP_FIX_LEN
108+
#undef HANDLE_MP_FIX_MIN
+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
//===-- MsgPack.h - MessagePack Constants -----------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This file contains constants used for implementing MessagePack support.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_BINARYFORMAT_MSGPACK_H
16+
#define LLVM_BINARYFORMAT_MSGPACK_H
17+
18+
#include "llvm/Support/DataTypes.h"
19+
#include "llvm/Support/Endian.h"
20+
21+
namespace llvm {
22+
namespace msgpack {
23+
24+
/// The endianness of all multi-byte encoded values in MessagePack.
25+
constexpr support::endianness Endianness = support::big;
26+
27+
/// The first byte identifiers of MessagePack object formats.
28+
namespace FirstByte {
29+
#define HANDLE_MP_FIRST_BYTE(ID, NAME) constexpr uint8_t NAME = ID;
30+
#include "llvm/BinaryFormat/MsgPack.def"
31+
}
32+
33+
/// Most significant bits used to identify "Fix" variants in MessagePack.
34+
///
35+
/// For example, FixStr objects encode their size in the five least significant
36+
/// bits of their first byte, which is identified by the bit pattern "101" in
37+
/// the three most significant bits. So FixBits::String contains 0b10100000.
38+
///
39+
/// A corresponding mask of the bit pattern is found in \c FixBitsMask.
40+
namespace FixBits {
41+
#define HANDLE_MP_FIX_BITS(ID, NAME) constexpr uint8_t NAME = ID;
42+
#include "llvm/BinaryFormat/MsgPack.def"
43+
}
44+
45+
/// Mask of bits used to identify "Fix" variants in MessagePack.
46+
///
47+
/// For example, FixStr objects encode their size in the five least significant
48+
/// bits of their first byte, which is identified by the bit pattern "101" in
49+
/// the three most significant bits. So FixBitsMask::String contains
50+
/// 0b11100000.
51+
///
52+
/// The corresponding bit pattern to mask for is found in FixBits.
53+
namespace FixBitsMask {
54+
#define HANDLE_MP_FIX_BITS_MASK(ID, NAME) constexpr uint8_t NAME = ID;
55+
#include "llvm/BinaryFormat/MsgPack.def"
56+
}
57+
58+
/// The maximum value or size encodable in "Fix" variants of formats.
59+
///
60+
/// For example, FixStr objects encode their size in the five least significant
61+
/// bits of their first byte, so the largest encodable size is 0b00011111.
62+
namespace FixMax {
63+
#define HANDLE_MP_FIX_MAX(ID, NAME) constexpr uint8_t NAME = ID;
64+
#include "llvm/BinaryFormat/MsgPack.def"
65+
}
66+
67+
/// The exact size encodable in "Fix" variants of formats.
68+
///
69+
/// The only objects for which an exact size makes sense are of Extension type.
70+
///
71+
/// For example, FixExt4 stores an extension type containing exactly four bytes.
72+
namespace FixLen {
73+
#define HANDLE_MP_FIX_LEN(ID, NAME) constexpr uint8_t NAME = ID;
74+
#include "llvm/BinaryFormat/MsgPack.def"
75+
}
76+
77+
/// The minimum value or size encodable in "Fix" variants of formats.
78+
///
79+
/// The only object for which a minimum makes sense is a negative FixNum.
80+
///
81+
/// Negative FixNum objects encode their signed integer value in one byte, but
82+
/// they must have the pattern "111" as their three most significant bits. This
83+
/// means all values are negative, and the smallest representable value is
84+
/// 0b11100000.
85+
namespace FixMin {
86+
#define HANDLE_MP_FIX_MIN(ID, NAME) constexpr int8_t NAME = ID;
87+
#include "llvm/BinaryFormat/MsgPack.def"
88+
}
89+
90+
} // end namespace msgpack
91+
} // end namespace llvm
92+
93+
#endif // LLVM_BINARYFORMAT_MSGPACK_H
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
//===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This is a MessagePack reader.
12+
///
13+
/// See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
14+
/// standard.
15+
///
16+
/// Typical usage:
17+
/// \code
18+
/// StringRef input = GetInput();
19+
/// msgpack::Reader MPReader(input);
20+
/// msgpack::Object Obj;
21+
///
22+
/// while (MPReader.read(Obj)) {
23+
/// switch (Obj.Kind) {
24+
/// case msgpack::Type::Int:
25+
// // Use Obj.Int
26+
/// break;
27+
/// // ...
28+
/// }
29+
/// }
30+
/// \endcode
31+
///
32+
//===----------------------------------------------------------------------===//
33+
34+
#ifndef LLVM_SUPPORT_MSGPACKREADER_H
35+
#define LLVM_SUPPORT_MSGPACKREADER_H
36+
37+
#include "llvm/Support/MemoryBuffer.h"
38+
#include "llvm/Support/raw_ostream.h"
39+
#include <cstdint>
40+
41+
namespace llvm {
42+
namespace msgpack {
43+
44+
/// MessagePack types as defined in the standard, with the exception of Integer
45+
/// being divided into a signed Int and unsigned UInt variant in order to map
46+
/// directly to C++ types.
47+
///
48+
/// The types map onto corresponding union members of the \c Object struct.
49+
enum class Type : uint8_t {
50+
Int,
51+
UInt,
52+
Nil,
53+
Boolean,
54+
Float,
55+
String,
56+
Binary,
57+
Array,
58+
Map,
59+
Extension,
60+
};
61+
62+
/// Extension types are composed of a user-defined type ID and an uninterpreted
63+
/// sequence of bytes.
64+
struct ExtensionType {
65+
/// User-defined extension type.
66+
int8_t Type;
67+
/// Raw bytes of the extension object.
68+
StringRef Bytes;
69+
};
70+
71+
/// MessagePack object, represented as a tagged union of C++ types.
72+
///
73+
/// All types except \c Type::Nil (which has only one value, and so is
74+
/// completely represented by the \c Kind itself) map to a exactly one union
75+
/// member.
76+
struct Object {
77+
Type Kind;
78+
union {
79+
/// Value for \c Type::Int.
80+
int64_t Int;
81+
/// Value for \c Type::Uint.
82+
uint64_t UInt;
83+
/// Value for \c Type::Boolean.
84+
bool Bool;
85+
/// Value for \c Type::Float.
86+
double Float;
87+
/// Value for \c Type::String and \c Type::Binary.
88+
StringRef Raw;
89+
/// Value for \c Type::Array and \c Type::Map.
90+
size_t Length;
91+
/// Value for \c Type::Extension.
92+
ExtensionType Extension;
93+
};
94+
95+
Object() : Kind(Type::Int), Int(0) {}
96+
};
97+
98+
/// Reads MessagePack objects from memory, one at a time.
99+
class Reader {
100+
public:
101+
/// Construct a reader, keeping a reference to the \p InputBuffer.
102+
Reader(MemoryBufferRef InputBuffer);
103+
/// Construct a reader, keeping a reference to the \p Input.
104+
Reader(StringRef Input);
105+
106+
Reader(const Reader &) = delete;
107+
Reader &operator=(const Reader &) = delete;
108+
109+
/// Read one object from the input buffer, advancing past it.
110+
///
111+
/// The \p Obj is updated with the kind of the object read, and the
112+
/// corresponding union member is updated.
113+
///
114+
/// For the collection objects (Array and Map), only the length is read, and
115+
/// the caller must make and additional \c N calls (in the case of Array) or
116+
/// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection
117+
/// elements.
118+
///
119+
/// \param [out] Obj filled with next object on success.
120+
///
121+
/// \returns true when object successfully read, false when at end of
122+
/// input (and so \p Obj was not updated), otherwise an error.
123+
Expected<bool> read(Object &Obj);
124+
125+
private:
126+
MemoryBufferRef InputBuffer;
127+
StringRef::iterator Current;
128+
StringRef::iterator End;
129+
130+
size_t remainingSpace() {
131+
// The rest of the code maintains the invariant that End >= Current, so
132+
// that this cast is always defined behavior.
133+
return static_cast<size_t>(End - Current);
134+
}
135+
136+
template <class T> Expected<bool> readRaw(Object &Obj);
137+
template <class T> Expected<bool> readInt(Object &Obj);
138+
template <class T> Expected<bool> readUInt(Object &Obj);
139+
template <class T> Expected<bool> readLength(Object &Obj);
140+
template <class T> Expected<bool> readExt(Object &Obj);
141+
Expected<bool> createRaw(Object &Obj, uint32_t Size);
142+
Expected<bool> createExt(Object &Obj, uint32_t Size);
143+
};
144+
145+
} // end namespace msgpack
146+
} // end namespace llvm
147+
148+
#endif // LLVM_SUPPORT_MSGPACKREADER_H
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//===- MsgPackWriter.h - Simple MsgPack writer ------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This file contains a MessagePack writer.
12+
///
13+
/// See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
14+
/// specification.
15+
///
16+
/// Typical usage:
17+
/// \code
18+
/// raw_ostream output = GetOutputStream();
19+
/// msgpack::Writer MPWriter(output);
20+
/// MPWriter.writeNil();
21+
/// MPWriter.write(false);
22+
/// MPWriter.write("string");
23+
/// // ...
24+
/// \endcode
25+
///
26+
///
27+
//===----------------------------------------------------------------------===//
28+
29+
#ifndef LLVM_SUPPORT_MSGPACKPARSER_H
30+
#define LLVM_SUPPORT_MSGPACKPARSER_H
31+
32+
#include "llvm/BinaryFormat/MsgPack.h"
33+
#include "llvm/Support/EndianStream.h"
34+
#include "llvm/Support/MemoryBuffer.h"
35+
#include "llvm/Support/raw_ostream.h"
36+
37+
namespace llvm {
38+
namespace msgpack {
39+
40+
/// Writes MessagePack objects to an output stream, one at a time.
41+
class Writer {
42+
public:
43+
/// Construct a writer, optionally enabling "Compatibility Mode" as defined
44+
/// in the MessagePack specification.
45+
///
46+
/// When in \p Compatible mode, the writer will write \c Str16 formats
47+
/// instead of \c Str8 formats, and will refuse to write any \c Bin formats.
48+
///
49+
/// \param OS stream to output MessagePack objects to.
50+
/// \param Compatible when set, write in "Compatibility Mode".
51+
Writer(raw_ostream &OS, bool Compatible = false);
52+
53+
Writer(const Writer &) = delete;
54+
Writer &operator=(const Writer &) = delete;
55+
56+
/// Write a \em Nil to the output stream.
57+
///
58+
/// The output will be the \em nil format.
59+
void writeNil();
60+
61+
/// Write a \em Boolean to the output stream.
62+
///
63+
/// The output will be a \em bool format.
64+
void write(bool b);
65+
66+
/// Write a signed integer to the output stream.
67+
///
68+
/// The output will be in the smallest possible \em int format.
69+
///
70+
/// The format chosen may be for an unsigned integer.
71+
void write(int64_t i);
72+
73+
/// Write an unsigned integer to the output stream.
74+
///
75+
/// The output will be in the smallest possible \em int format.
76+
void write(uint64_t u);
77+
78+
/// Write a floating point number to the output stream.
79+
///
80+
/// The output will be in the smallest possible \em float format.
81+
void write(double d);
82+
83+
/// Write a string to the output stream.
84+
///
85+
/// The output will be in the smallest possible \em str format.
86+
void write(StringRef s);
87+
88+
/// Write a memory buffer to the output stream.
89+
///
90+
/// The output will be in the smallest possible \em bin format.
91+
///
92+
/// \warning Do not use this overload if in \c Compatible mode.
93+
void write(MemoryBufferRef Buffer);
94+
95+
/// Write the header for an \em Array of the given size.
96+
///
97+
/// The output will be in the smallest possible \em array format.
98+
//
99+
/// The header contains an identifier for the \em array format used, as well
100+
/// as an encoding of the size of the array.
101+
///
102+
/// N.B. The caller must subsequently call \c Write an additional \p Size
103+
/// times to complete the array.
104+
void writeArraySize(uint32_t Size);
105+
106+
/// Write the header for a \em Map of the given size.
107+
///
108+
/// The output will be in the smallest possible \em map format.
109+
//
110+
/// The header contains an identifier for the \em map format used, as well
111+
/// as an encoding of the size of the map.
112+
///
113+
/// N.B. The caller must subsequently call \c Write and additional \c Size*2
114+
/// times to complete the map. Each even numbered call to \c Write defines a
115+
/// new key, and each odd numbered call defines the previous key's value.
116+
void writeMapSize(uint32_t Size);
117+
118+
/// Write a typed memory buffer (an extension type) to the output stream.
119+
///
120+
/// The output will be in the smallest possible \em ext format.
121+
void writeExt(int8_t Type, MemoryBufferRef Buffer);
122+
123+
private:
124+
support::endian::Writer EW;
125+
bool Compatible;
126+
};
127+
128+
} // end namespace msgpack
129+
} // end namespace llvm
130+
131+
#endif // LLVM_SUPPORT_MSGPACKPARSER_H

‎llvm/lib/BinaryFormat/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
add_llvm_library(LLVMBinaryFormat
22
Dwarf.cpp
33
Magic.cpp
4+
MsgPackReader.cpp
5+
MsgPackWriter.cpp
46
Wasm.cpp
57

68
ADDITIONAL_HEADER_DIRS
+255
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
//===- MsgPackReader.cpp - Simple MsgPack reader ----------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This file implements a MessagePack reader.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "llvm/BinaryFormat/MsgPackReader.h"
16+
#include "llvm/BinaryFormat/MsgPack.h"
17+
#include "llvm/Support/Endian.h"
18+
19+
using namespace llvm;
20+
using namespace llvm::support;
21+
using namespace msgpack;
22+
23+
Reader::Reader(MemoryBufferRef InputBuffer)
24+
: InputBuffer(InputBuffer), Current(InputBuffer.getBufferStart()),
25+
End(InputBuffer.getBufferEnd()) {}
26+
27+
Reader::Reader(StringRef Input) : Reader({Input, "MsgPack"}) {}
28+
29+
Expected<bool> Reader::read(Object &Obj) {
30+
if (Current == End)
31+
return false;
32+
33+
uint8_t FB = static_cast<uint8_t>(*Current++);
34+
35+
switch (FB) {
36+
case FirstByte::Nil:
37+
Obj.Kind = Type::Nil;
38+
return true;
39+
case FirstByte::True:
40+
Obj.Kind = Type::Boolean;
41+
Obj.Bool = true;
42+
return true;
43+
case FirstByte::False:
44+
Obj.Kind = Type::Boolean;
45+
Obj.Bool = false;
46+
return true;
47+
case FirstByte::Int8:
48+
Obj.Kind = Type::Int;
49+
return readInt<int8_t>(Obj);
50+
case FirstByte::Int16:
51+
Obj.Kind = Type::Int;
52+
return readInt<int16_t>(Obj);
53+
case FirstByte::Int32:
54+
Obj.Kind = Type::Int;
55+
return readInt<int32_t>(Obj);
56+
case FirstByte::Int64:
57+
Obj.Kind = Type::Int;
58+
return readInt<int64_t>(Obj);
59+
case FirstByte::UInt8:
60+
Obj.Kind = Type::UInt;
61+
return readUInt<uint8_t>(Obj);
62+
case FirstByte::UInt16:
63+
Obj.Kind = Type::UInt;
64+
return readUInt<uint16_t>(Obj);
65+
case FirstByte::UInt32:
66+
Obj.Kind = Type::UInt;
67+
return readUInt<uint32_t>(Obj);
68+
case FirstByte::UInt64:
69+
Obj.Kind = Type::UInt;
70+
return readUInt<uint64_t>(Obj);
71+
case FirstByte::Float32:
72+
Obj.Kind = Type::Float;
73+
if (sizeof(float) > remainingSpace())
74+
return make_error<StringError>(
75+
"Invalid Float32 with insufficient payload",
76+
std::make_error_code(std::errc::invalid_argument));
77+
Obj.Float = BitsToFloat(endian::read<uint32_t, Endianness>(Current));
78+
Current += sizeof(float);
79+
return true;
80+
case FirstByte::Float64:
81+
Obj.Kind = Type::Float;
82+
if (sizeof(double) > remainingSpace())
83+
return make_error<StringError>(
84+
"Invalid Float64 with insufficient payload",
85+
std::make_error_code(std::errc::invalid_argument));
86+
Obj.Float = BitsToDouble(endian::read<uint64_t, Endianness>(Current));
87+
Current += sizeof(double);
88+
return true;
89+
case FirstByte::Str8:
90+
Obj.Kind = Type::String;
91+
return readRaw<uint8_t>(Obj);
92+
case FirstByte::Str16:
93+
Obj.Kind = Type::String;
94+
return readRaw<uint16_t>(Obj);
95+
case FirstByte::Str32:
96+
Obj.Kind = Type::String;
97+
return readRaw<uint32_t>(Obj);
98+
case FirstByte::Bin8:
99+
Obj.Kind = Type::Binary;
100+
return readRaw<uint8_t>(Obj);
101+
case FirstByte::Bin16:
102+
Obj.Kind = Type::Binary;
103+
return readRaw<uint16_t>(Obj);
104+
case FirstByte::Bin32:
105+
Obj.Kind = Type::Binary;
106+
return readRaw<uint32_t>(Obj);
107+
case FirstByte::Array16:
108+
Obj.Kind = Type::Array;
109+
return readLength<uint16_t>(Obj);
110+
case FirstByte::Array32:
111+
Obj.Kind = Type::Array;
112+
return readLength<uint32_t>(Obj);
113+
case FirstByte::Map16:
114+
Obj.Kind = Type::Map;
115+
return readLength<uint16_t>(Obj);
116+
case FirstByte::Map32:
117+
Obj.Kind = Type::Map;
118+
return readLength<uint32_t>(Obj);
119+
case FirstByte::FixExt1:
120+
Obj.Kind = Type::Extension;
121+
return createExt(Obj, FixLen::Ext1);
122+
case FirstByte::FixExt2:
123+
Obj.Kind = Type::Extension;
124+
return createExt(Obj, FixLen::Ext2);
125+
case FirstByte::FixExt4:
126+
Obj.Kind = Type::Extension;
127+
return createExt(Obj, FixLen::Ext4);
128+
case FirstByte::FixExt8:
129+
Obj.Kind = Type::Extension;
130+
return createExt(Obj, FixLen::Ext8);
131+
case FirstByte::FixExt16:
132+
Obj.Kind = Type::Extension;
133+
return createExt(Obj, FixLen::Ext16);
134+
case FirstByte::Ext8:
135+
Obj.Kind = Type::Extension;
136+
return readExt<uint8_t>(Obj);
137+
case FirstByte::Ext16:
138+
Obj.Kind = Type::Extension;
139+
return readExt<uint16_t>(Obj);
140+
case FirstByte::Ext32:
141+
Obj.Kind = Type::Extension;
142+
return readExt<uint32_t>(Obj);
143+
}
144+
145+
if ((FB & FixBitsMask::NegativeInt) == FixBits::NegativeInt) {
146+
Obj.Kind = Type::Int;
147+
int8_t I;
148+
static_assert(sizeof(I) == sizeof(FB), "Unexpected type sizes");
149+
memcpy(&I, &FB, sizeof(FB));
150+
Obj.Int = I;
151+
return true;
152+
}
153+
154+
if ((FB & FixBitsMask::PositiveInt) == FixBits::PositiveInt) {
155+
Obj.Kind = Type::UInt;
156+
Obj.UInt = FB;
157+
return true;
158+
}
159+
160+
if ((FB & FixBitsMask::String) == FixBits::String) {
161+
Obj.Kind = Type::String;
162+
uint8_t Size = FB & ~FixBitsMask::String;
163+
return createRaw(Obj, Size);
164+
}
165+
166+
if ((FB & FixBitsMask::Array) == FixBits::Array) {
167+
Obj.Kind = Type::Array;
168+
Obj.Length = FB & ~FixBitsMask::Array;
169+
return true;
170+
}
171+
172+
if ((FB & FixBitsMask::Map) == FixBits::Map) {
173+
Obj.Kind = Type::Map;
174+
Obj.Length = FB & ~FixBitsMask::Map;
175+
return true;
176+
}
177+
178+
return make_error<StringError>(
179+
"Invalid first byte", std::make_error_code(std::errc::invalid_argument));
180+
}
181+
182+
template <class T> Expected<bool> Reader::readRaw(Object &Obj) {
183+
if (sizeof(T) > remainingSpace())
184+
return make_error<StringError>(
185+
"Invalid Raw with insufficient payload",
186+
std::make_error_code(std::errc::invalid_argument));
187+
T Size = endian::read<T, Endianness>(Current);
188+
Current += sizeof(T);
189+
return createRaw(Obj, Size);
190+
}
191+
192+
template <class T> Expected<bool> Reader::readInt(Object &Obj) {
193+
if (sizeof(T) > remainingSpace())
194+
return make_error<StringError>(
195+
"Invalid Int with insufficient payload",
196+
std::make_error_code(std::errc::invalid_argument));
197+
Obj.Int = static_cast<int64_t>(endian::read<T, Endianness>(Current));
198+
Current += sizeof(T);
199+
return true;
200+
}
201+
202+
template <class T> Expected<bool> Reader::readUInt(Object &Obj) {
203+
if (sizeof(T) > remainingSpace())
204+
return make_error<StringError>(
205+
"Invalid Int with insufficient payload",
206+
std::make_error_code(std::errc::invalid_argument));
207+
Obj.UInt = static_cast<uint64_t>(endian::read<T, Endianness>(Current));
208+
Current += sizeof(T);
209+
return true;
210+
}
211+
212+
template <class T> Expected<bool> Reader::readLength(Object &Obj) {
213+
if (sizeof(T) > remainingSpace())
214+
return make_error<StringError>(
215+
"Invalid Map/Array with invalid length",
216+
std::make_error_code(std::errc::invalid_argument));
217+
Obj.Length = static_cast<size_t>(endian::read<T, Endianness>(Current));
218+
Current += sizeof(T);
219+
return true;
220+
}
221+
222+
template <class T> Expected<bool> Reader::readExt(Object &Obj) {
223+
if (sizeof(T) > remainingSpace())
224+
return make_error<StringError>(
225+
"Invalid Ext with invalid length",
226+
std::make_error_code(std::errc::invalid_argument));
227+
T Size = endian::read<T, Endianness>(Current);
228+
Current += sizeof(T);
229+
return createExt(Obj, Size);
230+
}
231+
232+
Expected<bool> Reader::createRaw(Object &Obj, uint32_t Size) {
233+
if (Size > remainingSpace())
234+
return make_error<StringError>(
235+
"Invalid Raw with insufficient payload",
236+
std::make_error_code(std::errc::invalid_argument));
237+
Obj.Raw = StringRef(Current, Size);
238+
Current += Size;
239+
return true;
240+
}
241+
242+
Expected<bool> Reader::createExt(Object &Obj, uint32_t Size) {
243+
if (Current == End)
244+
return make_error<StringError>(
245+
"Invalid Ext with no type",
246+
std::make_error_code(std::errc::invalid_argument));
247+
Obj.Extension.Type = *Current++;
248+
if (Size > remainingSpace())
249+
return make_error<StringError>(
250+
"Invalid Ext with insufficient payload",
251+
std::make_error_code(std::errc::invalid_argument));
252+
Obj.Extension.Bytes = StringRef(Current, Size);
253+
Current += Size;
254+
return true;
255+
}
+208
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
//===- MsgPackWriter.cpp - Simple MsgPack writer ----------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This file implements a MessagePack writer.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "llvm/BinaryFormat/MsgPackWriter.h"
16+
#include "llvm/BinaryFormat/MsgPack.h"
17+
18+
using namespace llvm;
19+
using namespace msgpack;
20+
21+
Writer::Writer(raw_ostream &OS, bool Compatible)
22+
: EW(OS, Endianness), Compatible(Compatible) {}
23+
24+
void Writer::writeNil() { EW.write(FirstByte::Nil); }
25+
26+
void Writer::write(bool b) { EW.write(b ? FirstByte::True : FirstByte::False); }
27+
28+
void Writer::write(int64_t i) {
29+
if (i >= 0) {
30+
write(static_cast<uint64_t>(i));
31+
return;
32+
}
33+
34+
if (i >= FixMin::NegativeInt) {
35+
EW.write(static_cast<int8_t>(i));
36+
return;
37+
}
38+
39+
if (i >= INT8_MIN) {
40+
EW.write(FirstByte::Int8);
41+
EW.write(static_cast<int8_t>(i));
42+
return;
43+
}
44+
45+
if (i >= INT16_MIN) {
46+
EW.write(FirstByte::Int16);
47+
EW.write(static_cast<int16_t>(i));
48+
return;
49+
}
50+
51+
if (i >= INT32_MIN) {
52+
EW.write(FirstByte::Int32);
53+
EW.write(static_cast<int32_t>(i));
54+
return;
55+
}
56+
57+
EW.write(FirstByte::Int64);
58+
EW.write(i);
59+
}
60+
61+
void Writer::write(uint64_t u) {
62+
if (u <= FixMax::PositiveInt) {
63+
EW.write(static_cast<uint8_t>(u));
64+
return;
65+
}
66+
67+
if (u <= UINT8_MAX) {
68+
EW.write(FirstByte::UInt8);
69+
EW.write(static_cast<uint8_t>(u));
70+
return;
71+
}
72+
73+
if (u <= UINT16_MAX) {
74+
EW.write(FirstByte::UInt16);
75+
EW.write(static_cast<uint16_t>(u));
76+
return;
77+
}
78+
79+
if (u <= UINT32_MAX) {
80+
EW.write(FirstByte::UInt32);
81+
EW.write(static_cast<uint32_t>(u));
82+
return;
83+
}
84+
85+
EW.write(FirstByte::UInt64);
86+
EW.write(u);
87+
}
88+
89+
void Writer::write(double d) {
90+
// If no loss of precision, encode as a Float32.
91+
float f = static_cast<float>(d);
92+
if (static_cast<double>(f) == d) {
93+
EW.write(FirstByte::Float32);
94+
EW.write(f);
95+
} else {
96+
EW.write(FirstByte::Float64);
97+
EW.write(d);
98+
}
99+
}
100+
101+
void Writer::write(StringRef s) {
102+
size_t Size = s.size();
103+
104+
if (Size <= FixMax::String)
105+
EW.write(static_cast<uint8_t>(FixBits::String | Size));
106+
else if (!Compatible && Size <= UINT8_MAX) {
107+
EW.write(FirstByte::Str8);
108+
EW.write(static_cast<uint8_t>(Size));
109+
} else if (Size <= UINT16_MAX) {
110+
EW.write(FirstByte::Str16);
111+
EW.write(static_cast<uint16_t>(Size));
112+
} else {
113+
assert(Size <= UINT32_MAX && "String object too long to be encoded");
114+
EW.write(FirstByte::Str32);
115+
EW.write(static_cast<uint32_t>(Size));
116+
}
117+
118+
EW.OS << s;
119+
}
120+
121+
void Writer::write(MemoryBufferRef Buffer) {
122+
assert(!Compatible && "Attempt to write Bin format in compatible mode");
123+
124+
size_t Size = Buffer.getBufferSize();
125+
126+
if (Size <= UINT8_MAX) {
127+
EW.write(FirstByte::Bin8);
128+
EW.write(static_cast<uint8_t>(Size));
129+
} else if (Size <= UINT16_MAX) {
130+
EW.write(FirstByte::Bin16);
131+
EW.write(static_cast<uint16_t>(Size));
132+
} else {
133+
assert(Size <= UINT32_MAX && "Binary object too long to be encoded");
134+
EW.write(FirstByte::Bin32);
135+
EW.write(static_cast<uint32_t>(Size));
136+
}
137+
138+
EW.OS.write(Buffer.getBufferStart(), Size);
139+
}
140+
141+
void Writer::writeArraySize(uint32_t Size) {
142+
if (Size <= FixMax::Array) {
143+
EW.write(static_cast<uint8_t>(FixBits::Array | Size));
144+
return;
145+
}
146+
147+
if (Size <= UINT16_MAX) {
148+
EW.write(FirstByte::Array16);
149+
EW.write(static_cast<uint16_t>(Size));
150+
return;
151+
}
152+
153+
EW.write(FirstByte::Array32);
154+
EW.write(Size);
155+
}
156+
157+
void Writer::writeMapSize(uint32_t Size) {
158+
if (Size <= FixMax::Map) {
159+
EW.write(static_cast<uint8_t>(FixBits::Map | Size));
160+
return;
161+
}
162+
163+
if (Size <= UINT16_MAX) {
164+
EW.write(FirstByte::Map16);
165+
EW.write(static_cast<uint16_t>(Size));
166+
return;
167+
}
168+
169+
EW.write(FirstByte::Map32);
170+
EW.write(Size);
171+
}
172+
173+
void Writer::writeExt(int8_t Type, MemoryBufferRef Buffer) {
174+
size_t Size = Buffer.getBufferSize();
175+
176+
switch (Size) {
177+
case FixLen::Ext1:
178+
EW.write(FirstByte::FixExt1);
179+
break;
180+
case FixLen::Ext2:
181+
EW.write(FirstByte::FixExt2);
182+
break;
183+
case FixLen::Ext4:
184+
EW.write(FirstByte::FixExt4);
185+
break;
186+
case FixLen::Ext8:
187+
EW.write(FirstByte::FixExt8);
188+
break;
189+
case FixLen::Ext16:
190+
EW.write(FirstByte::FixExt16);
191+
break;
192+
default:
193+
if (Size <= UINT8_MAX) {
194+
EW.write(FirstByte::Ext8);
195+
EW.write(static_cast<uint8_t>(Size));
196+
} else if (Size <= UINT16_MAX) {
197+
EW.write(FirstByte::Ext16);
198+
EW.write(static_cast<uint16_t>(Size));
199+
} else {
200+
assert(Size <= UINT32_MAX && "Ext size too large to be encoded");
201+
EW.write(FirstByte::Ext32);
202+
EW.write(static_cast<uint32_t>(Size));
203+
}
204+
}
205+
206+
EW.write(Type);
207+
EW.OS.write(Buffer.getBufferStart(), Size);
208+
}

‎llvm/unittests/BinaryFormat/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ set(LLVM_LINK_COMPONENTS
55
add_llvm_unittest(BinaryFormatTests
66
DwarfTest.cpp
77
MachOTest.cpp
8+
MsgPackReaderTest.cpp
9+
MsgPackWriterTest.cpp
810
TestFileMagic.cpp
911
)
1012

‎llvm/unittests/BinaryFormat/MsgPackReaderTest.cpp

+891
Large diffs are not rendered by default.

‎llvm/unittests/BinaryFormat/MsgPackWriterTest.cpp

+523
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.