diff --git a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h --- a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h +++ b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h @@ -62,6 +62,8 @@ }; public: + // Default constructor gives an empty node with no associated Document. All + // you can do with it is "isEmpty()". DocNode() : KindAndDoc(nullptr) {} // Type methods @@ -70,8 +72,10 @@ bool isScalar() const { return !isMap() && !isArray(); } bool isString() const { return getKind() == Type::String; } - // Accessors - bool isEmpty() const { return !KindAndDoc; } + // Accessors. isEmpty() returns true for both a default-constructed DocNode + // that has no associated Document, and the result of getEmptyNode(), which + // does have an associated document. + bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; } Type getKind() const { return KindAndDoc->Kind; } Document *getDocument() const { return KindAndDoc->Doc; } @@ -146,10 +150,10 @@ friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) { // This has to cope with one or both of the nodes being default-constructed, // such that KindAndDoc is not set. + if (Rhs.isEmpty()) + return false; if (Lhs.KindAndDoc != Rhs.KindAndDoc) { - if (!Rhs.KindAndDoc) - return false; - if (!Lhs.KindAndDoc) + if (Lhs.isEmpty()) return true; return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind(); } @@ -222,14 +226,15 @@ // Array access methods. size_t size() const { return Array->size(); } bool empty() const { return !size(); } + DocNode &back() const { return Array->back(); } ArrayTy::iterator begin() { return Array->begin(); } ArrayTy::iterator end() { return Array->end(); } void push_back(DocNode N) { - assert(N.getDocument() == getDocument()); + assert(N.isEmpty() || N.getDocument() == getDocument()); Array->push_back(N); } - /// Element access. This extends the array if necessary. + /// Element access. This extends the array if necessary, with empty nodes. DocNode &operator[](size_t Index); }; @@ -247,7 +252,7 @@ DocNode Root; // The KindAndDocument structs pointed to by nodes in the document. - KindAndDocument KindAndDocs[size_t(Type::Extension) + 1]; + KindAndDocument KindAndDocs[size_t(Type::Empty) + 1]; // Whether YAML output uses hex for UInt. bool HexMode = false; @@ -255,7 +260,7 @@ public: Document() { clear(); - for (unsigned T = 0; T != size_t(Type::Extension) + 1; ++T) + for (unsigned T = 0; T != unsigned(Type::Empty) + 1; ++T) KindAndDocs[T] = {this, Type(T)}; } @@ -263,7 +268,13 @@ DocNode &getRoot() { return Root; } /// Restore the Document to an empty state. - void clear() { getRoot() = getNode(); } + void clear() { getRoot() = getEmptyNode(); } + + /// Create an empty node associated with this Document. + DocNode getEmptyNode() { + auto N = DocNode(&KindAndDocs[size_t(Type::Empty)]); + return N; + } /// Create a nil node associated with this Document. DocNode getNode() { @@ -345,15 +356,35 @@ return N.getArray(); } - /// Read a MsgPack document from a binary MsgPack blob. - /// The blob data must remain valid for the lifetime of this Document (because - /// a string object in the document contains a StringRef into the original - /// blob). - /// If Multi, then this sets root to an array and adds top-level objects to - /// it. If !Multi, then it only reads a single top-level object, even if there - /// are more, and sets root to that. - /// Returns false if failed due to illegal format. - bool readFromBlob(StringRef Blob, bool Multi); + /// Read a document from a binary msgpack blob, merging into anything already + /// in the Document. The blob data must remain valid for the lifetime of this + /// Document (because a string object in the document contains a StringRef + /// into the original blob). If Multi, then this sets root to an array and + /// adds top-level objects to it. If !Multi, then it only reads a single + /// top-level object, even if there are more, and sets root to that. Returns + /// false if failed due to illegal format or merge error. + /// + /// The Merger arg is a callback function that is called when the merge has a + /// conflict, that is, it is trying to set an item that is already set. If the + /// conflict cannot be resolved, the callback function returns -1. If the + /// conflict can be resolved, the callback returns a non-negative number and + /// sets *DestNode to the resolved node. The returned non-negative number is + /// significant only for an array node; it is then the array index to start + /// populating at. That allows Merger to choose whether to merge array + /// elements (returns 0) or append new elements (returns existing size). + /// + /// If SrcNode is an array or map, the resolution must be that *DestNode is an + /// array or map respectively, although it could be the array or map + /// (respectively) that was already there. MapKey is the key if *DestNode is a + /// map entry, a nil node otherwise. + /// + /// The default for Merger is to disallow any conflict. + bool readFromBlob( + StringRef Blob, bool Multi, + function_ref + Merger = [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) { + return -1; + }); /// Write a MsgPack document to a binary MsgPack blob. void writeToBlob(std::string &Blob); diff --git a/llvm/include/llvm/BinaryFormat/MsgPackReader.h b/llvm/include/llvm/BinaryFormat/MsgPackReader.h --- a/llvm/include/llvm/BinaryFormat/MsgPackReader.h +++ b/llvm/include/llvm/BinaryFormat/MsgPackReader.h @@ -57,6 +57,7 @@ Array, Map, Extension, + Empty, // Used by MsgPackDocument to represent an empty node }; /// Extension types are composed of a user-defined type ID and an uninterpreted diff --git a/llvm/lib/BinaryFormat/MsgPackDocument.cpp b/llvm/lib/BinaryFormat/MsgPackDocument.cpp --- a/llvm/lib/BinaryFormat/MsgPackDocument.cpp +++ b/llvm/lib/BinaryFormat/MsgPackDocument.cpp @@ -40,46 +40,56 @@ /// Member access for MapDocNode. DocNode &MapDocNode::operator[](DocNode Key) { assert(!Key.isEmpty()); - MapTy::value_type Entry(Key, DocNode()); - auto ItAndInserted = Map->insert(Entry); - if (ItAndInserted.second) { + DocNode &N = (*Map)[Key]; + if (N.isEmpty()) { // Ensure a new element has its KindAndDoc initialized. - ItAndInserted.first->second = getDocument()->getNode(); + N = getDocument()->getEmptyNode(); } - return ItAndInserted.first->second; + return N; } /// Array element access. This extends the array if necessary. DocNode &ArrayDocNode::operator[](size_t Index) { if (size() <= Index) { // Ensure new elements have their KindAndDoc initialized. - Array->resize(Index + 1, getDocument()->getNode()); + Array->resize(Index + 1, getDocument()->getEmptyNode()); } return (*Array)[Index]; } // A level in the document reading stack. struct StackLevel { + StackLevel(DocNode Node, size_t StartIndex, size_t Length, + DocNode *MapEntry = nullptr) + : Node(Node), Index(StartIndex), End(StartIndex + Length), + MapEntry(MapEntry) {} DocNode Node; - size_t Length; + size_t Index; + size_t End; // Points to map entry when we have just processed a map key. DocNode *MapEntry; + DocNode MapKey; }; -// Read a document from a binary msgpack blob. +// Read a document from a binary msgpack blob, merging into anything already in +// the Document. // The blob data must remain valid for the lifetime of this Document (because a // string object in the document contains a StringRef into the original blob). // If Multi, then this sets root to an array and adds top-level objects to it. // If !Multi, then it only reads a single top-level object, even if there are // more, and sets root to that. -// Returns false if failed due to illegal format. -bool Document::readFromBlob(StringRef Blob, bool Multi) { +// Returns false if failed due to illegal format or merge error. + +bool Document::readFromBlob( + StringRef Blob, bool Multi, + function_ref + Merger) { msgpack::Reader MPReader(Blob); SmallVector Stack; if (Multi) { // Create the array for multiple top-level objects. Root = getArrayNode(); - Stack.push_back(StackLevel({Root, (size_t)-1, nullptr})); + Stack.push_back(StackLevel(Root, 0, (size_t)-1)); } do { // On to next element (or key if doing a map key next). @@ -124,29 +134,47 @@ } // Store it. + DocNode *DestNode = nullptr; if (Stack.empty()) - Root = Node; + DestNode = &Root; else if (Stack.back().Node.getKind() == Type::Array) { // Reading an array entry. auto &Array = Stack.back().Node.getArray(); - Array.push_back(Node); + DestNode = &Array[Stack.back().Index++]; } else { auto &Map = Stack.back().Node.getMap(); if (!Stack.back().MapEntry) { // Reading a map key. + Stack.back().MapKey = Node; Stack.back().MapEntry = &Map[Node]; - } else { - // Reading the value for the map key read in the last iteration. - *Stack.back().MapEntry = Node; - Stack.back().MapEntry = nullptr; + continue; } + // Reading the value for the map key read in the last iteration. + DestNode = Stack.back().MapEntry; + Stack.back().MapEntry = nullptr; + ++Stack.back().Index; } + int MergeResult = 0; + if (!DestNode->isEmpty()) { + // In a merge, there is already a value at this position. Call the + // callback to attempt to resolve the conflict. The resolution must result + // in an array or map if Node is an array or map respectively. + DocNode MapKey = !Stack.empty() && !Stack.back().MapKey.isEmpty() + ? Stack.back().MapKey + : getNode(); + MergeResult = Merger(DestNode, Node, MapKey); + if (MergeResult < 0) + return false; // Merge conflict resolution failed + assert(!((Node.isMap() && !DestNode->isMap()) || + (Node.isArray() && !DestNode->isArray()))); + } else + *DestNode = Node; // See if we're starting a new array or map. - switch (Node.getKind()) { + switch (DestNode->getKind()) { case msgpack::Type::Array: case msgpack::Type::Map: - Stack.push_back(StackLevel({Node, Obj.Length, nullptr})); + Stack.push_back(StackLevel(*DestNode, MergeResult, Obj.Length, nullptr)); break; default: break; @@ -154,14 +182,10 @@ // Pop finished stack levels. while (!Stack.empty()) { - if (Stack.back().Node.getKind() == msgpack::Type::Array) { - if (Stack.back().Node.getArray().size() != Stack.back().Length) - break; - } else { - if (Stack.back().MapEntry || - Stack.back().Node.getMap().size() != Stack.back().Length) - break; - } + if (Stack.back().MapEntry) + break; + if (Stack.back().Index != Stack.back().End) + break; Stack.pop_back(); } } while (!Stack.empty()); diff --git a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp --- a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp +++ b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp @@ -21,7 +21,7 @@ ASSERT_EQ(Doc.getRoot().getInt(), 0); } -TEST(MsgPackDocument, TestReadArray) { +TEST(MsgPackDocument, TestReadMergeArray) { Document Doc; bool Ok = Doc.readFromBlob(StringRef("\x92\xd0\x01\xc0"), /*Multi=*/false); ASSERT_TRUE(Ok); @@ -33,9 +33,64 @@ ASSERT_EQ(SI.getInt(), 1); auto SN = A[1]; ASSERT_EQ(SN.getKind(), Type::Nil); + + Ok = Doc.readFromBlob(StringRef("\x91\xd0\x2a"), /*Multi=*/false, + [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) { + // Allow array, merging into existing elements, ORing + // ints. + if (DestNode->getKind() == Type::Int && + SrcNode.getKind() == Type::Int) { + *DestNode = DestNode->getDocument()->getNode( + DestNode->getInt() | SrcNode.getInt()); + return 0; + } + return DestNode->isArray() && SrcNode.isArray() ? 0 + : -1; + }); + ASSERT_TRUE(Ok); + A = Doc.getRoot().getArray(); + ASSERT_EQ(A.size(), 2u); + SI = A[0]; + ASSERT_EQ(SI.getKind(), Type::Int); + ASSERT_EQ(SI.getInt(), 43); + SN = A[1]; + ASSERT_EQ(SN.getKind(), Type::Nil); +} + +TEST(MsgPackDocument, TestReadAppendArray) { + Document Doc; + bool Ok = Doc.readFromBlob(StringRef("\x92\xd0\x01\xc0"), /*Multi=*/false); + ASSERT_TRUE(Ok); + ASSERT_EQ(Doc.getRoot().getKind(), Type::Array); + auto A = Doc.getRoot().getArray(); + ASSERT_EQ(A.size(), 2u); + auto SI = A[0]; + ASSERT_EQ(SI.getKind(), Type::Int); + ASSERT_EQ(SI.getInt(), 1); + auto SN = A[1]; + ASSERT_EQ(SN.getKind(), Type::Nil); + + Ok = Doc.readFromBlob(StringRef("\x91\xd0\x2a"), /*Multi=*/false, + [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) { + // Allow array, appending after existing elements + return DestNode->isArray() && SrcNode.isArray() + ? DestNode->getArray().size() + : -1; + }); + ASSERT_TRUE(Ok); + A = Doc.getRoot().getArray(); + ASSERT_EQ(A.size(), 3u); + SI = A[0]; + ASSERT_EQ(SI.getKind(), Type::Int); + ASSERT_EQ(SI.getInt(), 1); + SN = A[1]; + ASSERT_EQ(SN.getKind(), Type::Nil); + SI = A[2]; + ASSERT_EQ(SI.getKind(), Type::Int); + ASSERT_EQ(SI.getInt(), 42); } -TEST(MsgPackDocument, TestReadMap) { +TEST(MsgPackDocument, TestReadMergeMap) { Document Doc; bool Ok = Doc.readFromBlob(StringRef("\x82\xa3" "foo" @@ -53,6 +108,68 @@ auto BarS = M["bar"]; ASSERT_EQ(BarS.getKind(), Type::Int); ASSERT_EQ(BarS.getInt(), 2); + + Ok = Doc.readFromBlob(StringRef("\x82\xa3" + "foz" + "\xd0\x03\xa3" + "baz" + "\xd0\x04"), + /*Multi=*/false, + [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) { + return DestNode->isMap() && SrcNode.isMap() ? 0 : -1; + }); + ASSERT_TRUE(Ok); + ASSERT_EQ(M.size(), 4u); + FooS = M["foo"]; + ASSERT_EQ(FooS.getKind(), Type::Int); + ASSERT_EQ(FooS.getInt(), 1); + BarS = M["bar"]; + ASSERT_EQ(BarS.getKind(), Type::Int); + ASSERT_EQ(BarS.getInt(), 2); + auto FozS = M["foz"]; + ASSERT_EQ(FozS.getKind(), Type::Int); + ASSERT_EQ(FozS.getInt(), 3); + auto BazS = M["baz"]; + ASSERT_EQ(BazS.getKind(), Type::Int); + ASSERT_EQ(BazS.getInt(), 4); + + Ok = Doc.readFromBlob( + StringRef("\x82\xa3" + "foz" + "\xd0\x06\xa3" + "bay" + "\xd0\x08"), + /*Multi=*/false, [](DocNode *Dest, DocNode Src, DocNode MapKey) { + // Merger function that merges two ints by ORing their values, as long + // as the map key is "foz". + if (Src.isMap()) + return Dest->isMap(); + if (Src.isArray()) + return Dest->isArray(); + if (MapKey.isString() && MapKey.getString() == "foz" && + Dest->getKind() == Type::Int && Src.getKind() == Type::Int) { + *Dest = Src.getDocument()->getNode(Dest->getInt() | Src.getInt()); + return true; + } + return false; + }); + ASSERT_TRUE(Ok); + ASSERT_EQ(M.size(), 5u); + FooS = M["foo"]; + ASSERT_EQ(FooS.getKind(), Type::Int); + ASSERT_EQ(FooS.getInt(), 1); + BarS = M["bar"]; + ASSERT_EQ(BarS.getKind(), Type::Int); + ASSERT_EQ(BarS.getInt(), 2); + FozS = M["foz"]; + ASSERT_EQ(FozS.getKind(), Type::Int); + ASSERT_EQ(FozS.getInt(), 7); + BazS = M["baz"]; + ASSERT_EQ(BazS.getKind(), Type::Int); + ASSERT_EQ(BazS.getInt(), 4); + auto BayS = M["bay"]; + ASSERT_EQ(BayS.getKind(), Type::Int); + ASSERT_EQ(BayS.getInt(), 8); } TEST(MsgPackDocument, TestWriteInt) {