Index: include/llvm/Support/JSON.h =================================================================== --- /dev/null +++ include/llvm/Support/JSON.h @@ -0,0 +1,459 @@ +//===--- JSON.h - JSON representation, parser, serializer -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file allows creating, mutating, and (de-)serializing JSON. +// +// A Document is an owned JSON tree, and is the arena owning all values. +// Values are JSON expressions owned by a Document. This is a union of: +// StringRef, bool, double, nullptr_t: for primitive types +// Array: similar to vector +// Object: similar to map +// Arrays and Objects are mutable, inserted values are copied into the document. +// +// Literal allows you to use natural C++ expressions to construct JSON values. +// +// An example: +// // Parse a JSON file and do "error checking". +// Expected Doc = Document::parse("[1,true,3]"); +// assert(Doc && Doc->array()); +// +// Array& A = *Doc->array(); +// for (int I = 0; I < A.size(); ++I) { +// if (auto N = A[I].number()) { +// A.set(I, {{"number", *N}}); +// } +// } +// +// llvm::outs() << Doc; +// // [{"number":1},true,{"number":3}] +// +//===----------------------------------------------------------------------===// +// +// Caveats (fix me!): +// - integers could be embedded directly in the ValueRep +// - parser is recursive-descent, so can overflow on malicious input +// - arrays may allocate outside the arena (SmallVector is not allocator-aware) +// - arrays-from-literals could choose the perfect small-size optimization +// - StringMap may be suboptimal for tiny objects +// - nested literals are copied at every level (as initializer_list is const) +// - UTF-8 in strings is not validated +// - UTF-16 and UTF-32 streams are not supported +// - encapsulation is weak - too many friend declarations +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_JSON_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_JSON_H + +#include "llvm/ADT/iterator.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/JSONDetail.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/raw_os_ostream.h" + +namespace llvm { +namespace json { +class Object; +class Array; + +// A Literal is a temporary JSON structure used to construct Values. +// You can implicitly construct Literals holding: +// - strings: std::string, SmallString, StringRef, Twine, char*, formatv +// - numbers +// - booleans +// - null: nullptr +// - arrays: {"foo", 42.0, false} +// - objects: {{"key1", val1}, {"key2", val2}} +// - json::Values +// Literals can be nested: Objects and Arrays contain arbitrary literals. +// +// Normally you don't need to name the type, just writing directly: +// Document D = {"one", 2, "three"}; // Calls Document(const Literal&) +// If you do create Literals, be aware they don't own referenced strings/Values: +// Literal L = {AString, {42, AValue}}; +// L is only valid as long as AString and AValue are alive. +// Think of a Literal as the JSON version of an llvm::Twine. +using Literal = detail::Literal; +// Literals like {} and {{string, any}, {string, any}} are Objects by default. +// To force them to be treated as (nested) arrays, use array(...). +// (json::Values that happen to be strings do not trigger the Object behavior). +inline Literal array(std::initializer_list V) { return Literal(V, 1); } +inline Literal array() { return array({}); } + +// Value is the public representation of a generic JSON value. +// It's not directly copyable or movable, users should take references to it. +// +// It has a pointer into the document's allocator where the data is stored, +// knows what type the data is, and runs its destructor in ~Value(). +// However it doesn't know about the allocator in use, and doesn't free memory. +class Value { + public: + Value(Value &&) = delete; + Value(Value &) = delete; + Value &operator=(Value &&) = delete; + Value &operator=(const Value &) = delete; + ~Value(); + + // Typed accessors return None/nullptr of the Value is not of this type. + Optional number() const { + if (detail::SmallInt I = U.get()) return double(I); + if (double *D = U.get()) return *D; + return None; + } + Optional string() const { + if (detail::PString *PS = U.get()) + return StringRef(*PS); + if (const char *S = *U.get()) + return StringRef(S); + return None; + } + Optional boolean() const { + if (U == detail::TrueRep) return true; + if (U == detail::FalseRep) return false; + return None; + } + Object *object() { return U.get(); } + const Object *object() const { return U.get(); } + Array *array() { return U.get(); } + const Array *array() const { return U.get(); } + bool null() const { return U == detail::NullRep; } + + // Returns the JSON serialization of this Value. + std::string str() const { + std::string S; + raw_string_ostream OS(S); + OS << *this; + return OS.str(); + } + + // Not part of the public interface (but only internals can name it) + explicit operator detail::ValueRep() const { return U; } + + private: + using Rep = detail::ValueRep; + Value(Rep R) : U(R) {} + + Rep U; + friend detail::ValueWrapper; + friend bool operator==(const Value &, const Value &); + friend raw_ostream &operator<<(raw_ostream &, const class Value &); +}; + +// ValueWrapper provides move semantics for Value so containers can hold it. +// Only moves within a document are valid! +class detail::ValueWrapper { + public: + explicit ValueWrapper(ValueRep R) : V(R) {} + ValueWrapper(const ValueWrapper &Copy) = delete; + ValueWrapper(ValueWrapper &&Move) : V(Move.V.U) { Move.V.U = NullRep; } + ValueWrapper &operator=(const ValueWrapper &Copy) = delete; + ValueWrapper &operator=(ValueWrapper &&Move) { + V.~Value(); + new (&V) Value(Move.V.U); + Move.V.U = detail::NullRep; + return *this; + } + + operator Value &() { return V; } + operator const Value &() const { return V; } + +private: + Value V; +}; + +class Document { + public: + // Parses a document from a UTF-8 JSON stream. + // The only error reported is due to malformed input. + static Expected parse(StringRef text); + + // An empty document, with value null. + Document() : Alloc(nullptr), Root(detail::NullRep) {} + Document(Document&&) = default; + // Create a document from a literal JSON expression (see Literal above). + // Note that a json::Value may be passed here, it will be deep-copied. + Document(const Literal &V) + : Alloc(new detail::Arena()), Root(V.construct(*Alloc)) {} + // This constructor is needed to disambiguate Document({{"foo"}}), which could + // be Document(Document{Literal{"foo"}}) or Document(Literal{Literal{"foo"}}). + Document(std::initializer_list V): Document(Literal(V)) {}; + + Document &operator=(Document &&Move) { + // Default implementation would delete the allocator, then the Value! + Root.~ValueWrapper(); + Alloc = std::move(Move.Alloc); + new (&Root) detail::ValueWrapper(std::move(Move.Root)); + return *this; + } + Document &operator=(const Literal &V) { return *this = Document(V); } + Document &operator=(std::initializer_list V) { + return *this = Literal(V); + } + + Value &root() { return Root; } + const Value &root() const { return Root; } + + // For most purposes, a Document can be treated as its root Value. + Optional number() const { return root().number(); } + Optional string() const { return root().string(); } + Optional boolean() const { return root().boolean(); } + bool null() const { return root().null(); } + Object *object() {return root().object(); } + const Object *object() const {return root().object(); } + Array *array() {return root().array(); } + const Array *array() const { return root().array(); } + std::string str() const { return root().str(); } + operator Value &() { return Root; } + operator const Value &() const { return Root; } + + // Clean up memory used by values orphaned by mutations. + // This invalidates all Value references. + void trim() { *this = clone(); } + // Does a deep-copy (the copy-constructor is deleted as it is expensive). + Document clone() const { return Document((const Value&)*this); } + // Space actually used on the arena. + size_t usedBytes() const { return Alloc ? Alloc->getBytesAllocated() : 0; } + // Space consumed by the arena. + size_t allocatedBytes() const { return Alloc ? Alloc->getTotalMemory() : 0; } + + class Parser; + + private: + Document(detail::ValueWrapper V, std::unique_ptr Alloc) + : Alloc(std::move(Alloc)), Root(std::move(V)) {} + + std::unique_ptr Alloc; + detail::ValueWrapper Root; +}; + +class Array { + using Rep = SmallVector; + Rep Elems; + detail::Arena &Alloc; + friend class Document::Parser; + friend detail::ValueRep detail::deepCopy(const detail::ValueRep &U, + detail::Arena &Alloc); + + public: + using value_type = Value; + class const_iterator; + class iterator; + using const_reverse_iterator = std::reverse_iterator; + using reverse_iterator = std::reverse_iterator; + + Array(detail::Arena &Alloc) : Alloc(Alloc) {} + Array(size_t Size, detail::Arena &Alloc) : Alloc(Alloc) { + Elems.reserve(Size); + } + Array(Array &&) = delete; + Array(Array &) = delete; + Array &operator=(Array &&) = delete; + Array &operator=(const Array &) = delete; + + iterator begin() { return iterator(Elems.begin()); } + const_iterator begin() const { return const_iterator(Elems.begin()); } + iterator end() { return iterator(Elems.end()); } + const_iterator end() const { return const_iterator(Elems.end()); } + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + size_t size() const { return Elems.size(); } + void push_back(const Literal &V) { Elems.emplace_back(V.construct(Alloc)); } + + Value &operator[](size_t I) { return Elems[I]; } + const Value &operator[](size_t I) const { return Elems[I]; } + + Optional number(size_t I) const { return (*this)[I].number(); } + Optional string(size_t I) const { return (*this)[I].string(); } + Optional boolean(size_t I) const { return (*this)[I].boolean(); } + bool null(size_t I) const { return (*this)[I].null(); } + Array *array(size_t I) { return (*this)[I].array(); } + const Array *array(size_t I) const { return (*this)[I].array(); } + Object *object(size_t I) { return (*this)[I].object(); } + const Object *object(size_t I) const { return (*this)[I].object(); } + + class iterator : public iterator_adaptor_base { + public: + iterator() = default; + explicit iterator(Rep::iterator I) : iterator_adaptor_base(std::move(I)) {} + value_type &operator*() { return *this->wrapped(); } + }; + + class const_iterator + : public iterator_adaptor_base { + public: + const_iterator() = default; + explicit const_iterator(Rep::const_iterator I) + : iterator_adaptor_base(std::move(I)) {} + const value_type &operator*() { return *this->wrapped(); } + }; +}; + +class Object { + using Rep = StringMap; + Rep Props; + friend Document::Parser; + friend detail::ValueRep detail::deepCopy(const detail::ValueRep &U, + detail::Arena &Alloc); + + public: + using key_type = StringRef; + using mapped_type = Value; + struct value_type { + value_type(StringRef first, Value &second) : first(first), second(second) {} + const StringRef first; + Value &second; + }; + class const_iterator; + class iterator; + + Object(detail::Arena &Alloc) : Props(Alloc) {} + Object(size_t Size, detail::Arena &Alloc) : Props(Size, Alloc) {} + Object(Object &&) = delete; + Object(Object &) = delete; + Object &operator=(Object &&) = delete; + Object &operator=(const Object &) = delete; + + iterator begin() { return iterator(Props.begin()); } + const_iterator begin() const { return const_iterator(Props.begin()); } + iterator end() { return iterator(Props.end()); } + const_iterator end() const { return const_iterator(Props.end()); } + + size_t size() const { return Props.size(); } + + void set(StringRef K, const Literal &V) { + auto R = Props.try_emplace(K, V.construct(Props.getAllocator())); + if (!R.second) { + R.first->second.~ValueWrapper(); + new (&R.first->second) + detail::ValueWrapper(V.construct(Props.getAllocator())); + } + } + + bool erase(StringRef K) { return Props.erase(K); } + + Value *get(StringRef K) { + auto I = Props.find(K); + return (I == Props.end()) ? nullptr : &(Value&)(I->second); + } + + const Value *get(StringRef K) const { + auto I = Props.find(K); + return (I == Props.end()) ? nullptr : &(const Value&)(I->second); + } + + Optional number(StringRef I) const { + if (const Value *V = get(I)) return V->number(); + return None; + } + Optional string(StringRef I) const { + if (const Value *V = get(I)) return V->string(); + return None; + } + Optional boolean(StringRef I) const { + if (const Value *V = get(I)) return V->boolean(); + return None; + } + bool null(StringRef I) const { + if (const Value *V = get(I)) return V->null(); + return false; + } + Array *array(StringRef I) { + if (Value *V = get(I)) return V->array(); + return nullptr; + } + const Array *array(StringRef I) const { + if (const Value *V = get(I)) return V->array(); + return nullptr; + } + Object *object(StringRef I) { + if (Value *V = get(I)) return V->object(); + return nullptr; + } + const Object *object(StringRef I) const { + if (const Value *V = get(I)) return V->object(); + return nullptr; + } + + class iterator + : public iterator_adaptor_base { + Optional Current; + + public: + iterator() = default; + explicit iterator(Rep::iterator I) : iterator_adaptor_base(std::move(I)) {} + value_type &operator*() { + Current.emplace(wrapped()->getKey(), wrapped()->second); + return *Current; + } + }; + + class const_iterator + : public iterator_adaptor_base { + Optional Current; + + public: + const_iterator() = default; + explicit const_iterator(Rep::const_iterator I) + : iterator_adaptor_base(std::move(I)) {} + + const value_type &operator*() { + Current.emplace(wrapped()->getKey(), + const_cast((const Value &)(wrapped()->second))); + return *Current; + } + }; +}; + +bool operator==(const Array&, const Array&); +inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } +bool operator==(const Object&, const Object&); +inline bool operator!=(const Object& L, const Object& R) { return !(L == R); } +bool operator==(const Value&, const Value&); +inline bool operator!=(const Value &L, const Value &R) { return !(L == R); } +raw_ostream &operator<<(raw_ostream &OS, const Array &V); +raw_ostream &operator<<(raw_ostream &OS, const Object &V); +raw_ostream &operator<<(raw_ostream &OS, const Value &V); + +// For gtest. The default operator<< adapter doesn't work, as gtest needs ADL. +inline void PrintTo(const Array &V, std::ostream *S) { + raw_os_ostream OS (*S); + OS << V; +} +inline void PrintTo(const Object &V, std::ostream* S) { + raw_os_ostream OS (*S); + OS << V; +} +inline void PrintTo(const Value &V, std::ostream* S) { + raw_os_ostream OS (*S); + OS << V; +} +inline void PrintTo(const Document &V, std::ostream* S) { + raw_os_ostream OS (*S); + OS << V; +} + +} // namespace json +} // namespace llvm + +#endif Index: include/llvm/Support/JSONDetail.h =================================================================== --- /dev/null +++ include/llvm/Support/JSONDetail.h @@ -0,0 +1,218 @@ +//===--- JSONDetail.h - implementation details for JSON.h -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/PointerEmbeddedInt.h" +#include "llvm/ADT/PointerSumType.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/PointerLikeTypeTraits.h" +#include "llvm/Support/TrailingObjects.h" + +namespace llvm { +namespace json { +class Object; +class Array; +class Value; +namespace detail { + +constexpr size_t FirstSlabSize = 1024; +using Arena = BumpPtrAllocatorImpl; + +// The Object and Array types are used in a the Value::Rep PointerSumType. +// This requires them to be complete, but they have Values as indirect members! +// We get around this circularity by explicitly specifying that they are at +// least as aligned as an int64. (Both are large objects of at least 24 bytes). +} // namespace detail +} // namespace json +template <> struct PointerLikeTypeTraits { + static inline void *getAsVoidPointer(json::Object *P) { return P; } + static inline json::Object *getFromVoidPointer(void *P) { + return static_cast(P); + } + enum { NumLowBitsAvailable = + PointerLikeTypeTraits::NumLowBitsAvailable }; +}; +template <> struct PointerLikeTypeTraits { + static inline void *getAsVoidPointer(json::Array *P) { return P; } + static inline json::Array *getFromVoidPointer(void *P) { + return static_cast(P); + } + enum { NumLowBitsAvailable = + PointerLikeTypeTraits::NumLowBitsAvailable }; +}; +namespace json { +namespace detail { + +// PString is the internal representation of a String value. +// These are immutable, so we just allocate the right number of bytes. +struct PString final : public TrailingObjects { + size_t Size; + const char *data() const { return getTrailingObjects(); } + + operator StringRef() const { return StringRef(data(), Size); } + bool operator==(const PString &PS) const { + return Size == PS.Size && !memcmp(data(), PS.data(), Size); + } + static PString* create(StringRef S, Arena &Alloc) { + PString *PS = static_cast( + Alloc.Allocate(totalSizeToAlloc(S.size()), alignof(PString))); + PS->Size = S.size(); + memcpy(const_cast(PS->data()), S.data(), S.size()); + return PS; + } +}; + +// ValueRep is the lightweight representation of an arbitrary JSON value. +// It is a tagged pointer: [ pointer to representation | type ]. +// The representation is stored in a BumpPtrAllocator in a type-specific way. +// true/false/null/integers have no extra data outside the ValueType. +// +// ValueRep has no ownership semantics, like a raw pointer. +// Value wraps ValueRep and owns the external data's lifetime (but not storage). +// ValeWrapper wraps Value and adds movability for use in internal containers. +enum ValueType { + VT_Sentinel = 0, // true, false, null + VT_SmallInt, // 53-bit integer embedded in the ValueRep + VT_Double, + VT_Array, + VT_Object, + VT_OwnedString, + VT_StaticString, +}; +enum SentinelValue : char { + SV_Null = 0, + SV_True = 1, + SV_False = 2, +}; +using SmallInt = PointerEmbeddedInt; +using ValueRep = PointerSumType< + ValueType, + PointerSumTypeMember>, + PointerSumTypeMember, + PointerSumTypeMember, + PointerSumTypeMember, + PointerSumTypeMember, + PointerSumTypeMember, + PointerSumTypeMember>; +const ValueRep TrueRep = ValueRep::create(SV_True); +const ValueRep FalseRep = ValueRep::create(SV_False); +const ValueRep NullRep = ValueRep::create(SV_Null); + +class ValueWrapper; +ValueRep deepCopy(const ValueRep &U, Arena &Alloc); + +class Literal { +public: + Literal(const Literal&); + Literal(Literal&&); + ~Literal() { reset(); } + + // Strings: std::string, SmallString, StringRef, Twine, char*. + // Support stringy types that Twine accepts, but not char, int etc. + Literal(const Twine &V) : Type(LT_Twine) { new (&R.Twine) Twine(V); } + Literal(const StringRef &V) : Type(LT_Twine) { new (&R.Twine) Twine(V); } + Literal(const SmallVectorImpl &V) : Type(LT_Twine) { + new (&R.Twine) Twine(V); + } + Literal(const std::string &V) : Type(LT_Twine) { new (&R.Twine) Twine(V); } + Literal(const formatv_object_base &V) : Type(LT_Twine) { + new (&R.Twine) Twine(V); + } + // String literals are tracked separately to avoid copying them later. + template Literal(const char (&V)[N]) : Type(LT_StaticString) { + assert(strlen(V) == N - 1 && + "Detected string literal but not null-terminated!"); + R.StaticString = V; + } + // Don't treat const char* as a string literal unless it matched above. + // Use a template to reduce overload match strength of string literals. + template ::value>::type, + int = 0> + Literal(T V) : Type(LT_Twine) { + new (&R.Twine) Twine(V); + } + // Mutable char arrays are also not string literals. + template Literal(char (&V)[N]) : Literal((const char *)V) {} + + // Booleans: bool. + // The default conversion rules will convert things to bool way too easily. + template < + typename T, + typename = typename std::enable_if::value>::type, + char Dummy = 0> + Literal(T V) : Type(LT_Immediate) { + R.Immediate = V ? TrueRep : FalseRep; + } + + // Numbers: built-in numeric types. + // is_arithmetic is almost the right criterion, but exclude bool. + template < + typename T, + typename = typename std::enable_if::value>::type, + typename = typename std::enable_if::value>::value>::type> + Literal(T V) { + if (V == SmallInt(V)) { + Type = LT_Immediate; + R.Immediate = ValueRep::create(SmallInt(V)); + } else { + Type = LT_Double; + R.Double = V; + } + } + + // Null: nullptr; + Literal(std::nullptr_t) : Type(LT_Immediate) { R.Immediate = NullRep; } + + // Objects: initializer_list where each element is a two-element + // array and the first element is a string. + // Arrays: initializer_list. + // + // Where ambiguous, we choose Object unless ForceArray is passed. + Literal(std::initializer_list V, bool ForceArray = false); + + // References to JSON values: Value&. + // The value must outlive this Literal. + Literal(const Value &V) : Type(LT_Value) { R.Value = &V; } + + // Materializes this JSON value, allocating into the arena. + detail::ValueRep construct(Arena&) const; + +private: + static bool IsObjectCompatible(std::initializer_list V); + void reset(); + + enum LiteralType { + LT_Twine, + LT_StaticString, + LT_Array, + LT_Object, + LT_Double, + LT_Immediate, + LT_Value, + } Type; + union Rep { + Rep() {} + ~Rep() {} + Twine Twine; + const char* StaticString; + std::vector Array; + StringMap Object; + double Double; + ValueRep Immediate; + const Value *Value; + }; + Rep R; +}; + +} // namespace detail +} // namespace json +} // namespace llvm Index: lib/Support/CMakeLists.txt =================================================================== --- lib/Support/CMakeLists.txt +++ lib/Support/CMakeLists.txt @@ -71,6 +71,7 @@ IntEqClasses.cpp IntervalMap.cpp JamCRC.cpp + JSON.cpp KnownBits.cpp LEB128.cpp LineIterator.cpp Index: lib/Support/JSON.cpp =================================================================== --- /dev/null +++ lib/Support/JSON.cpp @@ -0,0 +1,613 @@ +//===--- JSON.cpp - JSON representation, parser, serializer -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/JSON.h" +#include "llvm/Support/NativeFormatting.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" + +namespace llvm { +namespace json { +using namespace llvm::json::detail; + +ValueRep detail::deepCopy(const ValueRep &U, Arena &Alloc) { + switch (U.getTag()) { + case VT_Sentinel: + case VT_SmallInt: + return U; + case VT_OwnedString: + return ValueRep::create( + PString::create(*U.cast(), Alloc)); + case VT_StaticString: + return ValueRep::create( + new (Alloc) const char *(*U.cast())); + case VT_Double: + return ValueRep::create( + new (Alloc) double{*U.cast()}); + case VT_Array: { + Array *Old = U.cast(); + Array *A = new (Alloc) Array(Old->size(), Alloc); + for (const Value &E : *Old) + A->Elems.emplace_back(deepCopy(ValueRep(E), Alloc)); + return ValueRep::create(A); + } + case VT_Object: { + Object *Old = U.cast(); + Object *O = new (Alloc) Object(Old->size(), Alloc); + for (const auto &E : *Old) + // TODO: remove friends with more naive code if it optimizes OK. + O->Props.try_emplace(E.first, deepCopy(ValueRep(E.second), Alloc)); + return ValueRep::create(O); + } + } +} + +ValueRep Literal::construct(Arena &Alloc) const { + switch (Type) { + case LT_Twine: { + return ValueRep::create(PString::create( + R.Twine.isSingleStringRef() ? R.Twine.getSingleStringRef() + : StringRef(R.Twine.str()), + Alloc)); + } + case LT_StaticString: + return ValueRep::create( + new (Alloc) const char *(R.StaticString)); + case LT_Array: { + Array *A = new (Alloc) Array(R.Array.size(), Alloc); + for (const Literal &E : R.Array) A->push_back(E); + return ValueRep::create(A); + } + case LT_Object: { + Object *O = new (Alloc) Object(R.Object.size(), Alloc); + for (const auto &E : R.Object) O->set(E.first(), E.second); + return ValueRep::create(O); + } + case LT_Double: + return ValueRep::create(new (Alloc) double{R.Double}); + case LT_Immediate: + return R.Immediate; + case LT_Value: // Deep copy. + return deepCopy(ValueRep(*R.Value), Alloc); + } +} + +Value::~Value() { + static_assert(alignof(Array) >= alignof(int64_t), ""); + static_assert(alignof(Object) >= alignof(int64_t), ""); + + switch (U.getTag()) { + case VT_Array: + U.cast()->~Array(); + break; + case VT_Object: + U.cast()->~Object(); + break; + case VT_StaticString: + case VT_OwnedString: + case VT_Double: + case VT_SmallInt: + case VT_Sentinel: + // POD, nothing to do. + break; + } +} + +bool operator==(const Value& L, const Value& R) { + if (L.U == R.U) return true; + auto FoldedType = [](const ValueRep& V) -> ValueType { + switch (V.getTag()) { + case VT_StaticString: + return VT_OwnedString; + case VT_SmallInt: + return VT_Double; + default: + return V.getTag(); + } + }; + auto FT = FoldedType(L.U); + if (FT != FoldedType(R.U)) return false; + switch (FT) { + case VT_OwnedString: + return L.string() == R.string(); + case VT_Array: + return *L.U.cast() == *R.U.cast(); + case VT_Object: + return *L.U.cast() == *R.U.cast(); + case VT_Double: + return L.number() == R.number(); + case VT_Sentinel: + return false; // rep equality checked above. + case VT_SmallInt: + case VT_StaticString: + llvm_unreachable("Types folded away"); + } +} + +bool operator==(const Array &L, const Array &R) { + if (L.size() != R.size()) return false; + for (size_t I = 0; I < L.size(); ++I) + if (L[I] != R[I]) return false; + return true; +} + +bool operator==(const Object& L, const Object& R) { + if (L.size() != R.size()) return false; + for (const auto& E : L) { + const Value* Other = R.get(E.first); + if (!Other || E.second != *Other) return false; + } + return true; +} + +bool Literal::IsObjectCompatible(std::initializer_list V) { + return std::all_of(V.begin(), V.end(), [](const Literal &L) { + return L.Type == LT_Array && L.R.Array.size() == 2 && + (L.R.Array[0].Type == LT_Twine || + L.R.Array[0].Type == LT_StaticString); + // We don't consider LT_Value as a possible object key. + // The object/array interpretation shouldn't depend on the dynamic data. + }); +} + +Literal::Literal(std::initializer_list V, bool ForceArray) { + if (ForceArray || !IsObjectCompatible(V)) { + Type = LT_Array; + new (&R.Array) std::vector(); + R.Array.reserve(V.size()); + for (const auto &E : V) + R.Array.push_back(E); + } else { + Type = LT_Object; + new (&R.Object) StringMap(); + for (const auto &E : V) { + const auto& K = E.R.Array[0]; + StringRef KS; + switch (K.Type) { + case LT_Twine: + KS = K.R.Twine.isSingleStringRef() ? K.R.Twine.getSingleStringRef() + : StringRef(K.R.Twine.str()); + break; + case LT_StaticString: + KS = StringRef(K.R.StaticString); + break; + default: + llvm_unreachable("Only strings are allowed as object keys"); + } + R.Object.try_emplace(KS, E.R.Array[1]); + } + } +} + +Literal::Literal(const Literal &Copy) { + switch (Copy.Type) { + case LT_Twine: + new (&R.Twine) Twine(Copy.R.Twine); + break; + case LT_Object: + new (&R.Object) StringMap(Copy.R.Object); + break; + case LT_Array: + new (&R.Array) std::vector(Copy.R.Array); + break; + case LT_Immediate: + case LT_StaticString: + case LT_Double: + case LT_Value: + memcpy(&R, &Copy.R, sizeof(Rep)); // POD + } + Type = Copy.Type; +} + +Literal::Literal(Literal &&Move) { + switch (Move.Type) { + case LT_Twine: + new (&R.Twine) Twine(std::move(Move.R.Twine)); + break; + case LT_Object: + new (&R.Object) StringMap(std::move(Move.R.Object)); + break; + case LT_Array: + new (&R.Array) std::vector(std::move(Move.R.Array)); + break; + case LT_Immediate: + case LT_StaticString: + case LT_Double: + case LT_Value: + memcpy(&R, &Move.R, sizeof(Rep)); // POD + } + Type = Move.Type; + Move.reset(); +} + +void Literal::reset() { + switch (Type) { + case LT_Twine: + R.Twine.~Twine(); + break; + case LT_Object: + R.Object.~StringMap(); + break; + case LT_Array: + R.Array.~vector(); + break; + case LT_Immediate: + case LT_StaticString: + case LT_Double: + case LT_Value: + break; // POD, nothing to do. + } + Type = LT_Immediate; + R.Immediate = NullRep; +} + +static void quote(StringRef S, raw_ostream &OS) { + OS << "\""; + for (unsigned char C : S) { + if (C == 0x22 || C == 0x5C) OS << '\\'; + if (C >= 0x20) { + OS << C; + continue; + } + OS << '\\'; + switch (C) { + // Some control characters have shorter escape sequences than \uXXXX. + case '\b': + OS << 'b'; + break; + case '\t': + OS << 't'; + break; + case '\n': + OS << 'n'; + break; + case '\f': + OS << 'f'; + break; + case '\r': + OS << 'r'; + break; + default: + OS << 'u'; + llvm::write_hex(OS, C, HexPrintStyle::Lower, 4); + break; + } + } + OS << "\""; +} + +raw_ostream &operator<<(raw_ostream &OS, const Array &V) { + const char *Sep = ""; + OS << "["; + for (const auto &E : V) { + OS << Sep << E; + Sep = ","; + } + OS << "]"; + return OS; +} + +raw_ostream &operator<<(raw_ostream &OS, const Object &V) { + const char *Sep = ""; + OS << "{"; + for (const auto &P : V) { + OS << Sep; + quote(P.first, OS); + OS << ":" << P.second; + Sep = ","; + } + OS << "}"; + return OS; +} + +raw_ostream &operator<<(raw_ostream &OS, const Value &V) { + switch (V.U.getTag()) { + case VT_Array: + OS << *V.U.cast(); + break; + case VT_Object: + OS << *V.U.cast(); + break; + case VT_OwnedString: + quote(*V.U.cast(), OS); + break; + case VT_StaticString: + quote(*V.U.cast(), OS); + break; + case VT_Double: + OS << format("%g", *V.U.cast()); + break; + case VT_SmallInt: + OS << V.U.cast(); + break; + case VT_Sentinel: + switch (V.U.cast()) { + case SV_Null: + OS << "null"; + break; + case SV_True: + OS << "true"; + break; + case SV_False: + OS << "false"; + break; + } + break; + } + return OS; +} + +class Document::Parser { + public: + Parser(StringRef Text) + : Start(Text.data()), P(Text.data()), End(Text.data() + Text.size()), + Alloc(new Arena()) {} + + Expected parseDocument() { + auto Root = parseObject(); + if (!Root) + return std::move(*Err); + eatWhitespace(); + if (P != End) { + error("Text after end of document"); + return std::move(*Err); + } + if (Err) llvm_unreachable("Error should have been reported"); + return Document(ValueWrapper(*Root), std::move(Alloc)); + } + +private: + Optional Err; + const char *Start, *P, *End; + std::unique_ptr Alloc; + + Optional parseObject() { + eatWhitespace(); + if (P == End) return error("Unexpected EOF"); + switch (char C = next()) { + case 't': { + if (!(consume('r') && consume('u') && consume('e'))) + return error("Invalid bareword"); + return TrueRep; + } + case 'f': { + if (!(consume('a') && consume('l') && consume('s') && consume('e'))) + return error("Invalid bareword"); + return FalseRep; + } + case 'n': { + if (!(consume('u') && consume('l') && consume('l'))) + return error("Invalid bareword"); + return NullRep; + } + case '"': { + auto S = parseString(); + if (!S) return None; + return ValueRep::create( + PString::create(*S, *Alloc)); + } + case '[': { + Array *A = new (*Alloc) Array(*Alloc); + eatWhitespace(); + if (peek() == ']') + return ValueRep::create(A); + for (;;) { + auto Elem = parseObject(); + if (!Elem) + return None; + A->Elems.emplace_back(*Elem); + eatWhitespace(); + switch (next()) { + case ',': + eatWhitespace(); + continue; + case ']': + return ValueRep::create(A); + default: + return error("Expected , or ] after array element"); + } + } + } + case '{': { + Object *O = new (*Alloc) Object(*Alloc); + eatWhitespace(); + if (peek() == '}') + return ValueRep::create(O); + for (;;) { + if (next() != '"') + return error("Expected object key"); + auto K = parseString(); + if (!K) + return None; + eatWhitespace(); + if (next() != ':') + return error("Expected : after object key"); + eatWhitespace(); + auto V = parseObject(); + if (!V) + return None; + O->Props.try_emplace(*K, *V); + eatWhitespace(); + switch (next()) { + case ',': + eatWhitespace(); + continue; + case ']': + return ValueRep::create(O); + default: + return error("Expected , or } after object value"); + } + } + } + default: + if (isNumber(C)) { + auto V = parseNumber(C); + if (!V) return None; + if (*V == SmallInt(*V)) + return ValueRep::create(SmallInt(*V)); + return ValueRep::create(new (*Alloc) double{*V}); + } + return error("Expected JSON value"); + } + } + + Optional> parseString() { + SmallString<16> Result; + for (char C = next(); C != '"'; C = next()) { + switch (C) { + default: + Result.push_back(C); + break; + case 0: + return error("Unterminated string"); + case '\\': switch (C = next()) { + case '"': + case '\\': + case '/': + Result.push_back(C); + break; + case 'b': + Result.push_back('\b'); + break; + case 'f': + Result.push_back('\f'); + break; + case 'n': + Result.push_back('\n'); + break; + case 'r': + Result.push_back('\r'); + break; + case 't': + Result.push_back('\t'); + break; + case 'u': + if (!parseUnicode(Result)) + return None; + break; + default: + return error("Invalid escape sequence"); + } + } + } + return Result; + } + + // Parse a \uNNNN escape sequence, the \u have already been consumed. + bool parseUnicode(SmallVectorImpl &Out) { + auto Parse2 = [this](uint16_t &V) { + V = 0; + char Bytes[] = {next(), next(), next(), next()}; + for (unsigned char C : reverse(Bytes)) { + if (!std::isxdigit(C)) + return false; + V <<= 4; + V |= (C > '9')? (C &~ 0x20) - 'A' + 10: (C - '0'); + } + return true; + }; + uint32_t Rune; + uint16_t First; + if (!Parse2(First)) return false; + if (First < 0xD800 || First >= 0xE000) { + Rune = First; + } else if (First >= 0xDC00) { + error("Found lone trailing surrogate"); + return false; + } else { + uint16_t Second; + if (!(consume('\\') && consume('u') && Parse2(Second) && + Second >= 0xDC00 && Second < 0xE000)) { + error("Expected trailing surrogate after leading surrogate"); + return false; + } + Rune = 0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00); + } + // UTF-8 encode rune + if (Rune <= 0x7F) { + Out.push_back(Rune & 0x7F); + return true; + } else if (Rune <= 0x7FF) { + uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6); + uint8_t SecondByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + return true; + } else if (Rune <= 0xFFFF) { + uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12); + uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6); + uint8_t ThirdByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + Out.push_back(ThirdByte); + return true; + } else if (Rune <= 0x10FFFF) { + uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18); + uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12); + uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6); + uint8_t FourthByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + Out.push_back(ThirdByte); + Out.push_back(FourthByte); + return true; + } else { + error("Invalid unicode codepoint"); + return false; + } + } + + bool isNumber(char C) { + return C == '-' || C == '+' || C == '.' || C == 'e' || C == 'E' || + (C >= '0' && C <= '9'); + } + + llvm::Optional parseNumber(char First) { + SmallString<24> S; + S.push_back(First); + while (isNumber(peek())) + S.push_back(next()); + char *End; + double D = std::strtod(S.c_str(), &End); + if (End != S.end()) return error("Invalid number"); + return D; + } + + void eatWhitespace() { + while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t')) + ++P; + } + bool consume(char C) { return P != End && *P++ == C; } + char next() { return P == End ? 0 : *P++; } + char peek() { return P == End ? 0 : *P; } + + NoneType error(StringRef Msg) { + int Line = 1; + const char *StartOfLine = Start; + for (const char *X = Start; X < P; ++X) { + if (*X == 0x0A) { + ++Line; + StartOfLine = X; + } + } + // TODO: actual error type. + Err = make_error(formatv("[{0}:{1}, byte={2}]: {3}", Line, + P - StartOfLine, P - Start, Msg), + inconvertibleErrorCode()); + return None; + } +}; + +Expected Document::parse(StringRef Text) { + return Parser(Text).parseDocument(); +} + +} // namespace json +} // namespace llvm + Index: unittests/Support/CMakeLists.txt =================================================================== --- unittests/Support/CMakeLists.txt +++ unittests/Support/CMakeLists.txt @@ -28,6 +28,7 @@ FormatVariadicTest.cpp GlobPatternTest.cpp Host.cpp + JSONTest.cpp LEB128Test.cpp LineIteratorTest.cpp LockFileManagerTest.cpp Index: unittests/Support/JSONTest.cpp =================================================================== --- /dev/null +++ unittests/Support/JSONTest.cpp @@ -0,0 +1,51 @@ +#include "gtest/gtest.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/JSON.h" + +namespace llvm { +namespace json { +namespace { + +// These are not really even worth looking at. +// TODO: write proper tests. And a benchmark. +TEST(JSONTest, All) { + EXPECT_EQ(R"("a\\b\"c\nd\u0007e")", Document("a\\b\"c\nd\ae").str()); + EXPECT_EQ(R"("foo")", Document("foo").str()); + EXPECT_EQ(R"("foo")", Document(Twine("foo")).str()); + EXPECT_EQ(R"("foo")", Document(StringRef("foo")).str()); + EXPECT_EQ(R"("foo")", Document(SmallString<12>("foo")).str()); + EXPECT_EQ(R"("foo")", Document(std::string("foo")).str()); + EXPECT_EQ(R"(["foo"])", Document({"foo"}).str()); + EXPECT_EQ(R"(["foo"])", Document(Literal{"foo"}).str()); + EXPECT_EQ(R"([["foo"],"bar"])", Document({{"foo"}, "bar"}).str()); + EXPECT_EQ(R"(["one",true,3.5])", Document({"one", true, 3.5}).str()); + EXPECT_EQ(R"({})", Document({}).str()); + EXPECT_EQ(R"({"foo":42})", Document({{"foo",42}}).str()); + EXPECT_EQ(R"({"foo":42})", Document(Literal{{"foo",42}}).str()); + EXPECT_EQ(R"([["foo",42]])", Document(array({{"foo",42}})).str()); + + Document X = {"foo", {{"bar", 42}}}; + Value& Root = X; + EXPECT_EQ(42, *Root.array()->object(1)->number("bar")); + + X = "bar"; + EXPECT_EQ(R"("bar")", X.str()); + + X = {"foo", 42, "bar", false}; + EXPECT_EQ(sizeof(Array) + 2 * sizeof(char*), X.usedBytes()); + EXPECT_EQ(detail::FirstSlabSize, X.allocatedBytes()); + + Expected D = Document::parse(R"([["foo"], 42])"); + ASSERT_FALSE(!D); + EXPECT_EQ(Document({{"foo"}, 42}), D.get()); + + D = Document::parse(R"([["foo",], 42])"); + ASSERT_TRUE(!D); + handleAllErrors(D.takeError(), [](const ErrorInfoBase &EI) { + EXPECT_EQ("[1:9, byte=9]: Expected JSON value", EI.message()); + }); +} + +} // namespace +} // namespace json +} // namespace llvm