Index: include/llvm/Support/JSON.h =================================================================== --- /dev/null +++ include/llvm/Support/JSON.h @@ -0,0 +1,551 @@ +//===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file supports working with JSON data. +/// +/// It comprises: +/// +/// - classes which hold dynamically-typed parsed JSON structures +/// These are value types that can be composed, inspected, and modified. +/// See json::Value, and the related types json::Object and json::Array. +/// +/// - functions to parse JSON text into Values, and to serialize Values to text. +/// See parse(), operator<<, and format_provider. +/// +/// - a convention and helpers for mapping between json::Value and user-defined +/// types. See fromJSON(), ObjectMapper, and the class comment on Value. +/// +/// Typically, JSON data would be read from an external source, parsed into +/// a Value, and then converted into some native data structure before doing +/// real work on it. (And vice versa when writing). +/// +/// Other serialization mechanisms you may consider: +/// +/// - YAML is also text-based, and more human-readable than JSON. It's a more +/// complex format and data model, and YAML parsers aren't ubiquitous. +/// YAMLParser.h is a streaming parser suitable for parsing large documents +/// (including JSON, as YAML is a superset). It can be awkward to use directly. +/// YAML I/O (YAMLTraits.h) provides data mapping that is more declarative +/// than the toJSON/fromJSON conventions here. +/// +/// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it +/// encodes LLVM IR ("bitcode"), but it can be a container for other data. +/// Low-level reader/writer libraries are in Bitcode/Bitstream*.h +/// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_JSON_H +#define LLVM_SUPPORT_JSON_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { +namespace json { +class Array; +class ObjectKey; +class Value; + +/// An Object is a JSON object, which maps strings to heterogenous JSON values. +/// The string keys may be owned or references. +class Object : public std::map { +public: + explicit Object() {} + // We use a custom struct for list-init, because pair forces extra copies. + struct KV; + explicit Object(std::initializer_list Properties); + + // Allow [] as if Value was default-constructible as null. + Value &operator[](const ObjectKey &K); + Value &operator[](ObjectKey &&K); + + // Look up a property, returning nullptr if it doesn't exist. + Value *get(const ObjectKey &K); + const Value *get(const ObjectKey &K) const; + // Typed accessors return None/nullptr if + // - the property doesn't exist + // - or it has the wrong type + llvm::Optional getNull(const ObjectKey &K) const; + llvm::Optional getBoolean(const ObjectKey &K) const; + llvm::Optional getNumber(const ObjectKey &K) const; + llvm::Optional getInteger(const ObjectKey &K) const; + llvm::Optional getString(const ObjectKey &K) const; + const json::Object *getObject(const ObjectKey &K) const; + json::Object *getObject(const ObjectKey &K); + const json::Array *getArray(const ObjectKey &K) const; + json::Array *getArray(const ObjectKey &K); +}; + +/// An Array is a JSON array, which contains heterogeneous JSON values. +class Array : public std::vector { +public: + explicit Array() {} + explicit Array(std::initializer_list Elements); + template explicit Array(const Collection &C) { + for (const auto &V : C) + emplace_back(V); + } + + // Typed accessors return None/nullptr if the element has the wrong type. + llvm::Optional getNull(size_t I) const; + llvm::Optional getBoolean(size_t I) const; + llvm::Optional getNumber(size_t I) const; + llvm::Optional getInteger(size_t I) const; + llvm::Optional getString(size_t I) const; + const Object *getObject(size_t I) const; + Object *getObject(size_t I); + const Array *getArray(size_t I) const; + Array *getArray(size_t I); +}; + +/// A Value is an JSON value of unknown type. +/// They can be copied, but should generally be moved. +/// +/// === Composing values === +/// +/// You can implicitly construct Values from: +/// - strings: std::string, SmallString, formatv, StringRef, char* +/// (char*, and StringRef are references, not copies!) +/// - numbers +/// - booleans +/// - null: nullptr +/// - arrays: {"foo", 42.0, false} +/// - serializable things: types with toJSON(const T&)->Value, found by ADL +/// +/// They can also be constructed from object/array helpers: +/// - json::Object is a type like map +/// - json::Array is a type like vector +/// These can be list-initialized, or used to build up collections in a loop. +/// json::ary(Collection) converts all items in a collection to Values. +/// +/// === Inspecting values === +/// +/// Each Value is one of the JSON kinds: +/// null (nullptr_t) +/// boolean (bool) +/// number (double) +/// string (StringRef) +/// array (json::Array) +/// object (json::Object) +/// +/// The kind can be queried directly, or implicitly via the typed accessors: +/// if (Optional S = E.asString() +/// assert(E.kind() == Value::String); +/// +/// Array and Object also have typed indexing accessors for easy traversal: +/// Expected E = parse(R"( {"options": {"font": "sans-serif"}} )"); +/// if (Object* O = E->asObject()) +/// if (Object* Opts = O->getObject("options")) +/// if (Optional Font = Opts->getString("font")) +/// assert(Opts->at("font").kind() == Value::String); +/// +/// === Converting JSON values to C++ types === +/// +/// The convention is to have a deserializer function findable via ADL: +/// fromJSON(const json::Value&, T&)->bool +/// Deserializers are provided for: +/// - bool +/// - int +/// - double +/// - std::string +/// - vector, where T is deserializable +/// - map, where T is deserializable +/// - Optional, where T is deserializable +/// ObjectMapper can help writing fromJSON() functions for object types. +/// +/// For conversion in the other direction, the serializer function is: +/// toJSON(const T&) -> json::Value +/// If this exists, then it also allows constructing Value from T, and can +/// be used to serialize vector, map, and Optional. +/// +/// === Serialization === +/// +/// Values can be serialized to JSON: +/// 1) raw_ostream << Value // Basic formatting. +/// 2) raw_ostream << formatv("{0}", Value) // Basic formatting. +/// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2. +/// +/// And parsed: +/// Expected E = json::parse("[1, 2, null]"); +/// assert(E && E->kind() == Value::Array); +class Value { +public: + enum Kind { + Null, + Boolean, + Number, + String, + Array, + Object, + }; + + // It would be nice to have Value() be null. But that would make {} null too. + Value(const Value &M) { copyFrom(M); } + Value(Value &&M) { moveFrom(std::move(M)); } + Value(std::initializer_list Elements); + Value(json::Array &&Elements) : Type(T_Array) { + create(std::move(Elements)); + } + Value(json::Object &&Properties) : Type(T_Object) { + create(std::move(Properties)); + } + // Strings: types with value semantics. + Value(std::string &&V) : Type(T_String) { create(std::move(V)); } + Value(const std::string &V) : Type(T_String) { create(V); } + Value(const llvm::SmallVectorImpl &V) : Type(T_String) { + create(V.begin(), V.end()); + } + Value(const llvm::formatv_object_base &V) : Value(V.str()){}; + // Strings: types with reference semantics. + Value(llvm::StringRef V) : Type(T_StringRef) { create(V); } + Value(const char *V) : Type(T_StringRef) { create(V); } + Value(std::nullptr_t) : Type(T_Null) {} + // Prevent implicit conversions to boolean. + template ::value>::type> + Value(T B) : Type(T_Boolean) { + create(B); + } + // Numbers: arithmetic types that are not boolean. + template < + typename T, + typename = typename std::enable_if::value>::type, + typename = typename std::enable_if::value>::value>::type> + Value(T D) : Type(T_Number) { + create(D); + } + // Serializable types: with a toJSON(const T&)->Value function, found by ADL. + template ::value>> + Value(const T &V) : Value(toJSON(V)) {} + + Value &operator=(const Value &M) { + destroy(); + copyFrom(M); + return *this; + } + Value &operator=(Value &&M) { + destroy(); + moveFrom(std::move(M)); + return *this; + } + ~Value() { destroy(); } + + Kind kind() const { + switch (Type) { + case T_Null: + return Null; + case T_Boolean: + return Boolean; + case T_Number: + return Number; + case T_String: + case T_StringRef: + return String; + case T_Object: + return Object; + case T_Array: + return Array; + } + llvm_unreachable("Unknown kind"); + } + + // Typed accessors return None/nullptr if the Value is not of this type. + llvm::Optional asNull() const { + if (LLVM_LIKELY(Type == T_Null)) + return nullptr; + return llvm::None; + } + llvm::Optional asBoolean() const { + if (LLVM_LIKELY(Type == T_Boolean)) + return as(); + return llvm::None; + } + llvm::Optional asNumber() const { + if (LLVM_LIKELY(Type == T_Number)) + return as(); + return llvm::None; + } + llvm::Optional asInteger() const { + if (LLVM_LIKELY(Type == T_Number)) { + double D = as(); + if (LLVM_LIKELY(std::modf(D, &D) == 0 && + D >= std::numeric_limits::min() && + D <= std::numeric_limits::max())) + return D; + } + return llvm::None; + } + llvm::Optional asString() const { + if (Type == T_String) + return llvm::StringRef(as()); + if (LLVM_LIKELY(Type == T_StringRef)) + return as(); + return llvm::None; + } + const json::Object *asObject() const { + return LLVM_LIKELY(Type == T_Object) ? &as() : nullptr; + } + json::Object *asObject() { + return LLVM_LIKELY(Type == T_Object) ? &as() : nullptr; + } + const json::Array *asArray() const { + return LLVM_LIKELY(Type == T_Array) ? &as() : nullptr; + } + json::Array *asArray() { + return LLVM_LIKELY(Type == T_Array) ? &as() : nullptr; + } + + /// Serializes this Value to JSON, writing it to the provided stream. + /// The formatting is compact (no extra whitespace) and deterministic. + /// For pretty-printing, use the formatv() format_provider below. + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &); + +private: + void destroy(); + void copyFrom(const Value &M); + // We allow moving from *const* Values, by marking all members as mutable! + // This hack is needed to support initializer-list syntax efficiently. + // (std::initializer_list is a container of const T). + void moveFrom(const Value &&M); + friend class Array; + friend class Object; + + template void create(U &&... V) { + new (&as()) T(std::forward(V)...); + } + template T &as() const { + return *reinterpret_cast(Union.buffer); + } + + template + void print(llvm::raw_ostream &, const Indenter &) const; + friend struct llvm::format_provider; + + enum ValueType : char { + T_Null, + T_Boolean, + // FIXME: splitting Number into Double and Integer would allow us to + // round-trip 64-bit integers. + T_Number, + T_StringRef, + T_String, + T_Object, + T_Array, + }; + // All members mutable, see moveFrom(). + mutable ValueType Type; + mutable llvm::AlignedCharArrayUnion + Union; +}; + +bool operator==(const Value &, const Value &); +inline bool operator!=(const Value &L, const Value &R) { return !(L == R); } +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &); + +/// ObjectKey is a used to capture keys in Object. Like Value but: +/// - only strings are allowed +/// - it's optimized for the string literal case (Owned == nullptr) +class ObjectKey { +public: + ObjectKey(const char *S) : Data(S) {} + ObjectKey(llvm::StringRef S) : Data(S) {} + ObjectKey(std::string &&V) + : Owned(new std::string(std::move(V))), Data(*Owned) {} + ObjectKey(const std::string &V) : Owned(new std::string(V)), Data(*Owned) {} + ObjectKey(const llvm::SmallVectorImpl &V) + : ObjectKey(std::string(V.begin(), V.end())) {} + ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {} + + ObjectKey(const ObjectKey &C) { *this = C; } + ObjectKey(ObjectKey &&C) : ObjectKey(static_cast(C)) {} + ObjectKey &operator=(const ObjectKey &C) { + if (C.Owned) { + Owned.reset(new std::string(*C.Owned)); + Data = *Owned; + } else { + Data = C.Data; + } + return *this; + } + ObjectKey &operator=(ObjectKey &&) = default; + + operator llvm::StringRef() const { return Data; } + + friend bool operator<(const ObjectKey &L, const ObjectKey &R) { + return L.Data < R.Data; + } + +private: + std::unique_ptr Owned; + llvm::StringRef Data; +}; + +inline bool operator==(const ObjectKey &L, const ObjectKey &R) { + return llvm::StringRef(L) == llvm::StringRef(R); +} +inline bool operator!=(const ObjectKey &L, const ObjectKey &R) { + return !(L == R); +} + +struct Object::KV { + ObjectKey K; + Value V; +}; + +inline Object::Object(std::initializer_list Properties) { + for (const auto &P : Properties) + emplace(P.K, nullptr).first->second.moveFrom(std::move(P.V)); +} + +// Standard deserializers are provided for primitive types. +// See comments on Value. +inline bool fromJSON(const Value &E, std::string &Out) { + if (auto S = E.asString()) { + Out = *S; + return true; + } + return false; +} +inline bool fromJSON(const Value &E, int &Out) { + if (auto S = E.asInteger()) { + Out = *S; + return true; + } + return false; +} +inline bool fromJSON(const Value &E, double &Out) { + if (auto S = E.asNumber()) { + Out = *S; + return true; + } + return false; +} +inline bool fromJSON(const Value &E, bool &Out) { + if (auto S = E.asBoolean()) { + Out = *S; + return true; + } + return false; +} +template bool fromJSON(const Value &E, llvm::Optional &Out) { + if (E.asNull()) { + Out = llvm::None; + return true; + } + T Result; + if (!fromJSON(E, Result)) + return false; + Out = std::move(Result); + return true; +} +template bool fromJSON(const Value &E, std::vector &Out) { + if (auto *A = E.asArray()) { + Out.clear(); + Out.resize(A->size()); + for (size_t I = 0; I < A->size(); ++I) + if (!fromJSON((*A)[I], Out[I])) + return false; + return true; + } + return false; +} +template +bool fromJSON(const Value &E, std::map &Out) { + if (auto *O = E.asObject()) { + Out.clear(); + for (const auto &KV : *O) + if (!fromJSON(KV.second, Out[llvm::StringRef(KV.first)])) + return false; + return true; + } + return false; +} + +/// Helper for mapping JSON objects onto protocol structs. +/// +/// Example: +/// \code +/// bool fromJSON(const Value &E, MyStruct &R) { +/// ObjectMapper O(E); +/// if (!O || !O.map("mandatory_field", R.MandatoryField)) +/// return false; +/// O.map("optional_field", R.OptionalField); +/// return true; +/// } +/// \endcode +class ObjectMapper { +public: + ObjectMapper(const Value &E) : O(E.asObject()) {} + + /// True if the expression is an object. + /// Must be checked before calling map(). + operator bool() { return O; } + + /// Maps a property to a field, if it exists. + template bool map(StringRef Prop, T &Out) { + assert(*this && "Must check this is an object before calling map()"); + if (const Value *E = O->get(Prop)) + return fromJSON(*E, Out); + return false; + } + + /// Maps a property to a field, if it exists. + /// (Optional requires special handling, because missing keys are OK). + template bool map(StringRef Prop, llvm::Optional &Out) { + assert(*this && "Must check this is an object before calling map()"); + if (const Value *E = O->get(Prop)) + return fromJSON(*E, Out); + Out = llvm::None; + return true; + } + +private: + const Object *O; +}; + +/// Parses the provided JSON source, or returns a ParseError. +/// The returned Value is self-contained and owns its strings (they do not refer +/// to the original source). +llvm::Expected parse(llvm::StringRef JSON); + +class ParseError : public llvm::ErrorInfo { + const char *Msg; + unsigned Line, Column, Offset; + + public: + static char ID; + ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset) + : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {} + void log(llvm::raw_ostream &OS) const override { + OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg); + } + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; +} // namespace json + +/// Allow printing json::Value with formatv(). +/// The default style is basic/compact formatting, like operator<<. +/// A format string like formatv("{0:2}", Value) pretty-prints with indent 2. +template <> struct format_provider { + static void format(const llvm::json::Value &, raw_ostream &, StringRef); +}; +} // namespace llvm + +#endif Index: lib/Support/CMakeLists.txt =================================================================== --- lib/Support/CMakeLists.txt +++ lib/Support/CMakeLists.txt @@ -80,6 +80,7 @@ IntEqClasses.cpp IntervalMap.cpp JamCRC.cpp + JSON.cpp KnownBits.cpp LEB128.cpp LineIterator.cpp Index: lib/Support/JSON.cpp =================================================================== --- /dev/null +++ lib/Support/JSON.cpp @@ -0,0 +1,640 @@ +//=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#include "llvm/Support/JSON.h" +#include "llvm/Support/Format.h" +#include + +namespace llvm { +namespace json { + +Value &Object::operator[](const ObjectKey &K) { + return emplace(K, Value(nullptr)).first->second; +} +Value &Object::operator[](ObjectKey &&K) { + return emplace(std::move(K), Value(nullptr)).first->second; +} +Value *Object::get(const ObjectKey &K) { + auto I = find(K); + if (I == end()) + return nullptr; + return &I->second; +} +const Value *Object::get(const ObjectKey &K) const { + auto I = find(K); + if (I == end()) + return nullptr; + return &I->second; +} +llvm::Optional Object::getNull(const ObjectKey &K) const { + if (auto *V = get(K)) + return V->asNull(); + return llvm::None; +} +llvm::Optional Object::getBoolean(const ObjectKey &K) const { + if (auto *V = get(K)) + return V->asBoolean(); + return llvm::None; +} +llvm::Optional Object::getNumber(const ObjectKey &K) const { + if (auto *V = get(K)) + return V->asNumber(); + return llvm::None; +} +llvm::Optional Object::getInteger(const ObjectKey &K) const { + if (auto *V = get(K)) + return V->asInteger(); + return llvm::None; +} +llvm::Optional Object::getString(const ObjectKey &K) const { + if (auto *V = get(K)) + return V->asString(); + return llvm::None; +} +const json::Object *Object::getObject(const ObjectKey &K) const { + if (auto *V = get(K)) + return V->asObject(); + return nullptr; +} +json::Object *Object::getObject(const ObjectKey &K) { + if (auto *V = get(K)) + return V->asObject(); + return nullptr; +} +const json::Array *Object::getArray(const ObjectKey &K) const { + if (auto *V = get(K)) + return V->asArray(); + return nullptr; +} +json::Array *Object::getArray(const ObjectKey &K) { + if (auto *V = get(K)) + return V->asArray(); + return nullptr; +} + +Array::Array(std::initializer_list Elements) { + reserve(Elements.size()); + for (const Value &V : Elements) { + emplace_back(nullptr); + back().moveFrom(std::move(V)); + } +} +llvm::Optional Array::getNull(size_t I) const { + return (*this)[I].asNull(); +} +llvm::Optional Array::getBoolean(size_t I) const { + return (*this)[I].asBoolean(); +} +llvm::Optional Array::getNumber(size_t I) const { + return (*this)[I].asNumber(); +} +llvm::Optional Array::getInteger(size_t I) const { + return (*this)[I].asInteger(); +} +llvm::Optional Array::getString(size_t I) const { + return (*this)[I].asString(); +} +const Object *Array::getObject(size_t I) const { return (*this)[I].asObject(); } +Object *Array::getObject(size_t I) { return (*this)[I].asObject(); } +const Array *Array::getArray(size_t I) const { return (*this)[I].asArray(); } +Array *Array::getArray(size_t I) { return (*this)[I].asArray(); } + +Value::Value(std::initializer_list Elements) + : Value(json::Array(Elements)) {} + + void Value::copyFrom(const Value &M) { + Type = M.Type; + switch (Type) { + case T_Null: + case T_Boolean: + case T_Number: + memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer)); + break; + case T_StringRef: + create(M.as()); + break; + case T_String: + create(M.as()); + break; + case T_Object: + create(M.as()); + break; + case T_Array: + create(M.as()); + break; + } + } + +void Value::moveFrom(const Value &&M) { + Type = M.Type; + switch (Type) { + case T_Null: + case T_Boolean: + case T_Number: + memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer)); + break; + case T_StringRef: + create(M.as()); + break; + case T_String: + create(std::move(M.as())); + M.Type = T_Null; + break; + case T_Object: + create(std::move(M.as())); + M.Type = T_Null; + break; + case T_Array: + create(std::move(M.as())); + M.Type = T_Null; + break; + } +} + +void Value::destroy() { + switch (Type) { + case T_Null: + case T_Boolean: + case T_Number: + break; + case T_StringRef: + as().~StringRef(); + break; + case T_String: + as().~basic_string(); + break; + case T_Object: + as().~Object(); + break; + case T_Array: + as().~Array(); + break; + } +} + +bool operator==(const Value &L, const Value &R) { + if (L.kind() != R.kind()) + return false; + switch (L.kind()) { + case Value::Null: + return *L.asNull() == *R.asNull(); + case Value::Boolean: + return *L.asBoolean() == *R.asBoolean(); + case Value::Number: + return *L.asNumber() == *R.asNumber(); + case Value::String: + return *L.asString() == *R.asString(); + case Value::Array: + return *L.asArray() == *R.asArray(); + case Value::Object: + return *L.asObject() == *R.asObject(); + } + llvm_unreachable("Unknown value kind"); +} + +namespace { +// Simple recursive-descent JSON parser. + class Parser { + public: + Parser(StringRef JSON) + : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {} + + bool parseValue(Value &Out); + + bool assertEnd() { + eatWhitespace(); + if (P == End) + return true; + return parseError("Text after end of document"); + } + + Error takeError() { + assert(Err); + return std::move(*Err); + } + + private: + void eatWhitespace() { + while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t')) + ++P; + } + + // On invalid syntax, parseX() functions return false and set Err. + bool parseNumber(char First, double &Out); + bool parseString(std::string &Out); + bool parseUnicode(std::string &Out); + bool parseError(const char *Msg); // always returns false + + char next() { return P == End ? 0 : *P++; } + char peek() { return P == End ? 0 : *P; } + static bool isNumber(char C) { + return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' || + C == '5' || C == '6' || C == '7' || C == '8' || C == '9' || + C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.'; + } + + Optional Err; + const char *Start, *P, *End; + }; + + bool Parser::parseValue(Value &Out) { + eatWhitespace(); + if (P == End) + return parseError("Unexpected EOF"); + switch (char C = next()) { + // Bare null/true/false are easy - first char identifies them. + case 'n': + Out = nullptr; + return (next() == 'u' && next() == 'l' && next() == 'l') || + parseError("Invalid JSON value (null?)"); + case 't': + Out = true; + return (next() == 'r' && next() == 'u' && next() == 'e') || + parseError("Invalid JSON value (true?)"); + case 'f': + Out = false; + return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') || + parseError("Invalid JSON value (false?)"); + case '"': { + std::string S; + if (parseString(S)) { + Out = std::move(S); + return true; + } + return false; + } + case '[': { + Out = Array{}; + Array &A = *Out.asArray(); + eatWhitespace(); + if (peek() == ']') { + ++P; + return true; + } + for (;;) { + A.emplace_back(nullptr); + if (!parseValue(A.back())) + return false; + eatWhitespace(); + switch (next()) { + case ',': + eatWhitespace(); + continue; + case ']': + return true; + default: + return parseError("Expected , or ] after array element"); + } + } + } + case '{': { + Out = Object{}; + Object &O = *Out.asObject(); + eatWhitespace(); + if (peek() == '}') { + ++P; + return true; + } + for (;;) { + if (next() != '"') + return parseError("Expected object key"); + std::string K; + if (!parseString(K)) + return false; + eatWhitespace(); + if (next() != ':') + return parseError("Expected : after object key"); + eatWhitespace(); + if (!parseValue(O[std::move(K)])) + return false; + eatWhitespace(); + switch (next()) { + case ',': + eatWhitespace(); + continue; + case '}': + return true; + default: + return parseError("Expected , or } after object property"); + } + } + } + default: + if (isNumber(C)) { + double Num; + if (parseNumber(C, Num)) { + Out = Num; + return true; + } else { + return false; + } + } + return parseError("Invalid JSON value"); + } + } + + bool Parser::parseNumber(char First, double &Out) { + SmallString<24> S; + S.push_back(First); + while (isNumber(peek())) + S.push_back(next()); + char *End; + Out = std::strtod(S.c_str(), &End); + return End == S.end() || parseError("Invalid JSON value (number?)"); + } + + bool Parser::parseString(std::string &Out) { + // leading quote was already consumed. + for (char C = next(); C != '"'; C = next()) { + if (LLVM_UNLIKELY(P == End)) + return parseError("Unterminated string"); + if (LLVM_UNLIKELY((C & 0x1f) == C)) + return parseError("Control character in string"); + if (LLVM_LIKELY(C != '\\')) { + Out.push_back(C); + continue; + } + // Handle escape sequence. + switch (C = next()) { + case '"': + case '\\': + case '/': + Out.push_back(C); + break; + case 'b': + Out.push_back('\b'); + break; + case 'f': + Out.push_back('\f'); + break; + case 'n': + Out.push_back('\n'); + break; + case 'r': + Out.push_back('\r'); + break; + case 't': + Out.push_back('\t'); + break; + case 'u': + if (!parseUnicode(Out)) + return false; + break; + default: + return parseError("Invalid escape sequence"); + } + } + return true; + } + + static void encodeUtf8(uint32_t Rune, std::string &Out) { + if (Rune < 0x80) { + Out.push_back(Rune & 0x7F); + } else if (Rune < 0x800) { + uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6); + uint8_t SecondByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + } else if (Rune < 0x10000) { + uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12); + uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6); + uint8_t ThirdByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + Out.push_back(ThirdByte); + } else if (Rune < 0x110000) { + uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18); + uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12); + uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6); + uint8_t FourthByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + Out.push_back(ThirdByte); + Out.push_back(FourthByte); + } else { + llvm_unreachable("Invalid codepoint"); + } + } + + // Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed. + // May parse several sequential escapes to ensure proper surrogate handling. + // We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates. + // These are invalid Unicode but valid JSON (RFC 8259, section 8.2). + bool Parser::parseUnicode(std::string &Out) { + // Invalid UTF is not a JSON error (RFC 8529ยง8.2). It gets replaced by U+FFFD. + auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); }; + // Decodes 4 hex digits from the stream into Out, returns false on error. + auto Parse4Hex = [this](uint16_t &Out) -> bool { + Out = 0; + char Bytes[] = {next(), next(), next(), next()}; + for (unsigned char C : Bytes) { + if (!std::isxdigit(C)) + return parseError("Invalid \\u escape sequence"); + Out <<= 4; + Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0'); + } + return true; + }; + uint16_t First; // UTF-16 code unit from the first \u escape. + if (!Parse4Hex(First)) + return false; + + // We loop to allow proper surrogate-pair error handling. + while (true) { + // Case 1: the UTF-16 code unit is already a codepoint in the BMP. + if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) { + encodeUtf8(First, Out); + return true; + } + + // Case 2: it's an (unpaired) trailing surrogate. + if (LLVM_UNLIKELY(First >= 0xDC00)) { + Invalid(); + return true; + } + + // Case 3: it's a leading surrogate. We expect a trailing one next. + // Case 3a: there's no trailing \u escape. Don't advance in the stream. + if (!LLVM_LIKELY(P + 2 <= End && *P == '\\' && *(P + 1) == 'u')) { + Invalid(); // Leading surrogate was unpaired. + return true; + } + P += 2; + uint16_t Second; + if (!Parse4Hex(Second)) + return false; + // Case 3b: there was another \u escape, but it wasn't a trailing surrogate. + if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) { + Invalid(); // Leading surrogate was unpaired. + First = Second; // Second escape still needs to be processed. + continue; + } + // Case 3c: a valid surrogate pair encoding an astral codepoint. + encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out); + return true; + } + } + + bool Parser::parseError(const char *Msg) { + int Line = 1; + const char *StartOfLine = Start; + for (const char *X = Start; X < P; ++X) { + if (*X == 0x0A) { + ++Line; + StartOfLine = X + 1; + } + } + Err.emplace( + llvm::make_unique(Msg, Line, P - StartOfLine, P - Start)); + return false; + } +} // namespace + +Expected parse(StringRef JSON) { + Parser P(JSON); + Value E = nullptr; + if (P.parseValue(E)) + if (P.assertEnd()) + return std::move(E); + return P.takeError(); +} +char ParseError::ID = 0; + +} // namespace json +} // namespace llvm + +static void quote(llvm::raw_ostream &OS, llvm::StringRef S) { + OS << '\"'; + for (unsigned char C : S) { + if (C == 0x22 || C == 0x5C) + OS << '\\'; + if (C >= 0x20) { + OS << C; + continue; + } + OS << '\\'; + switch (C) { + // A few characters are common enough to make short escapes worthwhile. + case '\t': + OS << 't'; + break; + case '\n': + OS << 'n'; + break; + case '\r': + OS << 'r'; + break; + default: + OS << 'u'; + llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4); + break; + } + } + OS << '\"'; +} + +enum IndenterAction { + Indent, + Outdent, + Newline, + Space, +}; + +// Prints JSON. The indenter can be used to control formatting. +template +void llvm::json::Value::print(raw_ostream &OS, const Indenter &I) const { + switch (Type) { + case T_Null: + OS << "null"; + break; + case T_Boolean: + OS << (as() ? "true" : "false"); + break; + case T_Number: + OS << format("%g", as()); + break; + case T_StringRef: + quote(OS, as()); + break; + case T_String: + quote(OS, as()); + break; + case T_Object: { + bool Comma = false; + OS << '{'; + I(Indent); + for (const auto &P : as()) { + if (Comma) + OS << ','; + Comma = true; + I(Newline); + quote(OS, P.first); + OS << ':'; + I(Space); + P.second.print(OS, I); + } + I(Outdent); + if (Comma) + I(Newline); + OS << '}'; + break; + } + case T_Array: { + bool Comma = false; + OS << '['; + I(Indent); + for (const auto &E : as()) { + if (Comma) + OS << ','; + Comma = true; + I(Newline); + E.print(OS, I); + } + I(Outdent); + if (Comma) + I(Newline); + OS << ']'; + break; + } + } +} + +void llvm::format_provider::format( + const llvm::json::Value &E, raw_ostream &OS, StringRef Options) { + if (Options.empty()) { + OS << E; + return; + } + unsigned IndentAmount = 0; + if (Options.getAsInteger(/*Radix=*/10, IndentAmount)) + llvm_unreachable("json::Value format options should be an integer"); + unsigned IndentLevel = 0; + E.print(OS, [&](IndenterAction A) { + switch (A) { + case Newline: + OS << '\n'; + OS.indent(IndentLevel); + break; + case Space: + OS << ' '; + break; + case Indent: + IndentLevel += IndentAmount; + break; + case Outdent: + IndentLevel -= IndentAmount; + break; + }; + }); +} + +llvm::raw_ostream &llvm::json::operator<<(raw_ostream &OS, const Value &E) { + E.print(OS, [](IndenterAction A) { /*ignore*/ }); + return OS; +} Index: unittests/Support/CMakeLists.txt =================================================================== --- unittests/Support/CMakeLists.txt +++ unittests/Support/CMakeLists.txt @@ -30,6 +30,7 @@ FormatVariadicTest.cpp GlobPatternTest.cpp Host.cpp + JSONTest.cpp LEB128Test.cpp LineIteratorTest.cpp LockFileManagerTest.cpp Index: unittests/Support/JSONTest.cpp =================================================================== --- /dev/null +++ unittests/Support/JSONTest.cpp @@ -0,0 +1,291 @@ +//===-- JSONTest.cpp - JSON unit tests --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/JSON.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace llvm { +namespace json { + +namespace { + +std::string s(const Value &E) { return llvm::formatv("{0}", E).str(); } +std::string sp(const Value &E) { return llvm::formatv("{0:2}", E).str(); } + +TEST(JSONTest, Types) { + EXPECT_EQ("true", s(true)); + EXPECT_EQ("null", s(nullptr)); + EXPECT_EQ("2.5", s(2.5)); + EXPECT_EQ(R"("foo")", s("foo")); + EXPECT_EQ("[1,2,3]", s({1, 2, 3})); + EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}})); +} + +TEST(JSONTest, Constructors) { + // Lots of edge cases around empty and singleton init lists. + EXPECT_EQ("[[[3]]]", s({{{3}}})); + EXPECT_EQ("[[[]]]", s({{{}}})); + EXPECT_EQ("[[{}]]", s({{Object{}}})); + EXPECT_EQ(R"({"A":{"B":{}}})", s(Object{{"A", Object{{"B", Object{}}}}})); + EXPECT_EQ(R"({"A":{"B":{"X":"Y"}}})", + s(Object{{"A", Object{{"B", Object{{"X", "Y"}}}}}})); +} + +TEST(JSONTest, StringOwnership) { + char X[] = "Hello"; + Value Alias = static_cast(X); + X[1] = 'a'; + EXPECT_EQ(R"("Hallo")", s(Alias)); + + std::string Y = "Hello"; + Value Copy = Y; + Y[1] = 'a'; + EXPECT_EQ(R"("Hello")", s(Copy)); +} + +TEST(JSONTest, CanonicalOutput) { + // Objects are sorted (but arrays aren't)! + EXPECT_EQ(R"({"a":1,"b":2,"c":3})", s(Object{{"a", 1}, {"c", 3}, {"b", 2}})); + EXPECT_EQ(R"(["a","c","b"])", s({"a", "c", "b"})); + EXPECT_EQ("3", s(3.0)); +} + +TEST(JSONTest, Escaping) { + std::string test = { + 0, // Strings may contain nulls. + '\b', '\f', // Have mnemonics, but we escape numerically. + '\r', '\n', '\t', // Escaped with mnemonics. + 'S', '\"', '\\', // Printable ASCII characters. + '\x7f', // Delete is not escaped. + '\xce', '\x94', // Non-ASCII UTF-8 is not escaped. + }; + + std::string teststring = R"("\u0000\u0008\u000c\r\n\tS\"\\)" + "\x7f\xCE\x94\""; + + EXPECT_EQ(teststring, s(test)); + + EXPECT_EQ(R"({"object keys are\nescaped":true})", + s(Object{{"object keys are\nescaped", true}})); +} + +TEST(JSONTest, PrettyPrinting) { + const char str[] = R"({ + "empty_array": [], + "empty_object": {}, + "full_array": [ + 1, + null + ], + "full_object": { + "nested_array": [ + { + "property": "value" + } + ] + } +})"; + + EXPECT_EQ(str, sp(Object{ + {"empty_object", Object{}}, + {"empty_array", {}}, + {"full_array", {1, nullptr}}, + {"full_object", + Object{ + {"nested_array", + {Object{ + {"property", "value"}, + }}}, + }}, + })); +} + +TEST(JSONTest, Parse) { + auto Compare = [](llvm::StringRef S, Value Expected) { + if (auto E = parse(S)) { + // Compare both string forms and with operator==, in case we have bugs. + EXPECT_EQ(*E, Expected); + EXPECT_EQ(sp(*E), sp(Expected)); + } else { + handleAllErrors(E.takeError(), [S](const llvm::ErrorInfoBase &E) { + FAIL() << "Failed to parse JSON >>> " << S << " <<<: " << E.message(); + }); + } + }; + + Compare(R"(true)", true); + Compare(R"(false)", false); + Compare(R"(null)", nullptr); + + Compare(R"(42)", 42); + Compare(R"(2.5)", 2.5); + Compare(R"(2e50)", 2e50); + Compare(R"(1.2e3456789)", std::numeric_limits::infinity()); + + Compare(R"("foo")", "foo"); + Compare(R"("\"\\\b\f\n\r\t")", "\"\\\b\f\n\r\t"); + Compare(R"("\u0000")", llvm::StringRef("\0", 1)); + Compare("\"\x7f\"", "\x7f"); + Compare(R"("\ud801\udc37")", u8"\U00010437"); // UTF16 surrogate pair escape. + Compare("\"\xE2\x82\xAC\xF0\x9D\x84\x9E\"", u8"\u20ac\U0001d11e"); // UTF8 + Compare( + R"("LoneLeading=\ud801, LoneTrailing=\udc01, LeadingLeadingTrailing=\ud801\ud801\udc37")", + u8"LoneLeading=\ufffd, LoneTrailing=\ufffd, " + u8"LeadingLeadingTrailing=\ufffd\U00010437"); // Invalid unicode. + + Compare(R"({"":0,"":0})", Object{{"", 0}}); + Compare(R"({"obj":{},"arr":[]})", Object{{"obj", Object{}}, {"arr", {}}}); + Compare(R"({"\n":{"\u0000":[[[[]]]]}})", + Object{{"\n", Object{ + {llvm::StringRef("\0", 1), {{{{}}}}}, + }}}); + Compare("\r[\n\t] ", {}); +} + +TEST(JSONTest, ParseErrors) { + auto ExpectErr = [](llvm::StringRef Msg, llvm::StringRef S) { + if (auto E = parse(S)) { + // Compare both string forms and with operator==, in case we have bugs. + FAIL() << "Parsed JSON >>> " << S << " <<< but wanted error: " << Msg; + } else { + handleAllErrors(E.takeError(), [S, Msg](const llvm::ErrorInfoBase &E) { + EXPECT_THAT(E.message(), testing::HasSubstr(Msg)) << S; + }); + } + }; + ExpectErr("Unexpected EOF", ""); + ExpectErr("Unexpected EOF", "["); + ExpectErr("Text after end of document", "[][]"); + ExpectErr("Invalid JSON value (false?)", "fuzzy"); + ExpectErr("Expected , or ]", "[2?]"); + ExpectErr("Expected object key", "{a:2}"); + ExpectErr("Expected : after object key", R"({"a",2})"); + ExpectErr("Expected , or } after object property", R"({"a":2 "b":3})"); + ExpectErr("Invalid JSON value", R"([&%!])"); + ExpectErr("Invalid JSON value (number?)", "1e1.0"); + ExpectErr("Unterminated string", R"("abc\"def)"); + ExpectErr("Control character in string", "\"abc\ndef\""); + ExpectErr("Invalid escape sequence", R"("\030")"); + ExpectErr("Invalid \\u escape sequence", R"("\usuck")"); + ExpectErr("[3:3, byte=19]", R"({ + "valid": 1, + invalid: 2 +})"); +} + +TEST(JSONTest, Inspection) { + llvm::Expected Doc = parse(R"( + { + "null": null, + "boolean": false, + "number": 2.78, + "string": "json", + "array": [null, true, 3.14, "hello", [1,2,3], {"time": "arrow"}], + "object": {"fruit": "banana"} + } + )"); + EXPECT_TRUE(!!Doc); + + Object *O = Doc->asObject(); + ASSERT_TRUE(O); + + EXPECT_FALSE(O->getNull("missing")); + EXPECT_FALSE(O->getNull("boolean")); + EXPECT_TRUE(O->getNull("null")); + + EXPECT_EQ(O->getNumber("number"), llvm::Optional(2.78)); + EXPECT_FALSE(O->getInteger("number")); + EXPECT_EQ(O->getString("string"), llvm::Optional("json")); + ASSERT_FALSE(O->getObject("missing")); + ASSERT_FALSE(O->getObject("array")); + ASSERT_TRUE(O->getObject("object")); + EXPECT_EQ(*O->getObject("object"), (Object{{"fruit", "banana"}})); + + Array *A = O->getArray("array"); + ASSERT_TRUE(A); + EXPECT_EQ(A->getBoolean(1), llvm::Optional(true)); + ASSERT_TRUE(A->getArray(4)); + EXPECT_EQ(*A->getArray(4), (Array{1, 2, 3})); + EXPECT_EQ(A->getArray(4)->getInteger(1), llvm::Optional(2)); + int I = 0; + for (Value &E : *A) { + if (I++ == 5) { + ASSERT_TRUE(E.asObject()); + EXPECT_EQ(E.asObject()->getString("time"), + llvm::Optional("arrow")); + } else + EXPECT_FALSE(E.asObject()); + } +} + +// Sample struct with typical JSON-mapping rules. +struct CustomStruct { + CustomStruct() : B(false) {} + CustomStruct(std::string S, llvm::Optional I, bool B) + : S(S), I(I), B(B) {} + std::string S; + llvm::Optional I; + bool B; +}; +inline bool operator==(const CustomStruct &L, const CustomStruct &R) { + return L.S == R.S && L.I == R.I && L.B == R.B; +} +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const CustomStruct &S) { + return OS << "(" << S.S << ", " << (S.I ? std::to_string(*S.I) : "None") + << ", " << S.B << ")"; +} +bool fromJSON(const Value &E, CustomStruct &R) { + ObjectMapper O(E); + if (!O || !O.map("str", R.S) || !O.map("int", R.I)) + return false; + O.map("bool", R.B); + return true; +} + +TEST(JSONTest, Deserialize) { + std::map> R; + CustomStruct ExpectedStruct = {"foo", 42, true}; + std::map> Expected; + Value J = Object{ + {"foo", + Array{ + Object{ + {"str", "foo"}, + {"int", 42}, + {"bool", true}, + {"unknown", "ignored"}, + }, + Object{{"str", "bar"}}, + Object{ + {"str", "baz"}, {"bool", "string"}, // OK, deserialize ignores. + }, + }}}; + Expected["foo"] = { + CustomStruct("foo", 42, true), + CustomStruct("bar", llvm::None, false), + CustomStruct("baz", llvm::None, false), + }; + ASSERT_TRUE(fromJSON(J, R)); + EXPECT_EQ(R, Expected); + + CustomStruct V; + EXPECT_FALSE(fromJSON(nullptr, V)) << "Not an object " << V; + EXPECT_FALSE(fromJSON(Object{}, V)) << "Missing required field " << V; + EXPECT_FALSE(fromJSON(Object{{"str", 1}}, V)) << "Wrong type " << V; + // Optional must parse as the correct type if present. + EXPECT_FALSE(fromJSON(Object{{"str", 1}, {"int", "string"}}, V)) + << "Wrong type for Optional " << V; +} + +} // namespace +} // namespace json +} // namespace llvm