Index: include/llvm/Support/JSON.h =================================================================== --- /dev/null +++ include/llvm/Support/JSON.h @@ -0,0 +1,264 @@ +//===---------------------JSON.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_JSON_H +#define LLVM_SUPPORT_JSON_H + +#include +#include +#include +#include +#include +#include + +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class JSONValue { +public: + enum class Kind { String, Number, True, False, Null, Object, Array }; + + JSONValue(Kind K) : TheKind(K) {} + virtual ~JSONValue() = default; + + virtual void write(raw_ostream &S) const = 0; + + Kind kind() const { return TheKind; } + +private: + const Kind TheKind; +}; + +class JSONString : public JSONValue { +public: + JSONString(StringRef S); + + JSONString(const JSONString &S) = delete; + JSONString &operator=(const JSONString &S) = delete; + + ~JSONString() override = default; + + void write(raw_ostream &S) const override; + + StringRef getData() const { return Data; } + + static bool classof(const JSONValue *V) { + return V->kind() == JSONValue::Kind::String; + } + +private: + std::string Data; +}; + +class JSONNumber : public JSONValue { +public: + // We create a constructor for all integer and floating point type with using + // templates and + // SFINAE to avoid having ambiguous overloads because of the implicit type + // promotion. If we + // would have constructors only with int64_t, uint64_t and double types then + // constructing a + // JSONNumber from an int32_t (or any other similar type) would fail to + // compile. + + template ::value && + std::is_unsigned::value>::type * = nullptr> + explicit JSONNumber(T U) + : JSONValue(JSONValue::Kind::Number), TheDataType(DataType::Unsigned) { + Data.Unsigned = U; + } + + template ::value && + std::is_signed::value>::type * = nullptr> + explicit JSONNumber(T S) + : JSONValue(JSONValue::Kind::Number), TheDataType(DataType::Signed) { + Data.Signed = S; + } + + template ::value>::type * = nullptr> + explicit JSONNumber(T D) + : JSONValue(JSONValue::Kind::Number), TheDataType(DataType::Double) { + Data.Double = D; + } + + ~JSONNumber() override = default; + + JSONNumber(const JSONNumber &S) = delete; + JSONNumber &operator=(const JSONNumber &S) = delete; + + void write(raw_ostream &S) const override; + + template T get() const { + switch (TheDataType) { + case DataType::Unsigned: + return static_cast(Data.Unsigned); + case DataType::Signed: + return static_cast(Data.Signed); + case DataType::Double: + return static_cast(Data.Double); + } + llvm_unreachable("Unhandled data type"); + } + + static bool classof(const JSONValue *V) { + return V->kind() == JSONValue::Kind::Number; + } + +private: + enum class DataType : uint8_t { Unsigned, Signed, Double } TheDataType; + + union { + uint64_t Unsigned; + int64_t Signed; + double Double; + } Data; +}; + +class JSONTrue : public JSONValue { +public: + JSONTrue(); + + JSONTrue(const JSONTrue &S) = delete; + JSONTrue &operator=(const JSONTrue &S) = delete; + + void write(raw_ostream &S) const override; + + static bool classof(const JSONValue *V) { + return V->kind() == JSONValue::Kind::True; + } + + ~JSONTrue() override = default; +}; + +class JSONFalse : public JSONValue { +public: + JSONFalse(); + + JSONFalse(const JSONFalse &S) = delete; + JSONFalse &operator=(const JSONFalse &S) = delete; + + void write(raw_ostream &S) const override; + + static bool classof(const JSONValue *V) { + return V->kind() == JSONValue::Kind::False; + } + + ~JSONFalse() override = default; +}; + +class JSONNull : public JSONValue { +public: + JSONNull(); + + JSONNull(const JSONNull &S) = delete; + JSONNull &operator=(const JSONNull &S) = delete; + + void write(raw_ostream &S) const override; + + static bool classof(const JSONValue *V) { + return V->kind() == JSONValue::Kind::Null; + } + + ~JSONNull() override = default; +}; + +class JSONObject : public JSONValue { +public: + JSONObject(); + ~JSONObject() override = default; + + JSONObject(const JSONObject &S) = delete; + JSONObject &operator=(const JSONObject &S) = delete; + + void write(raw_ostream &S) const override; + + static bool classof(const JSONValue *V) { + return V->kind() == JSONValue::Kind::Object; + } + + bool set(StringRef Key, std::unique_ptr Value); + bool get(StringRef Key, JSONValue const **ValuePtr) const; + +private: + std::map> Elements; +}; + +class JSONArray : public JSONValue { +public: + JSONArray(); + + JSONArray(const JSONArray &S) = delete; + JSONArray &operator=(const JSONArray &S) = delete; + + ~JSONArray() override = default; + + void write(raw_ostream &S) const override; + + static bool classof(const JSONValue *V) { + return V->kind() == JSONValue::Kind::Array; + } + +public: + bool set(size_t I, std::unique_ptr Value); + bool push_back(std::unique_ptr Value); + + size_t size() const { return Elements.size(); } + const JSONValue &operator[](size_t I) const; + +private: + std::vector> Elements; +}; + +class JSONParser { +public: + enum Token { + Invalid, + Error, + ObjectStart, + ObjectEnd, + ArrayStart, + ArrayEnd, + Comma, + Colon, + String, + Integer, + Float, + True, + False, + Null, + EndOfFile + }; + + explicit JSONParser(StringRef S) : Buffer(S), Index(0){}; + std::unique_ptr parseJSONValue(); + +protected: + int getEscapedChar(bool &WasEscaped); + Token getToken(std::string &Value); + std::unique_ptr parseJSONObject(); + std::unique_ptr parseJSONArray(); + + void skipSpaces(); + uint8_t decodeHexU8(); + char getChar(); + char peekChar() const; + size_t getBytesLeft() const { return Buffer.size() - Index; } + + StringRef Buffer; + size_t Index; +}; +} // namespace llvm + +#endif // LLVM_SUPPORT_JSON_H Index: lib/Support/CMakeLists.txt =================================================================== --- lib/Support/CMakeLists.txt +++ lib/Support/CMakeLists.txt @@ -61,6 +61,7 @@ IntervalMap.cpp IntrusiveRefCntPtr.cpp JamCRC.cpp + JSON.cpp LEB128.cpp LineIterator.cpp Locale.cpp Index: lib/Support/JSON.cpp =================================================================== --- /dev/null +++ lib/Support/JSON.cpp @@ -0,0 +1,537 @@ +//===--------------------- JSON.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/JSON.h" + +#include +#include + +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +static std::string quoteJsonString(StringRef S) { + if (S.find('"') == std::string::npos) + return S; + + std::string Output; + Output.reserve(S.size()); + for (char Ch : S.bytes()) { + if (Ch == '"') + Output.push_back('\\'); + Output.push_back(Ch); + } + return Output; +} + +JSONString::JSONString(StringRef S) + : JSONValue(JSONValue::Kind::String), Data(S) {} + +void JSONString::write(raw_ostream &OS) const { + OS << "\"" << quoteJsonString(Data) << "\""; +} + +void JSONNumber::write(raw_ostream &OS) const { + switch (TheDataType) { + case DataType::Unsigned: + OS << Data.Unsigned; + break; + case DataType::Signed: + OS << Data.Signed; + break; + case DataType::Double: + OS << Data.Double; + break; + } +} + +JSONTrue::JSONTrue() : JSONValue(JSONValue::Kind::True) {} + +void JSONTrue::write(raw_ostream &OS) const { OS << "true"; } + +JSONFalse::JSONFalse() : JSONValue(JSONValue::Kind::False) {} + +void JSONFalse::write(raw_ostream &OS) const { OS << "false"; } + +JSONNull::JSONNull() : JSONValue(JSONValue::Kind::Null) {} + +void JSONNull::write(raw_ostream &OS) const { OS << "null"; } + +JSONObject::JSONObject() : JSONValue(JSONValue::Kind::Object) {} + +void JSONObject::write(raw_ostream &OS) const { + bool First = true; + OS << '{'; + for (const auto &KV : Elements) { + if (First) + First = false; + else + OS << ','; + OS << "\"" << quoteJsonString(KV.first) << "\""; + OS << ':'; + KV.second->write(OS); + } + OS << '}'; +} + +bool JSONObject::set(StringRef Key, std::unique_ptr Value) { + if (Key.empty() || nullptr == Value.get()) + return false; + Elements[Key] = std::move(Value); + return true; +} + +bool JSONObject::get(StringRef Key, JSONValue const **ValuePtr) const { + auto Iter = Elements.find(Key), End = Elements.end(); + if (Iter == End) + return false; + *ValuePtr = Iter->second.get(); + return true; +} + +JSONArray::JSONArray() : JSONValue(JSONValue::Kind::Array) {} + +void JSONArray::write(raw_ostream &OS) const { + bool First = true; + OS << '['; + for (const auto &Element : Elements) { + if (First) + First = false; + else + OS << ','; + Element->write(OS); + } + OS << ']'; +} + +bool JSONArray::set(size_t I, std::unique_ptr Value) { + if (Value.get() == nullptr) + return false; + if (I < Elements.size()) { + Elements[I] = std::move(Value); + return true; + } + if (I == Elements.size()) { + Elements.push_back(std::move(Value)); + return true; + } + return false; +} + +bool JSONArray::push_back(std::unique_ptr Value) { + if (Value.get() == nullptr) + return false; + Elements.push_back(std::move(Value)); + return true; +} + +const JSONValue &JSONArray::operator[](size_t I) const { return *Elements[I]; } + +JSONParser::Token JSONParser::getToken(std::string &Value) { + std::stringstream Error; + + Value.clear(); + skipSpaces(); + const uint64_t StartIndex = Index; + const char Ch = getChar(); + switch (Ch) { + case '{': + return Token::ObjectStart; + case '}': + return Token::ObjectEnd; + case '[': + return Token::ArrayStart; + case ']': + return Token::ArrayEnd; + case ',': + return Token::Comma; + case ':': + return Token::Colon; + case '\0': + return Token::EndOfFile; + case 't': + if (getChar() == 'r') + if (getChar() == 'u') + if (getChar() == 'e') + return Token::True; + break; + + case 'f': + if (getChar() == 'a') + if (getChar() == 'l') + if (getChar() == 's') + if (getChar() == 'e') + return Token::False; + break; + + case 'n': + if (getChar() == 'u') + if (getChar() == 'l') + if (getChar() == 'l') + return Token::Null; + break; + + case '"': { + while (1) { + bool WasEscaped = false; + int EscapedCh = getEscapedChar(WasEscaped); + if (EscapedCh == -1) { + Error << "error: an error occurred getting a character from offset " + << StartIndex; + Value = Error.str(); + return Token::Error; + + } else { + const bool IsEndQuote = EscapedCh == '"'; + const bool IsNull = EscapedCh == 0; + if (WasEscaped || (!IsEndQuote && !IsNull)) { + if (CHAR_MIN <= EscapedCh && EscapedCh <= CHAR_MAX) { + Value.append(1, (char)EscapedCh); + } else { + Error << "error: wide character support is needed for unicode " + "character 0x" + << std::hex << EscapedCh << std::dec << " at offset " + << StartIndex; + Value = Error.str(); + return Token::Error; + } + } else if (IsEndQuote) { + return Token::String; + } else if (IsNull) { + Value = "error: missing end quote for string"; + return Token::Error; + } + } + } + } break; + + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + uint64_t ExpIndex = 0; + bool Done = false; + bool GotDecimalPoint = false; + bool GotIntDigits = (Ch >= '0') && (Ch <= '9'); + bool GotFracDigits = false; + bool GotExpDigits = false; + while (!Done) { + const char NextCh = peekChar(); + switch (NextCh) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (ExpIndex != 0) { + GotExpDigits = true; + } else if (GotDecimalPoint) { + GotFracDigits = true; + } else { + GotIntDigits = true; + } + ++Index; // Skip this character + break; + + case '.': + if (GotDecimalPoint) { + Error << "error: extra decimal point found at offset " << StartIndex; + Value = Error.str(); + return Token::Error; + } else { + GotDecimalPoint = true; + ++Index; // Skip this character + } + break; + + case 'e': + case 'E': + if (ExpIndex != 0) { + Error << "error: extra exponent character found at offset " + << StartIndex; + Value = Error.str(); + return Token::Error; + } else { + ExpIndex = Index; + ++Index; // Skip this character + } + break; + + case '+': + case '-': + // The '+' and '-' can only come after an exponent character... + if (ExpIndex == Index - 1) { + ++Index; // Skip the exponent sign character + } else { + Error << "error: unexpected " << NextCh << " character at offset " + << StartIndex; + Value = Error.str(); + return Token::Error; + } + break; + + default: + Done = true; + break; + } + } + + if (Index > StartIndex) { + Value = Buffer.substr(StartIndex, Index - StartIndex); + if (GotDecimalPoint) { + if (ExpIndex != 0) { + // We have an exponent, make sure we got exponent digits + if (GotExpDigits) { + return Token::Float; + } else { + Error << "error: got exponent character but no exponent digits at " + "offset in float value \"" + << Value << "\""; + Value = Error.str(); + return Token::Error; + } + } else { + // No exponent, but we need at least one decimal after the decimal + // point + if (GotFracDigits) { + return Token::Float; + } else { + Error << "error: no digits after decimal point \"" << Value << "\""; + Value = Error.str(); + return Token::Error; + } + } + } else { + // No decimal point + if (GotIntDigits) { + // We need at least some integer digits to make an integer + return Token::Integer; + } else { + Error << "error: no digits negate sign \"" << Value << "%s\""; + Value = Error.str(); + return Token::Error; + } + } + } else { + Error << "error: invalid number found at offset " << StartIndex; + Value = Error.str(); + return Token::Error; + } + } break; + default: + break; + } + Error << "error: failed to parse token at offset " << StartIndex + << " (around character '" << Ch << "')"; + Value = Error.str(); + return Token::Error; +} + +int JSONParser::getEscapedChar(bool &WasEscaped) { + WasEscaped = false; + const char Ch = getChar(); + if (Ch == '\\') { + WasEscaped = true; + const char Ch2 = getChar(); + switch (Ch2) { + case '"': + case '\\': + case '/': + default: + break; + + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'u': { + const int HiByte = decodeHexU8(); + const int LoByte = decodeHexU8(); + if (HiByte >= 0 && LoByte >= 0) + return HiByte << 8 | LoByte; + return -1; + } break; + } + return Ch2; + } + return Ch; +} + +std::unique_ptr JSONParser::parseJSONObject() { + // The "JSONParser::Token::ObjectStart" token should have already been + // consumed + // by the time this function is called + std::unique_ptr O(new JSONObject()); + + std::string Value; + std::string Key; + while (1) { + JSONParser::Token Token = getToken(Value); + + if (Token == JSONParser::Token::String) { + Key.swap(Value); + Token = getToken(Value); + if (Token == JSONParser::Token::Colon) { + auto Value = parseJSONValue(); + if (Value) + O->set(Key, std::move(Value)); + else + break; + } + } else if (Token == JSONParser::Token::ObjectEnd) { + return std::move(O); + } else if (Token == JSONParser::Token::Comma) { + continue; + } else { + break; + } + } + return nullptr; +} + +std::unique_ptr JSONParser::parseJSONArray() { + // The "JSONParser::Token::ObjectStart" token should have already been + // consumed + // by the time this function is called + std::unique_ptr Array(new JSONArray()); + + std::string Value; + while (1) { + skipSpaces(); + + char Peek = peekChar(); + if (Peek == ']' || Peek == ',') { + JSONParser::Token Token = getToken(Value); + if (Token == JSONParser::Token::Comma) { + continue; + } else if (Token == JSONParser::Token::ArrayEnd) { + return std::move(Array); + } + } + + auto Element = parseJSONValue(); + if (Element) + Array->push_back(std::move(Element)); + else + break; + } + return nullptr; +} + +std::unique_ptr JSONParser::parseJSONValue() { + std::string Value; + const JSONParser::Token Token = getToken(Value); + switch (Token) { + case JSONParser::Token::ObjectStart: + return parseJSONObject(); + + case JSONParser::Token::ArrayStart: + return parseJSONArray(); + + case JSONParser::Token::Integer: { + char *End = nullptr; + if (Value.front() == '-') { + int64_t SVal = ::strtoll(Value.c_str(), &End, 10); + bool Success = *End == '\0'; // all characters were used. + if (Success) + return std::unique_ptr(new JSONNumber(SVal)); + } else { + uint64_t UVal = ::strtoul(Value.c_str(), &End, 10); + bool Success = *End == '\0'; // all characters were used. + if (Success) + return std::unique_ptr(new JSONNumber(UVal)); + } + } break; + + case JSONParser::Token::Float: { + char *End = nullptr; + double Val = ::strtod(Value.c_str(), &End); + bool Success = *End == '\0'; // all characters were used. + if (Success) + return std::unique_ptr(new JSONNumber(Val)); + } break; + + case JSONParser::Token::String: + return std::unique_ptr(new JSONString(Value)); + + case JSONParser::Token::True: + return std::unique_ptr(new JSONTrue()); + + case JSONParser::Token::False: + return std::unique_ptr(new JSONFalse()); + + case JSONParser::Token::Null: + return std::unique_ptr(new JSONNull()); + + default: + break; + } + return nullptr; +} + +void JSONParser::skipSpaces() { + const size_t S = Buffer.size(); + while (Index < S && isspace(Buffer[Index])) + ++Index; +} + +char JSONParser::getChar() { + if (getBytesLeft() < 1) + return 0; + return Buffer[Index++]; +} + +char JSONParser::peekChar() const { + if (getBytesLeft() == 0) + return 0; + + return Buffer[Index]; +} + +static inline int hexDigitToSInt(char Ch) { + if (Ch >= 'a' && Ch <= 'f') + return 10 + Ch - 'a'; + if (Ch >= 'A' && Ch <= 'F') + return 10 + Ch - 'A'; + if (Ch >= '0' && Ch <= '9') + return Ch - '0'; + return -1; +} + +uint8_t JSONParser::decodeHexU8() { + skipSpaces(); + if (getBytesLeft() < 2) { + return -1; + } + const int HiNibble = hexDigitToSInt(Buffer[Index]); + const int LoNibble = hexDigitToSInt(Buffer[Index + 1]); + if (HiNibble == -1 || LoNibble == -1) { + return -1; + } + Index += 2; + return (uint8_t)((HiNibble << 4) + LoNibble); +} Index: unittests/Support/CMakeLists.txt =================================================================== --- unittests/Support/CMakeLists.txt +++ unittests/Support/CMakeLists.txt @@ -19,6 +19,7 @@ ErrorTest.cpp ErrorOrTest.cpp FileOutputBufferTest.cpp + JSONTest.cpp LEB128Test.cpp LineIteratorTest.cpp LockFileManagerTest.cpp Index: unittests/Support/JSONTest.cpp =================================================================== --- /dev/null +++ unittests/Support/JSONTest.cpp @@ -0,0 +1,46 @@ +//===- llvm/unittest/Support/JSONTest.cpp - JSON.cpp tests ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include "llvm/Support/JSON.h" + +using namespace llvm; + +static ::testing::AssertionResult MatchRoundtrip(std::string Text) { + JSONParser Parser(Text.c_str()); + auto Obj = Parser.parseJSONValue(); + + if (!Obj) { + return Text == "null" + ? ::testing::AssertionSuccess() + : ::testing::AssertionFailure() << "can't parse input: " << Text; + } + + std::string S; + raw_string_ostream Out(S); + Obj->write(Out); + + std::string Actual = Out.str(); + if (Actual != Text) { + return ::testing::AssertionFailure() << "expected: " << Text + << " actual: " << Actual; + } + return ::testing::AssertionSuccess(); +} + +TEST(JSON, Roundtrip) { + EXPECT_TRUE(MatchRoundtrip("0")); + EXPECT_TRUE(MatchRoundtrip("3.145150e+00")); + EXPECT_TRUE(MatchRoundtrip("{}")); + EXPECT_TRUE(MatchRoundtrip("{\"a\":1,\"b\":2}")); + EXPECT_TRUE(MatchRoundtrip("[]")); + EXPECT_TRUE(MatchRoundtrip("[0]")); + EXPECT_TRUE(MatchRoundtrip("[1,\"two\",3]")); +}