Diff 154775

include/llvm/ADT/StringExtras.h

	Show First 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	inline bool isAlpha(char C) {			inline bool isAlpha(char C) {
	return ('a' <= C && C <= 'z') \|\| ('A' <= C && C <= 'Z');			return ('a' <= C && C <= 'z') \|\| ('A' <= C && C <= 'Z');
	}			}

	/// Checks whether character \p C is either a decimal digit or an uppercase or			/// Checks whether character \p C is either a decimal digit or an uppercase or
	/// lowercase letter as classified by "C" locale.			/// lowercase letter as classified by "C" locale.
	inline bool isAlnum(char C) { return isAlpha(C) \|\| isDigit(C); }			inline bool isAlnum(char C) { return isAlpha(C) \|\| isDigit(C); }

				/// Checks whether character \p C is valid ASCII (high bit is zero).
				inline bool isASCII(char C) { return static_cast<unsigned char>(C) <= 127; }

				/// Checks whether all characters in S are ASCII.
				inline bool isASCII(llvm::StringRef S) {
				for (char C : S)
				if (LLVM_UNLIKELY(!isASCII(C)))
				return false;
				return true;
				}

	/// Returns the corresponding lowercase character if \p x is uppercase.			/// Returns the corresponding lowercase character if \p x is uppercase.
	inline char toLower(char x) {			inline char toLower(char x) {
	if (x >= 'A' && x <= 'Z')			if (x >= 'A' && x <= 'Z')
	return x - 'A' + 'a';			return x - 'A' + 'a';
	return x;			return x;
	}			}

	/// Returns the corresponding uppercase character if \p x is lowercase.			/// Returns the corresponding uppercase character if \p x is lowercase.
	▲ Show 20 Lines • Show All 283 Lines • Show Last 20 Lines

include/llvm/Support/JSON.h

Show First 20 Lines • Show All 48 Lines • ▼ Show 20 Lines
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"		#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"		#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"
#include <map>		#include <map>

namespace llvm {		namespace llvm {
namespace json {		namespace json {

		// === String encodings ===
		//
		// JSON strings are character sequences (not byte sequences like std::string).
		// We need to know the encoding, and for simplicity only support UTF-8.
		//
		// - When parsing, invalid UTF-8 is a syntax error like any other
		//
		// - When creating Values from strings, callers must ensure they are UTF-8.
		// with asserts on, invalid UTF-8 will crash the program
		// with asserts off, we'll substitute the replacement character (U+FFFD)
		// Callers can use json::isUTF8() and json::fixUTF8() for validation.
		//
		// - When retrieving strings from Values (e.g. asString()), the result will
		// always be valid UTF-8.

		/// Returns true if \p S is valid UTF-8, which is required for use as JSON.
		/// If it returns false, \p Offset is set to a byte offset near the first error.
		bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
		/// Replaces invalid UTF-8 sequences in \p S with the replacement character
		/// (U+FFFD). The returned string is valid UTF-8.
		/// This is much slower than isUTF8, so test that first.
		std::string fixUTF8(llvm::StringRef S);

class Array;		class Array;
class ObjectKey;		class ObjectKey;
class Value;		class Value;
template <typename T> Value toJSON(const llvm::Optional<T> &Opt);		template <typename T> Value toJSON(const llvm::Optional<T> &Opt);

/// An Object is a JSON object, which maps strings to heterogenous JSON values.		/// An Object is a JSON object, which maps strings to heterogenous JSON values.
/// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.		/// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
class Object {		class Object {
▲ Show 20 Lines • Show All 203 Lines • ▼ Show 20 Lines	public:
Value(Value &&M) { moveFrom(std::move(M)); }		Value(Value &&M) { moveFrom(std::move(M)); }
Value(std::initializer_list<Value> Elements);		Value(std::initializer_list<Value> Elements);
Value(json::Array &&Elements) : Type(T_Array) {		Value(json::Array &&Elements) : Type(T_Array) {
create<json::Array>(std::move(Elements));		create<json::Array>(std::move(Elements));
}		}
Value(json::Object &&Properties) : Type(T_Object) {		Value(json::Object &&Properties) : Type(T_Object) {
create<json::Object>(std::move(Properties));		create<json::Object>(std::move(Properties));
}		}
// Strings: types with value semantics.		// Strings: types with value semantics. Must be valid UTF-8.
Value(std::string &&V) : Type(T_String) { create<std::string>(std::move(V)); }		Value(std::string V) : Type(T_String) {
Value(const std::string &V) : Type(T_String) { create<std::string>(V); }		if (LLVM_UNLIKELY(!isUTF8(V))) {
		benhamiltonUnsubmitted Done Reply Inline Actions Seems like flipping the check is clearer: if (LLVM_UNLIKELY(!isUTF8(V)) { benhamilton: Seems like flipping the check is clearer: ``` if (LLVM_UNLIKELY(!isUTF8(V)) { ```
		dexonsmithUnsubmitted Done Reply Inline Actions Moreover, `__builtin_expect` doesn't work if it's not the top-level condition, and `LLVM_LIKELY` and `LLVM_UNLIKELY` are both using it. dexonsmith: Moreover, `__builtin_expect` doesn't work if it's not the top-level condition, and…
		sammccallAuthorUnsubmitted Not Done Reply Inline Actions Interesting! Naive test suggests this isn't always true for clang, though probably is for GCC: https://godbolt.org/g/vek4jo Done in any case. sammccall: Interesting! Naive test suggests this isn't always true for clang, though probably is for GCC…
Value(const llvm::SmallVectorImpl<char> &V) : Type(T_String) {		assert(false && "Invalid UTF-8 in value used as JSON");
create<std::string>(V.begin(), V.end());		V = fixUTF8(std::move(V));
		}
		create<std::string>(std::move(V));
}		}
		Value(const llvm::SmallVectorImpl<char> &V)
		: Value(std::string(V.begin(), V.end())){};
Value(const llvm::formatv_object_base &V) : Value(V.str()){};		Value(const llvm::formatv_object_base &V) : Value(V.str()){};
// Strings: types with reference semantics.		// Strings: types with reference semantics. Must be valid UTF-8.
Value(llvm::StringRef V) : Type(T_StringRef) { create<llvm::StringRef>(V); }		Value(StringRef V) : Type(T_StringRef) {
Value(const char *V) : Type(T_StringRef) { create<llvm::StringRef>(V); }		create<llvm::StringRef>(V);
		if (LLVM_UNLIKELY(!isUTF8(V))) {
		benhamiltonUnsubmitted Done Reply Inline Actions Ditto on LLVM_UNLIKELY. benhamilton: Ditto on LLVM_UNLIKELY.
		assert(false && "Invalid UTF-8 in value used as JSON");
		*this = Value(fixUTF8(V));
		}
		}
		Value(const char *V) : Value(StringRef(V)) {}
Value(std::nullptr_t) : Type(T_Null) {}		Value(std::nullptr_t) : Type(T_Null) {}
// Boolean (disallow implicit conversions).		// Boolean (disallow implicit conversions).
// (The last template parameter is a dummy to keep templates distinct.)		// (The last template parameter is a dummy to keep templates distinct.)
template <		template <
typename T,		typename T,
typename = typename std::enable_if<std::is_same<T, bool>::value>::type,		typename = typename std::enable_if<std::is_same<T, bool>::value>::type,
bool = false>		bool = false>
Value(T B) : Type(T_Boolean) {		Value(T B) : Type(T_Boolean) {
▲ Show 20 Lines • Show All 150 Lines • ▼ Show 20 Lines

bool operator==(const Value &, const Value &);		bool operator==(const Value &, const Value &);
inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }		inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);		llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);

/// ObjectKey is a used to capture keys in Object. Like Value but:		/// ObjectKey is a used to capture keys in Object. Like Value but:
/// - only strings are allowed		/// - only strings are allowed
/// - it's optimized for the string literal case (Owned == nullptr)		/// - it's optimized for the string literal case (Owned == nullptr)
		/// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
class ObjectKey {		class ObjectKey {
public:		public:
ObjectKey(const char *S) : Data(S) {}		ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
ObjectKey(llvm::StringRef S) : Data(S) {}		ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
ObjectKey(std::string &&V)		if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
		benhamiltonUnsubmitted Done Reply Inline Actions Ditto ditto. benhamilton: Ditto ditto.
: Owned(new std::string(std::move(V))), Data(*Owned) {}		assert(false && "Invalid UTF-8 in value used as JSON");
ObjectKey(const std::string &V) : Owned(new std::string(V)), Data(*Owned) {}		Owned = fixUTF8(std::move(Owned));
		}
		Data = *Owned;
		}
		ObjectKey(llvm::StringRef S) : Data(S) {
		if (LLVM_UNLIKELY(!isUTF8(Data))) {
		benhamiltonUnsubmitted Done Reply Inline Actions Ditto ditto ditto. benhamilton: Ditto ditto ditto.
		assert(false && "Invalid UTF-8 in value used as JSON");
		*this = ObjectKey(fixUTF8(S));
		}
		}
ObjectKey(const llvm::SmallVectorImpl<char> &V)		ObjectKey(const llvm::SmallVectorImpl<char> &V)
: ObjectKey(std::string(V.begin(), V.end())) {}		: ObjectKey(std::string(V.begin(), V.end())) {}
ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}		ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}

ObjectKey(const ObjectKey &C) { *this = C; }		ObjectKey(const ObjectKey &C) { *this = C; }
ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}		ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
ObjectKey &operator=(const ObjectKey &C) {		ObjectKey &operator=(const ObjectKey &C) {
if (C.Owned) {		if (C.Owned) {
▲ Show 20 Lines • Show All 194 Lines • Show Last 20 Lines

lib/Support/JSON.cpp

//=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//		//=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
//		//
// The LLVM Compiler Infrastructure		// The LLVM Compiler Infrastructure
//		//
// This file is distributed under the University of Illinois Open Source		// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.		// License. See LICENSE.TXT for details.
//		//
//===---------------------------------------------------------------------===//		//===---------------------------------------------------------------------===//

#include "llvm/Support/JSON.h"		#include "llvm/Support/JSON.h"
		#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Format.h"		#include "llvm/Support/Format.h"
#include <cctype>		#include <cctype>

namespace llvm {		namespace llvm {
namespace json {		namespace json {

Value &Object::operator[](const ObjectKey &K) {		Value &Object::operator[](const ObjectKey &K) {
return try_emplace(K, nullptr).first->getSecond();		return try_emplace(K, nullptr).first->getSecond();
▲ Show 20 Lines • Show All 175 Lines • ▼ Show 20 Lines

namespace {		namespace {
// Simple recursive-descent JSON parser.		// Simple recursive-descent JSON parser.
class Parser {		class Parser {
public:		public:
Parser(StringRef JSON)		Parser(StringRef JSON)
: Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}		: Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}

		bool checkUTF8() {
		size_t ErrOffset;
		if (isUTF8(StringRef(Start, End - Start), &ErrOffset))
		return true;
		P = Start + ErrOffset; // For line/column calculation.
		return parseError("Invalid UTF-8 sequence");
		}

bool parseValue(Value &Out);		bool parseValue(Value &Out);

bool assertEnd() {		bool assertEnd() {
eatWhitespace();		eatWhitespace();
if (P == End)		if (P == End)
return true;		return true;
return parseError("Text after end of document");		return parseError("Text after end of document");
}		}
▲ Show 20 Lines • Show All 243 Lines • ▼ Show 20 Lines	while (true) {
// Case 2: it's an (unpaired) trailing surrogate.		// Case 2: it's an (unpaired) trailing surrogate.
if (LLVM_UNLIKELY(First >= 0xDC00)) {		if (LLVM_UNLIKELY(First >= 0xDC00)) {
Invalid();		Invalid();
return true;		return true;
}		}

// Case 3: it's a leading surrogate. We expect a trailing one next.		// Case 3: it's a leading surrogate. We expect a trailing one next.
// Case 3a: there's no trailing \u escape. Don't advance in the stream.		// Case 3a: there's no trailing \u escape. Don't advance in the stream.
if (!LLVM_LIKELY(P + 2 <= End && P == '\\' && (P + 1) == 'u')) {		if (LLVM_UNLIKELY(P + 2 > End \|\| P != '\\' \|\| (P + 1) != 'u')) {
Invalid(); // Leading surrogate was unpaired.		Invalid(); // Leading surrogate was unpaired.
return true;		return true;
}		}
P += 2;		P += 2;
uint16_t Second;		uint16_t Second;
if (!Parse4Hex(Second))		if (!Parse4Hex(Second))
return false;		return false;
// Case 3b: there was another \u escape, but it wasn't a trailing surrogate.		// Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
Show All 21 Lines	Err.emplace(
llvm::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));		llvm::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
return false;		return false;
}		}
} // namespace		} // namespace

Expected<Value> parse(StringRef JSON) {		Expected<Value> parse(StringRef JSON) {
Parser P(JSON);		Parser P(JSON);
Value E = nullptr;		Value E = nullptr;
		if (P.checkUTF8())
if (P.parseValue(E))		if (P.parseValue(E))
if (P.assertEnd())		if (P.assertEnd())
return std::move(E);		return std::move(E);
return P.takeError();		return P.takeError();
}		}
char ParseError::ID = 0;		char ParseError::ID = 0;

static std::vector<const Object::value_type *> sortedElements(const Object &O) {		static std::vector<const Object::value_type *> sortedElements(const Object &O) {
std::vector<const Object::value_type *> Elements;		std::vector<const Object::value_type *> Elements;
		benhamiltonUnsubmitted Not Done Reply Inline Actions Wouldn't it make sense to move this to `isLegalUTF8String()`? benhamilton: Wouldn't it make sense to move this to `isLegalUTF8String()`?
		sammccallAuthorUnsubmitted Not Done Reply Inline Actions Hmm.. maybe. I'm slightly leery about this, as these are common Unicode reference functions that are largely unmodified, which people may expect. Also I think we'd still want to split it into two functions so the ASCII check can be inlined and the utf-8 wrangling outlined. It seems harmless enough here, but maybe I'm just lazy. WDYT? sammccall: Hmm.. maybe. I'm slightly leery about this, as these are common Unicode reference functions…
		benhamiltonUnsubmitted Not Done Reply Inline Actions I'm supportive of an inline-able ASCII check if we don't already have one. benhamilton: I'm supportive of an inline-able ASCII check if we don't already have one.
		sammccallAuthorUnsubmitted Not Done Reply Inline Actions Added one to `StringExtras.h` (Unicode.h and ConvertUTF.h have weird style and no dependencies, it's hard to work out how to make it fit). sammccall: Added one to `StringExtras.h` (Unicode.h and ConvertUTF.h have weird style and no dependencies…
for (const auto &E : O)		for (const auto &E : O)
Elements.push_back(&E);		Elements.push_back(&E);
llvm::sort(Elements.begin(), Elements.end(),		llvm::sort(Elements.begin(), Elements.end(),
[](const Object::value_type L, const Object::value_type R) {		[](const Object::value_type L, const Object::value_type R) {
return L->first < R->first;		return L->first < R->first;
});		});
return Elements;		return Elements;
		benhamiltonUnsubmitted Done Reply Inline Actions Style: Space between `UTF8` and ``? benhamilton:* Style: Space between `UTF8` and `*`?
}		}

		bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
		// Fast-path for ASCII, which is valid UTF-8.
		if (LLVM_LIKELY(isASCII(S)))
		return true;

		const UTF8 Data = reinterpret_cast<const UTF8 >(S.data()), *Rest = Data;
		if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
		return true;
		benhamiltonUnsubmitted Done Reply Inline Actions Style: Space between `UTF8` and ``? benhamilton:* Style: Space between `UTF8` and `*`?

		benhamiltonUnsubmitted Done Reply Inline Actions Can we clarify the comment that this logic results in checking for the shortest possible encoding? benhamilton: Can we clarify the comment that this logic results in checking for the shortest possible…
		if (ErrOffset)
		*ErrOffset = Rest - Data;
		return false;
		}
		benhamiltonUnsubmitted Done Reply Inline Actions Also need to check for and reject so-called CESU-8 encoding (where UTF-16 surrogate pairs are "encoded" as separate 3-byte UTF-8 sequences): https://www.unicode.org/reports/tr26/#definitions benhamilton: Also need to check for and reject so-called CESU-8 encoding (where UTF-16 surrogate pairs are…

		std::string fixUTF8(llvm::StringRef S) {
		// This isn't particularly efficient, but is only for error-recovery.
		std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
		const UTF8 In8 = reinterpret_cast<const UTF8 >(S.data());
		UTF32 *Out32 = Codepoints.data();
		ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(),
		lenientConversion);
		Codepoints.resize(Out32 - Codepoints.data());
		std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice
		const UTF32 *In32 = Codepoints.data();
		benhamiltonUnsubmitted Done Reply Inline Actions Also need to handle two more cases which would encode a code point > `U+10FFFF`, which is not allowed: First byte `== 0xF4` and second byte `> 0x8F` First byte `> 0xF4` benhamilton: Also need to handle two more cases which would encode a code point > `U+10FFFF`, which is not…
		UTF8 Out8 = reinterpret_cast<UTF8 >(&Res[0]);
		ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(),
		strictConversion);
		Res.resize(reinterpret_cast<char *>(Out8) - Res.data());
		return Res;
		}

} // namespace json		} // namespace json
} // namespace llvm		} // namespace llvm

static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {		static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
OS << '\"';		OS << '\"';
for (unsigned char C : S) {		for (unsigned char C : S) {
if (C == 0x22 \|\| C == 0x5C)		if (C == 0x22 \|\| C == 0x5C)
OS << '\\';		OS << '\\';
if (C >= 0x20) {		if (C >= 0x20) {
OS << C;		OS << C;
continue;		continue;
}		}
OS << '\\';		OS << '\\';
		benhamiltonUnsubmitted Done Reply Inline Actions if (!LLVM_LIKELY(measureChar(S, I)) { benhamilton: ``` if (!LLVM_LIKELY(measureChar(S, I)) { ```
switch (C) {		switch (C) {
// A few characters are common enough to make short escapes worthwhile.		// A few characters are common enough to make short escapes worthwhile.
case '\t':		case '\t':
OS << 't';		OS << 't';
break;		break;
case '\n':		case '\n':
OS << 'n';		OS << 'n';
break;		break;
▲ Show 20 Lines • Show All 115 Lines • Show Last 20 Lines

unittests/Support/JSONTest.cpp

Show All 21 Lines

TEST(JSONTest, Types) {		TEST(JSONTest, Types) {
EXPECT_EQ("true", s(true));		EXPECT_EQ("true", s(true));
EXPECT_EQ("null", s(nullptr));		EXPECT_EQ("null", s(nullptr));
EXPECT_EQ("2.5", s(2.5));		EXPECT_EQ("2.5", s(2.5));
EXPECT_EQ(R"("foo")", s("foo"));		EXPECT_EQ(R"("foo")", s("foo"));
EXPECT_EQ("[1,2,3]", s({1, 2, 3}));		EXPECT_EQ("[1,2,3]", s({1, 2, 3}));
EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}}));		EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}}));

		#ifdef NDEBUG
		EXPECT_EQ(R"("��")", s("\xC0\x80"));
		EXPECT_EQ(R"({"��":0})", s(Object{{"\xC0\x80", 0}}));
		#else
		EXPECT_DEATH(s("\xC0\x80"), "Invalid UTF-8");
		EXPECT_DEATH(s(Object{{"\xC0\x80", 0}}), "Invalid UTF-8");
		#endif
}		}

TEST(JSONTest, Constructors) {		TEST(JSONTest, Constructors) {
// Lots of edge cases around empty and singleton init lists.		// Lots of edge cases around empty and singleton init lists.
EXPECT_EQ("[[[3]]]", s({{{3}}}));		EXPECT_EQ("[[[3]]]", s({{{3}}}));
EXPECT_EQ("[[[]]]", s({{{}}}));		EXPECT_EQ("[[[]]]", s({{{}}}));
EXPECT_EQ("[[{}]]", s({{Object{}}}));		EXPECT_EQ("[[{}]]", s({{Object{}}}));
EXPECT_EQ(R"({"A":{"B":{}}})", s(Object{{"A", Object{{"B", Object{}}}}}));		EXPECT_EQ(R"({"A":{"B":{}}})", s(Object{{"A", Object{{"B", Object{}}}}}));
▲ Show 20 Lines • Show All 138 Lines • ▼ Show 20 Lines	TEST(JSONTest, ParseErrors) {
ExpectErr("Unterminated string", R"("abc\"def)");		ExpectErr("Unterminated string", R"("abc\"def)");
ExpectErr("Control character in string", "\"abc\ndef\"");		ExpectErr("Control character in string", "\"abc\ndef\"");
ExpectErr("Invalid escape sequence", R"("\030")");		ExpectErr("Invalid escape sequence", R"("\030")");
ExpectErr("Invalid \\u escape sequence", R"("\usuck")");		ExpectErr("Invalid \\u escape sequence", R"("\usuck")");
ExpectErr("[3:3, byte=19]", R"({		ExpectErr("[3:3, byte=19]", R"({
"valid": 1,		"valid": 1,
invalid: 2		invalid: 2
})");		})");
		ExpectErr("Invalid UTF-8 sequence", "\"\xC0\x80\""); // WTF-8 null
		}

		// Direct tests of isUTF8 and fixUTF8. Internal uses are also tested elsewhere.
		TEST(JSONTest, UTF8) {
		for (const char *Valid : {
		"this is ASCII text",
		"thïs tëxt häs BMP chäräctërs",
		"𐌶𐌰L𐌾𐍈 C𐍈𐌼𐌴𐍃",
		}) {
		EXPECT_TRUE(isUTF8(Valid)) << Valid;
		EXPECT_EQ(fixUTF8(Valid), Valid);
		}
		for (auto Invalid : std::vector<std::pair<const char , const char >>{
		{"lone trailing \x81\x82 bytes", "lone trailing �� bytes"},
		{"missing trailing \xD0 bytes", "missing trailing � bytes"},
		{"truncated character \xD0", "truncated character �"},
		{"not \xC1\x80 the \xE0\x9f\xBF shortest \xF0\x83\x83\x83 encoding",
		"not �� the �� shortest �� encoding"},
		{"too \xF9\x80\x80\x80\x80 long", "too �� long"},
		{"surrogate \xED\xA0\x80 invalid \xF4\x90\x80\x80",
		"surrogate �� invalid ��"}}) {
		EXPECT_FALSE(isUTF8(Invalid.first)) << Invalid.first;
		EXPECT_EQ(fixUTF8(Invalid.first), Invalid.second);
		}
}		}

TEST(JSONTest, Inspection) {		TEST(JSONTest, Inspection) {
llvm::Expected<Value> Doc = parse(R"(		llvm::Expected<Value> Doc = parse(R"(
{		{
"null": null,		"null": null,
"boolean": false,		"boolean": false,
"number": 2.78,		"number": 2.78,
▲ Show 20 Lines • Show All 163 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[Support] Harden JSON against invalid UTF-8.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 154775

include/llvm/ADT/StringExtras.h

include/llvm/Support/JSON.h

lib/Support/JSON.cpp

unittests/Support/JSONTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[Support] Harden JSON against invalid UTF-8.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 154775

include/llvm/ADT/StringExtras.h

include/llvm/Support/JSON.h

lib/Support/JSON.cpp

unittests/Support/JSONTest.cpp

[Support] Harden JSON against invalid UTF-8.
ClosedPublic