Index: include/llvm/Support/JSON.h =================================================================== --- /dev/null +++ include/llvm/Support/JSON.h @@ -0,0 +1,281 @@ +//===---------------------JSON.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_JSON_H +#define LLVM_SUPPORT_JSON_H + +#include +#include +#include +#include +#include +#include + +#include "llvm/Support/Casting.h" +#include "llvm/Support/StringExtractor.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class JSONValue { +public: + virtual void Write(raw_ostream &S) = 0; + + typedef std::shared_ptr SP; + + enum class Kind { String, Number, True, False, Null, Object, Array }; + + JSONValue(Kind K) : TheKind(K) {} + + Kind GetKind() const { return TheKind; } + + virtual ~JSONValue() = default; + +private: + const Kind TheKind; +}; + +class JSONString : public JSONValue { +public: + JSONString(); + JSONString(const char *S); + JSONString(const std::string &S); + + JSONString(const JSONString &S) = delete; + JSONString &operator=(const JSONString &S) = delete; + + void Write(raw_ostream &S) override; + + typedef std::shared_ptr SP; + + std::string GetData() { return Data; } + + static bool classof(const JSONValue *V) { + return V->GetKind() == JSONValue::Kind::String; + } + + ~JSONString() override = default; + +private: + static std::string json_string_quote_metachars(const std::string &); + + std::string Data; +}; + +class JSONNumber : public JSONValue { +public: + typedef std::shared_ptr SP; + + // We cretae a constructor for all integer and floating point type with using + // templates and + // SFINAE to avoid having ambiguous overloads because of the implicit type + // promotion. If we + // would have constructors only with int64_t, uint64_t and double types then + // constructing a + // JSONNumber from an int32_t (or any other similar type) would fail to + // compile. + + template ::value && + std::is_unsigned::value>::type * = nullptr> + explicit JSONNumber(T U) + : JSONValue(JSONValue::Kind::Number), TheDataType(DataType::Unsigned) { + Data.Unsigned = U; + } + + template ::value && + std::is_signed::value>::type * = nullptr> + explicit JSONNumber(T S) + : JSONValue(JSONValue::Kind::Number), TheDataType(DataType::Signed) { + Data.Signed = S; + } + + template ::value>::type * = nullptr> + explicit JSONNumber(T D) + : JSONValue(JSONValue::Kind::Number), TheDataType(DataType::Double) { + Data.Double = D; + } + + ~JSONNumber() override = default; + + JSONNumber(const JSONNumber &S) = delete; + JSONNumber &operator=(const JSONNumber &S) = delete; + + void Write(raw_ostream &S) override; + + uint64_t GetAsUnsigned() const; + + int64_t GetAsSigned() const; + + double GetAsDouble() const; + + static bool classof(const JSONValue *V) { + return V->GetKind() == JSONValue::Kind::Number; + } + +private: + enum class DataType : uint8_t { Unsigned, Signed, Double } TheDataType; + + union { + uint64_t Unsigned; + int64_t Signed; + double Double; + } Data; +}; + +class JSONTrue : public JSONValue { +public: + JSONTrue(); + + JSONTrue(const JSONTrue &S) = delete; + JSONTrue &operator=(const JSONTrue &S) = delete; + + void Write(raw_ostream &S) override; + + typedef std::shared_ptr SP; + + static bool classof(const JSONValue *V) { + return V->GetKind() == JSONValue::Kind::True; + } + + ~JSONTrue() override = default; +}; + +class JSONFalse : public JSONValue { +public: + JSONFalse(); + + JSONFalse(const JSONFalse &S) = delete; + JSONFalse &operator=(const JSONFalse &S) = delete; + + void Write(raw_ostream &S) override; + + typedef std::shared_ptr SP; + + static bool classof(const JSONValue *V) { + return V->GetKind() == JSONValue::Kind::False; + } + + ~JSONFalse() override = default; +}; + +class JSONNull : public JSONValue { +public: + JSONNull(); + + JSONNull(const JSONNull &S) = delete; + JSONNull &operator=(const JSONNull &S) = delete; + + void Write(raw_ostream &S) override; + + typedef std::shared_ptr SP; + + static bool classof(const JSONValue *V) { + return V->GetKind() == JSONValue::Kind::Null; + } + + ~JSONNull() override = default; +}; + +class JSONObject : public JSONValue { +public: + JSONObject(); + + JSONObject(const JSONObject &S) = delete; + JSONObject &operator=(const JSONObject &S) = delete; + + void Write(raw_ostream &S) override; + + typedef std::shared_ptr SP; + + static bool classof(const JSONValue *V) { + return V->GetKind() == JSONValue::Kind::Object; + } + + bool SetObject(const std::string &Key, JSONValue::SP Value); + + JSONValue::SP GetObject(const std::string &Key); + + ~JSONObject() override = default; + +private: + typedef std::map Map; + typedef Map::iterator Iterator; + Map Elements; +}; + +class JSONArray : public JSONValue { +public: + JSONArray(); + + JSONArray(const JSONArray &S) = delete; + JSONArray &operator=(const JSONArray &S) = delete; + + void Write(raw_ostream &S) override; + + typedef std::shared_ptr SP; + + static bool classof(const JSONValue *V) { + return V->GetKind() == JSONValue::Kind::Array; + } + +private: + typedef std::vector Vector; + typedef Vector::iterator Iterator; + typedef Vector::size_type Index; + typedef Vector::size_type Size; + +public: + bool SetObject(Index I, JSONValue::SP Value); + + bool AppendObject(JSONValue::SP Value); + + JSONValue::SP GetObject(Index I); + + Size GetNumElements(); + + ~JSONArray() override = default; + + Vector Elements; +}; + +class JSONParser : public StringExtractor { +public: + enum Token { + Invalid, + Error, + ObjectStart, + ObjectEnd, + ArrayStart, + ArrayEnd, + Comma, + Colon, + String, + Integer, + Float, + True, + False, + Null, + EndOfFile + }; + + JSONParser(const char *Cstr); + JSONValue::SP ParseJSONValue(); + +protected: + int GetEscapedChar(bool &WasEscaped); + Token GetToken(std::string &Value); + JSONValue::SP ParseJSONObject(); + JSONValue::SP ParseJSONArray(); +}; +} // namespace llvm + +#endif // LLVM_SUPPORT_JSON_H Index: include/llvm/Support/StringExtractor.h =================================================================== --- /dev/null +++ include/llvm/Support/StringExtractor.h @@ -0,0 +1,135 @@ +//===-- StringExtractor.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_STRINGEXTRACTOR_H +#define LLVM_SUPPORT_STRINGEXTRACTOR_H + +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +class StringExtractor { +public: + enum { BigEndian = 0, LittleEndian = 1 }; + //------------------------------------------------------------------ + // Constructors and Destructors + //------------------------------------------------------------------ + StringExtractor(); + StringExtractor(llvm::StringRef PacketStr); + StringExtractor(const char *PacketCstr); + StringExtractor(const StringExtractor &Rhs); + virtual ~StringExtractor(); + + //------------------------------------------------------------------ + // Operators + //------------------------------------------------------------------ + StringExtractor &operator=(const StringExtractor &Rhs); + + void Reset(llvm::StringRef Str) { + Packet = Str; + Index = 0; + } + + // Returns true if the file position is still valid for the data + // contained in this string extractor object. + bool IsGood() const { return Index != UINT64_MAX; } + + uint64_t GetFilePos() const { return Index; } + + void SetFilePos(uint32_t Idx) { Index = Idx; } + + void Clear() { + Packet.clear(); + Index = 0; + } + + void SkipSpaces(); + + std::string &GetStringRef() { return Packet; } + + const std::string &GetStringRef() const { return Packet; } + + bool Empty() { return Packet.empty(); } + + size_t GetBytesLeft() { + if (Index < Packet.size()) + return Packet.size() - Index; + return 0; + } + + char GetChar(char FailValue = '\0'); + + char PeekChar(char FailValue = '\0') { + const char *Cstr = Peek(); + if (Cstr) + return Cstr[0]; + return FailValue; + } + + int DecodeHexU8(); + + uint8_t GetHexU8(uint8_t FailValue = 0, bool SetEofOnFail = true); + + bool GetHexU8Ex(uint8_t &Ch, bool SetEofOnFail = true); + + bool GetNameColonValue(llvm::StringRef &Name, llvm::StringRef &Value); + + int32_t GetS32(int32_t FailValue, int Base = 0); + + uint32_t GetU32(uint32_t FailValue, int Base = 0); + + int64_t GetS64(int64_t FailValue, int Base = 0); + + uint64_t GetU64(uint64_t FailValue, int Base = 0); + + uint32_t GetHexMaxU32(bool LittleEndian, uint32_t FailValue); + + uint64_t GetHexMaxU64(bool LittleEndian, uint64_t FailValue); + + size_t GetHexBytes(llvm::MutableArrayRef Dest, + uint8_t FailFillValue); + + size_t GetHexBytesAvail(llvm::MutableArrayRef Dest); + + uint64_t GetHexWithFixedSize(size_t ByteSize, bool LittleEndian, + uint64_t FailValue); + + size_t GetHexByteString(std::string &Str); + + size_t GetHexByteStringFixedLength(std::string &Str, uint32_t NibbleLength); + + size_t GetHexByteStringTerminatedBy(std::string &Str, char Terminator); + + const char *Peek() { + if (Index < Packet.size()) + return Packet.c_str() + Index; + return nullptr; + } + +protected: + bool fail() { + Index = UINT64_MAX; + return false; + } + //------------------------------------------------------------------ + // For StringExtractor only + //------------------------------------------------------------------ + std::string Packet; // The string in which to extract data. + uint64_t Index; // When extracting data from a packet, this index + // will march along as things get extracted. If set + // to UINT64_MAX the end of the packet data was + // reached when decoding information +}; +} // namespace llvm + +#endif // LLVM_SUPPORT_STRINGEXTRACTOR_H Index: lib/Support/CMakeLists.txt =================================================================== --- lib/Support/CMakeLists.txt +++ lib/Support/CMakeLists.txt @@ -61,6 +61,7 @@ IntervalMap.cpp IntrusiveRefCntPtr.cpp JamCRC.cpp + JSON.cpp LEB128.cpp LineIterator.cpp Locale.cpp @@ -84,6 +85,7 @@ SpecialCaseList.cpp Statistic.cpp StreamingMemoryObject.cpp + StringExtractor.cpp StringExtras.cpp StringMap.cpp StringPool.cpp Index: lib/Support/JSON.cpp =================================================================== --- /dev/null +++ lib/Support/JSON.cpp @@ -0,0 +1,546 @@ +//===--------------------- JSON.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/JSON.h" + +#include +#include + +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +std::string JSONString::json_string_quote_metachars(const std::string &S) { + if (S.find('"') == std::string::npos) + return S; + + std::string Output; + const size_t Size = S.size(); + const char *Chars = S.c_str(); + for (size_t I = 0; I < Size; I++) { + unsigned char Ch = *(Chars + I); + if (Ch == '"') { + Output.push_back('\\'); + } + Output.push_back(Ch); + } + return Output; +} + +JSONString::JSONString() : JSONValue(JSONValue::Kind::String), Data() {} + +JSONString::JSONString(const char *S) + : JSONValue(JSONValue::Kind::String), Data(S ? S : "") {} + +JSONString::JSONString(const std::string &S) + : JSONValue(JSONValue::Kind::String), Data(S) {} + +void JSONString::Write(raw_ostream &OS) { + OS << "\"" << json_string_quote_metachars(Data) << "\""; +} + +uint64_t JSONNumber::GetAsUnsigned() const { + switch (TheDataType) { + case DataType::Unsigned: + return Data.Unsigned; + case DataType::Signed: + return (uint64_t)Data.Signed; + case DataType::Double: + return (uint64_t)Data.Double; + } + llvm_unreachable("Unhandled data type"); +} + +int64_t JSONNumber::GetAsSigned() const { + switch (TheDataType) { + case DataType::Unsigned: + return (int64_t)Data.Unsigned; + case DataType::Signed: + return Data.Signed; + case DataType::Double: + return (int64_t)Data.Double; + } + llvm_unreachable("Unhandled data type"); +} + +double JSONNumber::GetAsDouble() const { + switch (TheDataType) { + case DataType::Unsigned: + return (double)Data.Unsigned; + case DataType::Signed: + return (double)Data.Signed; + case DataType::Double: + return Data.Double; + } + llvm_unreachable("Unhandled data type"); +} + +void JSONNumber::Write(raw_ostream &OS) { + switch (TheDataType) { + case DataType::Unsigned: + OS << Data.Unsigned; + break; + case DataType::Signed: + OS << Data.Signed; + break; + case DataType::Double: + OS << Data.Double; + break; + } +} + +JSONTrue::JSONTrue() : JSONValue(JSONValue::Kind::True) {} + +void JSONTrue::Write(raw_ostream &OS) { OS << "true"; } + +JSONFalse::JSONFalse() : JSONValue(JSONValue::Kind::False) {} + +void JSONFalse::Write(raw_ostream &OS) { OS << "false"; } + +JSONNull::JSONNull() : JSONValue(JSONValue::Kind::Null) {} + +void JSONNull::Write(raw_ostream &OS) { OS << "null"; } + +JSONObject::JSONObject() : JSONValue(JSONValue::Kind::Object) {} + +void JSONObject::Write(raw_ostream &OS) { + bool First = true; + OS << '{'; + auto Iter = Elements.begin(), End = Elements.end(); + for (; Iter != End; Iter++) { + if (First) + First = false; + else + OS << ','; + JSONString Key(Iter->first); + JSONValue::SP Value(Iter->second); + Key.Write(OS); + OS << ':'; + Value->Write(OS); + } + OS << '}'; +} + +bool JSONObject::SetObject(const std::string &Key, JSONValue::SP Value) { + if (Key.empty() || nullptr == Value.get()) + return false; + Elements[Key] = Value; + return true; +} + +JSONValue::SP JSONObject::GetObject(const std::string &Key) { + auto Iter = Elements.find(Key), End = Elements.end(); + if (Iter == End) + return JSONValue::SP(); + return Iter->second; +} + +JSONArray::JSONArray() : JSONValue(JSONValue::Kind::Array) {} + +void JSONArray::Write(raw_ostream &OS) { + bool First = true; + OS << '['; + auto Iter = Elements.begin(), End = Elements.end(); + for (; Iter != End; Iter++) { + if (First) + First = false; + else + OS << ','; + (*Iter)->Write(OS); + } + OS << ']'; +} + +bool JSONArray::SetObject(Index I, JSONValue::SP Value) { + if (Value.get() == nullptr) + return false; + if (I < Elements.size()) { + Elements[I] = Value; + return true; + } + if (I == Elements.size()) { + Elements.push_back(Value); + return true; + } + return false; +} + +bool JSONArray::AppendObject(JSONValue::SP Value) { + if (Value.get() == nullptr) + return false; + Elements.push_back(Value); + return true; +} + +JSONValue::SP JSONArray::GetObject(Index I) { + if (I < Elements.size()) + return Elements[I]; + return JSONValue::SP(); +} + +JSONArray::Size JSONArray::GetNumElements() { return Elements.size(); } + +JSONParser::JSONParser(const char *CStr) : StringExtractor(CStr) {} + +JSONParser::Token JSONParser::GetToken(std::string &Value) { + std::stringstream Error; + + Value.clear(); + SkipSpaces(); + const uint64_t StartIndex = Index; + const char Ch = GetChar(); + switch (Ch) { + case '{': + return Token::ObjectStart; + case '}': + return Token::ObjectEnd; + case '[': + return Token::ArrayStart; + case ']': + return Token::ArrayEnd; + case ',': + return Token::Comma; + case ':': + return Token::Colon; + case '\0': + return Token::EndOfFile; + case 't': + if (GetChar() == 'r') + if (GetChar() == 'u') + if (GetChar() == 'e') + return Token::True; + break; + + case 'f': + if (GetChar() == 'a') + if (GetChar() == 'l') + if (GetChar() == 's') + if (GetChar() == 'e') + return Token::False; + break; + + case 'n': + if (GetChar() == 'u') + if (GetChar() == 'l') + if (GetChar() == 'l') + return Token::Null; + break; + + case '"': { + while (1) { + bool WasEscaped = false; + int EscapedCh = GetEscapedChar(WasEscaped); + if (EscapedCh == -1) { + Error << "error: an error occurred getting a character from offset " + << StartIndex; + Value = Error.str(); + return Token::Error; + + } else { + const bool IsEndQuote = EscapedCh == '"'; + const bool IsNull = EscapedCh == 0; + if (WasEscaped || (!IsEndQuote && !IsNull)) { + if (CHAR_MIN <= EscapedCh && EscapedCh <= CHAR_MAX) { + Value.append(1, (char)EscapedCh); + } else { + Error << "error: wide character support is needed for unicode " + "character 0x" + << std::hex << EscapedCh << std::dec << " at offset " + << StartIndex; + Value = Error.str(); + return Token::Error; + } + } else if (IsEndQuote) { + return Token::String; + } else if (IsNull) { + Value = "error: missing end quote for string"; + return Token::Error; + } + } + } + } break; + + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + uint64_t ExpIndex = 0; + bool Done = false; + bool GotDecimalPoint = false; + bool GotIntDigits = (Ch >= '0') && (Ch <= '9'); + bool GotFracDigits = false; + bool GotExpDigits = false; + while (!Done) { + const char NextCh = PeekChar(); + switch (NextCh) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (ExpIndex != 0) { + GotExpDigits = true; + } else if (GotDecimalPoint) { + GotFracDigits = true; + } else { + GotIntDigits = true; + } + ++Index; // Skip this character + break; + + case '.': + if (GotDecimalPoint) { + Error << "error: extra decimal point found at offset " << StartIndex; + Value = Error.str(); + return Token::Error; + } else { + GotDecimalPoint = true; + ++Index; // Skip this character + } + break; + + case 'e': + case 'E': + if (ExpIndex != 0) { + Error << "error: extra exponent character found at offset " + << StartIndex; + Value = Error.str(); + return Token::Error; + } else { + ExpIndex = Index; + ++Index; // Skip this character + } + break; + + case '+': + case '-': + // The '+' and '-' can only come after an exponent character... + if (ExpIndex == Index - 1) { + ++Index; // Skip the exponent sign character + } else { + Error << "error: unexpected " << NextCh << " character at offset " + << StartIndex; + Value = Error.str(); + return Token::Error; + } + break; + + default: + Done = true; + break; + } + } + + if (Index > StartIndex) { + Value = Packet.substr(StartIndex, Index - StartIndex); + if (GotDecimalPoint) { + if (ExpIndex != 0) { + // We have an exponent, make sure we got exponent digits + if (GotExpDigits) { + return Token::Float; + } else { + Error << "error: got exponent character but no exponent digits at " + "offset in float value \"" + << Value << "\""; + Value = Error.str(); + return Token::Error; + } + } else { + // No exponent, but we need at least one decimal after the decimal + // point + if (GotFracDigits) { + return Token::Float; + } else { + Error << "error: no digits after decimal point \"" << Value << "\""; + Value = Error.str(); + return Token::Error; + } + } + } else { + // No decimal point + if (GotIntDigits) { + // We need at least some integer digits to make an integer + return Token::Integer; + } else { + Error << "error: no digits negate sign \"" << Value << "%s\""; + Value = Error.str(); + return Token::Error; + } + } + } else { + Error << "error: invalid number found at offset " << StartIndex; + Value = Error.str(); + return Token::Error; + } + } break; + default: + break; + } + Error << "error: failed to parse token at offset " << StartIndex + << " (around character '" << Ch << "')"; + Value = Error.str(); + return Token::Error; +} + +int JSONParser::GetEscapedChar(bool &WasEscaped) { + WasEscaped = false; + const char Ch = GetChar(); + if (Ch == '\\') { + WasEscaped = true; + const char Ch2 = GetChar(); + switch (Ch2) { + case '"': + case '\\': + case '/': + default: + break; + + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'u': { + const int HiByte = DecodeHexU8(); + const int LoByte = DecodeHexU8(); + if (HiByte >= 0 && LoByte >= 0) + return HiByte << 8 | LoByte; + return -1; + } break; + } + return Ch2; + } + return Ch; +} + +JSONValue::SP JSONParser::ParseJSONObject() { + // The "JSONParser::Token::ObjectStart" token should have already been + // consumed + // by the time this function is called + std::unique_ptr DictUp(new JSONObject()); + + std::string Value; + std::string Key; + while (1) { + JSONParser::Token Token = GetToken(Value); + + if (Token == JSONParser::Token::String) { + Key.swap(Value); + Token = GetToken(Value); + if (Token == JSONParser::Token::Colon) { + JSONValue::SP ValueSP = ParseJSONValue(); + if (ValueSP) + DictUp->SetObject(Key, ValueSP); + else + break; + } + } else if (Token == JSONParser::Token::ObjectEnd) { + return JSONValue::SP(DictUp.release()); + } else if (Token == JSONParser::Token::Comma) { + continue; + } else { + break; + } + } + return JSONValue::SP(); +} + +JSONValue::SP JSONParser::ParseJSONArray() { + // The "JSONParser::Token::ObjectStart" token should have already been + // consumed + // by the time this function is called + std::unique_ptr ArrayUp(new JSONArray()); + + std::string Value; + while (1) { + JSONValue::SP ValueSP = ParseJSONValue(); + if (ValueSP) + ArrayUp->AppendObject(ValueSP); + else + break; + + JSONParser::Token Token = GetToken(Value); + if (Token == JSONParser::Token::Comma) { + continue; + } else if (Token == JSONParser::Token::ArrayEnd) { + return JSONValue::SP(ArrayUp.release()); + } else { + break; + } + } + return JSONValue::SP(); +} + +JSONValue::SP JSONParser::ParseJSONValue() { + std::string Value; + const JSONParser::Token Token = GetToken(Value); + switch (Token) { + case JSONParser::Token::ObjectStart: + return ParseJSONObject(); + + case JSONParser::Token::ArrayStart: + return ParseJSONArray(); + + case JSONParser::Token::Integer: { + char *End = nullptr; + if (Value.front() == '-') { + int64_t SVal = ::strtoll(Value.c_str(), &End, 10); + bool Success = *End == '\0'; // all characters were used. + if (Success) + return JSONValue::SP(new JSONNumber(SVal)); + } else { + uint64_t UVal = ::strtoul(Value.c_str(), &End, 10); + bool Success = *End == '\0'; // all characters were used. + if (Success) + return JSONValue::SP(new JSONNumber(UVal)); + } + } break; + + case JSONParser::Token::Float: { + char *End = nullptr; + double Val = ::strtod(Value.c_str(), &End); + bool Success = *End == '\0'; // all characters were used. + if (Success) + return JSONValue::SP(new JSONNumber(Val)); + } break; + + case JSONParser::Token::String: + return JSONValue::SP(new JSONString(Value)); + + case JSONParser::Token::True: + return JSONValue::SP(new JSONTrue()); + + case JSONParser::Token::False: + return JSONValue::SP(new JSONFalse()); + + case JSONParser::Token::Null: + return JSONValue::SP(new JSONNull()); + + default: + break; + } + return JSONValue::SP(); +} Index: lib/Support/StringExtractor.cpp =================================================================== --- /dev/null +++ lib/Support/StringExtractor.cpp @@ -0,0 +1,412 @@ +//===-- StringExtractor.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/StringExtractor.h" + +#include +#include + +using namespace llvm; + +static inline int xdigit_to_sint(char Ch) { + if (Ch >= 'a' && Ch <= 'f') + return 10 + Ch - 'a'; + if (Ch >= 'A' && Ch <= 'F') + return 10 + Ch - 'A'; + if (Ch >= '0' && Ch <= '9') + return Ch - '0'; + return -1; +} + +//---------------------------------------------------------------------- +// StringExtractor constructor +//---------------------------------------------------------------------- +StringExtractor::StringExtractor() : Packet(), Index(0) {} + +StringExtractor::StringExtractor(StringRef PacketStr) : Packet(), Index(0) { + Packet.assign(PacketStr.begin(), PacketStr.end()); +} + +StringExtractor::StringExtractor(const char *PacketCStr) : Packet(), Index(0) { + if (PacketCStr) + Packet.assign(PacketCStr); +} + +//---------------------------------------------------------------------- +// StringExtractor copy constructor +//---------------------------------------------------------------------- +StringExtractor::StringExtractor(const StringExtractor &Rhs) + : Packet(Rhs.Packet), Index(Rhs.Index) {} + +//---------------------------------------------------------------------- +// StringExtractor assignment operator +//---------------------------------------------------------------------- +StringExtractor &StringExtractor::operator=(const StringExtractor &Rhs) { + if (this != &Rhs) { + Packet = Rhs.Packet; + Index = Rhs.Index; + } + return *this; +} + +//---------------------------------------------------------------------- +// Destructor +//---------------------------------------------------------------------- +StringExtractor::~StringExtractor() {} + +char StringExtractor::GetChar(char FailValue) { + if (Index < Packet.size()) { + char Ch = Packet[Index]; + ++Index; + return Ch; + } + Index = UINT64_MAX; + return FailValue; +} + +//---------------------------------------------------------------------- +// If a pair of valid hex digits exist at the head of the +// StringExtractor they are decoded into an unsigned byte and returned +// by this function +// +// If there is not a pair of valid hex digits at the head of the +// StringExtractor, it is left unchanged and -1 is returned +//---------------------------------------------------------------------- +int StringExtractor::DecodeHexU8() { + SkipSpaces(); + if (GetBytesLeft() < 2) { + return -1; + } + const int HiNibble = xdigit_to_sint(Packet[Index]); + const int LoNibble = xdigit_to_sint(Packet[Index + 1]); + if (HiNibble == -1 || LoNibble == -1) { + return -1; + } + Index += 2; + return (uint8_t)((HiNibble << 4) + LoNibble); +} + +//---------------------------------------------------------------------- +// Extract an unsigned character from two hex ASCII chars in the packet +// string, or return FailValue on failure +//---------------------------------------------------------------------- +uint8_t StringExtractor::GetHexU8(uint8_t FailValue, bool SetEOFOnFail) { + // On success, FailValue will be overwritten with the next + // character in the stream + GetHexU8Ex(FailValue, SetEOFOnFail); + return FailValue; +} + +bool StringExtractor::GetHexU8Ex(uint8_t &Ch, bool SetEOFOnFail) { + int Byte = DecodeHexU8(); + if (Byte == -1) { + if (SetEOFOnFail || Index >= Packet.size()) + Index = UINT64_MAX; + // Ch should not be changed in case of failure + return false; + } + Ch = (uint8_t)Byte; + return true; +} + +uint32_t StringExtractor::GetU32(uint32_t FailValue, int Base) { + if (Index < Packet.size()) { + char *End = nullptr; + const char *Start = Packet.c_str(); + const char *CStr = Start + Index; + uint32_t Result = static_cast(::strtoul(CStr, &End, Base)); + + if (End && End != CStr) { + Index = End - Start; + return Result; + } + } + return FailValue; +} + +int32_t StringExtractor::GetS32(int32_t FailValue, int Base) { + if (Index < Packet.size()) { + char *End = nullptr; + const char *Start = Packet.c_str(); + const char *CStr = Start + Index; + int32_t Result = static_cast(::strtol(CStr, &End, Base)); + + if (End && End != CStr) { + Index = End - Start; + return Result; + } + } + return FailValue; +} + +uint64_t StringExtractor::GetU64(uint64_t FailValue, int Base) { + if (Index < Packet.size()) { + char *End = nullptr; + const char *Start = Packet.c_str(); + const char *CStr = Start + Index; + uint64_t Result = ::strtoull(CStr, &End, Base); + + if (End && End != CStr) { + Index = End - Start; + return Result; + } + } + return FailValue; +} + +int64_t StringExtractor::GetS64(int64_t FailValue, int Base) { + if (Index < Packet.size()) { + char *End = nullptr; + const char *Start = Packet.c_str(); + const char *CStr = Start + Index; + int64_t Result = ::strtoll(CStr, &End, Base); + + if (End && End != CStr) { + Index = End - Start; + return Result; + } + } + return FailValue; +} + +uint32_t StringExtractor::GetHexMaxU32(bool LittleEndian, uint32_t FailValue) { + uint32_t Result = 0; + uint32_t NibbleCount = 0; + + SkipSpaces(); + if (LittleEndian) { + uint32_t ShiftAmount = 0; + while (Index < Packet.size() && ::isxdigit(Packet[Index])) { + // Make sure we don't exceed the size of a uint32_t... + if (NibbleCount >= (sizeof(uint32_t) * 2)) { + Index = UINT64_MAX; + return FailValue; + } + + uint8_t NibbleLo; + uint8_t NibbleHi = xdigit_to_sint(Packet[Index]); + ++Index; + if (Index < Packet.size() && ::isxdigit(Packet[Index])) { + NibbleLo = xdigit_to_sint(Packet[Index]); + ++Index; + Result |= ((uint32_t)NibbleHi << (ShiftAmount + 4)); + Result |= ((uint32_t)NibbleLo << ShiftAmount); + NibbleCount += 2; + ShiftAmount += 8; + } else { + Result |= ((uint32_t)NibbleHi << ShiftAmount); + NibbleCount += 1; + ShiftAmount += 4; + } + } + } else { + while (Index < Packet.size() && ::isxdigit(Packet[Index])) { + // Make sure we don't exceed the size of a uint32_t... + if (NibbleCount >= (sizeof(uint32_t) * 2)) { + Index = UINT64_MAX; + return FailValue; + } + + uint8_t Nibble = xdigit_to_sint(Packet[Index]); + // Big Endian + Result <<= 4; + Result |= Nibble; + + ++Index; + ++NibbleCount; + } + } + return Result; +} + +uint64_t StringExtractor::GetHexMaxU64(bool LittleEndian, uint64_t FailValue) { + uint64_t Result = 0; + uint32_t NibbleCount = 0; + + SkipSpaces(); + if (LittleEndian) { + uint32_t ShiftAmount = 0; + while (Index < Packet.size() && ::isxdigit(Packet[Index])) { + // Make sure we don't exceed the size of a uint64_t... + if (NibbleCount >= (sizeof(uint64_t) * 2)) { + Index = UINT64_MAX; + return FailValue; + } + + uint8_t NibbleLo; + uint8_t NibbleHi = xdigit_to_sint(Packet[Index]); + ++Index; + if (Index < Packet.size() && ::isxdigit(Packet[Index])) { + NibbleLo = xdigit_to_sint(Packet[Index]); + ++Index; + Result |= ((uint64_t)NibbleHi << (ShiftAmount + 4)); + Result |= ((uint64_t)NibbleLo << ShiftAmount); + NibbleCount += 2; + ShiftAmount += 8; + } else { + Result |= ((uint64_t)NibbleHi << ShiftAmount); + NibbleCount += 1; + ShiftAmount += 4; + } + } + } else { + while (Index < Packet.size() && ::isxdigit(Packet[Index])) { + // Make sure we don't exceed the size of a uint64_t... + if (NibbleCount >= (sizeof(uint64_t) * 2)) { + Index = UINT64_MAX; + return FailValue; + } + + uint8_t Nibble = xdigit_to_sint(Packet[Index]); + // Big Endian + Result <<= 4; + Result |= Nibble; + + ++Index; + ++NibbleCount; + } + } + return Result; +} + +size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef Dest, + uint8_t FailFillValue) { + size_t BytesExtracted = 0; + while (!Dest.empty() && GetBytesLeft() > 0) { + Dest[0] = GetHexU8(FailFillValue); + if (!IsGood()) + break; + ++BytesExtracted; + Dest = Dest.drop_front(); + } + + if (!Dest.empty()) + ::memset(Dest.data(), FailFillValue, Dest.size()); + + return BytesExtracted; +} + +//---------------------------------------------------------------------- +// Decodes all valid hex encoded bytes at the head of the +// StringExtractor, limited by dst_len. +// +// Returns the number of bytes successfully decoded +//---------------------------------------------------------------------- +size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef Dest) { + size_t BytesExtracted = 0; + while (!Dest.empty()) { + int Decode = DecodeHexU8(); + if (Decode == -1) + break; + Dest[0] = (uint8_t)Decode; + Dest = Dest.drop_front(); + ++BytesExtracted; + } + return BytesExtracted; +} + +// Consume ASCII hex nibble character pairs until we have decoded ByteSize +// bytes of data. + +uint64_t StringExtractor::GetHexWithFixedSize(size_t ByteSize, + bool LittleEndian, + uint64_t FailValue) { + if (ByteSize <= 8 && GetBytesLeft() >= ByteSize * 2) { + uint64_t Result = 0; + uint32_t I; + if (LittleEndian) { + // Little Endian + uint32_t ShiftAmount; + for (I = 0, ShiftAmount = 0; I < ByteSize && IsGood(); + ++I, ShiftAmount += 8) { + Result |= ((uint64_t)GetHexU8() << ShiftAmount); + } + } else { + // Big Endian + for (I = 0; I < ByteSize && IsGood(); ++I) { + Result <<= 8; + Result |= GetHexU8(); + } + } + } + Index = UINT64_MAX; + return FailValue; +} + +size_t StringExtractor::GetHexByteString(std::string &Str) { + Str.clear(); + Str.reserve(GetBytesLeft() / 2); + char Ch; + while ((Ch = GetHexU8()) != '\0') + Str.append(1, Ch); + return Str.size(); +} + +size_t StringExtractor::GetHexByteStringFixedLength(std::string &Str, + uint32_t NibbleLen) { + Str.clear(); + + uint32_t NibbleCount = 0; + for (const char *Pch = Peek(); (NibbleCount < NibbleLen) && (Pch != nullptr); + Str.append(1, GetHexU8(0, false)), Pch = Peek(), NibbleCount += 2) { + } + + return Str.size(); +} + +size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &Str, + char Terminator) { + Str.clear(); + char Ch; + while ((Ch = GetHexU8(0, false)) != '\0') + Str.append(1, Ch); + if (Peek() && *Peek() == Terminator) + return Str.size(); + + Str.clear(); + return Str.size(); +} + +bool StringExtractor::GetNameColonValue(llvm::StringRef &Name, + llvm::StringRef &Value) { + // Read something in the form of NNNN:VVVV; where NNNN is any character + // that is not a colon, followed by a ':' character, then a value (one or + // more ';' chars), followed by a ';' + if (Index >= Packet.size()) + return fail(); + + llvm::StringRef View(Packet); + if (View.empty()) + return fail(); + + llvm::StringRef A, B, C, D; + View = View.substr(Index); + std::tie(A, B) = View.split(':'); + if (A.empty() || B.empty()) + return fail(); + std::tie(C, D) = B.split(';'); + if (B == C && D.empty()) + return fail(); + + Name = A; + Value = C; + if (D.empty()) + Index = Packet.size(); + else { + size_t BytesConsumed = D.data() - View.data(); + Index += BytesConsumed; + } + return true; +} + +void StringExtractor::SkipSpaces() { + const size_t N = Packet.size(); + while (Index < N && isspace(Packet[Index])) + ++Index; +} Index: unittests/Support/CMakeLists.txt =================================================================== --- unittests/Support/CMakeLists.txt +++ unittests/Support/CMakeLists.txt @@ -19,6 +19,7 @@ ErrorTest.cpp ErrorOrTest.cpp FileOutputBufferTest.cpp + JSONTest.cpp LEB128Test.cpp LineIteratorTest.cpp LockFileManagerTest.cpp @@ -36,6 +37,7 @@ SourceMgrTest.cpp SpecialCaseListTest.cpp StreamingMemoryObjectTest.cpp + StringExtractorTest.cpp StringPool.cpp SwapByteOrderTest.cpp TargetParserTest.cpp Index: unittests/Support/JSONTest.cpp =================================================================== --- /dev/null +++ unittests/Support/JSONTest.cpp @@ -0,0 +1,45 @@ +//===- llvm/unittest/Support/JSONTest.cpp - JSON.cpp tests ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include "llvm/Support/JSON.h" + +using namespace llvm; + +static ::testing::AssertionResult MatchRoundtrip(std::string Text) { + JSONParser Parser(Text.c_str()); + auto Obj = Parser.ParseJSONValue(); + + if (!Obj) { + return Text == "null" + ? ::testing::AssertionSuccess() + : ::testing::AssertionFailure() << "can't parse input: " << Text; + } + + std::string S; + raw_string_ostream Out(S); + Obj->Write(Out); + + std::string Actual = Out.str(); + if (Actual != Text) { + return ::testing::AssertionFailure() << "expected: " << Text + << " actual: " << Actual; + } + return ::testing::AssertionSuccess(); +} + +TEST(JSON, Roundtrip) { + EXPECT_TRUE(MatchRoundtrip("0")); + EXPECT_TRUE(MatchRoundtrip("3.145150e+00")); + EXPECT_TRUE(MatchRoundtrip("{}")); + EXPECT_TRUE(MatchRoundtrip("{\"a\":1,\"b\":2}")); + EXPECT_TRUE(MatchRoundtrip("[0]")); + EXPECT_TRUE(MatchRoundtrip("[1,\"two\",3]")); +} Index: unittests/Support/StringExtractorTest.cpp =================================================================== --- /dev/null +++ unittests/Support/StringExtractorTest.cpp @@ -0,0 +1,700 @@ +#include "gtest/gtest.h" +#include + +#include "llvm/Support/StringExtractor.h" + +using namespace llvm; + +namespace { +class StringExtractorTest : public ::testing::Test {}; +} // namespace + +TEST_F(StringExtractorTest, InitEmpty) { + const char EmptyString[] = ""; + StringExtractor SE(EmptyString); + + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(0u, SE.GetFilePos()); + ASSERT_STREQ(EmptyString, SE.GetStringRef().c_str()); + ASSERT_EQ(true, SE.Empty()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, InitMisc) { + const char InitMiscString[] = "Hello, StringExtractor!"; + StringExtractor SE(InitMiscString); + + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(0u, SE.GetFilePos()); + ASSERT_STREQ(InitMiscString, SE.GetStringRef().c_str()); + ASSERT_EQ(false, SE.Empty()); + ASSERT_EQ(sizeof(InitMiscString) - 1, SE.GetBytesLeft()); + ASSERT_EQ(InitMiscString[0], *SE.Peek()); +} + +TEST_F(StringExtractorTest, DecodeHexU8_Underflow) { + const char EmptyString[] = ""; + StringExtractor SE(EmptyString); + + ASSERT_EQ(-1, SE.DecodeHexU8()); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(0u, SE.GetFilePos()); + ASSERT_EQ(true, SE.Empty()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, DecodeHexU8_Underflow2) { + const char EmptyString[] = "1"; + StringExtractor SE(EmptyString); + + ASSERT_EQ(-1, SE.DecodeHexU8()); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(0u, SE.GetFilePos()); + ASSERT_EQ(1u, SE.GetBytesLeft()); + ASSERT_EQ('1', *SE.Peek()); +} + +TEST_F(StringExtractorTest, DecodeHexU8_InvalidHex) { + const char InvalidHex[] = "xa"; + StringExtractor SE(InvalidHex); + + ASSERT_EQ(-1, SE.DecodeHexU8()); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(0u, SE.GetFilePos()); + ASSERT_EQ(2u, SE.GetBytesLeft()); + ASSERT_EQ('x', *SE.Peek()); +} + +TEST_F(StringExtractorTest, DecodeHexU8_InvalidHex2) { + const char InvalidHex[] = "ax"; + StringExtractor SE(InvalidHex); + + ASSERT_EQ(-1, SE.DecodeHexU8()); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(0u, SE.GetFilePos()); + ASSERT_EQ(2u, SE.GetBytesLeft()); + ASSERT_EQ('a', *SE.Peek()); +} + +TEST_F(StringExtractorTest, DecodeHexU8_Exact) { + const char ValidHexPair[] = "12"; + StringExtractor SE(ValidHexPair); + + ASSERT_EQ(0x12, SE.DecodeHexU8()); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(2u, SE.GetFilePos()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, DecodeHexU8_Extra) { + const char ValidHexPair[] = "1234"; + StringExtractor SE(ValidHexPair); + + ASSERT_EQ(0x12, SE.DecodeHexU8()); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(2u, SE.GetFilePos()); + ASSERT_EQ(2u, SE.GetBytesLeft()); + ASSERT_EQ('3', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_Underflow) { + const char EmptyString[] = ""; + StringExtractor SE(EmptyString); + + ASSERT_EQ(0xab, SE.GetHexU8(0xab)); + ASSERT_EQ(false, SE.IsGood()); + ASSERT_EQ(UINT64_MAX, SE.GetFilePos()); + ASSERT_EQ(true, SE.Empty()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_Underflow2) { + const char OneNibble[] = "1"; + StringExtractor SE(OneNibble); + + ASSERT_EQ(0xbc, SE.GetHexU8(0xbc)); + ASSERT_EQ(false, SE.IsGood()); + ASSERT_EQ(UINT64_MAX, SE.GetFilePos()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_InvalidHex) { + const char InvalidHex[] = "xx"; + StringExtractor SE(InvalidHex); + + ASSERT_EQ(0xcd, SE.GetHexU8(0xcd)); + ASSERT_EQ(false, SE.IsGood()); + ASSERT_EQ(UINT64_MAX, SE.GetFilePos()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_Exact) { + const char ValidHexPair[] = "12"; + StringExtractor SE(ValidHexPair); + + ASSERT_EQ(0x12, SE.GetHexU8(0x12)); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(2u, SE.GetFilePos()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_Extra) { + const char ValidHexPair[] = "1234"; + StringExtractor SE(ValidHexPair); + + ASSERT_EQ(0x12, SE.GetHexU8(0x12)); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(2u, SE.GetFilePos()); + ASSERT_EQ(2u, SE.GetBytesLeft()); + ASSERT_EQ('3', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_Underflow_NoEof) { + const char EmptyString[] = ""; + StringExtractor SE(EmptyString); + const bool SetEofOnFail = false; + + ASSERT_EQ(0xab, SE.GetHexU8(0xab, SetEofOnFail)); + ASSERT_EQ(false, SE.IsGood()); // this result seems inconsistent with + // SetEofOnFail == false + ASSERT_EQ(UINT64_MAX, SE.GetFilePos()); + ASSERT_EQ(true, SE.Empty()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_Underflow2_NoEof) { + const char OneNibble[] = "1"; + StringExtractor SE(OneNibble); + const bool SetEofOnFail = false; + + ASSERT_EQ(0xbc, SE.GetHexU8(0xbc, SetEofOnFail)); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(0u, SE.GetFilePos()); + ASSERT_EQ(1u, SE.GetBytesLeft()); + ASSERT_EQ('1', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_InvalidHex_NoEof) { + const char InvalidHex[] = "xx"; + StringExtractor SE(InvalidHex); + const bool SetEofOnFail = false; + + ASSERT_EQ(0xcd, SE.GetHexU8(0xcd, SetEofOnFail)); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(0u, SE.GetFilePos()); + ASSERT_EQ(2u, SE.GetBytesLeft()); + ASSERT_EQ('x', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_Exact_NoEof) { + const char ValidHexPair[] = "12"; + StringExtractor SE(ValidHexPair); + const bool SetEofOnFail = false; + + ASSERT_EQ(0x12, SE.GetHexU8(0x12, SetEofOnFail)); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(2u, SE.GetFilePos()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(nullptr, SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexU8_Extra_NoEof) { + const char ValidHexPair[] = "1234"; + StringExtractor SE(ValidHexPair); + const bool SetEofOnFail = false; + + ASSERT_EQ(0x12, SE.GetHexU8(0x12, SetEofOnFail)); + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(2u, SE.GetFilePos()); + ASSERT_EQ(2u, SE.GetBytesLeft()); + ASSERT_EQ('3', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexBytes) { + const char HexEncodedBytes[] = "abcdef0123456789xyzw"; + const size_t ValidHexPairs = 8; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[ValidHexPairs]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytes(Dst, 0xde)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); + EXPECT_EQ(0x89, Dst[7]); + + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(2 * ValidHexPairs, SE.GetFilePos()); + ASSERT_EQ(false, SE.Empty()); + ASSERT_EQ(4u, SE.GetBytesLeft()); + ASSERT_EQ('x', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexBytes_FullString) { + const char HexEncodedBytes[] = "abcdef0123456789"; + const size_t ValidHexPairs = 8; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[ValidHexPairs]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytes(Dst, 0xde)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); + EXPECT_EQ(0x89, Dst[7]); +} + +TEST_F(StringExtractorTest, GetHexBytes_OddPair) { + const char HexEncodedBytes[] = "abcdef012345678w"; + const size_t ValidHexPairs = 7; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[8]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytes(Dst, 0xde)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); + + // This one should be invalid + EXPECT_EQ(0xde, Dst[7]); +} + +TEST_F(StringExtractorTest, GetHexBytes_OddPair2) { + const char HexEncodedBytes[] = "abcdef012345678"; + const size_t ValidHexPairs = 7; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[8]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytes(Dst, 0xde)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); + + EXPECT_EQ(0xde, Dst[7]); +} + +TEST_F(StringExtractorTest, GetHexBytes_Underflow) { + const char HexEncodedBytes[] = "abcdef0123456789xyzw"; + const size_t ValidHexPairs = 8; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[12]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytes(Dst, 0xde)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); + EXPECT_EQ(0x89, Dst[7]); + // these bytes should be filled with fail_fill_value 0xde + EXPECT_EQ(0xde, Dst[8]); + EXPECT_EQ(0xde, Dst[9]); + EXPECT_EQ(0xde, Dst[10]); + EXPECT_EQ(0xde, Dst[11]); + + ASSERT_EQ(false, SE.IsGood()); + ASSERT_EQ(UINT64_MAX, SE.GetFilePos()); + ASSERT_EQ(false, SE.Empty()); + ASSERT_EQ(0u, SE.GetBytesLeft()); + ASSERT_EQ(0, SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexBytes_Partial) { + const char HexEncodedBytes[] = "abcdef0123456789xyzw"; + const size_t kReadBytes = 4; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[12]; + memset(Dst, 0xab, sizeof(Dst)); + ASSERT_EQ( + kReadBytes, + SE.GetHexBytes(llvm::MutableArrayRef(Dst, kReadBytes), 0xde)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + // these bytes should be unchanged + EXPECT_EQ(0xab, Dst[4]); + EXPECT_EQ(0xab, Dst[5]); + EXPECT_EQ(0xab, Dst[6]); + EXPECT_EQ(0xab, Dst[7]); + EXPECT_EQ(0xab, Dst[8]); + EXPECT_EQ(0xab, Dst[9]); + EXPECT_EQ(0xab, Dst[10]); + EXPECT_EQ(0xab, Dst[11]); + + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(kReadBytes * 2, SE.GetFilePos()); + ASSERT_EQ(false, SE.Empty()); + ASSERT_EQ(12u, SE.GetBytesLeft()); + ASSERT_EQ('2', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexBytesAvail) { + const char HexEncodedBytes[] = "abcdef0123456789xyzw"; + const size_t ValidHexPairs = 8; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[ValidHexPairs]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytesAvail(Dst)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); + EXPECT_EQ(0x89, Dst[7]); + + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(2 * ValidHexPairs, SE.GetFilePos()); + ASSERT_EQ(false, SE.Empty()); + ASSERT_EQ(4u, SE.GetBytesLeft()); + ASSERT_EQ('x', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexBytesAvail_FullString) { + const char HexEncodedBytes[] = "abcdef0123456789"; + const size_t ValidHexPairs = 8; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[ValidHexPairs]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytesAvail(Dst)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); + EXPECT_EQ(0x89, Dst[7]); +} + +TEST_F(StringExtractorTest, GetHexBytesAvail_OddPair) { + const char HexEncodedBytes[] = "abcdef012345678w"; + const size_t ValidHexPairs = 7; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[8]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytesAvail(Dst)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); +} + +TEST_F(StringExtractorTest, GetHexBytesAvail_OddPair2) { + const char HexEncodedBytes[] = "abcdef012345678"; + const size_t ValidHexPairs = 7; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[8]; + ASSERT_EQ(ValidHexPairs, SE.GetHexBytesAvail(Dst)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); +} + +TEST_F(StringExtractorTest, GetHexBytesAvail_Underflow) { + const char HexEncodedBytes[] = "abcdef0123456789xyzw"; + const size_t ValidHexPairs = 8; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[12]; + memset(Dst, 0xef, sizeof(Dst)); + ASSERT_EQ(ValidHexPairs, SE.GetHexBytesAvail(Dst)); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + EXPECT_EQ(0x23, Dst[4]); + EXPECT_EQ(0x45, Dst[5]); + EXPECT_EQ(0x67, Dst[6]); + EXPECT_EQ(0x89, Dst[7]); + // these bytes should be unchanged + EXPECT_EQ(0xef, Dst[8]); + EXPECT_EQ(0xef, Dst[9]); + EXPECT_EQ(0xef, Dst[10]); + EXPECT_EQ(0xef, Dst[11]); + + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(ValidHexPairs * 2, SE.GetFilePos()); + ASSERT_EQ(false, SE.Empty()); + ASSERT_EQ(4u, SE.GetBytesLeft()); + ASSERT_EQ('x', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetHexBytesAvail_Partial) { + const char HexEncodedBytes[] = "abcdef0123456789xyzw"; + const size_t kReadBytes = 4; + StringExtractor SE(HexEncodedBytes); + + uint8_t Dst[12]; + memset(Dst, 0xab, sizeof(Dst)); + ASSERT_EQ(kReadBytes, SE.GetHexBytesAvail( + llvm::MutableArrayRef(Dst, kReadBytes))); + EXPECT_EQ(0xab, Dst[0]); + EXPECT_EQ(0xcd, Dst[1]); + EXPECT_EQ(0xef, Dst[2]); + EXPECT_EQ(0x01, Dst[3]); + // these bytes should be unchanged + EXPECT_EQ(0xab, Dst[4]); + EXPECT_EQ(0xab, Dst[5]); + EXPECT_EQ(0xab, Dst[6]); + EXPECT_EQ(0xab, Dst[7]); + EXPECT_EQ(0xab, Dst[8]); + EXPECT_EQ(0xab, Dst[9]); + EXPECT_EQ(0xab, Dst[10]); + EXPECT_EQ(0xab, Dst[11]); + + ASSERT_EQ(true, SE.IsGood()); + ASSERT_EQ(kReadBytes * 2, SE.GetFilePos()); + ASSERT_EQ(false, SE.Empty()); + ASSERT_EQ(12u, SE.GetBytesLeft()); + ASSERT_EQ('2', *SE.Peek()); +} + +TEST_F(StringExtractorTest, GetNameColonValueSuccess) { + const char NameColonPairs[] = "key1:value1;key2:value2;"; + StringExtractor SE(NameColonPairs); + + llvm::StringRef name; + llvm::StringRef value; + EXPECT_TRUE(SE.GetNameColonValue(name, value)); + EXPECT_EQ("key1", name); + EXPECT_EQ("value1", value); + EXPECT_TRUE(SE.GetNameColonValue(name, value)); + EXPECT_EQ("key2", name); + EXPECT_EQ("value2", value); + EXPECT_EQ(0u, SE.GetBytesLeft()); +} + +TEST_F(StringExtractorTest, GetNameColonValueContainsColon) { + const char NameColonPairs[] = "key1:value1:value2;key2:value3;"; + StringExtractor SE(NameColonPairs); + + llvm::StringRef name; + llvm::StringRef value; + EXPECT_TRUE(SE.GetNameColonValue(name, value)); + EXPECT_EQ("key1", name); + EXPECT_EQ("value1:value2", value); + EXPECT_TRUE(SE.GetNameColonValue(name, value)); + EXPECT_EQ("key2", name); + EXPECT_EQ("value3", value); + EXPECT_EQ(0u, SE.GetBytesLeft()); +} + +TEST_F(StringExtractorTest, GetNameColonValueNoSemicolon) { + const char NameColonPairs[] = "key1:value1"; + StringExtractor SE(NameColonPairs); + + llvm::StringRef name; + llvm::StringRef value; + EXPECT_FALSE(SE.GetNameColonValue(name, value)); + EXPECT_EQ(0u, SE.GetBytesLeft()); +} + +TEST_F(StringExtractorTest, GetNameColonValueNoColon) { + const char NameColonPairs[] = "key1value1;"; + StringExtractor SE(NameColonPairs); + + llvm::StringRef name; + llvm::StringRef value; + EXPECT_FALSE(SE.GetNameColonValue(name, value)); + EXPECT_EQ(0u, SE.GetBytesLeft()); +} + +TEST_F(StringExtractorTest, GetU32LittleEndian) { + StringExtractor SE(""); + EXPECT_EQ(0x0u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("0"); + EXPECT_EQ(0x0u, SE.GetHexMaxU32(true, 1)); + + SE.Reset("1"); + EXPECT_EQ(0x1u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("01"); + EXPECT_EQ(0x1u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("001"); + EXPECT_EQ(0x100u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("12"); + EXPECT_EQ(0x12u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("123"); + EXPECT_EQ(0x312u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("1203"); + EXPECT_EQ(0x312u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("1234"); + EXPECT_EQ(0x3412u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("12340"); + EXPECT_EQ(0x3412u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("123400"); + EXPECT_EQ(0x3412u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("12345670"); + EXPECT_EQ(0x70563412u, SE.GetHexMaxU32(true, 0)); + + SE.Reset("123456701"); + EXPECT_EQ(0u, SE.GetHexMaxU32(true, 0)); +} + +TEST_F(StringExtractorTest, GetU32BigEndian) { + StringExtractor SE(""); + EXPECT_EQ(0x0u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("0"); + EXPECT_EQ(0x0u, SE.GetHexMaxU32(false, 1)); + + SE.Reset("1"); + EXPECT_EQ(0x1u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("01"); + EXPECT_EQ(0x1u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("001"); + EXPECT_EQ(0x1u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("12"); + EXPECT_EQ(0x12u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("123"); + EXPECT_EQ(0x123u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("1203"); + EXPECT_EQ(0x1203u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("1234"); + EXPECT_EQ(0x1234u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("12340"); + EXPECT_EQ(0x12340u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("123400"); + EXPECT_EQ(0x123400u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("12345670"); + EXPECT_EQ(0x12345670u, SE.GetHexMaxU32(false, 0)); + + SE.Reset("123456700"); + EXPECT_EQ(0u, SE.GetHexMaxU32(false, 0)); +} + +TEST_F(StringExtractorTest, GetU64LittleEndian) { + StringExtractor SE(""); + EXPECT_EQ(0x0u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("0"); + EXPECT_EQ(0x0u, SE.GetHexMaxU64(true, 1)); + + SE.Reset("1"); + EXPECT_EQ(0x1u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("01"); + EXPECT_EQ(0x1u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("001"); + EXPECT_EQ(0x100u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("12"); + EXPECT_EQ(0x12u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("123"); + EXPECT_EQ(0x312u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("1203"); + EXPECT_EQ(0x312u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("1234"); + EXPECT_EQ(0x3412u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("12340"); + EXPECT_EQ(0x3412u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("123400"); + EXPECT_EQ(0x3412u, SE.GetHexMaxU64(true, 0)); + + SE.Reset("123456789ABCDEF0"); + EXPECT_EQ(0xF0DEBC9A78563412ULL, SE.GetHexMaxU64(true, 0)); + + SE.Reset("123456789ABCDEF01"); + EXPECT_EQ(0u, SE.GetHexMaxU64(true, 0)); +} + +TEST_F(StringExtractorTest, GetU64BigEndian) { + StringExtractor SE(""); + EXPECT_EQ(0x0u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("0"); + EXPECT_EQ(0x0u, SE.GetHexMaxU64(false, 1)); + + SE.Reset("1"); + EXPECT_EQ(0x1u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("01"); + EXPECT_EQ(0x1u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("001"); + EXPECT_EQ(0x1u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("12"); + EXPECT_EQ(0x12u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("123"); + EXPECT_EQ(0x123u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("1203"); + EXPECT_EQ(0x1203u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("1234"); + EXPECT_EQ(0x1234u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("12340"); + EXPECT_EQ(0x12340u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("123400"); + EXPECT_EQ(0x123400u, SE.GetHexMaxU64(false, 0)); + + SE.Reset("123456789ABCDEF0"); + EXPECT_EQ(0x123456789ABCDEF0ULL, SE.GetHexMaxU64(false, 0)); + + SE.Reset("123456789ABCDEF000"); + EXPECT_EQ(0u, SE.GetHexMaxU64(false, 0)); +}