diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -66,10 +66,39 @@ /// /// If \p C is not a valid hex digit, -1U is returned. inline unsigned hexDigitValue(char C) { - if (C >= '0' && C <= '9') return C-'0'; - if (C >= 'a' && C <= 'f') return C-'a'+10U; - if (C >= 'A' && C <= 'F') return C-'A'+10U; - return -1U; + // clang-format off + static unsigned LUT[255] = { + /// 0-47: Non hexadecimal digits + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, + /// 48-57: `0`-`9` hexadecimal digits + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + /// 58-64: Non hexadecimal digits + -1U, -1U, -1U, -1U, -1U, -1U, -1U, + /// 65-70: `A`-`F` hexadecimal digits + 10, 11, 12, 13, 14, 15, + /// 71-96: Non hexadecimal digits + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + /// 97-102: `a`-`f` hexadecimal digits + 10, 11, 12, 13, 14, 15, + /// 103-255: Non hexadecimal digits + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, + -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U, -1U + }; + // clang-format on + return LUT[static_cast(C)]; } /// Checks if character \p C is one of the 10 decimal digits. @@ -165,34 +194,61 @@ return toHex(toStringRef(Input), LowerCase); } -inline uint8_t hexFromNibbles(char MSB, char LSB) { +inline bool tryGetHexFromNibbles(char MSB, char LSB, uint8_t &Hex) { unsigned U1 = hexDigitValue(MSB); unsigned U2 = hexDigitValue(LSB); - assert(U1 != -1U && U2 != -1U); + if (U1 == -1U || U2 == -1U) + return false; - return static_cast((U1 << 4) | U2); + Hex = static_cast((U1 << 4) | U2); + return true; } - -/// Convert hexadecimal string \p Input to its binary representation. -/// The return string is half the size of \p Input. -inline std::string fromHex(StringRef Input) { +inline uint8_t hexFromNibbles(char MSB, char LSB) { + uint8_t Hex = 0; + bool GotHex = tryGetHexFromNibbles(MSB, LSB, Hex); + (void)GotHex; + assert(GotHex && "MSB and/or LSB do not correspond to hex digits"); + return Hex; +} + +/// Convert hexadecimal string \p Input to its binary representation and store +/// the result in \p Output. Returns true if the binary representation could be +/// converted from the hexadecimal string. Returns false if \p Input contains +/// non-hexadecimal digits. The output string is half the size of \p Input. +inline bool tryGetFromHex(StringRef Input, std::string &Output) { if (Input.empty()) - return std::string(); + return true; - std::string Output; Output.reserve((Input.size() + 1) / 2); if (Input.size() % 2 == 1) { - Output.push_back(hexFromNibbles('0', Input.front())); + uint8_t Hex = 0; + if (!tryGetHexFromNibbles('0', Input.front(), Hex)) + return false; + + Output.push_back(Hex); Input = Input.drop_front(); } assert(Input.size() % 2 == 0); while (!Input.empty()) { - uint8_t Hex = hexFromNibbles(Input[0], Input[1]); + uint8_t Hex = 0; + if (!tryGetHexFromNibbles(Input[0], Input[1], Hex)) + return false; + Output.push_back(Hex); Input = Input.drop_front(2); } - return Output; + return true; +} + +/// Convert hexadecimal string \p Input to its binary representation. +/// The return string is half the size of \p Input. +inline std::string fromHex(StringRef Input) { + std::string Hex; + bool GotHex = tryGetFromHex(Input, Hex); + (void)GotHex; + assert(GotHex && "Input contains non hex digits"); + return Hex; } /// Convert the string \p S to an integer of the specified type using diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -89,6 +89,10 @@ EXPECT_EQ(EvenStr, toHex(EvenData)); EXPECT_EQ(EvenData, fromHex(EvenStr)); EXPECT_EQ(StringRef(EvenStr).lower(), toHex(EvenData, true)); + + std::string InvalidStr = "A5ZX"; + std::string IgnoredOutput; + EXPECT_FALSE(tryGetFromHex(InvalidStr, IgnoredOutput)); } TEST(StringExtrasTest, to_float) {