Index: llvm/include/llvm/Support/YAMLTraits.h =================================================================== --- llvm/include/llvm/Support/YAMLTraits.h +++ llvm/include/llvm/Support/YAMLTraits.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -449,46 +450,69 @@ static bool const value = (sizeof(test>(nullptr))==1); }; -inline bool isNumber(StringRef S) { - static const char OctalChars[] = "01234567"; - if (S.startswith("0") && - S.drop_front().find_first_not_of(OctalChars) == StringRef::npos) - return true; - - if (S.startswith("0o") && - S.drop_front(2).find_first_not_of(OctalChars) == StringRef::npos) - return true; +inline bool isNumeric(StringRef S) { + if (S.empty()) + return false; - static const char HexChars[] = "0123456789abcdefABCDEF"; - if (S.startswith("0x") && - S.drop_front(2).find_first_not_of(HexChars) == StringRef::npos) + if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN")) return true; - static const char DecChars[] = "0123456789"; - if (S.find_first_not_of(DecChars) == StringRef::npos) - return true; + // Infinity and decimal numbers can be prefixed with sign. + StringRef Tail = (S.front() == '-' || S.front() == '+') ? S.drop_front() : S; - if (S.equals(".inf") || S.equals(".Inf") || S.equals(".INF")) + // Check for infinity first, because checking for hex and oct numbers is more + // expensive. + if (Tail.equals(".inf") || Tail.equals(".Inf") || Tail.equals(".INF")) return true; - Regex FloatMatcher("^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$"); - if (FloatMatcher.match(S)) + bool ParseHex = S.startswith("0x"); + bool ParseOct = S.startswith("0o"); + if (ParseHex || ParseOct) { + if (S.size() < 3) + return false; + for (const auto &Char : S.drop_front(2)) { + if (ParseHex && std::strchr("0123456789abcdefABCDEF", Char) == nullptr) + return false; + if (ParseOct && std::strchr("01234567", Char) == nullptr) + return false; + } return true; + } - return false; -} - -inline bool isNumeric(StringRef S) { - if ((S.front() == '-' || S.front() == '+') && isNumber(S.drop_front())) - return true; + static const char DecChars[] = "0123456789"; - if (isNumber(S)) - return true; + // Parse float: [-+]? (\. [0-9]+ | [0-9]+ (\. [0-9]* )?) ([eE] [-+]? [0-9]+)? + bool FoundDot = false; + bool FoundExponent = false; + for (size_t I = 0; I < Tail.size(); ++I) { + char Symbol = Tail[I]; + if (Symbol == '.') { + // There can only be one dot in the number. + if (FoundDot) + return false; + FoundDot = true; + // If string starts with '.' it has to be followed by at least one digit. + if (I == 0 && (Tail.size() == 1 || Tail.find_first_of(DecChars) != 1)) + return false; + } else if (Symbol == 'e' || Symbol == 'E') { + // There can only be one exponent sign in the number. + if (FoundExponent) + return false; + FoundExponent = true; + } else if (Symbol == '+' || Symbol == '-') { + // Sign can only follow an exponent sign. + if (!FoundExponent || (Tail[I - 1] != 'e' && Tail[I - 1] != 'E')) + return false; + } else if ('0' > Symbol || Symbol > '9') { + return false; + } + } - if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN")) - return true; + // Exponent sign has been found: it should be followed by at least one digit. + if (FoundExponent) + return ('0' <= S.back() && S.back() <= '9'); - return false; + return true; } inline bool isNull(StringRef S) { Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt =================================================================== --- /dev/null +++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + Support + FuzzMutate +) + +add_llvm_fuzzer(llvm-yaml-numeric-parser-fuzzer + yaml-numeric-parser-fuzzer.cpp + DUMMY_MAIN DummyYAMLNumericParserFuzzer.cpp + ) Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp @@ -0,0 +1,19 @@ +//===--- DummyYAMLNumericParserFuzzer.cpp ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of main so we can build and test without linking libFuzzer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/FuzzerCLI.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); +int main(int argc, char *argv[]) { + return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput); +} Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp @@ -0,0 +1,44 @@ +//===--- special-case-list-fuzzer.cpp - Fuzzer for special case lists -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include + +llvm::Regex Inifnity("^[-+]?(\\.inf|\\.Inf|\\.INF)$"); +llvm::Regex Base8("^0o[0-7]+$"); +llvm::Regex Base16("^0x[0-9a-fA-F]+$"); +llvm::Regex Float("^[-+]?(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$"); + +inline bool isNumericRegex(llvm::StringRef S) { + if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN")) + return true; + + if (Inifnity.match(S)) + return true; + + if (Base8.match(S)) + return true; + + if (Base16.match(S)) + return true; + + if (Float.match(S)) + return true; + + return false; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + std::string Input(reinterpret_cast(Data), Size); + assert(llvm::yaml::isNumeric(Input) == isNumericRegex(Input)); + return 0; +} Index: llvm/unittests/Support/YAMLIOTest.cpp =================================================================== --- llvm/unittests/Support/YAMLIOTest.cpp +++ llvm/unittests/Support/YAMLIOTest.cpp @@ -16,16 +16,17 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +using llvm::yaml::Hex16; +using llvm::yaml::Hex32; +using llvm::yaml::Hex64; +using llvm::yaml::Hex8; using llvm::yaml::Input; -using llvm::yaml::Output; using llvm::yaml::IO; -using llvm::yaml::MappingTraits; +using llvm::yaml::isNumeric; using llvm::yaml::MappingNormalization; +using llvm::yaml::MappingTraits; +using llvm::yaml::Output; using llvm::yaml::ScalarTraits; -using llvm::yaml::Hex8; -using llvm::yaml::Hex16; -using llvm::yaml::Hex32; -using llvm::yaml::Hex64; using ::testing::StartsWith; @@ -2569,3 +2570,64 @@ TestEscaped((char const *)foobar, "\"foo\\u200Bbar\""); } } + +TEST(YAMLIO, Numeric) { + EXPECT_TRUE(isNumeric(".inf")); + EXPECT_TRUE(isNumeric(".INF")); + EXPECT_TRUE(isNumeric(".Inf")); + EXPECT_TRUE(isNumeric("-.inf")); + EXPECT_TRUE(isNumeric("+.inf")); + + EXPECT_TRUE(isNumeric(".nan")); + EXPECT_TRUE(isNumeric(".NaN")); + EXPECT_TRUE(isNumeric(".NAN")); + + EXPECT_TRUE(isNumeric("0")); + EXPECT_TRUE(isNumeric("0.")); + EXPECT_TRUE(isNumeric("0.0")); + EXPECT_TRUE(isNumeric("-0.0")); + EXPECT_TRUE(isNumeric("+0.0")); + + EXPECT_TRUE(isNumeric("12345")); + EXPECT_TRUE(isNumeric("012345")); + EXPECT_TRUE(isNumeric("+12.0")); + EXPECT_TRUE(isNumeric(".5")); + EXPECT_TRUE(isNumeric("+.5")); + EXPECT_TRUE(isNumeric("-1.0")); + + EXPECT_TRUE(isNumeric("2.3e4")); + EXPECT_TRUE(isNumeric("-2E+05")); + EXPECT_TRUE(isNumeric("+12e03")); + EXPECT_TRUE(isNumeric("6.8523015e+5")); + + EXPECT_TRUE(isNumeric("1.e+1")); + EXPECT_TRUE(isNumeric(".0e+1")); + + EXPECT_TRUE(isNumeric("0x2aF3")); + EXPECT_TRUE(isNumeric("0o01234567")); + + EXPECT_FALSE(isNumeric("not a number")); + EXPECT_FALSE(isNumeric(".")); + EXPECT_FALSE(isNumeric(".e+1")); + EXPECT_FALSE(isNumeric(".1e")); + EXPECT_FALSE(isNumeric(".1e+")); + EXPECT_FALSE(isNumeric(".1e++1")); + + EXPECT_FALSE(isNumeric("+0x2AF3")); + EXPECT_FALSE(isNumeric("-0x2AF3")); + EXPECT_FALSE(isNumeric("0x2AF3Z")); + EXPECT_FALSE(isNumeric("0o012345678")); + EXPECT_FALSE(isNumeric("-0o012345678")); + EXPECT_FALSE(isNumeric("000003A8229434B839616A25C16B0291F77A438B")); + + // Deprecated formats: as for YAML 1.2 specification, the following are not + // valid numbers anymore: + // + // * Sexagecimal numbers + // * Decimal numbers with comma s the delimiter + // * "inf", "nan" without '.' prefix + EXPECT_FALSE(isNumeric("3:25:45")); + EXPECT_FALSE(isNumeric("+12,345")); + EXPECT_FALSE(isNumeric("-inf")); + EXPECT_FALSE(isNumeric("1,230.15")); +}