Index: llvm/include/llvm/Support/YAMLTraits.h =================================================================== --- llvm/include/llvm/Support/YAMLTraits.h +++ llvm/include/llvm/Support/YAMLTraits.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,7 @@ #include #include #include +#include namespace llvm { namespace yaml { @@ -449,46 +451,101 @@ static bool const value = (sizeof(test>(nullptr))==1); }; -inline bool isNumber(StringRef S) { - static const char OctalChars[] = "01234567"; - if (S.startswith("0") && - S.drop_front().find_first_not_of(OctalChars) == StringRef::npos) - return true; +inline bool isNumeric(StringRef S) { + const static auto skipDigits = [](StringRef Input) { + return Input.drop_front(std::min(Input.find_first_not_of("0123456789"), + Input.size())); + }; - if (S.startswith("0o") && - S.drop_front(2).find_first_not_of(OctalChars) == StringRef::npos) - return true; + // Make S.front() and S.drop_front().front() (if S.front() is [+-]) calls + // safe. + if (S.empty() || S.equals("+") || S.equals("-")) + return false; - static const char HexChars[] = "0123456789abcdefABCDEF"; - if (S.startswith("0x") && - S.drop_front(2).find_first_not_of(HexChars) == StringRef::npos) + if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN")) return true; - static const char DecChars[] = "0123456789"; - if (S.find_first_not_of(DecChars) == StringRef::npos) - return true; + // Infinity and decimal numbers can be prefixed with sign. + StringRef Tail = (S.front() == '-' || S.front() == '+') ? S.drop_front() : S; - if (S.equals(".inf") || S.equals(".Inf") || S.equals(".INF")) + // Check for infinity first, because checking for hex and oct numbers is more + // expensive. + if (Tail.equals(".inf") || Tail.equals(".Inf") || Tail.equals(".INF")) return true; - Regex FloatMatcher("^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$"); - if (FloatMatcher.match(S)) - return true; + // Section 10.3.2 Tag Resolution + // YAML 1.2 Specification prohibits Base 8 and Base 16 numbers prefixed with + // [-+], so S should be used instead of Tail. + if (S.startswith("0o")) + return S.size() > 2 && + S.drop_front(2).find_first_not_of("01234567") == StringRef::npos; + + if (S.startswith("0x")) + return S.size() > 2 && + S.drop_front(2).find_first_not_of("0123456789abcdefABCDEF") == + StringRef::npos; + + // Parse float: [-+]? (\. [0-9]+ | [0-9]+ (\. [0-9]* )?) ([eE] [-+]? [0-9]+)? + S = Tail; + + // Handle cases when the number starts with '.' and hence needs at least one + // digit after dot (as opposed by number which has digits before the dot), but + // doesn't have one. + if (S.startswith(".") && + (S.equals(".") || (S.size() > 1 && std::strchr("0123456789", + S[1]) == nullptr))) + return false; + + if (S.startswith("E") || S.startswith("e")) + return false; + + enum ParseState { + Default, + FoundDot, + FoundExponent, + }; + ParseState State = Default; - return false; -} + S = skipDigits(S); -inline bool isNumeric(StringRef S) { - if ((S.front() == '-' || S.front() == '+') && isNumber(S.drop_front())) + // Accept decimal integer. + if (S.empty()) return true; - if (isNumber(S)) - return true; + if (S.front() == '.') { + State = FoundDot; + S = S.drop_front(); + } else if (S.front() == 'e' || S.front() == 'E') { + State = FoundExponent; + S = S.drop_front(); + } else { + return false; + } - if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN")) - return true; + if (State == FoundDot) { + S = skipDigits(S); + if (S.empty()) + return true; + + if (S.front() == 'e' || S.front() == 'E') { + State = FoundExponent; + S = S.drop_front(); + } else { + return false; + } + } + + assert(FoundExponent && "Should have found exponent at this point."); + if (S.empty()) + return false; + + if (S.front() == '+' || S.front() == '-') { + S = S.drop_front(); + if (S.empty()) + return false; + } - return false; + return skipDigits(S).empty(); } inline bool isNull(StringRef S) { Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt =================================================================== --- /dev/null +++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + Support + FuzzMutate +) + +add_llvm_fuzzer(llvm-yaml-numeric-parser-fuzzer + yaml-numeric-parser-fuzzer.cpp + DUMMY_MAIN DummyYAMLNumericParserFuzzer.cpp + ) Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp @@ -0,0 +1,19 @@ +//===--- DummyYAMLNumericParserFuzzer.cpp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of main so we can build and test without linking libFuzzer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/FuzzerCLI.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); +int main(int argc, char *argv[]) { + return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput); +} Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp @@ -0,0 +1,47 @@ +//===--- special-case-list-fuzzer.cpp - Fuzzer for special case lists -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include + +llvm::Regex Infinity("^[-+]?(\\.inf|\\.Inf|\\.INF)$"); +llvm::Regex Base8("^0o[0-7]+$"); +llvm::Regex Base16("^0x[0-9a-fA-F]+$"); +llvm::Regex Float("^[-+]?(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$"); + +inline bool isNumericRegex(llvm::StringRef S) { + + if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN")) + return true; + + if (Infinity.match(S)) + return true; + + if (Base8.match(S)) + return true; + + if (Base16.match(S)) + return true; + + if (Float.match(S)) + return true; + + return false; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + std::string Input(reinterpret_cast(Data), Size); + Input.erase(std::remove(Input.begin(), Input.end(), 0), Input.end()); + if (!Input.empty() && llvm::yaml::isNumeric(Input) != isNumericRegex(Input)) + __builtin_trap(); + return 0; +} Index: llvm/unittests/Support/YAMLIOTest.cpp =================================================================== --- llvm/unittests/Support/YAMLIOTest.cpp +++ llvm/unittests/Support/YAMLIOTest.cpp @@ -16,16 +16,17 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +using llvm::yaml::Hex16; +using llvm::yaml::Hex32; +using llvm::yaml::Hex64; +using llvm::yaml::Hex8; using llvm::yaml::Input; -using llvm::yaml::Output; using llvm::yaml::IO; -using llvm::yaml::MappingTraits; +using llvm::yaml::isNumeric; using llvm::yaml::MappingNormalization; +using llvm::yaml::MappingTraits; +using llvm::yaml::Output; using llvm::yaml::ScalarTraits; -using llvm::yaml::Hex8; -using llvm::yaml::Hex16; -using llvm::yaml::Hex32; -using llvm::yaml::Hex64; using ::testing::StartsWith; @@ -2569,3 +2570,73 @@ TestEscaped((char const *)foobar, "\"foo\\u200Bbar\""); } } + +TEST(YAMLIO, Numeric) { + EXPECT_TRUE(isNumeric(".inf")); + EXPECT_TRUE(isNumeric(".INF")); + EXPECT_TRUE(isNumeric(".Inf")); + EXPECT_TRUE(isNumeric("-.inf")); + EXPECT_TRUE(isNumeric("+.inf")); + + EXPECT_TRUE(isNumeric(".nan")); + EXPECT_TRUE(isNumeric(".NaN")); + EXPECT_TRUE(isNumeric(".NAN")); + + EXPECT_TRUE(isNumeric("0")); + EXPECT_TRUE(isNumeric("0.")); + EXPECT_TRUE(isNumeric("0.0")); + EXPECT_TRUE(isNumeric("-0.0")); + EXPECT_TRUE(isNumeric("+0.0")); + + EXPECT_TRUE(isNumeric("12345")); + EXPECT_TRUE(isNumeric("012345")); + EXPECT_TRUE(isNumeric("+12.0")); + EXPECT_TRUE(isNumeric(".5")); + EXPECT_TRUE(isNumeric("+.5")); + EXPECT_TRUE(isNumeric("-1.0")); + + EXPECT_TRUE(isNumeric("2.3e4")); + EXPECT_TRUE(isNumeric("-2E+05")); + EXPECT_TRUE(isNumeric("+12e03")); + EXPECT_TRUE(isNumeric("6.8523015e+5")); + + EXPECT_TRUE(isNumeric("1.e+1")); + EXPECT_TRUE(isNumeric(".0e+1")); + + EXPECT_TRUE(isNumeric("0x2aF3")); + EXPECT_TRUE(isNumeric("0o01234567")); + + EXPECT_FALSE(isNumeric("not a number")); + EXPECT_FALSE(isNumeric(".")); + EXPECT_FALSE(isNumeric(".e+1")); + EXPECT_FALSE(isNumeric(".1e")); + EXPECT_FALSE(isNumeric(".1e+")); + EXPECT_FALSE(isNumeric(".1e++1")); + + EXPECT_FALSE(isNumeric("ABCD")); + EXPECT_FALSE(isNumeric("+0x2AF3")); + EXPECT_FALSE(isNumeric("-0x2AF3")); + EXPECT_FALSE(isNumeric("0x2AF3Z")); + EXPECT_FALSE(isNumeric("0o012345678")); + EXPECT_FALSE(isNumeric("0xZ")); + EXPECT_FALSE(isNumeric("-0o012345678")); + EXPECT_FALSE(isNumeric("000003A8229434B839616A25C16B0291F77A438B")); + + EXPECT_FALSE(isNumeric("")); + EXPECT_FALSE(isNumeric(".")); + EXPECT_FALSE(isNumeric(".e+1")); + EXPECT_FALSE(isNumeric(".e+")); + EXPECT_FALSE(isNumeric(".e")); + EXPECT_FALSE(isNumeric("e1")); + + // Deprecated formats: as for YAML 1.2 specification, the following are not + // valid numbers anymore: + // + // * Sexagecimal numbers + // * Decimal numbers with comma s the delimiter + // * "inf", "nan" without '.' prefix + EXPECT_FALSE(isNumeric("3:25:45")); + EXPECT_FALSE(isNumeric("+12,345")); + EXPECT_FALSE(isNumeric("-inf")); + EXPECT_FALSE(isNumeric("1,230.15")); +}