diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -3,6 +3,7 @@ endif() add_subdirectory(printf_core) +add_subdirectory(scanf_core) add_entrypoint_object( fopen diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -0,0 +1,25 @@ +add_header_library( + core_structs + HDRS + core_structs.h + DEPENDS + libc.src.__support.CPP.string_view + libc.src.__support.CPP.bitset + libc.src.__support.FPUtil.fp_bits +) + +add_object_library( + parser + SRCS + parser.cpp + HDRS + parser.h + DEPENDS + .core_structs + libc.src.__support.arg_list + libc.src.__support.ctype_utils + libc.src.__support.str_to_integer + libc.src.__support.CPP.bit + libc.src.__support.CPP.bitset + libc.src.__support.CPP.string_view +) diff --git a/libc/src/stdio/scanf_core/core_structs.h b/libc/src/stdio/scanf_core/core_structs.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/core_structs.h @@ -0,0 +1,91 @@ +//===-- Core Structures for scanf ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CORE_STRUCTS_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CORE_STRUCTS_H + +#include "src/__support/CPP/bitset.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/FPUtil/FPBits.h" + +#include +#include + +namespace __llvm_libc { +namespace scanf_core { + +// These length modifiers match the length modifiers in the format string, which +// is why they are formatted differently from the rest of the file. +enum class LengthModifier { hh, h, l, ll, j, z, t, L, NONE }; + +enum FormatFlags : uint8_t { + NONE = 0x00, + NO_WRITE = 0x01, // * + ALLOCATE = 0x02, // m +}; + +struct FormatSection { + bool has_conv; + + cpp::string_view raw_string; + + // Format Specifier Values + FormatFlags flags = FormatFlags::NONE; + LengthModifier length_modifier = LengthModifier::NONE; + int max_width = -1; + + // output_ptr is nullptr if and only if the NO_WRITE flag is set. + void *output_ptr = nullptr; + + char conv_name; + + cpp::bitset<256> scan_set; + + bool operator==(const FormatSection &other) { + if (has_conv != other.has_conv) + return false; + + if (raw_string != other.raw_string) + return false; + + if (has_conv) { + if (!((static_cast(flags) == + static_cast(other.flags)) && + (max_width == other.max_width) && + (length_modifier == other.length_modifier) && + (conv_name == other.conv_name))) + return false; + + // If the pointers are used, then they should be equal. If the NO_WRITE + // flag is set or the conversion is %, then the pointers are not used. + // If the pointers are used and they are not equal, return false. + + if (!(((flags & FormatFlags::NO_WRITE) != 0) || (conv_name == '%') || + (output_ptr == other.output_ptr))) + return false; + + if (conv_name == '[') + return scan_set == other.scan_set; + } + return true; + } +}; + +enum ReturnValues : int { + // This is the value to be returned by conversions when no error has occurred. + WRITE_OK = 0, + // These are the scanf return values for when an error has occurred. They are + // all negative, and should be distinct. + FILE_READ_ERROR = -1, + FILE_STATUS_ERROR = -2, + MATCHING_FAILURE = -3, +}; +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CORE_STRUCTS_H diff --git a/libc/src/stdio/scanf_core/parser.h b/libc/src/stdio/scanf_core/parser.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/parser.h @@ -0,0 +1,99 @@ +//===-- Format string parser for scanf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H + +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/scanf_config.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +class Parser { + const char *__restrict str; + + size_t cur_pos = 0; + internal::ArgList args_cur; + +#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + // args_start stores the start of the va_args, which is used when a previous + // argument is needed. In that case, we have to read the arguments from the + // beginning since they don't support reading backwards. + internal::ArgList args_start; + size_t args_index = 1; +#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + +public: +#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + Parser(const char *__restrict new_str, internal::ArgList &args) + : str(new_str), args_cur(args), args_start(args) {} +#else + Parser(const char *__restrict new_str, internal::ArgList &args) + : str(new_str), args_cur(args) {} +#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + + // get_next_section will parse the format string until it has a fully + // specified format section. This can either be a raw format section with no + // conversion, or a format section with a conversion that has all of its + // variables stored in the format section. + FormatSection get_next_section(); + +private: + // parse_length_modifier parses the length modifier inside a format string. It + // assumes that str[*local_pos] is inside a format specifier. It returns a + // LengthModifier with the length modifier it found. It will advance local_pos + // after the format specifier if one is found. + LengthModifier parse_length_modifier(size_t *local_pos); + + // get_next_arg_value gets the next value from the arg list as type T. + template T inline get_next_arg_value() { + return args_cur.next_var(); + } + + //---------------------------------------------------- + // INDEX MODE ONLY FUNCTIONS AFTER HERE: + //---------------------------------------------------- + +#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + + // parse_index parses the index of a value inside a format string. It + // assumes that str[*local_pos] points to character after a '%' or '*', and + // returns 0 if there is no closing $, or if it finds no number. If it finds a + // number, it will move local_pos past the end of the $, else it will not move + // local_pos. + size_t parse_index(size_t *local_pos); + + // get_arg_value gets the value from the arg list at index (starting at 1). + // This may require parsing the format string. An index of 0 is interpreted as + // the next value. + template T inline get_arg_value(size_t index) { + if (!(index == 0 || index == args_index)) + args_to_index(index); + + ++args_index; + return get_next_arg_value(); + } + + // the ArgList can only return the next item in the list. This function is + // used in index mode when the item that needs to be read is not the next one. + // It moves cur_args to the index requested so the the appropriate value may + // be read. This may involve parsing the format string, and is in the worst + // case an O(n^2) operation. + void args_to_index(size_t index); + +#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE +}; + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H diff --git a/libc/src/stdio/scanf_core/parser.cpp b/libc/src/stdio/scanf_core/parser.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/parser.cpp @@ -0,0 +1,220 @@ +//===-- Format string parser implementation for scanf ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// #define LLVM_LIBC_SCANF_DISABLE_INDEX_MODE 1 // This will be a compile flag. + +#include "src/stdio/scanf_core/parser.h" + +#include "src/__support/arg_list.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/bitset.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/ctype_utils.h" +#include "src/__support/str_to_integer.h" + +namespace __llvm_libc { +namespace scanf_core { + +#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE +#define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value(index) +#else +#define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value() +#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + +FormatSection Parser::get_next_section() { + FormatSection section; + size_t starting_pos = cur_pos; + if (str[cur_pos] == '%') { + // format section + section.has_conv = true; + + ++cur_pos; + [[maybe_unused]] size_t conv_index = 0; + +#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + conv_index = parse_index(&cur_pos); +#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + + if (str[cur_pos] == '*') { + ++cur_pos; + section.flags = FormatFlags::NO_WRITE; + } + + // handle width + section.max_width = -1; + if (internal::isdigit(str[cur_pos])) { + char *int_end; + section.max_width = + internal::strtointeger(str + cur_pos, &int_end, 10); + cur_pos = int_end - str; + } + + // TODO(michaelrj): add posix allocate flag support. + // if (str[cur_pos] == 'm') { + // ++cur_pos; + // section.flags = FormatFlags::ALLOCATE; + // } + + LengthModifier lm = parse_length_modifier(&cur_pos); + section.length_modifier = lm; + + section.conv_name = str[cur_pos]; + + // If NO_WRITE is not set, then read the next arg as the output pointer. + if ((section.flags & FormatFlags::NO_WRITE) == 0) { + // Since all outputs are pointers, there's no need to distinguish when + // reading from va_args. They're all the same size and stored the same. + section.output_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); + } + + ++cur_pos; + + // If the format is a bracketed one, then we need to parse out the insides + // of the brackets. + if (section.conv_name == '[') { + constexpr char CLOSING_BRACKET = ']'; + constexpr char INVERT_FLAG = '^'; + constexpr char RANGE_OPERATOR = '-'; + + cpp::bitset<256> scan_set; + bool invert = false; + + // The circumflex in the first position represents the inversion flag, but + // it's easier to apply that at the end so we just store it for now. + if (str[cur_pos] == INVERT_FLAG) { + invert = true; + ++cur_pos; + } + + // This is used to determine if a hyphen is being used as a literal or as + // a range operator. + size_t set_start_pos = cur_pos; + + // Normally the right bracket closes the set, but if it's the first + // character (possibly after the inversion flag) then it's instead + // included as a character in the set and the second right bracket closes + // the set. + if (str[cur_pos] == CLOSING_BRACKET) { + scan_set.set(CLOSING_BRACKET); + ++cur_pos; + } + + while (str[cur_pos] != '\0' && str[cur_pos] != CLOSING_BRACKET) { + // If a hyphen is being used as a range operator, since it's neither at + // the beginning nor end of the set. + if (str[cur_pos] == RANGE_OPERATOR && cur_pos != set_start_pos && + str[cur_pos + 1] != CLOSING_BRACKET && str[cur_pos + 1] != '\0') { + // Technically there is no requirement to correct the ordering of the + // range, but since the range operator is entirely implementation + // defined it seems like a good convenience. + char a = str[cur_pos - 1]; + char b = str[cur_pos + 1]; + char start = (a < b ? a : b); + char end = (a < b ? b : a); + scan_set.set_range(start, end); + cur_pos += 2; + } else { + scan_set.set(str[cur_pos]); + ++cur_pos; + } + } + if (invert) + scan_set.flip(); + + if (str[cur_pos] == CLOSING_BRACKET) { + ++cur_pos; + section.scan_set = scan_set; + } else { + // if the end of the string was encountered, this is not a valid set. + section.has_conv = false; + } + } + } else { + // raw section + section.has_conv = false; + while (str[cur_pos] != '%' && str[cur_pos] != '\0') + ++cur_pos; + } + section.raw_string = {str + starting_pos, cur_pos - starting_pos}; + return section; +} + +LengthModifier Parser::parse_length_modifier(size_t *local_pos) { + switch (str[*local_pos]) { + case ('l'): + if (str[*local_pos + 1] == 'l') { + *local_pos += 2; + return LengthModifier::ll; + } else { + ++*local_pos; + return LengthModifier::l; + } + case ('h'): + if (str[*local_pos + 1] == 'h') { + *local_pos += 2; + return LengthModifier::hh; + } else { + ++*local_pos; + return LengthModifier::h; + } + case ('L'): + ++*local_pos; + return LengthModifier::L; + case ('j'): + ++*local_pos; + return LengthModifier::j; + case ('z'): + ++*local_pos; + return LengthModifier::z; + case ('t'): + ++*local_pos; + return LengthModifier::t; + default: + return LengthModifier::NONE; + } +} + +//---------------------------------------------------- +// INDEX MODE ONLY FUNCTIONS AFTER HERE: +//---------------------------------------------------- + +#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + +size_t Parser::parse_index(size_t *local_pos) { + if (internal::isdigit(str[*local_pos])) { + char *int_end; + size_t index = + internal::strtointeger(str + *local_pos, &int_end, 10); + if (int_end[0] != '$') + return 0; + *local_pos = 1 + int_end - str; + return index; + } + return 0; +} + +void Parser::args_to_index(size_t index) { + if (args_index > index) { + args_index = 1; + args_cur = args_start; + } + + while (args_index < index) { + // Since all arguments must be pointers, we can just read all of them as + // void * and not worry about type issues. + args_cur.next_var(); + ++args_index; + } +} + +#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/scanf_config.h b/libc/src/stdio/scanf_core/scanf_config.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/scanf_config.h @@ -0,0 +1,24 @@ +//===-- Scanf Configuration Handler ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_CONFIG_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_CONFIG_H + +// These macros can be set or unset to adjust scanf behavior at compile time. + +// This flag disables all functionality relating to floating point numbers. This +// can be useful for embedded systems or other situations where binary size is +// important. +// #define LLVM_LIBC_SCANF_DISABLE_FLOAT + +// This flag disables index mode, a posix extension often used for +// internationalization of format strings. Supporting it takes up additional +// memory and parsing time, so it can be disabled if it's not used. +// #define LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_CONFIG_H diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -188,4 +188,5 @@ ) add_subdirectory(printf_core) +add_subdirectory(scanf_core) add_subdirectory(testdata) diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -56,7 +56,7 @@ expected.raw_string = {str, 4}; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); // TODO: add checks that the format_arr after the last one has length 0 } @@ -70,20 +70,20 @@ expected0.raw_string = {str, 5}; - ASSERT_FORMAT_EQ(expected0, format_arr[0]); + ASSERT_PFORMAT_EQ(expected0, format_arr[0]); expected1.has_conv = true; expected1.raw_string = {str + 5, 2}; expected1.conv_name = '%'; - ASSERT_FORMAT_EQ(expected1, format_arr[1]); + ASSERT_PFORMAT_EQ(expected1, format_arr[1]); expected2.has_conv = false; expected2.raw_string = {str + 7, 5}; - ASSERT_FORMAT_EQ(expected2, format_arr[2]); + ASSERT_PFORMAT_EQ(expected2, format_arr[2]); } TEST(LlvmLibcPrintfParserTest, EvalOneArg) { @@ -99,7 +99,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFlags) { @@ -121,7 +121,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, EvalOneArgWithWidth) { @@ -138,7 +138,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, EvalOneArgWithPrecision) { @@ -155,7 +155,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, EvalOneArgWithTrivialPrecision) { @@ -172,7 +172,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, EvalOneArgWithShortLengthModifier) { @@ -189,7 +189,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) { @@ -206,7 +206,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) { @@ -229,7 +229,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, EvalThreeArgs) { @@ -247,7 +247,7 @@ expected0.conv_val_raw = arg1; expected0.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected0, format_arr[0]); + ASSERT_PFORMAT_EQ(expected0, format_arr[0]); expected1.has_conv = true; @@ -255,7 +255,7 @@ expected1.conv_val_raw = __llvm_libc::cpp::bit_cast(arg2); expected1.conv_name = 'f'; - ASSERT_FORMAT_EQ(expected1, format_arr[1]); + ASSERT_PFORMAT_EQ(expected1, format_arr[1]); expected2.has_conv = true; @@ -263,7 +263,7 @@ expected2.conv_val_ptr = const_cast(arg3); expected2.conv_name = 's'; - ASSERT_FORMAT_EQ(expected2, format_arr[2]); + ASSERT_PFORMAT_EQ(expected2, format_arr[2]); } #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE @@ -281,7 +281,7 @@ expected.conv_val_raw = arg1; expected.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected, format_arr[0]); + ASSERT_PFORMAT_EQ(expected, format_arr[0]); } TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsSequential) { @@ -299,7 +299,7 @@ expected0.conv_val_raw = arg1; expected0.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected0, format_arr[0]); + ASSERT_PFORMAT_EQ(expected0, format_arr[0]); expected1.has_conv = true; @@ -307,7 +307,7 @@ expected1.conv_val_raw = __llvm_libc::cpp::bit_cast(arg2); expected1.conv_name = 'f'; - ASSERT_FORMAT_EQ(expected1, format_arr[1]); + ASSERT_PFORMAT_EQ(expected1, format_arr[1]); expected2.has_conv = true; @@ -315,7 +315,7 @@ expected2.conv_val_ptr = const_cast(arg3); expected2.conv_name = 's'; - ASSERT_FORMAT_EQ(expected2, format_arr[2]); + ASSERT_PFORMAT_EQ(expected2, format_arr[2]); } TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsReverse) { @@ -333,7 +333,7 @@ expected0.conv_val_raw = arg1; expected0.conv_name = 'd'; - ASSERT_FORMAT_EQ(expected0, format_arr[0]); + ASSERT_PFORMAT_EQ(expected0, format_arr[0]); expected1.has_conv = true; @@ -341,7 +341,7 @@ expected1.conv_val_raw = __llvm_libc::cpp::bit_cast(arg2); expected1.conv_name = 'f'; - ASSERT_FORMAT_EQ(expected1, format_arr[1]); + ASSERT_PFORMAT_EQ(expected1, format_arr[1]); expected2.has_conv = true; @@ -349,7 +349,7 @@ expected2.conv_val_ptr = const_cast(arg3); expected2.conv_name = 's'; - ASSERT_FORMAT_EQ(expected2, format_arr[2]); + ASSERT_PFORMAT_EQ(expected2, format_arr[2]); } TEST(LlvmLibcPrintfParserTest, IndexModeTenArgsRandom) { @@ -367,7 +367,7 @@ static_cast(4 + (i >= 9 ? 1 : 0))}; expected.conv_val_raw = i + 1; expected.conv_name = 'd'; - EXPECT_FORMAT_EQ(expected, format_arr[i]); + EXPECT_PFORMAT_EQ(expected, format_arr[i]); } } @@ -388,7 +388,7 @@ expected0.raw_string = {str, 12}; - EXPECT_FORMAT_EQ(expected0, format_arr[0]); + EXPECT_PFORMAT_EQ(expected0, format_arr[0]); expected1.has_conv = true; @@ -397,26 +397,26 @@ expected1.conv_val_raw = arg3; expected1.conv_name = 'u'; - EXPECT_FORMAT_EQ(expected1, format_arr[1]); + EXPECT_PFORMAT_EQ(expected1, format_arr[1]); expected2.has_conv = false; expected2.raw_string = {str + 18, 1}; - EXPECT_FORMAT_EQ(expected2, format_arr[2]); + EXPECT_PFORMAT_EQ(expected2, format_arr[2]); expected3.has_conv = true; expected3.raw_string = {str + 19, 2}; expected3.conv_name = '%'; - EXPECT_FORMAT_EQ(expected3, format_arr[3]); + EXPECT_PFORMAT_EQ(expected3, format_arr[3]); expected4.has_conv = false; expected4.raw_string = {str + 21, 1}; - EXPECT_FORMAT_EQ(expected4, format_arr[4]); + EXPECT_PFORMAT_EQ(expected4, format_arr[4]); expected5.has_conv = true; @@ -426,13 +426,13 @@ expected5.conv_val_raw = __llvm_libc::cpp::bit_cast(arg2); expected5.conv_name = 'f'; - EXPECT_FORMAT_EQ(expected5, format_arr[5]); + EXPECT_PFORMAT_EQ(expected5, format_arr[5]); expected6.has_conv = false; expected6.raw_string = {str + 30, 1}; - EXPECT_FORMAT_EQ(expected6, format_arr[6]); + EXPECT_PFORMAT_EQ(expected6, format_arr[6]); expected7.has_conv = true; @@ -442,13 +442,13 @@ expected7.conv_val_raw = __llvm_libc::cpp::bit_cast(arg2); expected7.conv_name = 'f'; - EXPECT_FORMAT_EQ(expected7, format_arr[7]); + EXPECT_PFORMAT_EQ(expected7, format_arr[7]); expected8.has_conv = false; expected8.raw_string = {str + 40, 1}; - EXPECT_FORMAT_EQ(expected8, format_arr[8]); + EXPECT_PFORMAT_EQ(expected8, format_arr[8]); expected9.has_conv = true; @@ -458,7 +458,7 @@ expected9.conv_val_raw = arg1; expected9.conv_name = 'c'; - EXPECT_FORMAT_EQ(expected9, format_arr[9]); + EXPECT_PFORMAT_EQ(expected9, format_arr[9]); } #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE diff --git a/libc/test/src/stdio/scanf_core/CMakeLists.txt b/libc/test/src/stdio/scanf_core/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/test/src/stdio/scanf_core/CMakeLists.txt @@ -0,0 +1,14 @@ +add_libc_unittest( + parser_test + SUITE + libc_stdio_unittests + SRCS + parser_test.cpp + LINK_LIBRARIES + LibcScanfHelpers + DEPENDS + libc.src.stdio.scanf_core.parser + libc.src.stdio.scanf_core.core_structs + libc.src.__support.CPP.string_view + libc.src.__support.arg_list +) diff --git a/libc/test/src/stdio/scanf_core/parser_test.cpp b/libc/test/src/stdio/scanf_core/parser_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/stdio/scanf_core/parser_test.cpp @@ -0,0 +1,754 @@ +//===-- Unittests for the scanf Parser -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/bitset.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/parser.h" + +#include + +#include "utils/UnitTest/ScanfMatcher.h" +#include "utils/UnitTest/Test.h" + +using __llvm_libc::cpp::string_view; + +void init(const char *__restrict str, ...) { + va_list vlist; + va_start(vlist, str); + __llvm_libc::internal::ArgList v(vlist); + va_end(vlist); + + __llvm_libc::scanf_core::Parser parser(str, v); +} + +void evaluate(__llvm_libc::scanf_core::FormatSection *format_arr, + const char *__restrict str, ...) { + va_list vlist; + va_start(vlist, str); + __llvm_libc::internal::ArgList v(vlist); + va_end(vlist); + + __llvm_libc::scanf_core::Parser parser(str, v); + + for (auto cur_section = parser.get_next_section(); + !cur_section.raw_string.empty(); + cur_section = parser.get_next_section()) { + *format_arr = cur_section; + ++format_arr; + } +} + +TEST(LlvmLibcScanfParserTest, Constructor) { init("test", 1, 2); } + +TEST(LlvmLibcScanfParserTest, EvalRaw) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "test"; + evaluate(format_arr, str); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = false; + + expected.raw_string = str; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); + // TODO: add checks that the format_arr after the last one has length 0 +} + +TEST(LlvmLibcScanfParserTest, EvalSimple) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "test %% test"; + evaluate(format_arr, str); + + __llvm_libc::scanf_core::FormatSection expected0, expected1, expected2; + expected0.has_conv = false; + + expected0.raw_string = {str, 5}; + + ASSERT_SFORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + + expected1.raw_string = {str + 5, 2}; + expected1.conv_name = '%'; + + ASSERT_SFORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = false; + + expected2.raw_string = {str + 7, 5}; + + ASSERT_SFORMAT_EQ(expected2, format_arr[2]); +} + +TEST(LlvmLibcScanfParserTest, EvalOneArg) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%d"; + int arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.output_ptr = &arg1; + expected.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalOneArgWithFlag) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%*d"; + // Since NO_WRITE is set, the argument shouldn't be used, but I've included + // one anyways because in the case that it doesn't work it's better for it to + // have a real argument to check against. + int arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.flags = __llvm_libc::scanf_core::FormatFlags::NO_WRITE; + expected.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); + + // If NO_WRITE is set, then the equality check ignores the pointer since it's + // irrelevant, but in this case I want to make sure that it hasn't been set + // and check it separately. + ASSERT_EQ(expected.output_ptr, format_arr[0].output_ptr); +} + +TEST(LlvmLibcScanfParserTest, EvalOneArgWithWidth) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%12d"; + int arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.max_width = 12; + expected.output_ptr = &arg1; + expected.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalOneArgWithShortLengthModifier) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%hd"; + int arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.length_modifier = __llvm_libc::scanf_core::LengthModifier::h; + expected.output_ptr = &arg1; + expected.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalOneArgWithLongLengthModifier) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%lld"; + long long arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.length_modifier = __llvm_libc::scanf_core::LengthModifier::ll; + expected.output_ptr = &arg1; + expected.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalOneArgWithAllOptions) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%*56jd"; + intmax_t arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.flags = __llvm_libc::scanf_core::FormatFlags::NO_WRITE; + expected.max_width = 56; + expected.length_modifier = __llvm_libc::scanf_core::LengthModifier::j; + expected.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalSimpleBracketArg) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[abc]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set('a'); + scan_set.set('b'); + scan_set.set('c'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgRange) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[A-D]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set('A'); + scan_set.set('B'); + scan_set.set('C'); + scan_set.set('D'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgTwoRanges) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[A-De-g]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set('A'); + scan_set.set('B'); + scan_set.set('C'); + scan_set.set('D'); + scan_set.set_range('e', 'g'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgJustHyphen) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[-]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set('-'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgLeftHyphen) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[-A]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set('-'); + scan_set.set('A'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgRightHyphen) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[Z-]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set('-'); + scan_set.set('Z'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgInvertSimple) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[^abc]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set('a'); + scan_set.set('b'); + scan_set.set('c'); + scan_set.flip(); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgInvertRange) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[^0-9]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set_range('0', '9'); + scan_set.flip(); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgRightBracket) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[]]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set(']'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgRightBracketRange) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[]-a]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set_range(']', 'a'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgRightBracketInvert) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[^]]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set(']'); + scan_set.flip(); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalBracketArgRightBracketInvertRange) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[^]-^]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set_range(']', '^'); + scan_set.flip(); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +// This is not part of the standard, but the hyphen's effect is always +// implementation defined, and I have defined it such that it will capture the +// correct range regardless of the order of the characters. +TEST(LlvmLibcScanfParserTest, EvalBracketArgBackwardsRange) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%[9-0]"; + char arg1 = 'a'; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = str; + expected.conv_name = '['; + expected.output_ptr = &arg1; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set_range('0', '9'); + + expected.scan_set = scan_set; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, EvalThreeArgs) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%d%f%s"; + int arg1 = 12345; + double arg2 = 123.45; + const char *arg3 = "12345"; + evaluate(format_arr, str, &arg1, &arg2, &arg3); + + __llvm_libc::scanf_core::FormatSection expected0, expected1, expected2; + expected0.has_conv = true; + + expected0.raw_string = {str, 2}; + expected0.output_ptr = &arg1; + expected0.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + + expected1.raw_string = {str + 2, 2}; + expected1.output_ptr = &arg2; + expected1.conv_name = 'f'; + + ASSERT_SFORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = true; + + expected2.raw_string = {str + 4, 2}; + expected2.output_ptr = &arg3; + expected2.conv_name = 's'; + + ASSERT_SFORMAT_EQ(expected2, format_arr[2]); +} + +#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE + +TEST(LlvmLibcScanfParserTest, IndexModeOneArg) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%1$d"; + int arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = {str, 4}; + expected.output_ptr = &arg1; + expected.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcScanfParserTest, IndexModeThreeArgsSequential) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%1$d%2$f%3$s"; + int arg1 = 12345; + double arg2 = 123.45; + const char *arg3 = "12345"; + evaluate(format_arr, str, &arg1, &arg2, &arg3); + + __llvm_libc::scanf_core::FormatSection expected0, expected1, expected2; + expected0.has_conv = true; + + expected0.raw_string = {str, 4}; + expected0.output_ptr = &arg1; + expected0.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + + expected1.raw_string = {str + 4, 4}; + expected1.output_ptr = &arg2; + expected1.conv_name = 'f'; + + ASSERT_SFORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = true; + + expected2.raw_string = {str + 8, 4}; + expected2.output_ptr = &arg3; + expected2.conv_name = 's'; + + ASSERT_SFORMAT_EQ(expected2, format_arr[2]); +} + +TEST(LlvmLibcScanfParserTest, IndexModeThreeArgsReverse) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%3$d%2$f%1$s"; + int arg1 = 12345; + double arg2 = 123.45; + const char *arg3 = "12345"; + evaluate(format_arr, str, &arg3, &arg2, &arg1); + + __llvm_libc::scanf_core::FormatSection expected0, expected1, expected2; + expected0.has_conv = true; + + expected0.raw_string = {str, 4}; + expected0.output_ptr = &arg1; + expected0.conv_name = 'd'; + + ASSERT_SFORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + + expected1.raw_string = {str + 4, 4}; + expected1.output_ptr = &arg2; + expected1.conv_name = 'f'; + + ASSERT_SFORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = true; + + expected2.raw_string = {str + 8, 4}; + expected2.output_ptr = &arg3; + expected2.conv_name = 's'; + + ASSERT_SFORMAT_EQ(expected2, format_arr[2]); +} + +TEST(LlvmLibcScanfParserTest, IndexModeTenArgsRandom) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%6$d%3$d%7$d%2$d%8$d%1$d%4$d%9$d%5$d%10$d"; + uintptr_t args[10] = {6, 4, 2, 7, 9, 1, 3, 5, 8, 10}; + evaluate(format_arr, str, args[0], args[1], args[2], args[3], args[4], + args[5], args[6], args[7], args[8], args[9]); + + for (size_t i = 0; i < 10; ++i) { + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = {str + (4 * i), + static_cast(4 + (i >= 9 ? 1 : 0))}; + expected.output_ptr = reinterpret_cast(i + 1); + expected.conv_name = 'd'; + EXPECT_SFORMAT_EQ(expected, format_arr[i]); + } +} + +TEST(LlvmLibcScanfParserTest, IndexModeComplexParsing) { + __llvm_libc::scanf_core::FormatSection format_arr[11]; + const char *str = "normal text %3$llu %% %2$*f %4$d %1$1c%5$[123]"; + char arg1 = '1'; + double arg2 = 123.45; + unsigned long long arg3 = 12345; + int arg4 = 10; + char arg5 = 'A'; + evaluate(format_arr, str, &arg1, &arg2, &arg3, &arg4, &arg5); + + __llvm_libc::scanf_core::FormatSection expected0, expected1, expected2, + expected3, expected4, expected5, expected6, expected7, expected8, + expected9, expected10; + + expected0.has_conv = false; + + // "normal text " + expected0.raw_string = {str, 12}; + + EXPECT_SFORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + + // "%3$llu" + expected1.raw_string = {str + 12, 6}; + expected1.length_modifier = __llvm_libc::scanf_core::LengthModifier::ll; + expected1.output_ptr = &arg3; + expected1.conv_name = 'u'; + + EXPECT_SFORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = false; + + // " " + expected2.raw_string = {str + 18, 1}; + + EXPECT_SFORMAT_EQ(expected2, format_arr[2]); + + expected3.has_conv = true; + + expected3.raw_string = {str + 19, 2}; + expected3.conv_name = '%'; + + EXPECT_SFORMAT_EQ(expected3, format_arr[3]); + + expected4.has_conv = false; + + // " " + expected4.raw_string = {str + 21, 1}; + + EXPECT_SFORMAT_EQ(expected4, format_arr[4]); + + expected5.has_conv = true; + + // "%%" + expected5.raw_string = {str + 22, 5}; + expected5.flags = __llvm_libc::scanf_core::FormatFlags::NO_WRITE; + expected5.conv_name = 'f'; + + EXPECT_SFORMAT_EQ(expected5, format_arr[5]); + + expected6.has_conv = false; + + // " " + expected6.raw_string = {str + 27, 1}; + + EXPECT_SFORMAT_EQ(expected6, format_arr[6]); + + expected7.has_conv = true; + + // "%2$*f" + expected7.raw_string = {str + 28, 4}; + expected7.output_ptr = &arg4; + expected7.conv_name = 'd'; + + EXPECT_SFORMAT_EQ(expected7, format_arr[7]); + + expected8.has_conv = false; + + // " " + expected8.raw_string = {str + 32, 1}; + + EXPECT_SFORMAT_EQ(expected8, format_arr[8]); + + expected9.has_conv = true; + + // "%1$1c" + expected9.raw_string = {str + 33, 5}; + expected9.max_width = 1; + expected9.output_ptr = &arg1; + expected9.conv_name = 'c'; + + EXPECT_SFORMAT_EQ(expected9, format_arr[9]); + + expected9.has_conv = true; + + // "%5$[123]" + expected10.raw_string = {str + 38, 8}; + expected10.output_ptr = &arg5; + expected10.conv_name = '['; + + __llvm_libc::cpp::bitset<256> scan_set; + + scan_set.set_range('1', '3'); + + expected10.scan_set = scan_set; + + EXPECT_SFORMAT_EQ(expected10, format_arr[10]); +} + +#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE diff --git a/libc/utils/UnitTest/CMakeLists.txt b/libc/utils/UnitTest/CMakeLists.txt --- a/libc/utils/UnitTest/CMakeLists.txt +++ b/libc/utils/UnitTest/CMakeLists.txt @@ -69,3 +69,18 @@ libc.src.stdio.printf_core.core_structs libc.utils.UnitTest.string_utils ) + +add_library( + LibcScanfHelpers + ScanfMatcher.h + ScanfMatcher.cpp +) +target_include_directories(LibcScanfHelpers PUBLIC ${LIBC_SOURCE_DIR}) +target_link_libraries(LibcScanfHelpers LibcUnitTest) +add_dependencies( + LibcScanfHelpers + LibcUnitTest + libc.src.__support.FPUtil.fp_bits + libc.src.stdio.scanf_core.core_structs + libc.utils.UnitTest.string_utils +) diff --git a/libc/utils/UnitTest/PrintfMatcher.h b/libc/utils/UnitTest/PrintfMatcher.h --- a/libc/utils/UnitTest/PrintfMatcher.h +++ b/libc/utils/UnitTest/PrintfMatcher.h @@ -35,11 +35,11 @@ } // namespace printf_core } // namespace __llvm_libc -#define EXPECT_FORMAT_EQ(expected, actual) \ +#define EXPECT_PFORMAT_EQ(expected, actual) \ EXPECT_THAT(actual, __llvm_libc::printf_core::testing::FormatSectionMatcher( \ expected)) -#define ASSERT_FORMAT_EQ(expected, actual) \ +#define ASSERT_PFORMAT_EQ(expected, actual) \ ASSERT_THAT(actual, __llvm_libc::printf_core::testing::FormatSectionMatcher( \ expected)) diff --git a/libc/utils/UnitTest/PrintfMatcher.h b/libc/utils/UnitTest/ScanfMatcher.h copy from libc/utils/UnitTest/PrintfMatcher.h copy to libc/utils/UnitTest/ScanfMatcher.h --- a/libc/utils/UnitTest/PrintfMatcher.h +++ b/libc/utils/UnitTest/ScanfMatcher.h @@ -1,4 +1,4 @@ -//===-- PrintfMatcher.h -----------------------------------------*- C++ -*-===// +//===-- ScanfMatcher.h ------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,16 +6,16 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_UTILS_UNITTEST_PRINTF_MATCHER_H -#define LLVM_LIBC_UTILS_UNITTEST_PRINTF_MATCHER_H +#ifndef LLVM_LIBC_UTILS_UNITTEST_SCANF_MATCHER_H +#define LLVM_LIBC_UTILS_UNITTEST_SCANF_MATCHER_H -#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/scanf_core/core_structs.h" #include "utils/UnitTest/Test.h" #include namespace __llvm_libc { -namespace printf_core { +namespace scanf_core { namespace testing { class FormatSectionMatcher @@ -32,15 +32,15 @@ }; } // namespace testing -} // namespace printf_core +} // namespace scanf_core } // namespace __llvm_libc -#define EXPECT_FORMAT_EQ(expected, actual) \ - EXPECT_THAT(actual, __llvm_libc::printf_core::testing::FormatSectionMatcher( \ +#define EXPECT_SFORMAT_EQ(expected, actual) \ + EXPECT_THAT(actual, __llvm_libc::scanf_core::testing::FormatSectionMatcher( \ expected)) -#define ASSERT_FORMAT_EQ(expected, actual) \ - ASSERT_THAT(actual, __llvm_libc::printf_core::testing::FormatSectionMatcher( \ +#define ASSERT_SFORMAT_EQ(expected, actual) \ + ASSERT_THAT(actual, __llvm_libc::scanf_core::testing::FormatSectionMatcher( \ expected)) -#endif // LLVM_LIBC_UTILS_UNITTEST_PRINTF_MATCHER_H +#endif // LLVM_LIBC_UTILS_UNITTEST_SCANF_MATCHER_H diff --git a/libc/utils/UnitTest/ScanfMatcher.cpp b/libc/utils/UnitTest/ScanfMatcher.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/UnitTest/ScanfMatcher.cpp @@ -0,0 +1,99 @@ +//===-- ScanfMatcher.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ScanfMatcher.h" + +#include "src/__support/FPUtil/FPBits.h" +#include "src/stdio/scanf_core/core_structs.h" + +#include "utils/UnitTest/StringUtils.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { +namespace testing { + +bool FormatSectionMatcher::match(FormatSection actualValue) { + actual = actualValue; + return expected == actual; +} + +namespace { + +#define IF_FLAG_SHOW_FLAG(flag_name) \ + do { \ + if ((form.flags & FormatFlags::flag_name) == FormatFlags::flag_name) \ + stream << "\n\t\t" << #flag_name; \ + } while (false) +#define CASE_LM(lm) \ + case (LengthModifier::lm): \ + stream << #lm; \ + break + +void display(testutils::StreamWrapper &stream, FormatSection form) { + stream << "Raw String (len " << form.raw_string.size() << "): \""; + for (size_t i = 0; i < form.raw_string.size(); ++i) { + stream << form.raw_string[i]; + } + stream << "\""; + if (form.has_conv) { + stream << "\n\tHas Conv\n\tFlags:"; + IF_FLAG_SHOW_FLAG(NO_WRITE); + IF_FLAG_SHOW_FLAG(ALLOCATE); + stream << "\n"; + stream << "\tmax width: " << form.max_width << "\n"; + stream << "\tlength modifier: "; + switch (form.length_modifier) { + CASE_LM(NONE); + CASE_LM(l); + CASE_LM(ll); + CASE_LM(h); + CASE_LM(hh); + CASE_LM(j); + CASE_LM(z); + CASE_LM(t); + CASE_LM(L); + } + stream << "\n"; + // If the pointer is used (NO_WRITE is not set and the conversion isn't %). + if (((form.flags & FormatFlags::NO_WRITE) == 0) && + (form.conv_name != '%')) { + stream << "\tpointer value: " + << int_to_hex( + reinterpret_cast(form.output_ptr)) + << "\n"; + } + + stream << "\tconversion name: " << form.conv_name << "\n"; + + if (form.conv_name == '[') { + stream << "\t\t"; + for (size_t i = 0; i < 256 /* char max */; ++i) { + if (form.scan_set.test(i)) { + stream << static_cast(i); + } + } + stream << "\n\t]\n"; + } + } +} +} // anonymous namespace + +void FormatSectionMatcher::explainError(testutils::StreamWrapper &stream) { + stream << "expected format section: "; + display(stream, expected); + stream << '\n'; + stream << "actual format section : "; + display(stream, actual); + stream << '\n'; +} + +} // namespace testing +} // namespace scanf_core +} // namespace __llvm_libc