diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -30,6 +30,20 @@ return() endif() +add_object_library( + scanf_main + SRCS + scanf_main.cpp + HDRS + scanf_main.h + DEPENDS + .parser + .reader + .converter + .core_structs + libc.src.__support.arg_list +) + add_object_library( string_reader SRCS @@ -58,3 +72,20 @@ .string_reader .file_reader ) + +add_object_library( + converter + SRCS + converter.cpp + string_converter.cpp + HDRS + converter.h + string_converter.h + DEPENDS + .reader + .core_structs + libc.src.__support.ctype_utils + libc.src.__support.CPP.bitset + libc.src.__support.CPP.string_view + libc.src.__support.CPP.limits +) diff --git a/libc/src/stdio/scanf_core/converter.h b/libc/src/stdio/scanf_core/converter.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/converter.h @@ -0,0 +1,33 @@ +//===-- Format specifier converter for scanf -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H + +#include "src/__support/CPP/string_view.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +// convert will call a conversion function to convert the FormatSection into +// its string representation, and then that will write the result to the +// reader. +int convert(Reader *reader, const FormatSection &to_conv); + +// raw_match takes a raw string and matches it to the characters obtained from +// the reader. +int raw_match(Reader *reader, cpp::string_view raw_string); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/converter.cpp @@ -0,0 +1,98 @@ +//===-- Format specifier converter implmentation for scanf -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/converter.h" + +#include "src/__support/ctype_utils.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include "src/stdio/scanf_core/string_converter.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert(Reader *reader, const FormatSection &to_conv) { + int ret_val = 0; + switch (to_conv.conv_name) { + case '%': + return raw_match(reader, "%"); + case 's': + ret_val = raw_match(reader, " "); + if (ret_val != READ_OK) + return ret_val; + return convert_string(reader, to_conv); + case 'c': + case '[': + return convert_string(reader, to_conv); + // case 'd': + // case 'i': + // case 'u': + // case 'o': + // case 'x': + // case 'X': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_int(reader, to_conv); + // #ifndef LLVM_LIBC_SCANF_DISABLE_FLOAT + // case 'f': + // case 'F': + // case 'e': + // case 'E': + // case 'a': + // case 'A': + // case 'g': + // case 'G': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_float(reader, to_conv); + // #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT + // #ifndef LLVM_LIBC_SCANF_DISABLE_WRITE_INT + // case 'n': + // return convert_write_int(reader, to_conv); + // #endif // LLVM_LIBC_SCANF_DISABLE_WRITE_INT + // case 'p': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_pointer(reader, to_conv); + default: + return raw_match(reader, to_conv.raw_string); + } + return -1; +} + +// raw_string is assumed to have a positive size. +int raw_match(Reader *reader, cpp::string_view raw_string) { + char cur_char = reader->getc(); + int ret_val = READ_OK; + for (size_t i = 0; i < raw_string.size(); ++i) { + // Any space character matches any number of space characters. + if (internal::isspace(raw_string[i])) { + while (internal::isspace(cur_char)) { + cur_char = reader->getc(); + } + } else { + if (raw_string[i] == cur_char) { + cur_char = reader->getc(); + } else { + ret_val = MATCHING_FAILURE; + break; + } + } + } + reader->ungetc(cur_char); + return ret_val; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/core_structs.h b/libc/src/stdio/scanf_core/core_structs.h --- a/libc/src/stdio/scanf_core/core_structs.h +++ b/libc/src/stdio/scanf_core/core_structs.h @@ -78,7 +78,7 @@ enum ErrorCodes : int { // This is the value to be returned by conversions when no error has occurred. - WRITE_OK = 0, + READ_OK = 0, // These are the scanf return values for when an error has occurred. They are // all negative, and should be distinct. FILE_READ_ERROR = -1, diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h --- a/libc/src/stdio/scanf_core/reader.h +++ b/libc/src/stdio/scanf_core/reader.h @@ -26,6 +26,8 @@ const ReaderType reader_type; + size_t cur_chars_read = 0; + public: Reader(StringReader *init_string_reader) : string_reader(init_string_reader), reader_type(ReaderType::String) {} @@ -40,6 +42,8 @@ // This moves the input back by one character, placing c into the buffer if // this is a file reader, else c is ignored. void ungetc(char c); + + size_t chars_read() { return cur_chars_read; } }; } // namespace scanf_core diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp --- a/libc/src/stdio/scanf_core/reader.cpp +++ b/libc/src/stdio/scanf_core/reader.cpp @@ -13,6 +13,7 @@ namespace scanf_core { char Reader::getc() { + ++cur_chars_read; if (reader_type == ReaderType::String) { return string_reader->get_char(); } else { @@ -21,6 +22,7 @@ } void Reader::ungetc(char c) { + --cur_chars_read; if (reader_type == ReaderType::String) { // The string reader ignores the char c passed to unget since it doesn't // need to place anything back into a buffer, and modifying the source diff --git a/libc/src/stdio/scanf_core/scanf_main.h b/libc/src/stdio/scanf_core/scanf_main.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/scanf_main.h @@ -0,0 +1,26 @@ +//===-- Starting point for scanf --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H + +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int scanf_main(Reader *reader, const char *__restrict str, + internal::ArgList &args); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/scanf_main.cpp @@ -0,0 +1,47 @@ +//===-- Starting point for scanf --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/scanf_main.h" + +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/converter.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/parser.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int scanf_main(Reader *reader, const char *__restrict str, + internal::ArgList &args) { + Parser parser(str, args); + int ret_val = READ_OK; + int conversions = 0; + for (FormatSection cur_section = parser.get_next_section(); + !cur_section.raw_string.empty() && ret_val == READ_OK; + cur_section = parser.get_next_section()) { + if (cur_section.has_conv) { + ret_val = convert(reader, cur_section); + conversions += ret_val == READ_OK ? 1 : 0; + } else { + ret_val = raw_match(reader, cur_section.raw_string); + } + } + + if (conversions == 0 && ret_val != READ_OK) { + // This is intended to be converted to EOF in the client call to avoid + // including stdio.h in this internal file. + return -1; + } + return conversions; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/string_converter.h b/libc/src/stdio/scanf_core/string_converter.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_converter.h @@ -0,0 +1,25 @@ +//===-- String type specifier converters for scanf --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H + +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert_string(Reader *reader, const FormatSection &to_conv); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H diff --git a/libc/src/stdio/scanf_core/string_converter.cpp b/libc/src/stdio/scanf_core/string_converter.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_converter.cpp @@ -0,0 +1,76 @@ +//===-- String type specifier converters for scanf --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/string_converter.h" + +#include "src/__support/CPP/limits.h" +#include "src/__support/ctype_utils.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert_string(Reader *reader, const FormatSection &to_conv) { + // %s "Matches a sequence of non-white-space characters" + + // %c "Matches a sequence of characters of exactly the number specified by the + // field width (1 if no field width is present in the directive)" + + // %[ "Matches a nonempty sequence of characters from a set of expected + // characters (the scanset)." + size_t max_width = 0; + if (to_conv.max_width > 0) { + max_width = to_conv.max_width; + } else { + if (to_conv.conv_name == 'c') { + max_width = 1; + } else { + max_width = cpp::numeric_limits::max(); + } + } + + char *output = reinterpret_cast(to_conv.output_ptr); + + char cur_char = reader->getc(); + size_t i = 0; + for (; i < max_width && cur_char != '\0'; ++i) { + // If this is %s and we've hit a space, or if this is %[] and we've found + // something not in the scanset. + if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) || + (to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) { + break; + } + // if the NO_WRITE flag is not set, write to the output. + if ((to_conv.flags & NO_WRITE) == 0) + output[i] = cur_char; + cur_char = reader->getc(); + } + + // We always read one more character than will be used, so we have to put the + // last one back. + reader->ungetc(cur_char); + + // If this is %s or %[] + if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) { + // Always null terminate the string. This may cause a write to the + // (max_width + 1) byte, which is correct. The max width describes the max + // number of characters read from the input string, and doesn't necessarily + // correspond to the output. + output[i] = '\0'; + } + + if (i == 0) + return MATCHING_FAILURE; + return READ_OK; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/test/src/stdio/scanf_core/CMakeLists.txt b/libc/test/src/stdio/scanf_core/CMakeLists.txt --- a/libc/test/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/test/src/stdio/scanf_core/CMakeLists.txt @@ -30,3 +30,16 @@ libc.src.stdio.scanf_core.string_reader libc.src.__support.CPP.string_view ) + +add_libc_unittest( + converter_test + SUITE + libc_stdio_unittests + SRCS + converter_test.cpp + DEPENDS + libc.src.stdio.scanf_core.reader + libc.src.stdio.scanf_core.string_reader + libc.src.stdio.scanf_core.converter + libc.src.__support.CPP.string_view +) diff --git a/libc/test/src/stdio/scanf_core/converter_test.cpp b/libc/test/src/stdio/scanf_core/converter_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/stdio/scanf_core/converter_test.cpp @@ -0,0 +1,295 @@ +//===-- Unittests for the basic scanf converters --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/string_view.h" +#include "src/stdio/scanf_core/converter.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" +#include "src/stdio/scanf_core/string_reader.h" + +#include "utils/UnitTest/Test.h" + +TEST(LlvmLibcScanfConverterTest, RawMatchBasic) { + const char *str = "abcdef"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + // Reading "abc" should succeed. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "abc"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading nothing should succeed and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, ""), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading a space where there is none should succeed and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " "), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading "d" should succeed and advance by 1. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "d"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(4)); + + // Reading "z" should fail and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "z"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(4)); + + // Reading "efgh" should fail but advance to the end. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "efgh"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(6)); +} + +TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) { + const char *str = " a \t\n b cd"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + // Reading "a" should fail and not advance. + // Since there's nothing in the format string (the second argument to + // raw_match) to match the space in the buffer it isn't consumed. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "a"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(0)); + + // Reading " \t\n " should succeed and advance past the space. + // Any number of space characters in the format string match 0 or more space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " \t\n "), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(1)); + + // Reading "ab" should fail and only advance past the a + // The a characters match, but the format string doesn't have anything to + // consume the spaces in the buffer, so it fails. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "ab"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(2)); + + // Reading " b" should succeed and advance past the b + // Any number of space characters in the format string matches 0 or more space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " b"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(7)); + + // Reading "\t" should succeed and advance past the spaces to the c + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "\t"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(10)); + + // Reading "c d" should succeed and advance past the d. + // Here the space character in the format string is matching 0 space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "c d"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(12)); +} + +TEST(LlvmLibcScanfConverterTest, StringConvSimple) { + const char *str = "abcDEF123 654LKJihg"; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + ASSERT_STREQ(result, "abcDEF123"); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_STREQ(result, "654LKJihg"); +} + +TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) { + const char *str = "abcDEF123 654LKJihg"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.flags = __llvm_libc::scanf_core::NO_WRITE; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); +} + +TEST(LlvmLibcScanfConverterTest, StringConvWidth) { + const char *str = "abcDEF123 654LKJihg"; + char result[6]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.max_width = 5; // this means the result takes up 6 characters (with \0). + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(5)); + ASSERT_STREQ(result, "abcDE"); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + ASSERT_STREQ(result, "F123"); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(15)); + ASSERT_STREQ(result, "654LK"); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_STREQ(result, "Jihg"); +} + +TEST(LlvmLibcScanfConverterTest, CharsConv) { + const char *str = "abcDEF123 654LKJihg MNOpqr&*("; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 'c'; + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(1)); + ASSERT_EQ(result[0], 'a'); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(2)); + ASSERT_EQ(result[0], 'b'); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + ASSERT_EQ(result[0], 'c'); + + // Switch from character by character to 8 at a time. + conv.max_width = 8; + __llvm_libc::cpp::string_view result_view(result, 8); + + //%c doesn't stop on spaces. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("DEF123 6", 8)); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("54LKJihg", 8)); + + //%c also doesn't skip spaces at the start. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(27)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view(" MNOpqr&", 8)); + + //%c will stop on a null byte though. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(29)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 2), + __llvm_libc::cpp::string_view("*(", 2)); +} + +TEST(LlvmLibcScanfConverterTest, ScansetConv) { + const char *str = "abcDEF[123] 654LKJihg"; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = '['; + conv.output_ptr = result; + + __llvm_libc::cpp::bitset<256> bitset1; + bitset1.set_range('a', 'c'); + bitset1.set_range('D', 'F'); + bitset1.set_range('1', '6'); + bitset1.set('['); + bitset1.set(']'); + + conv.scan_set = bitset1; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 11), + __llvm_libc::cpp::string_view("abcDEF[123]", 11)); + + // The scanset conversion doesn't consume leading spaces. If it did it would + // return "654" here. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + + // This set is everything except for a-g. + __llvm_libc::cpp::bitset<256> bitset2; + bitset2.set_range('a', 'g'); + bitset2.flip(); + conv.scan_set = bitset2; + + conv.max_width = 5; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(16)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 5), + __llvm_libc::cpp::string_view(" 654L", 5)); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(20)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 4), + __llvm_libc::cpp::string_view("KJih", 4)); + + // This set is g and '\0'. + __llvm_libc::cpp::bitset<256> bitset3; + bitset3.set('g'); + bitset3.set('\0'); + conv.scan_set = bitset3; + + // Even though '\0' is in the scanset, it should still stop on it. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(21)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 1), + __llvm_libc::cpp::string_view("g", 1)); +}