diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -17,7 +17,7 @@ libc.src.__support.ctype_utils libc.src.__support.str_to_integer libc.src.__support.CPP.bit - + libc.src.string.memory_utils.memset_implementation ) add_header_library( diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -11,6 +11,7 @@ #include "src/__support/arg_list.h" #include "src/stdio/printf_core/core_structs.h" +#include "src/string/memory_utils/memset_implementations.h" #include @@ -21,16 +22,50 @@ const char *__restrict str; size_t cur_pos = 0; + internal::ArgList args_cur; +#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE + // args_start stores the start of the va_args, which is allows getting the + // value of arguments that have already been passed. args_index is tracked so + // that we know which argument args_cur is on. internal::ArgList args_start; - internal::ArgList args_cur; size_t args_index = 1; + enum PrimaryType : uint8_t { Integer = 0, Float = 1, Pointer = 2 }; + + // TypeDesc stores the information about a type that is relevant to printf in + // a relatively compact manner. + struct TypeDesc { + uint8_t size; + PrimaryType primary_type; + constexpr bool operator==(const TypeDesc &other) const { + return (size == other.size) && (primary_type == other.primary_type); + } + }; + // TODO: Make this size configurable via a compile option. + static constexpr size_t DESC_ARR_LEN = 32; + // desc_arr stores the sizes of the variables in the ArgList. This is used in + // index mode to reduce repeated string parsing. The sizes are stored as + // TypeDesc objects, which store the size as well as minimal type information. + // This is necessary because some systems separate the floating point and + // integer values in va_args. + TypeDesc desc_arr[DESC_ARR_LEN]; + // TODO: Look into object stores for optimization. +#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE + public: +#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE Parser(const char *__restrict new_str, internal::ArgList &args) - : str(new_str), args_start(args), args_cur(args) {} + : str(new_str), args_cur(args), args_start(args) { + inline_memset(reinterpret_cast(desc_arr), 0, + DESC_ARR_LEN * sizeof(TypeDesc)); + } +#else + Parser(const char *__restrict new_str, internal::ArgList &args) + : str(new_str), args_cur(args) {} +#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE // get_next_section will parse the format string until it has a fully // specified format section. This can either be a raw format section with no @@ -53,9 +88,67 @@ // get_next_arg_value gets the next value from the arg list as type T. template T inline get_next_arg_value() { - ++args_index; return args_cur.next_var(); } + + //---------------------------------------------------- + // INDEX MODE ONLY FUNCTIONS AFTER HERE: + //---------------------------------------------------- + +#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE + + // parse_index parses the index of a value inside a format string. It + // assumes that str[*local_pos] points to character after a '%' or '*', and + // returns 0 if there is no closing $, or if it finds no number. If it finds a + // number, it will move local_pos past the end of the $, else it will not move + // local_pos. + size_t parse_index(size_t *local_pos); + + template + static constexpr TypeDesc TYPE_DESC{sizeof(T), PrimaryType::Integer}; + template <> + static constexpr TypeDesc TYPE_DESC{sizeof(double), + PrimaryType::Float}; + template <> + static constexpr TypeDesc TYPE_DESC{sizeof(long double), + PrimaryType::Float}; + template <> + static constexpr TypeDesc TYPE_DESC{sizeof(void *), + PrimaryType::Pointer}; + template <> + static constexpr TypeDesc TYPE_DESC{0, PrimaryType::Integer}; + + void inline set_desc_arr(size_t index, TypeDesc value) { + if (index != 0 && index <= DESC_ARR_LEN) + desc_arr[index - 1] = value; + } + + // get_arg_value gets the value from the arg list at index (starting at 1). + // This may require parsing the format string. An index of 0 is interpreted as + // the next value. + template T inline get_arg_value(size_t index) { + if (!(index == 0 || index == args_index)) + args_to_index(index); + + set_desc_arr(index, TYPE_DESC); + + ++args_index; + return get_next_arg_value(); + } + + // args_to_index assumes that this format string uses index mode. It moves + // args_index to index. If index is less than args_index, it will first move + // args_cur back to args_start and reset args_index to 1. Once index is + // greater than args_index, it will iterate through + void args_to_index(size_t index); + + // get_type_desc assumes that this format string uses index mode. It iterates + // through the format string until it finds a format specifier that defines + // the type of index, and returns a TypeDesc describing that type. It does not + // modify cur_pos. + TypeDesc get_type_desc(size_t index); + +#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE }; } // namespace printf_core diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp --- a/libc/src/stdio/printf_core/parser.cpp +++ b/libc/src/stdio/printf_core/parser.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag. + #include "parser.h" #include "src/__support/arg_list.h" @@ -17,8 +19,6 @@ namespace __llvm_libc { namespace printf_core { -#define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag. - #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value(index) #else @@ -36,6 +36,10 @@ ++cur_pos; [[maybe_unused]] size_t conv_index = 0; +#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE + conv_index = parse_index(&cur_pos); +#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE + section.flags = parse_flags(&cur_pos); // handle width @@ -192,7 +196,7 @@ return LengthModifier::l; } case ('h'): - if (str[cur_pos + 1] == 'h') { + if (str[*local_pos + 1] == 'h') { *local_pos += 2; return LengthModifier::hh; } else { @@ -216,5 +220,190 @@ } } +//---------------------------------------------------- +// INDEX MODE ONLY FUNCTIONS AFTER HERE: +//---------------------------------------------------- + +#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE + +size_t Parser::parse_index(size_t *local_pos) { + if (internal::isdigit(str[*local_pos])) { + char *int_end; + size_t index = + internal::strtointeger(str + *local_pos, &int_end, 10); + if (int_end[0] != '$') + return 0; + *local_pos = 1 + int_end - str; + return index; + } + return 0; +} + +Parser::TypeDesc Parser::get_type_desc(size_t index) { + // index mode is assumed, and the indicies start at 1, so an index + // of 0 is invalid. + size_t local_pos = 0; + + while (str[local_pos]) { + if (str[local_pos] == '%') { + ++local_pos; + + size_t conv_index = parse_index(&local_pos); + + // the flags aren't relevant for this situation, but I need to skip past + // them so they're parsed but the result is discarded. + parse_flags(&local_pos); + + // handle width + if (str[local_pos] == '*') { + ++local_pos; + + size_t width_index = parse_index(&local_pos); + set_desc_arr(width_index, TYPE_DESC); + if (width_index == index) + return TYPE_DESC; + + } else if (internal::isdigit(str[local_pos])) { + while (internal::isdigit(str[local_pos])) + ++local_pos; + } + + // handle precision + if (str[local_pos] == '.') { + ++local_pos; + if (str[local_pos] == '*') { + ++local_pos; + + size_t precision_index = parse_index(&local_pos); + set_desc_arr(precision_index, TYPE_DESC); + if (precision_index == index) + return TYPE_DESC; + + } else if (internal::isdigit(str[local_pos])) { + while (internal::isdigit(str[local_pos])) + ++local_pos; + } + } + + LengthModifier lm = parse_length_modifier(&local_pos); + + // if we don't have an index for this conversion, then its position is + // unknown and all this information is irrelevant. The rest of this logic + // has been for skipping past this conversion properly to avoid + // weirdness with %%. + if (conv_index == 0) { + ++local_pos; + continue; + } + + TypeDesc conv_size = TYPE_DESC; + switch (str[local_pos]) { + case ('%'): + conv_size = TYPE_DESC; + break; + case ('c'): + conv_size = TYPE_DESC; + break; + case ('d'): + case ('i'): + case ('o'): + case ('x'): + case ('X'): + case ('u'): + switch (lm) { + case (LengthModifier::hh): + case (LengthModifier::h): + case (LengthModifier::none): + conv_size = TYPE_DESC; + break; + case (LengthModifier::l): + conv_size = TYPE_DESC; + break; + case (LengthModifier::ll): + case (LengthModifier::L): // This isn't in the standard, but is in other + // libc implementations. + conv_size = TYPE_DESC; + break; + case (LengthModifier::j): + conv_size = TYPE_DESC; + break; + case (LengthModifier::z): + conv_size = TYPE_DESC; + break; + case (LengthModifier::t): + conv_size = TYPE_DESC; + break; + } + break; + case ('f'): + case ('F'): + case ('e'): + case ('E'): + case ('a'): + case ('A'): + case ('g'): + case ('G'): + if (lm != LengthModifier::L) + conv_size = TYPE_DESC; + else + conv_size = TYPE_DESC; + break; + case ('n'): + case ('p'): + case ('s'): + conv_size = TYPE_DESC; + break; + default: + conv_size = TYPE_DESC; + break; + } + + set_desc_arr(conv_index, conv_size); + if (conv_index == index) + return conv_size; + } + ++local_pos; + } + + // If there is no size for the requested index, then just guess that it's an + // int. + return TYPE_DESC; +} + +void Parser::args_to_index(size_t index) { + if (args_index > index) { + args_index = 1; + args_cur = args_start; + } + + while (args_index < index) { + Parser::TypeDesc cur_type_desc = TYPE_DESC; + if (args_index <= DESC_ARR_LEN) + cur_type_desc = desc_arr[args_index - 1]; + + if (cur_type_desc == TYPE_DESC) + cur_type_desc = get_type_desc(args_index); + + if (cur_type_desc == TYPE_DESC) + args_cur.next_var(); + else if (cur_type_desc == TYPE_DESC) + args_cur.next_var(); + // Floating point numbers are stored separately from the other arguments. + else if (cur_type_desc == TYPE_DESC) + args_cur.next_var(); + else if (cur_type_desc == TYPE_DESC) + args_cur.next_var(); + // pointers may be stored separately from normal values. + else if (cur_type_desc == TYPE_DESC) + args_cur.next_var(); + else + args_cur.next_var(); + + ++args_index; + } +} + +#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE + } // namespace printf_core } // namespace __llvm_libc diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -260,3 +260,199 @@ ASSERT_FORMAT_EQ(expected2, format_arr[2]); } + +#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE + +TEST(LlvmLibcPrintfParserTest, IndexModeOneArg) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%1$d"; + int arg1 = 12345; + evaluate(format_arr, str, arg1); + + __llvm_libc::printf_core::FormatSection expected; + expected.has_conv = true; + expected.raw_len = 4; + expected.raw_string = str; + expected.conv_val_raw = arg1; + expected.conv_name = 'd'; + + ASSERT_FORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsSequential) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%1$d%2$f%3$s"; + int arg1 = 12345; + double arg2 = 123.45; + const char *arg3 = "12345"; + evaluate(format_arr, str, arg1, arg2, arg3); + + __llvm_libc::printf_core::FormatSection expected0, expected1, expected2; + expected0.has_conv = true; + expected0.raw_len = 4; + expected0.raw_string = str; + expected0.conv_val_raw = arg1; + expected0.conv_name = 'd'; + + ASSERT_FORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + expected1.raw_len = 4; + expected1.raw_string = str + 4; + expected1.conv_val_raw = __llvm_libc::bit_cast(arg2); + expected1.conv_name = 'f'; + + ASSERT_FORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = true; + expected2.raw_len = 4; + expected2.raw_string = str + 8; + expected2.conv_val_ptr = const_cast(arg3); + expected2.conv_name = 's'; + + ASSERT_FORMAT_EQ(expected2, format_arr[2]); +} + +TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsReverse) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%3$d%2$f%1$s"; + int arg1 = 12345; + double arg2 = 123.45; + const char *arg3 = "12345"; + evaluate(format_arr, str, arg3, arg2, arg1); + + __llvm_libc::printf_core::FormatSection expected0, expected1, expected2; + expected0.has_conv = true; + expected0.raw_len = 4; + expected0.raw_string = str; + expected0.conv_val_raw = arg1; + expected0.conv_name = 'd'; + + ASSERT_FORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + expected1.raw_len = 4; + expected1.raw_string = str + 4; + expected1.conv_val_raw = __llvm_libc::bit_cast(arg2); + expected1.conv_name = 'f'; + + ASSERT_FORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = true; + expected2.raw_len = 4; + expected2.raw_string = str + 8; + expected2.conv_val_ptr = const_cast(arg3); + expected2.conv_name = 's'; + + ASSERT_FORMAT_EQ(expected2, format_arr[2]); +} + +TEST(LlvmLibcPrintfParserTest, IndexModeTenArgsRandom) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%6$d%3$d%7$d%2$d%8$d%1$d%4$d%9$d%5$d%10$d"; + int args[10] = {6, 4, 2, 7, 9, 1, 3, 5, 8, 10}; + evaluate(format_arr, str, args[0], args[1], args[2], args[3], args[4], + args[5], args[6], args[7], args[8], args[9]); + + for (size_t i = 0; i < 10; ++i) { + __llvm_libc::printf_core::FormatSection expected; + expected.has_conv = true; + expected.raw_len = 4 + (i >= 9 ? 1 : 0); + expected.raw_string = str + (4 * i); + expected.conv_val_raw = i + 1; + expected.conv_name = 'd'; + EXPECT_FORMAT_EQ(expected, format_arr[i]); + } +} + +TEST(LlvmLibcPrintfParserTest, IndexModeComplexParsing) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "normal text %3$llu %% %2$ *4$f %2$ .*4$f %1$1.1c"; + char arg1 = '1'; + double arg2 = 123.45; + unsigned long long arg3 = 12345; + int arg4 = 10; + evaluate(format_arr, str, arg1, arg2, arg3, arg4); + + __llvm_libc::printf_core::FormatSection expected0, expected1, expected2, + expected3, expected4, expected5, expected6, expected7, expected8, + expected9; + + expected0.has_conv = false; + expected0.raw_len = 12; + expected0.raw_string = str; + + EXPECT_FORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + expected1.raw_len = 6; + expected1.raw_string = str + 12; + expected1.length_modifier = __llvm_libc::printf_core::LengthModifier::ll; + expected1.conv_val_raw = arg3; + expected1.conv_name = 'u'; + + EXPECT_FORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = false; + expected2.raw_len = 1; + expected2.raw_string = str + 18; + + EXPECT_FORMAT_EQ(expected2, format_arr[2]); + + expected3.has_conv = true; + expected3.raw_len = 2; + expected3.raw_string = str + 19; + expected3.conv_name = '%'; + + EXPECT_FORMAT_EQ(expected3, format_arr[3]); + + expected4.has_conv = false; + expected4.raw_len = 1; + expected4.raw_string = str + 21; + + EXPECT_FORMAT_EQ(expected4, format_arr[4]); + + expected5.has_conv = true; + expected5.raw_len = 8; + expected5.raw_string = str + 22; + expected5.flags = __llvm_libc::printf_core::FormatFlags::SPACE_PREFIX; + expected5.min_width = arg4; + expected5.conv_val_raw = __llvm_libc::bit_cast(arg2); + expected5.conv_name = 'f'; + + EXPECT_FORMAT_EQ(expected5, format_arr[5]); + + expected6.has_conv = false; + expected6.raw_len = 1; + expected6.raw_string = str + 30; + + EXPECT_FORMAT_EQ(expected6, format_arr[6]); + + expected7.has_conv = true; + expected7.raw_len = 9; + expected7.raw_string = str + 31; + expected7.flags = __llvm_libc::printf_core::FormatFlags::SPACE_PREFIX; + expected7.precision = arg4; + expected7.conv_val_raw = __llvm_libc::bit_cast(arg2); + expected7.conv_name = 'f'; + + EXPECT_FORMAT_EQ(expected7, format_arr[7]); + + expected8.has_conv = false; + expected8.raw_len = 1; + expected8.raw_string = str + 40; + + EXPECT_FORMAT_EQ(expected8, format_arr[8]); + + expected9.has_conv = true; + expected9.raw_len = 7; + expected9.raw_string = str + 41; + expected9.min_width = 1; + expected9.precision = 1; + expected9.conv_val_raw = arg1; + expected9.conv_name = 'c'; + + EXPECT_FORMAT_EQ(expected9, format_arr[9]); +} + +#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE