diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h --- a/libc/src/stdio/printf_core/core_structs.h +++ b/libc/src/stdio/printf_core/core_structs.h @@ -77,7 +77,7 @@ } }; -enum PrimaryType : uint8_t { Integer = 0, Float = 1, Pointer = 2 }; +enum PrimaryType : uint8_t { Unknown = 0, Float = 1, Pointer = 2, Integer = 3 }; // TypeDesc stores the information about a type that is relevant to printf in // a relatively compact manner. @@ -91,7 +91,7 @@ template LIBC_INLINE constexpr TypeDesc type_desc_from_type() { if constexpr (cpp::is_same_v) { - return TypeDesc{0, PrimaryType::Integer}; + return TypeDesc{0, PrimaryType::Unknown}; } else { constexpr bool isPointer = cpp::is_pointer_v; constexpr bool isFloat = cpp::is_floating_point_v; diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H +#include "src/__support/CPP/optional.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/arg_list.h" #include "src/__support/common.h" @@ -47,7 +48,7 @@ // TypeDesc objects, which store the size as well as minimal type information. // This is necessary because some systems separate the floating point and // integer values in va_args. - TypeDesc desc_arr[DESC_ARR_LEN] = {{0, Integer}}; + TypeDesc desc_arr[DESC_ARR_LEN] = {type_desc_from_type()}; // TODO: Look into object stores for optimization. @@ -106,10 +107,18 @@ // get_arg_value gets the value from the arg list at index (starting at 1). // This may require parsing the format string. An index of 0 is interpreted as - // the next value. - template LIBC_INLINE T get_arg_value(size_t index) { - if (!(index == 0 || index == args_index)) - args_to_index(index); + // the next value. If the format string is not valid, it may have gaps in its + // indexes. Requesting the value for any index after a gap will fail, since + // the arg list must be read in order and with the correct types. + template LIBC_INLINE cpp::optional get_arg_value(size_t index) { + if (!(index == 0 || index == args_index)) { + bool success = args_to_index(index); + if (!success) { + // If we can't get to this index, then the value of the arg can't be + // found. + return cpp::optional(); + } + } set_type_desc(index, type_desc_from_type()); @@ -122,7 +131,7 @@ // It moves cur_args to the index requested so the the appropriate value may // be read. This may involve parsing the format string, and is in the worst // case an O(n^2) operation. - void args_to_index(size_t index); + bool args_to_index(size_t index); // get_type_desc assumes that this format string uses index mode. It iterates // through the format string until it finds a format specifier that defines diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp --- a/libc/src/stdio/printf_core/parser.cpp +++ b/libc/src/stdio/printf_core/parser.cpp @@ -13,7 +13,9 @@ #include "src/__support/arg_list.h" #include "src/__support/CPP/bit.h" +#include "src/__support/CPP/optional.h" #include "src/__support/CPP/string_view.h" +#include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/ctype_utils.h" #include "src/__support/str_to_integer.h" @@ -22,10 +24,29 @@ namespace __llvm_libc { namespace printf_core { +template struct int_type_of { + using type = T; +}; +template <> struct int_type_of { + using type = fputil::FPBits::UIntType; +}; +template <> struct int_type_of { + using type = fputil::FPBits::UIntType; +}; +template using int_type_of_v = typename int_type_of::type; + #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE -#define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value(index) +#define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, index) \ + { \ + auto temp = get_arg_value(index); \ + if (!temp.has_value()) { \ + section.has_conv = false; \ + } \ + dst = cpp::bit_cast>(temp.value()); \ + } #else -#define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value() +#define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, _) \ + dst = cpp::bit_cast>(get_next_arg_value()) #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE FormatSection Parser::get_next_section() { @@ -49,7 +70,7 @@ if (str[cur_pos] == '*') { ++cur_pos; - section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); + WRITE_ARG_VAL_SIMPLEST(section.min_width, int, parse_index(&cur_pos)); } else if (internal::isdigit(str[cur_pos])) { auto result = internal::strtointeger(str + cur_pos, 10); section.min_width = result.value; @@ -70,7 +91,7 @@ if (str[cur_pos] == '*') { ++cur_pos; - section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); + WRITE_ARG_VAL_SIMPLEST(section.precision, int, parse_index(&cur_pos)); } else if (internal::isdigit(str[cur_pos])) { auto result = internal::strtointeger(str + cur_pos, 10); @@ -87,7 +108,7 @@ case ('%'): break; case ('c'): - section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index); break; case ('d'): case ('i'): @@ -99,24 +120,28 @@ case (LengthModifier::hh): case (LengthModifier::h): case (LengthModifier::none): - section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index); break; case (LengthModifier::l): - section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index); + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index); break; case (LengthModifier::ll): case (LengthModifier::L): // This isn't in the standard, but is in other // libc implementations. - section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index); + + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index); break; case (LengthModifier::j): - section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index); + + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index); break; case (LengthModifier::z): - section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index); + + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, size_t, conv_index); break; case (LengthModifier::t): - section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index); + + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index); break; } break; @@ -129,13 +154,11 @@ case ('A'): case ('g'): case ('G'): - if (lm != LengthModifier::L) - section.conv_val_raw = - cpp::bit_cast(GET_ARG_VAL_SIMPLEST(double, conv_index)); - else - section.conv_val_raw = - cpp::bit_cast::UIntType>( - GET_ARG_VAL_SIMPLEST(long double, conv_index)); + if (lm != LengthModifier::L) { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, double, conv_index); + } else { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long double, conv_index); + } break; #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT @@ -143,7 +166,7 @@ #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT case ('p'): case ('s'): - section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); + WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index); break; default: // if the conversion is undefined, change this to a raw section. @@ -300,7 +323,8 @@ // has been for skipping past this conversion properly to avoid // weirdness with %%. if (conv_index == 0) { - ++local_pos; + if (str[local_pos] != '\0') + ++local_pos; continue; } @@ -380,12 +404,12 @@ ++local_pos; } - // If there is no size for the requested index, then just guess that it's an - // int. - return type_desc_from_type(); + // If there is no size for the requested index, then it's unknown. Return + // void. + return type_desc_from_type(); } -void Parser::args_to_index(size_t index) { +bool Parser::args_to_index(size_t index) { if (args_index > index) { args_index = 1; args_cur = args_start; @@ -399,6 +423,13 @@ if (cur_type_desc == type_desc_from_type()) cur_type_desc = get_type_desc(args_index); + // A type of void represents the type being unknown. If the type for the + // requested index isn't in the desc_arr and isn't found by parsing the + // string, then then advancing to the requested index is impossible. In that + // case the function returns false. + if (cur_type_desc == type_desc_from_type()) + return false; + if (cur_type_desc == type_desc_from_type()) args_cur.next_var(); else if (cur_type_desc == type_desc_from_type()) @@ -418,6 +449,7 @@ ++args_index; } + return true; } #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -474,4 +474,53 @@ EXPECT_PFORMAT_EQ(expected9, format_arr[9]); } +TEST(LlvmLibcPrintfParserTest, IndexModeGapCheck) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%1$d%2$d%4$d"; + int arg1 = 1; + int arg2 = 2; + int arg3 = 3; + int arg4 = 4; + + evaluate(format_arr, str, arg1, arg2, arg3, arg4); + + __llvm_libc::printf_core::FormatSection expected0, expected1, expected2; + + expected0.has_conv = true; + expected0.raw_string = {str, 4}; + expected0.conv_val_raw = arg1; + expected0.conv_name = 'd'; + + EXPECT_PFORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = true; + expected1.raw_string = {str + 4, 4}; + expected1.conv_val_raw = arg2; + expected1.conv_name = 'd'; + + EXPECT_PFORMAT_EQ(expected1, format_arr[1]); + + expected2.has_conv = false; + expected2.raw_string = {str + 8, 4}; + + EXPECT_PFORMAT_EQ(expected2, format_arr[2]); +} + +TEST(LlvmLibcPrintfParserTest, IndexModeTrailingPercentCrash) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%2$d%"; + evaluate(format_arr, str, 1, 2); + + __llvm_libc::printf_core::FormatSection expected0, expected1; + expected0.has_conv = false; + + expected0.raw_string = {str, 4}; + EXPECT_PFORMAT_EQ(expected0, format_arr[0]); + + expected1.has_conv = false; + + expected1.raw_string = {str + 4, 1}; + EXPECT_PFORMAT_EQ(expected1, format_arr[1]); +} + #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE