diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -384,6 +384,7 @@ __format/range_formatter.h __format/unicode.h __format/width_estimation_table.h + __format/write_escaped.h __functional/binary_function.h __functional/binary_negate.h __functional/bind.h diff --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h --- a/libcxx/include/__chrono/formatter.h +++ b/libcxx/include/__chrono/formatter.h @@ -38,8 +38,8 @@ #include <__format/format_functions.h> #include <__format/format_parse_context.h> #include <__format/formatter.h> -#include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> +#include <__format/write_escaped.h> #include <__memory/addressof.h> #include #include diff --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h --- a/libcxx/include/__format/formatter_char.h +++ b/libcxx/include/__format/formatter_char.h @@ -19,6 +19,7 @@ #include <__format/formatter_integral.h> #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> +#include <__format/write_escaped.h> #include <__type_traits/conditional.h> #include <__type_traits/is_signed.h> diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -28,6 +28,7 @@ #include <__format/formatter_integral.h> #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> +#include <__iterator/concepts.h> #include <__memory/allocator.h> #include <__system_error/errc.h> #include <__type_traits/conditional.h> @@ -608,6 +609,37 @@ return __formatter::__write(__buffer, __last, _VSTD::move(__out_it), __specs); } +/// Writes additional zero's for the precision before the exponent. +/// This is used when the precision requested in the format string is larger +/// than the maximum precision of the floating-point type. These precision +/// digits are always 0. +/// +/// \param __exponent The location of the exponent character. +/// \param __num_trailing_zeros The number of 0's to write before the exponent +/// character. +template +_LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros( + const _CharT* __first, + const _CharT* __last, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_ParserCharT> __specs, + size_t __size, + const _CharT* __exponent, + size_t __num_trailing_zeros) -> decltype(__out_it) { + _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "Not a valid range"); + _LIBCPP_ASSERT_UNCATEGORIZED(__num_trailing_zeros > 0, + "The overload not writing trailing zeros should have been used"); + + __padding_size_result __padding = + __formatter::__padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + __out_it = __formatter::__copy(__first, __exponent, _VSTD::move(__out_it)); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0')); + __out_it = __formatter::__copy(__exponent, __last, _VSTD::move(__out_it)); + return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); +} + + template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_floating_point(_Tp __value, _FormatContext& __ctx, __format_spec::__parsed_specifications<_CharT> __specs) { diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -203,6 +203,72 @@ + 1; // Reserve space for the sign. } +template +_LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first, + const char* __last, string&& __grouping, _CharT __sep, + __format_spec::__parsed_specifications<_CharT> __specs) { + int __size = (__first - __begin) + // [sign][prefix] + (__last - __first) + // data + (__grouping.size() - 1); // number of separator characters + + __padding_size_result __padding = {0, 0}; + if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) { + // Write [sign][prefix]. + __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); + + if (__specs.__width_ > __size) { + // Write zero padding. + __padding.__before_ = __specs.__width_ - __size; + __out_it = __formatter::__fill(_VSTD::move(__out_it), __specs.__width_ - __size, _CharT('0')); + } + } else { + if (__specs.__width_ > __size) { + // Determine padding and write padding. + __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_); + + __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + } + // Write [sign][prefix]. + __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); + } + + auto __r = __grouping.rbegin(); + auto __e = __grouping.rend() - 1; + _LIBCPP_ASSERT_UNCATEGORIZED(__r != __e, "The slow grouping formatting is used while " + "there will be no separators written."); + // The output is divided in small groups of numbers to write: + // - A group before the first separator. + // - A separator and a group, repeated for the number of separators. + // - A group after the last separator. + // This loop achieves that process by testing the termination condition + // midway in the loop. + // + // TODO FMT This loop evaluates the loop invariant `__parser.__type != + // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test + // happens in the __write call.) Benchmark whether making two loops and + // hoisting the invariant is worth the effort. + while (true) { + if (__specs.__std_.__type_ == __format_spec::__type::__hexadecimal_upper_case) { + __last = __first + *__r; + __out_it = __formatter::__transform(__first, __last, _VSTD::move(__out_it), __hex_to_upper); + __first = __last; + } else { + __out_it = __formatter::__copy(__first, *__r, _VSTD::move(__out_it)); + __first += *__r; + } + + if (__r == __e) + break; + + ++__r; + *__out_it++ = __sep; + } + + return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); +} + + + template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer( _Tp __value, diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -12,29 +12,22 @@ #include <__algorithm/ranges_copy.h> #include <__algorithm/ranges_fill_n.h> -#include <__algorithm/ranges_for_each.h> #include <__algorithm/ranges_transform.h> #include <__bit/countl.h> -#include <__charconv/to_chars_integral.h> -#include <__charconv/to_chars_result.h> -#include <__chrono/statically_widen.h> #include <__concepts/same_as.h> #include <__config> #include <__format/buffer.h> #include <__format/concepts.h> -#include <__format/escaped_output_table.h> #include <__format/formatter.h> #include <__format/parser_std_format_spec.h> #include <__format/unicode.h> #include <__iterator/back_insert_iterator.h> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> // iter_value_t -#include <__system_error/errc.h> -#include <__type_traits/make_unsigned.h> +#include <__memory/addressof.h> #include <__utility/move.h> #include <__utility/unreachable.h> #include -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -207,70 +200,6 @@ } # endif // _LIBCPP_HAS_NO_UNICODE -template -_LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first, - const char* __last, string&& __grouping, _CharT __sep, - __format_spec::__parsed_specifications<_CharT> __specs) { - int __size = (__first - __begin) + // [sign][prefix] - (__last - __first) + // data - (__grouping.size() - 1); // number of separator characters - - __padding_size_result __padding = {0, 0}; - if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) { - // Write [sign][prefix]. - __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); - - if (__specs.__width_ > __size) { - // Write zero padding. - __padding.__before_ = __specs.__width_ - __size; - __out_it = __formatter::__fill(_VSTD::move(__out_it), __specs.__width_ - __size, _CharT('0')); - } - } else { - if (__specs.__width_ > __size) { - // Determine padding and write padding. - __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_); - - __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); - } - // Write [sign][prefix]. - __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); - } - - auto __r = __grouping.rbegin(); - auto __e = __grouping.rend() - 1; - _LIBCPP_ASSERT_UNCATEGORIZED(__r != __e, "The slow grouping formatting is used while " - "there will be no separators written."); - // The output is divided in small groups of numbers to write: - // - A group before the first separator. - // - A separator and a group, repeated for the number of separators. - // - A group after the last separator. - // This loop achieves that process by testing the termination condition - // midway in the loop. - // - // TODO FMT This loop evaluates the loop invariant `__parser.__type != - // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test - // happens in the __write call.) Benchmark whether making two loops and - // hoisting the invariant is worth the effort. - while (true) { - if (__specs.__std_.__type_ == __format_spec::__type::__hexadecimal_upper_case) { - __last = __first + *__r; - __out_it = __formatter::__transform(__first, __last, _VSTD::move(__out_it), __hex_to_upper); - __first = __last; - } else { - __out_it = __formatter::__copy(__first, *__r, _VSTD::move(__out_it)); - __first += *__r; - } - - if (__r == __e) - break; - - ++__r; - *__out_it++ = __sep; - } - - return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); -} - /// Writes the input to the output with the required padding. /// /// Since the output column width is specified the function can be used for @@ -348,36 +277,6 @@ return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); } -/// Writes additional zero's for the precision before the exponent. -/// This is used when the precision requested in the format string is larger -/// than the maximum precision of the floating-point type. These precision -/// digits are always 0. -/// -/// \param __exponent The location of the exponent character. -/// \param __num_trailing_zeros The number of 0's to write before the exponent -/// character. -template -_LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros( - const _CharT* __first, - const _CharT* __last, - output_iterator auto __out_it, - __format_spec::__parsed_specifications<_ParserCharT> __specs, - size_t __size, - const _CharT* __exponent, - size_t __num_trailing_zeros) -> decltype(__out_it) { - _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "Not a valid range"); - _LIBCPP_ASSERT_UNCATEGORIZED(__num_trailing_zeros > 0, - "The overload not writing trailing zeros should have been used"); - - __padding_size_result __padding = - __formatter::__padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_); - __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); - __out_it = __formatter::__copy(__first, __exponent, _VSTD::move(__out_it)); - __out_it = __formatter::__fill(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0')); - __out_it = __formatter::__copy(__exponent, __last, _VSTD::move(__out_it)); - return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); -} - /// Writes a string using format's width estimation algorithm. /// /// \pre !__specs.__has_precision() @@ -412,183 +311,6 @@ return __result.__width_; } -/// Writes a string using format's width estimation algorithm. -/// -/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the -/// input is ASCII. -template -_LIBCPP_HIDE_FROM_ABI auto __write_string( - basic_string_view<_CharT> __str, - output_iterator auto __out_it, - __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { - if (!__specs.__has_precision()) - return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs); - - int __size = __formatter::__truncate(__str, __specs.__precision_); - - return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); -} - -# if _LIBCPP_STD_VER >= 23 - -struct __nul_terminator {}; - -template -_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { - return *__cstr == _CharT('\0'); -} - -template -_LIBCPP_HIDE_FROM_ABI void -__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { - back_insert_iterator __out_it{__str}; - std::ranges::copy(__prefix, __nul_terminator{}, __out_it); - - char __buffer[8]; - to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); - _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); - std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); - - __str += _CharT('}'); -} - -// [format.string.escaped]/2.2.1.2 -// ... -// then the sequence \u{hex-digit-sequence} is appended to E, where -// hex-digit-sequence is the shortest hexadecimal representation of C using -// lower-case hexadecimal digits. -template -_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { - __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{")); -} - -// [format.string.escaped]/2.2.3 -// Otherwise (X is a sequence of ill-formed code units), each code unit U is -// appended to E in order as the sequence \x{hex-digit-sequence}, where -// hex-digit-sequence is the shortest hexadecimal representation of U using -// lower-case hexadecimal digits. -template -_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { - __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{")); -} - -template -[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) { -# ifdef _LIBCPP_HAS_NO_UNICODE - // For ASCII assume everything above 127 is printable. - if (__value > 127) - return false; -# endif - - if (!__escaped_output_table::__needs_escape(__value)) - return false; - - __formatter::__write_well_formed_escaped_code_unit(__str, __value); - return true; -} - -template -[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { - return static_cast>(__value); -} - -enum class _LIBCPP_ENUM_VIS __escape_quotation_mark { __apostrophe, __double_quote }; - -// [format.string.escaped]/2 -template -[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool -__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) { - // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] - switch (__value) { - case _CharT('\t'): - __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t"); - return true; - case _CharT('\n'): - __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n"); - return true; - case _CharT('\r'): - __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r"); - return true; - case _CharT('\''): - if (__mark == __escape_quotation_mark::__apostrophe) - __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')"); - else - __str += __value; - return true; - case _CharT('"'): - if (__mark == __escape_quotation_mark::__double_quote) - __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")"); - else - __str += __value; - return true; - case _CharT('\\'): - __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)"); - return true; - - // 2.2.1.2 - Space - case _CharT(' '): - __str += __value; - return true; - } - - // 2.2.2 - // Otherwise, if X is a shift sequence, the effect on E and further - // decoding of S is unspecified. - // For now shift sequences are ignored and treated as Unicode. Other parts - // of the format library do the same. It's unknown how ostream treats them. - // TODO FMT determine what to do with shift sequences. - - // 2.2.1.2.1 and 2.2.1.2.2 - Escape - return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value)); -} - -template -_LIBCPP_HIDE_FROM_ABI void -__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { - __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; - - while (!__view.__at_end()) { - auto __first = __view.__position(); - typename __unicode::__consume_result __result = __view.__consume(); - if (__result.__status == __unicode::__consume_result::__ok) { - if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark)) - // 2.2.1.3 - Add the character - ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); - } else { - // 2.2.3 sequence of ill-formed code units - ranges::for_each(__first, __view.__position(), [&](_CharT __value) { - __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); - }); - } - } -} - -template -_LIBCPP_HIDE_FROM_ABI auto -__format_escaped_char(_CharT __value, - output_iterator auto __out_it, - __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { - basic_string<_CharT> __str; - __str += _CharT('\''); - __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); - __str += _CharT('\''); - return __formatter::__write(__str.data(), __str.data() + __str.size(), _VSTD::move(__out_it), __specs, __str.size()); -} - -template -_LIBCPP_HIDE_FROM_ABI auto -__format_escaped_string(basic_string_view<_CharT> __values, - output_iterator auto __out_it, - __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { - basic_string<_CharT> __str; - __str += _CharT('"'); - __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); - __str += _CharT('"'); - return __formatter::__write_string(basic_string_view{__str}, _VSTD::move(__out_it), __specs); -} - -# endif // _LIBCPP_STD_VER >= 23 - } // namespace __formatter #endif //_LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -17,6 +17,7 @@ #include <__format/formatter.h> #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> +#include <__format/write_escaped.h> #include #include diff --git a/libcxx/include/__format/write_escaped.h b/libcxx/include/__format/write_escaped.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__format/write_escaped.h @@ -0,0 +1,222 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H +#define _LIBCPP___FORMAT_WRITE_ESCAPED_H + +#include <__algorithm/ranges_copy.h> +#include <__algorithm/ranges_for_each.h> +#include <__charconv/to_chars_integral.h> +#include <__charconv/to_chars_result.h> +#include <__chrono/statically_widen.h> +#include <__format/escaped_output_table.h> +#include <__format/formatter_output.h> +#include <__format/parser_std_format_spec.h> +#include <__format/unicode.h> +#include <__iterator/back_insert_iterator.h> +#include <__memory/addressof.h> +#include <__system_error/errc.h> +#include <__type_traits/make_unsigned.h> +#include <__utility/move.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + + +namespace __formatter { + +#if _LIBCPP_STD_VER >= 20 + +/// Writes a string using format's width estimation algorithm. +/// +/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the +/// input is ASCII. +template +_LIBCPP_HIDE_FROM_ABI auto __write_string( + basic_string_view<_CharT> __str, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { + if (!__specs.__has_precision()) + return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs); + + int __size = __formatter::__truncate(__str, __specs.__precision_); + + return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); +} + +# endif // _LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 23 + +struct __nul_terminator {}; + +template +_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { + return *__cstr == _CharT('\0'); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { + back_insert_iterator __out_it{__str}; + std::ranges::copy(__prefix, __nul_terminator{}, __out_it); + + char __buffer[8]; + to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); + _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); + std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); + + __str += _CharT('}'); +} + +// [format.string.escaped]/2.2.1.2 +// ... +// then the sequence \u{hex-digit-sequence} is appended to E, where +// hex-digit-sequence is the shortest hexadecimal representation of C using +// lower-case hexadecimal digits. +template +_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { + __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{")); +} + +// [format.string.escaped]/2.2.3 +// Otherwise (X is a sequence of ill-formed code units), each code unit U is +// appended to E in order as the sequence \x{hex-digit-sequence}, where +// hex-digit-sequence is the shortest hexadecimal representation of U using +// lower-case hexadecimal digits. +template +_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { + __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{")); +} + +template +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) { +# ifdef _LIBCPP_HAS_NO_UNICODE + // For ASCII assume everything above 127 is printable. + if (__value > 127) + return false; +# endif + + if (!__escaped_output_table::__needs_escape(__value)) + return false; + + __formatter::__write_well_formed_escaped_code_unit(__str, __value); + return true; +} + +template +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { + return static_cast>(__value); +} + +enum class _LIBCPP_ENUM_VIS __escape_quotation_mark { __apostrophe, __double_quote }; + +// [format.string.escaped]/2 +template +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool +__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) { + // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] + switch (__value) { + case _CharT('\t'): + __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t"); + return true; + case _CharT('\n'): + __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n"); + return true; + case _CharT('\r'): + __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r"); + return true; + case _CharT('\''): + if (__mark == __escape_quotation_mark::__apostrophe) + __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')"); + else + __str += __value; + return true; + case _CharT('"'): + if (__mark == __escape_quotation_mark::__double_quote) + __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")"); + else + __str += __value; + return true; + case _CharT('\\'): + __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)"); + return true; + + // 2.2.1.2 - Space + case _CharT(' '): + __str += __value; + return true; + } + + // 2.2.2 + // Otherwise, if X is a shift sequence, the effect on E and further + // decoding of S is unspecified. + // For now shift sequences are ignored and treated as Unicode. Other parts + // of the format library do the same. It's unknown how ostream treats them. + // TODO FMT determine what to do with shift sequences. + + // 2.2.1.2.1 and 2.2.1.2.2 - Escape + return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value)); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { + __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; + + while (!__view.__at_end()) { + auto __first = __view.__position(); + typename __unicode::__consume_result __result = __view.__consume(); + if (__result.__status == __unicode::__consume_result::__ok) { + if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark)) + // 2.2.1.3 - Add the character + ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); + } else { + // 2.2.3 sequence of ill-formed code units + ranges::for_each(__first, __view.__position(), [&](_CharT __value) { + __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); + }); + } + } +} + +template +_LIBCPP_HIDE_FROM_ABI auto +__format_escaped_char(_CharT __value, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { + basic_string<_CharT> __str; + __str += _CharT('\''); + __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); + __str += _CharT('\''); + return __formatter::__write(__str.data(), __str.data() + __str.size(), _VSTD::move(__out_it), __specs, __str.size()); +} + +template +_LIBCPP_HIDE_FROM_ABI auto +__format_escaped_string(basic_string_view<_CharT> __values, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { + basic_string<_CharT> __str; + __str += _CharT('"'); + __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); + __str += _CharT('"'); + return __formatter::__write_string(basic_string_view{__str}, _VSTD::move(__out_it), __specs); +} + +# endif // _LIBCPP_STD_VER >= 23 + +} // namespace __formatter + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -1015,6 +1015,7 @@ module range_formatter { private header "__format/range_formatter.h" } module unicode { private header "__format/unicode.h" } module width_estimation_table { private header "__format/width_estimation_table.h" } + module write_escaped { private header "__format/write_escaped.h" } } } module forward_list { diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -215,6 +215,7 @@ libcxx/include/__format/formatter_string.h libcxx/include/__format/parser_std_format_spec.h libcxx/include/__format/unicode.h +libcxx/include/__format/write_escaped.h libcxx/include/forward_list libcxx/include/fstream libcxx/include/__functional/binary_function.h