diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -43,6 +43,7 @@ - P2675R1 - ``format``'s width estimation is too approximate and not forward compatible - P2505R5 - Monadic operations for ``std::expected`` - P2711R1 - Making Multi-Param Constructors Of views explicit (``join_with_view`` is not done yet) +- P2572R1 - ``std::format`` fill character allowances Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -112,7 +112,7 @@ "`P2609R3 `__","LWG", "Relaxing Ranges Just A Smidge","February 2023","","","|ranges|" "`P2713R1 `__","LWG", "Escaping improvements in ``std::format``","February 2023","","","|format|" "`P2675R1 `__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","|Complete|","17.0","|format|" -"`P2572R1 `__","LWG", "``std::format`` fill character allowances","February 2023","","","|format|" +"`P2572R1 `__","LWG", "``std::format`` fill character allowances","February 2023","|Complete|","17.0","|format|" "`P2693R1 `__","LWG", "Formatting ``thread::id`` and ``stacktrace``","February 2023","|Partial| [#note-P2693R1]_","","|format|" "`P2679R2 `__","LWG", "Fixing ``std::start_lifetime_as`` for arrays","February 2023","","","" "`P2674R1 `__","LWG", "A trait for implicit lifetime types","February 2023","","","" diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv --- a/libcxx/docs/Status/FormatIssues.csv +++ b/libcxx/docs/Status/FormatIssues.csv @@ -12,7 +12,7 @@ "`P2539R4 `__","Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?","C++23","Mark de Wever" "`P2713R1 `__","Escaping improvements in ``std::format``","C++23","Mark de Wever","" "`P2675R1 `__","``format``'s width estimation is too approximate and not forward compatible","C++23","Mark de Wever","|Complete|", Clang 17 -"`P2572R1 `__","``std::format`` fill character allowances","C++23","Mark de Wever","|In progress|" +"`P2572R1 `__","``std::format`` fill character allowances","C++23","Mark de Wever","|Complete|", Clang 17 "`P2693R1 `__","Formatting ``thread::id`` and ``stacktrace``","C++23","Mark de Wever","|In progress|" `P1361 `_,"Integration of chrono with text formatting","C++20",Mark de Wever,|In Progress|, `P2372 `__,"Fixing locale handling in chrono formatters","C++20",Mark de Wever,|In Progress|, diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -528,7 +528,7 @@ if (__size < __specs.__width_) { if (__zero_padding) { __specs.__alignment_ = __format_spec::__alignment::__right; - __specs.__fill_ = _CharT('0'); + __specs.__fill_.__data[0] = _CharT('0'); } __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_); @@ -712,7 +712,7 @@ // After the sign is written, zero padding is the same a right alignment // with '0'. __specs.__alignment_ = __format_spec::__alignment::__right; - __specs.__fill_ = _CharT('0'); + __specs.__fill_.__data[0] = _CharT('0'); } if (__num_trailing_zeros) diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -251,7 +251,7 @@ // - Write data right aligned with '0' as fill character. __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); __specs.__alignment_ = __format_spec::__alignment::__right; - __specs.__fill_ = _CharT('0'); + __specs.__fill_.__data[0] = _CharT('0'); int32_t __size = __first - __begin; __specs.__width_ -= _VSTD::min(__size, __specs.__width_); diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -14,6 +14,7 @@ #include <__algorithm/ranges_fill_n.h> #include <__algorithm/ranges_for_each.h> #include <__algorithm/ranges_transform.h> +#include <__bit/countl.h> #include <__charconv/to_chars_integral.h> #include <__charconv/to_chars_result.h> #include <__chrono/statically_widen.h> @@ -166,6 +167,46 @@ } } +# ifndef _LIBCPP_HAS_NO_UNICODE +template <__fmt_char_type _CharT, output_iterator _OutIt> + requires(same_as<_CharT, char>) +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { + std::size_t __bytes = std::countl_one(static_cast(__value.__data[0])); + if (__bytes == 0) + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); + + for (size_t __i = 0; __i < __n; ++__i) + __out_it = __formatter::__copy( + std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + __bytes, std::move(__out_it)); + return __out_it; +} + +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +template <__fmt_char_type _CharT, output_iterator _OutIt> + requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { + if (!__unicode::__is_high_surrogate(__value.__data[0])) + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); + + for (size_t __i = 0; __i < __n; ++__i) + __out_it = __formatter::__copy( + std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + 2, std::move(__out_it)); + return __out_it; +} + +template <__fmt_char_type _CharT, output_iterator _OutIt> + requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); +} +# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +# else // _LIBCPP_HAS_NO_UNICODE +template <__fmt_char_type _CharT, output_iterator _OutIt> +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); +} +# endif // _LIBCPP_HAS_NO_UNICODE + template _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first, const char* __last, string&& __grouping, _CharT __sep, diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -16,6 +16,7 @@ /// This header has some support for the chrono-format-spec since it doesn't /// affect the std-format-spec. +#include <__algorithm/copy_n.h> #include <__algorithm/find_if.h> #include <__algorithm/min.h> #include <__assert> @@ -31,6 +32,7 @@ #include <__format/width_estimation_table.h> #include <__iterator/concepts.h> #include <__iterator/readable_traits.h> // iter_value_t +#include <__memory/addressof.h> #include <__type_traits/common_type.h> #include <__type_traits/is_trivially_copyable.h> #include <__variant/monostate.h> @@ -220,6 +222,25 @@ bool __month_name_ : 1; }; +// The fill UCS scalar value. +// +// This is always an array, with 1, 2, or 4 elements. +// The size of the data structure is always 32-bits. +template +struct __code_point; + +template <> +struct __code_point { + char __data[4] = {' '}; +}; + +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +template <> +struct __code_point { + wchar_t __data[4 / sizeof(wchar_t)] = {L' '}; +}; +# endif + /// Contains the parsed formatting specifications. /// /// This contains information for both the std-format-spec and the @@ -255,7 +276,7 @@ /// replaced with the value of that arg-id. int32_t __precision_; - _CharT __fill_; + __code_point<_CharT> __fill_; _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } @@ -385,11 +406,7 @@ /// The requested precision, either the value or the arg-id. int32_t __precision_{-1}; - // LWG 3576 will probably change this to always accept a Unicode code point - // To avoid changing the size with that change align the field so when it - // becomes 32-bit its alignment will remain the same. That also means the - // size will remain the same. (D2572 addresses the solution for LWG 3576.) - _CharT __fill_{_CharT(' ')}; + __code_point<_CharT> __fill_{}; private: _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { @@ -409,19 +426,90 @@ return false; } + _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill, bool __use_range_fill) { + // The forbidden fill characters all code points formed from a single code unit, thus the + // check can be omitted when more code units are used. + if (__use_range_fill && (__fill == _CharT('{') || __fill == _CharT('}') || __fill == _CharT(':'))) + std::__throw_format_error("The format-spec range-fill field contains an invalid character"); + else if (__fill == _CharT('{') || __fill == _CharT('}')) + std::__throw_format_error("The format-spec fill field contains an invalid character"); + } + +# ifndef _LIBCPP_HAS_NO_UNICODE // range-fill and tuple-fill are identical template + requires same_as<_CharT, char> +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS + || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) +# endif _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { - _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause " - "undefined behavior by evaluating data not in the input"); + _LIBCPP_ASSERT(__begin != __end, + "when called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); + __unicode::__code_point_view<_CharT> __view{__begin, __end}; + __unicode::__consume_result __consumed = __view.__consume(); + if (__consumed.__status != __unicode::__consume_result::__ok) + std::__throw_format_error("The format-spec contains malformed Unicode characters"); + + if (__view.__position() < __end && __parse_alignment(*__view.__position())) { + ptrdiff_t __code_units = __view.__position() - __begin; + if (__code_units == 1) + // The forbidden fill characters all are code points encoded + // in one code unit, thus the check can be omitted when more + // code units are used. + __validate_fill_character(*__begin, __use_range_fill); + + std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0])); + __begin += __code_units + 1; + return true; + } + + if (!__parse_alignment(*__begin)) + return false; + + ++__begin; + return true; + } + +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS + template + requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { + _LIBCPP_ASSERT(__begin != __end, + "when called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); + if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) { + if (!__unicode::__is_scalar_value(*__begin)) + std::__throw_format_error("The fill character contains an invalid value"); + + __validate_fill_character(*__begin, __use_range_fill); + + __fill_.__data[0] = *__begin; + __begin += 2; + return true; + } + + if (!__parse_alignment(*__begin)) + return false; + + ++__begin; + return true; + } + +# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS + +# else // _LIBCPP_HAS_NO_UNICODE + // range-fill and tuple-fill are identical + template + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { + _LIBCPP_ASSERT(__begin != __end, + "when called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); if (__begin + 1 != __end) { if (__parse_alignment(*(__begin + 1))) { - if (__use_range_fill && (*__begin == _CharT('{') || *__begin == _CharT('}') || *__begin == _CharT(':'))) - std::__throw_format_error("The format-spec range-fill field contains an invalid character"); - else if (*__begin == _CharT('{') || *__begin == _CharT('}')) - std::__throw_format_error("The format-spec fill field contains an invalid character"); + __validate_fill_character(*__begin, __use_range_fill); - __fill_ = *__begin; + __fill_.__data[0] = *__begin; __begin += 2; return true; } @@ -434,6 +522,8 @@ return true; } +# endif // _LIBCPP_HAS_NO_UNICODE + template _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) { switch (*__begin) { diff --git a/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp @@ -0,0 +1,138 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT Evaluate gcc-12 status + +// This version runs the test when the platform has Unicode support. +// UNSUPPORTED: libcpp-has-no-unicode + +// XFAIL: availability-fp_to_chars-missing + +// + +// The paper +// P2572R1 std::format fill character allowances +// adds support for Unicode Scalar Values as fill character. + +#include + +#include "assert_macros.h" +#include "concat_macros.h" +#include "format.functions.common.h" +#include "make_string.h" +#include "string_literal.h" +#include "test_format_string.h" +#include "test_macros.h" + +#define SV(S) MAKE_STRING_VIEW(CharT, S) + +auto check = []( + std::basic_string_view expected, test_format_string fmt, Args&&... args) { + std::basic_string out = std::format(fmt, std::forward(args)...); + TEST_REQUIRE(out == expected, + TEST_WRITE_CONCATENATED( + "\nFormat string ", fmt.get(), "\nExpected output ", expected, "\nActual output ", out, '\n')); +}; + +auto check_exception = + []( + [[maybe_unused]] std::string_view what, + [[maybe_unused]] std::basic_string_view fmt, + [[maybe_unused]] Args&&... args) { + TEST_VALIDATE_EXCEPTION( + std::format_error, + [&]([[maybe_unused]] const std::format_error& e) { + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED( + "\nFormat string ", fmt, "\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD std::vformat(fmt, std::make_format_args>(args...))); + }; + +template +void test() { + // 1, 2, 3, 4 code unit UTF-8 transitions + check(SV("\u000042\u0000"), SV("{:\u0000^4}"), 42); + check(SV("\u007f42\u007f"), SV("{:\u007f^4}"), 42); + check(SV("\u008042\u0080"), SV("{:\u0080^4}"), 42); + check(SV("\u07ff42\u07ff"), SV("{:\u07ff^4}"), 42); + check(SV("\u080042\u0800"), SV("{:\u0800^4}"), 42); + check(SV("\uffff42\uffff"), SV("{:\uffff^4}"), 42); + check(SV("\U0010000042\U00100000"), SV("{:\U00100000^4}"), 42); + check(SV("\U0010ffff42\U0010ffff"), SV("{:\U0010ffff^4}"), 42); + + // Examples of P2572R1 + check(SV("🤡🤡x🤡🤡🤡"), SV("{:🤡^6}"), SV("x")); + check(SV("🤡🤡🤡"), SV("{:*^6}"), SV("🤡🤡🤡")); + check(SV("12345678"), SV("{:*>6}"), SV("12345678")); + + // Invalid Unicode Scalar Values + if constexpr (std::same_as) { + check_exception("The format-spec contains malformed Unicode characters", SV("{:\xed\xa0\x80^}"), 42); // U+D800 + check_exception("The format-spec contains malformed Unicode characters", SV("{:\xed\xa0\xbf^}"), 42); // U+DBFF + check_exception("The format-spec contains malformed Unicode characters", SV("{:\xed\xbf\x80^}"), 42); // U+DC00 + check_exception("The format-spec contains malformed Unicode characters", SV("{:\xed\xbf\xbf^}"), 42); // U+DFFF + + check_exception( + "The format-spec contains malformed Unicode characters", SV("{:\xf4\x90\x80\x80^}"), 42); // U+110000 + check_exception( + "The format-spec contains malformed Unicode characters", SV("{:\xf4\x90\xbf\xbf^}"), 42); // U+11FFFF + + check_exception("The format-spec contains malformed Unicode characters", + SV("{:\x80^}"), + 42); // Trailing code unit with no leading one. + check_exception( + "The format-spec contains malformed Unicode characters", SV("{:\xc0^}"), 42); // Missing trailing code unit. + check_exception( + "The format-spec contains malformed Unicode characters", SV("{:\xe0\x80^}"), 42); // Missing trailing code unit. + check_exception("The format-spec contains malformed Unicode characters", + SV("{:\xf0\x80^}"), + 42); // Missing two trailing code units. + check_exception("The format-spec contains malformed Unicode characters", + SV("{:\xf0\x80\x80^}"), + 42); // Missing trailing code unit. + +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + } else { +# ifdef TEST_SHORT_WCHAR + check_exception("The format-spec contains malformed Unicode characters", std::wstring_view{L"{:\xd800^}"}, 42); + check_exception("The format-spec contains malformed Unicode characters", std::wstring_view{L"{:\xdbff^}"}, 42); + check_exception("The format-spec contains malformed Unicode characters", std::wstring_view{L"{:\xdc00^}"}, 42); + check_exception("The format-spec contains malformed Unicode characters", std::wstring_view{L"{:\xddff^}"}, 42); + + check_exception("The format-spec contains malformed Unicode characters", + std::wstring_view{L"{:\xdc00\xd800^}"}, + 42); // Reverted surrogates. + +# else // TEST_SHORT_WCHAR + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\xd800^}"}, 42); + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\xdbff^}"}, 42); + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\xdc00^}"}, 42); + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\xddff^}"}, 42); + + check_exception( + "The format-spec should consume the input or end with a '}'", std::wstring_view{L"{:\xdc00\xd800^}"}, 42); + + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\x00110000^}"}, 42); + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\x0011ffff^}"}, 42); +# endif // TEST_SHORT_WCHAR +#endif // TEST_HAS_NO_WIDE_CHARACTERS + } +} + +int main(int, char**) { + test(); + +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test(); +#endif + + return 0; +} diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -229,6 +229,7 @@ ! grep -rn '[^ -~]' libcxx/include libcxx/src libcxx/test libcxx/benchmarks \ --exclude '*.dat' \ --exclude 'escaped_output.*.pass.cpp' \ + --exclude 'fill.unicode.pass.cpp' \ --exclude 'format_tests.h' \ --exclude 'format.functions.tests.h' \ --exclude 'formatter.*.pass.cpp' \