diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -41,6 +41,7 @@ - P1328R1 - ``constexpr type_info::operator==()`` - P1413R3 - Formatting ``thread::id`` (the ``stacktrace`` is not done yet) - P2675R1 - ``format``'s width estimation is too approximate and not forward compatible +- P2572R1 - ``std::format`` fill character allowances Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -112,7 +112,7 @@ "`P2609R3 `__","LWG", "Relaxing Ranges Just A Smidge","February 2023","","","|ranges|" "`P2713R1 `__","LWG", "Escaping improvements in ``std::format``","February 2023","","","|format|" "`P2675R1 `__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","|Complete|","17.0","|format|" -"`P2572R1 `__","LWG", "``std::format`` fill character allowances","February 2023","","","|format|" +"`P2572R1 `__","LWG", "``std::format`` fill character allowances","February 2023","|Complete|","17.0","|format|" "`P2693R1 `__","LWG", "Formatting ``thread::id`` and ``stacktrace``","February 2023","|Partial| [#note-P2693R1]_","","|format|" "`P2679R2 `__","LWG", "Fixing ``std::start_lifetime_as`` for arrays","February 2023","","","" "`P2674R1 `__","LWG", "A trait for implicit lifetime types","February 2023","","","" diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv --- a/libcxx/docs/Status/FormatIssues.csv +++ b/libcxx/docs/Status/FormatIssues.csv @@ -12,7 +12,7 @@ "`P2539R4 `__","Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?","C++23","Mark de Wever" "`P2713R1 `__","Escaping improvements in ``std::format``","C++23","Mark de Wever","" "`P2675R1 `__","``format``'s width estimation is too approximate and not forward compatible","C++23","Mark de Wever","|Complete|", Clang 17 -"`P2572R1 `__","``std::format`` fill character allowances","C++23","Mark de Wever","|In progress|" +"`P2572R1 `__","``std::format`` fill character allowances","C++23","Mark de Wever","|Complete|", Clang 17 "`P2693R1 `__","Formatting ``thread::id`` and ``stacktrace``","C++23","Mark de Wever","|In progress|" `P1361 `_,"Integration of chrono with text formatting","C++20",Mark de Wever,|In Progress|, `P2372 `__,"Fixing locale handling in chrono formatters","C++20",Mark de Wever,|In Progress|, diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -528,7 +528,7 @@ if (__size < __specs.__width_) { if (__zero_padding) { __specs.__alignment_ = __format_spec::__alignment::__right; - __specs.__fill_ = _CharT('0'); + __specs.__fill_.__data[0] = _CharT('0'); } __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_); @@ -713,7 +713,7 @@ // After the sign is written, zero padding is the same a right alignment // with '0'. __specs.__alignment_ = __format_spec::__alignment::__right; - __specs.__fill_ = _CharT('0'); + __specs.__fill_.__data[0] = _CharT('0'); } if (__num_trailing_zeros) diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -251,7 +251,7 @@ // - Write data right aligned with '0' as fill character. __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); __specs.__alignment_ = __format_spec::__alignment::__right; - __specs.__fill_ = _CharT('0'); + __specs.__fill_.__data[0] = _CharT('0'); int32_t __size = __first - __begin; __specs.__width_ -= _VSTD::min(__size, __specs.__width_); diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -14,6 +14,7 @@ #include <__algorithm/ranges_fill_n.h> #include <__algorithm/ranges_for_each.h> #include <__algorithm/ranges_transform.h> +#include <__bit/countl.h> #include <__charconv/to_chars_integral.h> #include <__charconv/to_chars_result.h> #include <__chrono/statically_widen.h> @@ -166,6 +167,46 @@ } } +# ifndef _LIBCPP_HAS_NO_UNICODE +template <__fmt_char_type _CharT, output_iterator _OutIt> + requires(same_as<_CharT, char>) +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__fill<_CharT> __value) { + std::size_t __bytes = std::countl_one(static_cast(__value.__data[0])); + if (__bytes == 0) + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); + + for (size_t __i = 0; __i < __n; ++__i) + __out_it = __formatter::__copy( + std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + __bytes, std::move(__out_it)); + return __out_it; +} + +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +template <__fmt_char_type _CharT, output_iterator _OutIt> + requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__fill<_CharT> __value) { + if (!__unicode::__is_high_surrogate(__value.__data[0])) + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); + + for (size_t __i = 0; __i < __n; ++__i) + __out_it = __formatter::__copy( + std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + 2, std::move(__out_it)); + return __out_it; +} + +template <__fmt_char_type _CharT, output_iterator _OutIt> + requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__fill<_CharT> __value) { + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); +} +# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +# else // _LIBCPP_HAS_NO_UNICODE +template <__fmt_char_type _CharT, output_iterator _OutIt> +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__fill<_CharT> __value) { + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); +} +# endif // _LIBCPP_HAS_NO_UNICODE + template _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first, const char* __last, string&& __grouping, _CharT __sep, diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -16,9 +16,11 @@ /// This header has some support for the chrono-format-spec since it doesn't /// affect the std-format-spec. +#include <__algorithm/copy_n.h> #include <__algorithm/find_if.h> #include <__algorithm/min.h> #include <__assert> +#include <__bit/countl.h> #include <__concepts/arithmetic.h> #include <__concepts/same_as.h> #include <__config> @@ -31,6 +33,7 @@ #include <__format/width_estimation_table.h> #include <__iterator/concepts.h> #include <__iterator/readable_traits.h> // iter_value_t +#include <__memory/addressof.h> #include <__type_traits/common_type.h> #include <__type_traits/is_trivially_copyable.h> #include <__variant/monostate.h> @@ -220,6 +223,25 @@ bool __month_name_ : 1; }; +// The fill UCS scalar value. +// +// This is always an array, with 1, 2, or 4 elements. +// The size of the data structure is always 32-bits. +template +struct __fill; + +template <> +struct __fill { + char __data[4] = {' '}; +}; + +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +template <> +struct __fill { + wchar_t __data[4 / sizeof(wchar_t)] = {L' '}; +}; +# endif + /// Contains the parsed formatting specifications. /// /// This contains information for both the std-format-spec and the @@ -255,7 +277,7 @@ /// replaced with the value of that arg-id. int32_t __precision_; - _CharT __fill_; + __fill<_CharT> __fill_; _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } @@ -386,11 +408,7 @@ /// The requested precision, either the value or the arg-id. int32_t __precision_{-1}; - // LWG 3576 will probably change this to always accept a Unicode code point - // To avoid changing the size with that change align the field so when it - // becomes 32-bit its alignment will remain the same. That also means the - // size will remain the same. (D2572 addresses the solution for LWG 3576.) - _CharT __fill_{_CharT(' ')}; + __fill<_CharT> __fill_{}; private: _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { @@ -410,19 +428,143 @@ return false; } + _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill, bool __use_range_fill) { + // The forbidden fill characters all are 1-byte code points, thus the + // check can be omitted when more bytes are used. + if (__use_range_fill && (__fill == _CharT('{') || __fill == _CharT('}') || __fill == _CharT(':'))) + std::__throw_format_error("The format-spec range-fill field contains an invalid character"); + else if (__fill == _CharT('{') || __fill == _CharT('}')) + std::__throw_format_error("The format-spec fill field contains an invalid character"); + } + +# ifndef _LIBCPP_HAS_NO_UNICODE // range-fill and tuple-fill are identical template + requires(same_as<_CharT, char>) _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { - _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause " - "undefined behavior by evaluating data not in the input"); + _LIBCPP_ASSERT(__begin != __end, + "when called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); + // The number of bytes the are used for the UCS scalar value, can be + // determined by the number of leading bits with value 1 in the first byte. + std::size_t __bytes = std::countl_one(static_cast(*__begin)); + switch (__bytes) { + case 0: + __bytes = 1; + break; + + case 2: + case 3: + case 4: + break; + + default: + std::__throw_format_error("Malformed Unicode fill character"); + } + + if (__begin + __bytes < __end) { + if (__parse_alignment(*(__begin + __bytes))) { + // Validates whether the input is indeed a valid UCS Scalar value. + __unicode::__code_point_view __view{__begin, __begin + __bytes}; + __unicode::__consume_result __consumed = __view.__consume(); + if (__consumed.__status != __unicode::__consume_result::__ok) + std::__throw_format_error("The fill character contains an invalid value"); + _LIBCPP_ASSERT(__view.__at_end(), "a valid fill character should have consumed the entire input"); + + if (__bytes == 1) + // The forbidden fill characters all are 1-byte code points, thus the + // check can be omitted when more bytes are used. + __validate_fill_character(*__begin, __use_range_fill); + + std::copy_n(__begin, __bytes, std::addressof(__fill_.__data[0])); + __begin += __bytes + 1; + return true; + } + } + + if (!__parse_alignment(*__begin)) + return false; + + ++__begin; + return true; + } + +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS + template + requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { + _LIBCPP_ASSERT(__begin != __end, + "when called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); + + std::size_t __bytes = 1 + __unicode::__is_high_surrogate(*__begin); + if (__begin + __bytes < __end) { + if (__parse_alignment(*(__begin + __bytes))) { + // Validates whether the input is indeed a valid UCS Scalar value. + __unicode::__code_point_view __view{__begin, __begin + __bytes}; + __unicode::__consume_result __consumed = __view.__consume(); + if (__consumed.__status != __unicode::__consume_result::__ok) + std::__throw_format_error("The fill character contains an invalid value"); + _LIBCPP_ASSERT(__view.__at_end(), "a valid fill character should have consumed the entire input"); + + if (__bytes == 1) + // The forbidden fill characters all are 1-byte code points, thus the + // check can be omitted when more bytes are used. + __validate_fill_character(*__begin, __use_range_fill); + + std::copy_n(__begin, __bytes, std::addressof(__fill_.__data[0])); // ranges and inout result to be used? + __begin += __bytes + 1; + return true; + } + } + + if (!__parse_alignment(*__begin)) + return false; + + ++__begin; + return true; + } + + template + requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { + _LIBCPP_ASSERT(__begin != __end, + "when called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); + if (__begin + 1 != __end) { + if (__parse_alignment(*(__begin + 1))) { + if (!__unicode::__is_scalar_value(*__begin)) + std::__throw_format_error("The fill character contains an invalid value"); + + __validate_fill_character(*__begin, __use_range_fill); + + __fill_.__data[0] = *__begin; + __begin += 2; + return true; + } + } + + if (!__parse_alignment(*__begin)) + return false; + + ++__begin; + return true; + } + +# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS + +# else // _LIBCPP_HAS_NO_UNICODE + // range-fill and tuple-fill are identical + template + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { + _LIBCPP_ASSERT(__begin != __end, + "when called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); if (__begin + 1 != __end) { if (__parse_alignment(*(__begin + 1))) { - if (__use_range_fill && (*__begin == _CharT('{') || *__begin == _CharT('}') || *__begin == _CharT(':'))) - std::__throw_format_error("The format-spec range-fill field contains an invalid character"); - else if (*__begin == _CharT('{') || *__begin == _CharT('}')) - std::__throw_format_error("The format-spec fill field contains an invalid character"); + __validate_fill_character(*__begin, __use_range_fill); - __fill_ = *__begin; + __fill_.__data[0] = *__begin; __begin += 2; return true; } @@ -435,6 +577,8 @@ return true; } +# endif // _LIBCPP_HAS_NO_UNICODE + template _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) { switch (*__begin) { @@ -482,9 +626,9 @@ if (*__begin == _CharT('{')) { __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); - __width_as_arg_ = true; - __width_ = __r.__value; - __begin = __r.__last; + __width_as_arg_ = true; + __width_ = __r.__value; + __begin = __r.__last; return true; } @@ -492,9 +636,10 @@ return false; __format::__parse_number_result __r = __format::__parse_number(__begin, __end); - __width_ = __r.__value; - _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, " - "due to validations in this function"); + __width_ = __r.__value; + _LIBCPP_ASSERT(__width_ != 0, + "A zero value isn't allowed and should be impossible, " + "due to validations in this function"); __begin = __r.__last; return true; } @@ -510,9 +655,9 @@ if (*__begin == _CharT('{')) { __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); - __precision_as_arg_ = true; - __precision_ = __arg_id.__value; - __begin = __arg_id.__last; + __precision_as_arg_ = true; + __precision_ = __arg_id.__value; + __begin = __arg_id.__last; return true; } @@ -520,9 +665,9 @@ std::__throw_format_error("The format-spec precision field doesn't contain a value or arg-id"); __format::__parse_number_result __r = __format::__parse_number(__begin, __end); - __precision_ = __r.__value; - __precision_as_arg_ = false; - __begin = __r.__last; + __precision_ = __r.__value; + __precision_as_arg_ = false; + __begin = __r.__last; return true; } @@ -606,16 +751,14 @@ ++__begin; } - _LIBCPP_HIDE_FROM_ABI - int32_t __get_width(auto& __ctx) const { + _LIBCPP_HIDE_FROM_ABI int32_t __get_width(auto& __ctx) const { if (!__width_as_arg_) return __width_; return __format_spec::__substitute_arg_id(__ctx.arg(__width_)); } - _LIBCPP_HIDE_FROM_ABI - int32_t __get_precision(auto& __ctx) const { + _LIBCPP_HIDE_FROM_ABI int32_t __get_precision(auto& __ctx) const { if (!__precision_as_arg_) return __precision_; diff --git a/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp @@ -0,0 +1,108 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT Evaluate gcc-12 status + +// This version runs the test when the platform has Unicode support. +// UNSUPPORTED: libcpp-has-no-unicode + +// XFAIL: availability-fp_to_chars-missing + +// + +// The paper +// P2572R1 std::format fill character allowances +// adds support for Unicode Scalar Values as fill character. + +#include + +#include "assert_macros.h" +#include "concat_macros.h" +#include "format.functions.common.h" +#include "make_string.h" +#include "string_literal.h" +#include "test_format_string.h" +#include "test_macros.h" + +#define SV(S) MAKE_STRING_VIEW(CharT, S) + +auto check = []( + std::basic_string_view expected, test_format_string fmt, Args&&... args) { + std::basic_string out = std::format(fmt, std::forward(args)...); + TEST_REQUIRE(out == expected, + TEST_WRITE_CONCATENATED( + "\nFormat string ", fmt.get(), "\nExpected output ", expected, "\nActual output ", out, '\n')); +}; + +auto check_exception = + []( + [[maybe_unused]] std::string_view what, + [[maybe_unused]] std::basic_string_view fmt, + [[maybe_unused]] Args&&... args) { + TEST_VALIDATE_EXCEPTION( + std::format_error, + [&]([[maybe_unused]] const std::format_error& e) { + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED( + "\nFormat string ", fmt, "\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD std::vformat(fmt, std::make_format_args>(args...))); + }; + +template +void test() { + // 1, 2, 3, 4 code unit UFT-8 transitions + check(SV("\u000042\u0000"), SV("{:\u0000^4}"), 42); + check(SV("\u007f42\u007f"), SV("{:\u007f^4}"), 42); + check(SV("\u008042\u0080"), SV("{:\u0080^4}"), 42); + check(SV("\u07ff42\u07ff"), SV("{:\u07ff^4}"), 42); + check(SV("\u080042\u0800"), SV("{:\u0800^4}"), 42); + check(SV("\uffff42\uffff"), SV("{:\uffff^4}"), 42); + check(SV("\U0010000042\U00100000"), SV("{:\U00100000^4}"), 42); + check(SV("\U0010ffff42\U0010ffff"), SV("{:\U0010ffff^4}"), 42); + + // Examples of P2572R1 + check(SV("🤡🤡x🤡🤡🤡"), SV("{:🤡^6}"), SV("x")); + check(SV("🤡🤡🤡"), SV("{:*^6}"), SV("🤡🤡🤡")); + check(SV("12345678"), SV("{:*>6}"), SV("12345678")); + + // Invalid Unicode Scalar Values + if constexpr (std::same_as) { + check_exception("The fill character contains an invalid value", SV("{:\xed\xa0\x80^4}"), 42); // U+D800 + check_exception("The fill character contains an invalid value", SV("{:\xed\xa0\xbf^4}"), 42); // U+DBFF + check_exception("The fill character contains an invalid value", SV("{:\xed\xbf\x80^4}"), 42); // U+DC00 + check_exception("The fill character contains an invalid value", SV("{:\xed\xbf\xbf^4}"), 42); // U+DFFF + + check_exception("The fill character contains an invalid value", SV("{:\xf4\x90\x80\x80^4}"), 42); // U+110000 + check_exception("The fill character contains an invalid value", SV("{:\xf4\x90\xbf\xbf^4}"), 42); // U+11FFFF +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + } else { + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\xd800^4}"}, 42); + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\xdbff^4}"}, 42); + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\xdc00^4}"}, 42); + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\xddff^4}"}, 42); + +# ifndef TEST_SHORT_WCHAR + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\x00110000^4}"}, 42); + check_exception("The fill character contains an invalid value", std::wstring_view{L"{:\x0011ffff^4}"}, 42); +# endif +#endif // TEST_HAS_NO_WIDE_CHARACTERS + } +} + +int main(int, char**) { + test(); + +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test(); +#endif + + return 0; +} diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -229,6 +229,7 @@ ! grep -rn '[^ -~]' libcxx/include libcxx/src libcxx/test libcxx/benchmarks \ --exclude '*.dat' \ --exclude 'escaped_output.*.pass.cpp' \ + --exclude 'fill.unicode.pass.cpp' \ --exclude 'format_tests.h' \ --exclude 'format.functions.tests.h' \ --exclude 'formatter.*.pass.cpp' \