diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -244,6 +244,7 @@ __format/formatter_floating_point.h __format/formatter_integer.h __format/formatter_integral.h + __format/formatter_output.h __format/formatter_pointer.h __format/formatter_string.h __format/parser_std_format_spec.h diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__format/formatter_output.h @@ -0,0 +1,188 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FORMAT_FORMATTER_OUTPUT_H +#define _LIBCPP___FORMAT_FORMATTER_OUTPUT_H + +#include <__algorithm/copy.h> +#include <__algorithm/fill_n.h> +#include <__config> +#include <__format/parser_std_format_spec.h> +#include <__utility/move.h> +#include <__utility/unreachable.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +namespace __formatter { + +// TODO FMT remove _v2 suffix. +struct _LIBCPP_TYPE_VIS __padding_size_result_v2 { + size_t __before_; + size_t __after_; +}; + +// TODO FMT remove _v2 suffix. +_LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result_v2 __padding_size_v2(size_t __size, size_t __width, + __format_spec::__alignment __align) { + _LIBCPP_ASSERT(__width > __size, "don't call this function when no padding is required"); + _LIBCPP_ASSERT(__align != __format_spec::__alignment::__default, + "the caller should adjust the default to the value required by the type"); + _LIBCPP_ASSERT(__align != __format_spec::__alignment::__zero_padding, + "the caller should have handled the zero-padding"); + + size_t __fill = __width - __size; + switch (__align) { + case __format_spec::__alignment::__default: + case __format_spec::__alignment::__zero_padding: + __libcpp_unreachable(); + + case __format_spec::__alignment::__left: + return {0, __fill}; + + case __format_spec::__alignment::__center: { + // The extra padding is divided per [format.string.std]/3 + // __before = floor(__fill, 2); + // __after = ceil(__fill, 2); + size_t __before = __fill / 2; + size_t __after = __fill - __before; + return {__before, __after}; + } + case __format_spec::__alignment::__right: + return {__fill, 0}; + } + __libcpp_unreachable(); +} + +/// Writes the input to the output with the required padding. +/// +/// Since the output column width is specified the function can be used for +/// ASCII and Unicode output. +/// +/// \pre [\a __first, \a __last) is a valid range. +/// \pre \a __size <= \a __width. Using this function when this pre-condition +/// doesn't hold incurs an unwanted overhead. +/// +/// \param __first Pointer to the first element to write. +/// \param __last Pointer beyond the last element to write. +/// \param __out_it The output iterator to write to. +/// \param __specs The parsed formatting specifications. +/// \param __size The (estimated) output column width. When the elements +/// to be written are ASCII the following condition holds +/// \a __size == \a __last - \a __first. +/// +/// \returns An iterator pointing beyond the last element written. +/// +/// \note The type of the elements in range [\a __first, \a __last) can differ +/// from the type of \a __specs. Integer output uses \c std::to_chars for its +/// conversion, which means the [\a __first, \a __last) always contains elements +/// of the type \c char. +template +_LIBCPP_HIDE_FROM_ABI auto __write(const _CharT* __first, const _CharT* __last, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_ParserCharT> __specs, ptrdiff_t __size) + -> decltype(__out_it) { + _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); + + if (__size >= __specs.__width_) + return _VSTD::copy(__first, __last, _VSTD::move(__out_it)); + + __padding_size_result_v2 __padding = + __formatter::__padding_size_v2(__size, __specs.__width_, __specs.__std_.__alignment_); + __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + __out_it = _VSTD::copy(__first, __last, _VSTD::move(__out_it)); + return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); +} + +# ifndef _LIBCPP_HAS_NO_UNICODE +template +_LIBCPP_HIDE_FROM_ABI auto __write_unicode_no_precision(basic_string_view<_CharT> __str, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_CharT> __specs) + -> decltype(__out_it) { + _LIBCPP_ASSERT(!__specs.__has_precision(), "use __write_unicode"); + // No padding -> copy the string + if (!__specs.__has_width()) + return _VSTD::copy(__str.begin(), __str.end(), _VSTD::move(__out_it)); + + // Non Unicode part larger than width -> copy the string + auto __last = __format_spec::__detail::__estimate_column_width_fast(__str.begin(), __str.end()); + ptrdiff_t __size = __last - __str.begin(); + if (__size >= __specs.__width_) + return _VSTD::copy(__str.begin(), __str.end(), _VSTD::move(__out_it)); + + // Is there a non Unicode part? + if (__last != __str.end()) { + // Non Unicode and Unicode part larger than width -> copy the string + __format_spec::__detail::__column_width_result __column_width = + __format_spec::__detail::__estimate_column_width(__last, __str.end(), __specs.__width_); + __size += __column_width.__width; // Note this new size is used when __size < __specs.__width_ + if (__size >= __specs.__width_) + return _VSTD::copy(__str.begin(), __str.end(), _VSTD::move(__out_it)); + } + + return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); +} +# endif + +template +_LIBCPP_HIDE_FROM_ABI auto __write_unicode(basic_string_view<_CharT> __str, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_CharT> __specs) + -> decltype(__out_it) { +# ifndef _LIBCPP_HAS_NO_UNICODE + if (!__specs.__has_precision()) + return __formatter::__write_unicode_no_precision(__str, _VSTD::move(__out_it), __specs); + + // Non unicode part larger than precision -> truncate the output and use the normal write operation. + auto __last = __format_spec::__detail::__estimate_column_width_fast(__str.begin(), __str.end()); + ptrdiff_t __size = __last - __str.begin(); + if (__size >= __specs.__precision_) + return __formatter::__write(__str.begin(), __str.begin() + __specs.__precision_, _VSTD::move(__out_it), __specs, + __specs.__precision_); + + // No non Unicode part, implies __size < __specs.__precision_ -> use normal write operation + if (__last == __str.end()) + return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __str.size()); + + __format_spec::__detail::__column_width_result __column_width = + __format_spec::__detail::__estimate_column_width(__last, __str.end(), __specs.__precision_ - __size); + __size += __column_width.__width; + // Truncate the output + if (__column_width.__ptr != __str.end()) + __str.remove_suffix(__str.end() - __column_width.__ptr); + + return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); + +# else + if (__specs.__has_precision()) { + ptrdiff_t __size = __str.size(); + if (__size > __specs.__precision_) + return __formatter::__write(__str.begin(), __str.begin() + __specs.__precision_, _VSTD::move(__out_it), __specs, + __specs.__precision_); + } + return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __str.size()); + +# endif +} + +} // namespace __formatter + +#endif //_LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_FORMATTER_OUTPUT_H diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -10,13 +10,15 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_STRING_H #define _LIBCPP___FORMAT_FORMATTER_STRING_H -#include <__assert> +#include <__availability> #include <__config> -#include <__format/format_error.h> #include <__format/format_fwd.h> -#include <__format/format_string.h> +#include <__format/format_parse_context.h> #include <__format/formatter.h> +#include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> +#include <__utility/move.h> +#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -27,43 +29,30 @@ #if _LIBCPP_STD_VER > 17 -namespace __format_spec { - template <__formatter::__char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __formatter_string : public __parser_string<_CharT> { +struct _LIBCPP_TEMPLATE_VIS __formatter_string { public: - _LIBCPP_HIDE_FROM_ABI auto format(basic_string_view<_CharT> __str, - auto& __ctx) -> decltype(__ctx.out()) { - - _LIBCPP_ASSERT(this->__alignment != _Flags::_Alignment::__default, - "The parser should not use these defaults"); - - if (this->__width_needs_substitution()) - this->__substitute_width_arg_id(__ctx.arg(this->__width)); - - if (this->__precision_needs_substitution()) - this->__substitute_precision_arg_id(__ctx.arg(this->__precision)); - - return __formatter::__write_unicode( - __ctx.out(), __str, this->__width, - this->__has_precision_field() ? this->__precision : -1, this->__fill, - this->__alignment); + _LIBCPP_HIDE_FROM_ABI constexpr auto parse(basic_format_parse_context<_CharT>& __parse_ctx) + -> decltype(__parse_ctx.begin()) { + auto __result = __parser_.__parse(__parse_ctx, __format_spec::__fields_string); + __format_spec::__process_display_type_string(__parser_.__type_); + return __result; } -}; -} //namespace __format_spec + _LIBCPP_HIDE_FROM_ABI auto format(basic_string_view<_CharT> __str, auto& __ctx) const -> decltype(__ctx.out()) { + return __formatter::__write_unicode(__str, __ctx.out(), __parser_.__get_parsed_std_specifications(__ctx)); + } -// [format.formatter.spec]/2.2 For each charT, the string type specializations + __format_spec::__parser<_CharT> __parser_; +}; // Formatter const char*. template <__formatter::__char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT - formatter - : public __format_spec::__formatter_string<_CharT> { - using _Base = __format_spec::__formatter_string<_CharT>; +struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter + : public __formatter_string<_CharT> { + using _Base = __formatter_string<_CharT>; - _LIBCPP_HIDE_FROM_ABI auto format(const _CharT* __str, auto& __ctx) - -> decltype(__ctx.out()) { + _LIBCPP_HIDE_FROM_ABI auto format(const _CharT* __str, auto& __ctx) const -> decltype(__ctx.out()) { _LIBCPP_ASSERT(__str, "The basic_format_arg constructor should have " "prevented an invalid pointer."); @@ -78,8 +67,9 @@ // now these optimizations aren't implemented. Instead the base class // handles these options. // TODO FMT Implement these improvements. - if (this->__has_width_field() || this->__has_precision_field()) - return _Base::format(__str, __ctx); + __format_spec::__parsed_specifications<_CharT> __specs = _Base::__parser_.__get_parsed_std_specifications(__ctx); + if (__specs.__has_width() || __specs.__has_precision()) + return __formatter::__write_unicode(basic_string_view<_CharT>{__str}, __ctx.out(), __specs); // No formatting required, copy the string to the output. auto __out_it = __ctx.out(); @@ -91,12 +81,11 @@ // Formatter char*. template <__formatter::__char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT - formatter<_CharT*, _CharT> : public formatter { +struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT*, _CharT> + : public formatter { using _Base = formatter; - _LIBCPP_HIDE_FROM_ABI auto format(_CharT* __str, auto& __ctx) - -> decltype(__ctx.out()) { + _LIBCPP_HIDE_FROM_ABI auto format(_CharT* __str, auto& __ctx) const -> decltype(__ctx.out()) { return _Base::format(__str, __ctx); } }; @@ -104,39 +93,34 @@ // Formatter char[]. template <__formatter::__char_type _CharT, size_t _Size> struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT[_Size], _CharT> - : public __format_spec::__formatter_string<_CharT> { - static_assert(!is_const_v<_CharT>); - using _Base = __format_spec::__formatter_string<_CharT>; + : public __formatter_string<_CharT> { + using _Base = __formatter_string<_CharT>; - _LIBCPP_HIDE_FROM_ABI auto format(_CharT __str[_Size], auto& __ctx) -> decltype(__ctx.out()) { + _LIBCPP_HIDE_FROM_ABI auto format(_CharT __str[_Size], auto& __ctx) const -> decltype(__ctx.out()) { return _Base::format(basic_string_view<_CharT>(__str, _Size), __ctx); } }; // Formatter const char[]. template <__formatter::__char_type _CharT, size_t _Size> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT - formatter - : public __format_spec::__formatter_string<_CharT> { - using _Base = __format_spec::__formatter_string<_CharT>; +struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter + : public __formatter_string<_CharT> { + using _Base = __formatter_string<_CharT>; - _LIBCPP_HIDE_FROM_ABI auto format(const _CharT __str[_Size], auto& __ctx) - -> decltype(__ctx.out()) { + _LIBCPP_HIDE_FROM_ABI auto format(const _CharT __str[_Size], auto& __ctx) const -> decltype(__ctx.out()) { return _Base::format(basic_string_view<_CharT>(__str, _Size), __ctx); } }; // Formatter std::string. template <__formatter::__char_type _CharT, class _Traits, class _Allocator> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT - formatter, _CharT> - : public __format_spec::__formatter_string<_CharT> { - using _Base = __format_spec::__formatter_string<_CharT>; +struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> + : public __formatter_string<_CharT> { + using _Base = __formatter_string<_CharT>; - _LIBCPP_HIDE_FROM_ABI auto - format(const basic_string<_CharT, _Traits, _Allocator>& __str, auto& __ctx) + _LIBCPP_HIDE_FROM_ABI auto format(const basic_string<_CharT, _Traits, _Allocator>& __str, auto& __ctx) const -> decltype(__ctx.out()) { - // drop _Traits and _Allocator + // Drop _Traits and _Allocator to have one std::basic_string formatter. return _Base::format(basic_string_view<_CharT>(__str.data(), __str.size()), __ctx); } }; @@ -144,13 +128,12 @@ // Formatter std::string_view. template <__formatter::__char_type _CharT, class _Traits> struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> - : public __format_spec::__formatter_string<_CharT> { - using _Base = __format_spec::__formatter_string<_CharT>; + : public __formatter_string<_CharT> { + using _Base = __formatter_string<_CharT>; - _LIBCPP_HIDE_FROM_ABI auto - format(basic_string_view<_CharT, _Traits> __str, auto& __ctx) + _LIBCPP_HIDE_FROM_ABI auto format(basic_string_view<_CharT, _Traits> __str, auto& __ctx) const -> decltype(__ctx.out()) { - // drop _Traits + // Drop _Traits to have one std::basic_string_view formatter. return _Base::format(basic_string_view<_CharT>(__str.data(), __str.size()), __ctx); } }; diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -10,12 +10,20 @@ #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H +/// \file Contains the std-format-spec parser. +/// +/// Most of the code can be reused in the chrono-format-spec. +/// This header has some support for the chrono-format-spec since it doesn't +/// affect the std-format-spec. + #include <__algorithm/find_if.h> #include <__algorithm/min.h> #include <__assert> #include <__config> +#include <__debug> #include <__format/format_arg.h> #include <__format/format_error.h> +#include <__format/format_parse_context.h> #include <__format/format_string.h> #include <__variant/monostate.h> #include @@ -1377,6 +1385,461 @@ } #endif // _LIBCPP_HAS_NO_UNICODE +/// These fields are a filter for which elements to parse. +/// +/// They default to false so when a new field is added it needs to be opted in +/// explicitly. +struct __fields { + uint8_t __sign_ : 1 {false}; + uint8_t __alternate_form_ : 1 {false}; + uint8_t __zero_padding_ : 1 {false}; + uint8_t __precision_ : 1 {false}; + uint8_t __locale_specific_form_ : 1 {false}; + uint8_t __type_ : 1 {false}; +}; + +// By not placing this constant in the formatter class it's not duplicated for +// char and wchar_t. +inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true}; + +enum class _LIBCPP_ENUM_VIS __alignment : uint8_t { + /// No alignment is set in the format string. + __default, + __left, + __center, + __right, + __zero_padding +}; + +enum class _LIBCPP_ENUM_VIS __sign : uint8_t { + /// No sign is set in the format string. + /// + /// The sign isn't allowed for certain format-types. By using this value + /// it's possible to detect whether or not the user explicitly set the sign + /// flag. For formatting purposes it behaves the same as \ref __minus. + __default, + __minus, + __plus, + __space +}; + +enum class _LIBCPP_ENUM_VIS __type : uint8_t { + __default, + __string, + __binary_lower_case, + __binary_upper_case, + __octal, + __decimal, + __hexadecimal_lower_case, + __hexadecimal_upper_case, + __pointer, + __char, + __hexfloat_lower_case, + __hexfloat_upper_case, + __scientific_lower_case, + __scientific_upper_case, + __fixed_lower_case, + __fixed_upper_case, + __general_lower_case, + __general_upper_case +}; + +struct __std { + __alignment __alignment_ : 3; + __sign __sign_ : 2; + bool __alternate_form_ : 1; + bool __locale_specific_form_ : 1; + __type __type_; +}; + +struct __chrono { + __alignment __alignment_ : 3; + bool __weekday_name_ : 1; + bool __month_name_ : 1; +}; + +/// Contains the parsed formatting specifications. +/// +/// This contains information for both the std-format-spec and the +/// chrono-format-spec. This results in some unused members for both +/// specifications. However these unused members don't increase the size +/// of the structure. +/// +/// This struct doesn't cross ABI bounds so it doesn't need to be ABI stable. +template +struct _LIBCPP_TEMPLATE_VIS __parsed_specifications { + union { + // The field __alignment_ is the first element in __std_ and __chrono_. + // This allows the code to always inspect this value regards which member + // of the union is the active member [class.union.general]/2. + // + // This is needed since the generic output routines handle the alignment of + // the output and therefore need this field. + __alignment __alignment_ : 3; + __std __std_; + __chrono __chrono_; + }; + + /// The requested width. + /// + /// When the format-spec used an arg-id for this field it has already been + /// replaced with the value of that arg-id. + int32_t __width_; + + /// The requested precision. + /// + /// When the format-spec used an arg-id for this field it has already been + /// replaced with the value of that arg-id. + int32_t __precision_; + + _CharT __fill_; + + _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } + + _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; } +}; + +static_assert(sizeof(__parsed_specifications) == 16); +static_assert(is_trivially_copyable_v<__parsed_specifications>); +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +static_assert(sizeof(__parsed_specifications) == 16); +static_assert(is_trivially_copyable_v<__parsed_specifications>); +# endif + +/// The parser for the std-format-spec. +/// +/// Note this class is a member of std::formatter specializations. It's +/// expected developers will create their own formatter specializations that +/// inherit from the std::formatter specializations. This means this class +/// must be ABI stable. To aid the stability the unused bits in the class are +/// set to zero. That way they can be repurposed when a future revision of the +/// Standards adds new fields to std-format-spec. +template +class _LIBCPP_TEMPLATE_VIS __parser { +public: + _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields) + -> decltype(__parse_ctx.begin()) { + + const _CharT* __begin = __parse_ctx.begin(); + const _CharT* __end = __parse_ctx.end(); + if (__begin == __end) + return __begin; + + if (__parse_fill_align(__begin, __end) && __begin == __end) + return __begin; + + if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end) + return __begin; + + if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end) + return __begin; + + if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end) + return __begin; + + if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end) + return __begin; + + if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end) + return __begin; + + if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end) + return __begin; + + if (__fields.__type_) { + __parse_type(__begin); + + // When __type_ is false the calling parser is expected to do additional + // parsing. In that case that parser should do the end of format string + // validation. + if (__begin != __end && *__begin != _CharT('}')) + __throw_format_error("The format-spec should consume the input or end with a '}'"); + } + + return __begin; + } + + /// \returns the `__parsed_specifications` with the resolved dynamic sizes.. + _LIBCPP_HIDE_FROM_ABI + __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const { + return __parsed_specifications<_CharT>{.__std_{.__alignment_ = __alignment_, + .__sign_ = __sign_, + .__alternate_form_ = __alternate_form_, + .__locale_specific_form_ = __locale_specific_form_, + .__type_ = __type_}, + .__width_{__get_width(__ctx)}, + .__precision_{__get_precision(__ctx)}, + .__fill_{__fill_}}; + } + + __alignment __alignment_ : 3 {__alignment::__default}; + __sign __sign_ : 2 {__sign::__default}; + bool __alternate_form_ : 1 {false}; + bool __locale_specific_form_ : 1 {false}; + bool __reserved_0_ : 1 {false}; + __type __type_{__type::__default}; + + // These two flags are used for formatting chrono. Since the struct has + // padding space left it's added to this structure. + bool __weekday_name_ : 1 {false}; + bool __month_name_ : 1 {false}; + + uint8_t __reserved_1_ : 6 {0}; + uint8_t __reserved_2_ : 6 {0}; + // These two flags are only used internally and not part of the + // __parsed_specifications. Therefore put them at the end. + bool __width_as_arg_ : 1 {false}; + bool __precision_as_arg_ : 1 {false}; + + /// The requested width, either the value or the arg-id. + int32_t __width_{0}; + + /// The requested precision, either the value or the arg-id. + int32_t __precision_{-1}; + + // LWG 3576 will probably change this to always accept a Unicode code point + // To avoid changing the size with that change align the field so when it + // becomes 32-bit its alignment will remain the same. That also means the + // size will remain the same. (D2572 addresses the solution for LWG 3576.) + _CharT __fill_{_CharT(' ')}; + +private: + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { + switch (__c) { + case _CharT('<'): + __alignment_ = __alignment::__left; + return true; + + case _CharT('^'): + __alignment_ = __alignment::__center; + return true; + + case _CharT('>'): + __alignment_ = __alignment::__right; + return true; + } + return false; + } + + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(const _CharT*& __begin, const _CharT* __end) { + _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); + if (__begin + 1 != __end) { + if (__parse_alignment(*(__begin + 1))) { + if (*__begin == _CharT('{') || *__begin == _CharT('}')) + __throw_format_error("The format-spec fill field contains an invalid character"); + + __fill_ = *__begin; + __begin += 2; + return true; + } + } + + if (!__parse_alignment(*__begin)) + return false; + + ++__begin; + return true; + } + + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) { + switch (*__begin) { + case _CharT('-'): + __sign_ = __sign::__minus; + break; + case _CharT('+'): + __sign_ = __sign::__plus; + break; + case _CharT(' '): + __sign_ = __sign::__space; + break; + default: + return false; + } + ++__begin; + return true; + } + + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) { + if (*__begin != _CharT('#')) + return false; + + __alternate_form_ = true; + ++__begin; + return true; + } + + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) { + if (*__begin != _CharT('0')) + return false; + + if (__alignment_ == __alignment::__default) + __alignment_ = __alignment::__zero_padding; + ++__begin; + return true; + } + + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) { + if (*__begin == _CharT('0')) + __throw_format_error("A format-spec width field shouldn't have a leading zero"); + + if (*__begin == _CharT('{')) { + __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); + __width_as_arg_ = true; + __width_ = __r.__value; + __begin = __r.__ptr; + return true; + } + + if (*__begin < _CharT('0') || *__begin > _CharT('9')) + return false; + + __format::__parse_number_result __r = __format::__parse_number(__begin, __end); + __width_ = __r.__value; + _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, " + "due to validations in this function"); + __begin = __r.__ptr; + return true; + } + + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end, + auto& __parse_ctx) { + if (*__begin != _CharT('.')) + return false; + + ++__begin; + if (__begin == __end) + __throw_format_error("End of input while parsing format-spec precision"); + + if (*__begin == _CharT('{')) { + __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); + __precision_as_arg_ = true; + __precision_ = __arg_id.__value; + __begin = __arg_id.__ptr; + return true; + } + + if (*__begin < _CharT('0') || *__begin > _CharT('9')) + __throw_format_error("The format-spec precision field doesn't contain a value or arg-id"); + + __format::__parse_number_result __r = __format::__parse_number(__begin, __end); + __precision_ = __r.__value; + __precision_as_arg_ = false; + __begin = __r.__ptr; + return true; + } + + _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) { + if (*__begin != _CharT('L')) + return false; + + __locale_specific_form_ = true; + ++__begin; + return true; + } + + _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) { + // Determines the type. It does not validate whether the selected type is + // valid. Most formatters have optional fields that are only allowed for + // certain types. These parsers need to do validation after the type has + // been parsed. So its easier to implement the validation for all types in + // the specific parse function. + switch (*__begin) { + case 'A': + __type_ = __type::__hexfloat_upper_case; + break; + case 'B': + __type_ = __type::__binary_upper_case; + break; + case 'E': + __type_ = __type::__scientific_upper_case; + break; + case 'F': + __type_ = __type::__fixed_upper_case; + break; + case 'G': + __type_ = __type::__general_upper_case; + break; + case 'X': + __type_ = __type::__hexadecimal_upper_case; + break; + case 'a': + __type_ = __type::__hexfloat_lower_case; + break; + case 'b': + __type_ = __type::__binary_lower_case; + break; + case 'c': + __type_ = __type::__char; + break; + case 'd': + __type_ = __type::__decimal; + break; + case 'e': + __type_ = __type::__scientific_lower_case; + break; + case 'f': + __type_ = __type::__fixed_lower_case; + break; + case 'g': + __type_ = __type::__general_lower_case; + break; + case 'o': + __type_ = __type::__octal; + break; + case 'p': + __type_ = __type::__pointer; + break; + case 's': + __type_ = __type::__string; + break; + case 'x': + __type_ = __type::__hexadecimal_lower_case; + break; + default: + return; + } + ++__begin; + } + + _LIBCPP_HIDE_FROM_ABI + int32_t __get_width(auto& __ctx) const { + if (!__width_as_arg_) + return __width_; + + int32_t __result = __format_spec::__substitute_arg_id(__ctx.arg(__width_)); + if (__result == 0) + __throw_format_error("A format-spec width field replacement should have a positive value"); + return __result; + } + + _LIBCPP_HIDE_FROM_ABI + int32_t __get_precision(auto& __ctx) const { + if (!__precision_as_arg_) + return __precision_; + + return __format_spec::__substitute_arg_id(__ctx.arg(__precision_)); + } +}; + +// Validates whether the reserved bitfields don't change the size. +static_assert(sizeof(__parser) == 16); +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +static_assert(sizeof(__parser) == 16); +# endif + +_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) { + switch (__type) { + case __format_spec::__type::__default: + case __format_spec::__type::__string: + break; + + default: + __throw_format_error("The format-spec type has a type not supported for " + "a string argument"); + } +} + } // namespace __format_spec #endif //_LIBCPP_STD_VER > 17 diff --git a/libcxx/include/format b/libcxx/include/format --- a/libcxx/include/format +++ b/libcxx/include/format @@ -310,12 +310,22 @@ // exception of type format_error is thrown. // // Validate whether the arguments are integrals. - if (__formatter.__width_needs_substitution()) - __format::__compile_time_validate_integral(__ctx.arg(__formatter.__width)); - - if constexpr (_HasPrecision) - if (__formatter.__precision_needs_substitution()) - __format::__compile_time_validate_integral(__ctx.arg(__formatter.__precision)); + if constexpr (requires(formatter<_Tp, _CharT> __f) { __f.__width_needs_substitution(); }) { + // TODO FMT Remove this when parser v1 has been phased out. + if (__formatter.__width_needs_substitution()) + __format::__compile_time_validate_integral(__ctx.arg(__formatter.__width)); + + if constexpr (_HasPrecision) + if (__formatter.__precision_needs_substitution()) + __format::__compile_time_validate_integral(__ctx.arg(__formatter.__precision)); + } else { + if (__formatter.__parser_.__width_as_arg_) + __format::__compile_time_validate_integral(__ctx.arg(__formatter.__parser_.__width_)); + + if constexpr (_HasPrecision) + if (__formatter.__parser_.__precision_as_arg_) + __format::__compile_time_validate_integral(__ctx.arg(__formatter.__parser_.__precision_)); + } } template diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -593,6 +593,7 @@ module formatter_floating_point { private header "__format/formatter_floating_point.h" } module formatter_integer { private header "__format/formatter_integer.h" } module formatter_integral { private header "__format/formatter_integral.h" } + module formatter_output { private header "__format/formatter_output.h" } module formatter_pointer { private header "__format/formatter_pointer.h" } module formatter_string { private header "__format/formatter_string.h" } module parser_std_format_spec { private header "__format/parser_std_format_spec.h" } diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -276,6 +276,7 @@ #include <__format/formatter_floating_point.h> // expected-error@*:* {{use of private header from outside its module: '__format/formatter_floating_point.h'}} #include <__format/formatter_integer.h> // expected-error@*:* {{use of private header from outside its module: '__format/formatter_integer.h'}} #include <__format/formatter_integral.h> // expected-error@*:* {{use of private header from outside its module: '__format/formatter_integral.h'}} +#include <__format/formatter_output.h> // expected-error@*:* {{use of private header from outside its module: '__format/formatter_output.h'}} #include <__format/formatter_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__format/formatter_pointer.h'}} #include <__format/formatter_string.h> // expected-error@*:* {{use of private header from outside its module: '__format/formatter_string.h'}} #include <__format/parser_std_format_spec.h> // expected-error@*:* {{use of private header from outside its module: '__format/parser_std_format_spec.h'}} diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string.pass.cpp deleted file mode 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string.pass.cpp +++ /dev/null @@ -1,367 +0,0 @@ -//===----------------------------------------------------------------------===// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-incomplete-format - -// - -// Tests the parsing of the format string as specified in [format.string.std]. -// It validates whether the std-format-spec is valid for a string type. - -#include -#include -#ifndef _LIBCPP_HAS_NO_LOCALIZATION -# include -#endif - -#include "test_macros.h" -#include "make_string.h" -#include "test_exception.h" - -#define CSTR(S) MAKE_CSTRING(CharT, S) - -using namespace std::__format_spec; - -template -using Parser = __parser_string; - -template -struct Expected { - CharT fill = CharT(' '); - _Flags::_Alignment alignment = _Flags::_Alignment::__left; - uint32_t width = 0; - bool width_as_arg = false; - uint32_t precision = std::__format::__number_max; - bool precision_as_arg = true; - _Flags::_Type type = _Flags::_Type::__default; -}; - -template -constexpr void test(Expected expected, size_t size, - std::basic_string_view fmt) { - // Initialize parser with sufficient arguments to avoid the parsing to fail - // due to insufficient arguments. - std::basic_format_parse_context parse_ctx(fmt, - std::__format::__number_max); - auto begin = parse_ctx.begin(); - auto end = parse_ctx.end(); - Parser parser; - auto it = parser.parse(parse_ctx); - - assert(begin == parse_ctx.begin()); - assert(end == parse_ctx.end()); - - assert(begin + size == it); - assert(parser.__fill == expected.fill); - assert(parser.__alignment == expected.alignment); - assert(parser.__sign == _Flags::_Sign::__default); - assert(parser.__alternate_form == false); - assert(parser.__zero_padding == false); - assert(parser.__width == expected.width); - assert(parser.__width_as_arg == expected.width_as_arg); - assert(parser.__precision == expected.precision); - assert(parser.__precision_as_arg == expected.precision_as_arg); - assert(parser.__locale_specific_form == false); - assert(parser.__type == expected.type); -} - -template -constexpr void test(Expected expected, size_t size, const CharT* f) { - // The format-spec is valid if completely consumed or terminates at a '}'. - // The valid inputs all end with a '}'. The test is executed twice: - // - first with the terminating '}', - // - second consuming the entire input. - std::basic_string_view fmt{f}; - assert(fmt.back() == CharT('}') && "Pre-condition failure"); - - test(expected, size, fmt); - fmt.remove_suffix(1); - test(expected, size, fmt); -} - -template -constexpr void test() { - Parser parser; - - assert(parser.__fill == CharT(' ')); - assert(parser.__alignment == _Flags::_Alignment::__left); - assert(parser.__sign == _Flags::_Sign::__default); - assert(parser.__alternate_form == false); - assert(parser.__zero_padding == false); - assert(parser.__width == 0); - assert(parser.__width_as_arg == false); - assert(parser.__precision == std::__format::__number_max); - assert(parser.__precision_as_arg == true); - assert(parser.__locale_specific_form == false); - assert(parser.__type == _Flags::_Type::__default); - - test({}, 0, CSTR("}")); - - // *** Align-fill *** - test({.alignment = _Flags::_Alignment::__left}, 1, CSTR("<}")); - test({.alignment = _Flags::_Alignment::__center}, 1, "^}"); - test({.alignment = _Flags::_Alignment::__right}, 1, ">}"); - - test({.fill = CharT('L'), .alignment = _Flags::_Alignment::__left}, 2, - CSTR("L<}")); - test({.fill = CharT('#'), .alignment = _Flags::_Alignment::__center}, 2, - CSTR("#^}")); - test({.fill = CharT('0'), .alignment = _Flags::_Alignment::__right}, 2, - CSTR("0>}")); - - test_exception>( - "The format-spec fill field contains an invalid character", CSTR("{<")); - test_exception>( - "The format-spec fill field contains an invalid character", CSTR("}<")); - - // *** Sign *** - test_exception>( - "The format-spec should consume the input or end with a '}'", CSTR("+")); - test_exception>( - "The format-spec should consume the input or end with a '}'", CSTR("-")); - test_exception>( - "The format-spec should consume the input or end with a '}'", CSTR(" ")); - - // *** Alternate form *** - test_exception>( - "The format-spec should consume the input or end with a '}'", CSTR("#")); - - // *** Zero padding *** - test_exception>( - "A format-spec width field shouldn't have a leading zero", CSTR("0")); - - // *** Width *** - test({.width = 0, .width_as_arg = false}, 0, CSTR("}")); - test({.width = 1, .width_as_arg = false}, 1, CSTR("1}")); - test({.width = 10, .width_as_arg = false}, 2, CSTR("10}")); - test({.width = 1000, .width_as_arg = false}, 4, CSTR("1000}")); - test({.width = 1000000, .width_as_arg = false}, 7, CSTR("1000000}")); - - test({.width = 0, .width_as_arg = true}, 2, CSTR("{}}")); - test({.width = 0, .width_as_arg = true}, 3, CSTR("{0}}")); - test({.width = 1, .width_as_arg = true}, 3, CSTR("{1}}")); - - test_exception>( - "A format-spec width field shouldn't have a leading zero", CSTR("00")); - - static_assert(std::__format::__number_max == 2'147'483'647, - "Update the assert and the test."); - test({.width = 2'147'483'647, .width_as_arg = false}, 10, - CSTR("2147483647}")); - test_exception>( - "The numeric value of the format-spec is too large", CSTR("2147483648")); - test_exception>( - "The numeric value of the format-spec is too large", CSTR("5000000000")); - test_exception>( - "The numeric value of the format-spec is too large", CSTR("10000000000")); - - test_exception>("End of input while parsing format-spec arg-id", - CSTR("{")); - test_exception>("Invalid arg-id", CSTR("{0")); - test_exception>( - "The arg-id of the format-spec starts with an invalid character", - CSTR("{a")); - test_exception>("Invalid arg-id", CSTR("{1")); - test_exception>("Invalid arg-id", CSTR("{9")); - test_exception>("Invalid arg-id", CSTR("{9:")); - test_exception>("Invalid arg-id", CSTR("{9a")); - - static_assert(std::__format::__number_max == 2'147'483'647, - "Update the assert and the test."); - // Note the static_assert tests whether the arg-id is valid. - // Therefore the following should be true arg-id < __format::__number_max. - test({.width = 2'147'483'646, .width_as_arg = true}, 12, - CSTR("{2147483646}}")); - test_exception>( - "The numeric value of the format-spec is too large", - CSTR("{2147483648}")); - test_exception>( - "The numeric value of the format-spec is too large", - CSTR("{5000000000}")); - test_exception>( - "The numeric value of the format-spec is too large", - CSTR("{10000000000}")); - - // *** Precision *** - test({.precision = 0, .precision_as_arg = false}, 2, CSTR(".0}")); - test({.precision = 1, .precision_as_arg = false}, 2, CSTR(".1}")); - test({.precision = 10, .precision_as_arg = false}, 3, CSTR(".10}")); - test({.precision = 1000, .precision_as_arg = false}, 5, CSTR(".1000}")); - test({.precision = 1000000, .precision_as_arg = false}, 8, CSTR(".1000000}")); - - test({.precision = 0, .precision_as_arg = true}, 3, CSTR(".{}}")); - test({.precision = 0, .precision_as_arg = true}, 4, CSTR(".{0}}")); - test({.precision = 1, .precision_as_arg = true}, 4, CSTR(".{1}}")); - - test_exception>( - "The format-spec precision field doesn't contain a value or arg-id", - CSTR(".a")); - test_exception>( - "The format-spec precision field doesn't contain a value or arg-id", - CSTR(".:")); - - static_assert(std::__format::__number_max == 2'147'483'647, - "Update the assert and the test."); - test({.precision = 2'147'483'647, .precision_as_arg = false}, 11, - CSTR(".2147483647}")); - test_exception>( - "The numeric value of the format-spec is too large", CSTR(".2147483648")); - test_exception>( - "The numeric value of the format-spec is too large", CSTR(".5000000000")); - test_exception>( - "The numeric value of the format-spec is too large", - CSTR(".10000000000")); - - test_exception>("End of input while parsing format-spec arg-id", - CSTR(".{")); - test_exception>("Invalid arg-id", CSTR(".{0")); - test_exception>( - "The arg-id of the format-spec starts with an invalid character", - CSTR(".{a")); - test_exception>("Invalid arg-id", CSTR(".{1")); - test_exception>("Invalid arg-id", CSTR(".{9")); - test_exception>("Invalid arg-id", CSTR(".{9:")); - test_exception>("Invalid arg-id", CSTR(".{9a")); - - static_assert(std::__format::__number_max == 2'147'483'647, - "Update the assert and the test."); - // Note the static_assert tests whether the arg-id is valid. - // Therefore the following should be true arg-id < __format::__number_max. - test({.precision = 2'147'483'646, .precision_as_arg = true}, 13, - CSTR(".{2147483646}}")); - test_exception>( - "The numeric value of the format-spec is too large", - CSTR(".{2147483648}")); - test_exception>( - "The numeric value of the format-spec is too large", - CSTR(".{5000000000}")); - test_exception>( - "The numeric value of the format-spec is too large", - CSTR(".{10000000000}")); - - // *** Width & Precision *** - test({.width = 1, - .width_as_arg = false, - .precision = 0, - .precision_as_arg = false}, - 3, CSTR("1.0}")); - test({.width = 0, - .width_as_arg = true, - .precision = 1, - .precision_as_arg = true}, - 5, CSTR("{}.{}}")); - test({.width = 10, - .width_as_arg = true, - .precision = 9, - .precision_as_arg = true}, - 8, CSTR("{10}.{9}}")); - - // *** Locale-specific form *** - test_exception>( - "The format-spec should consume the input or end with a '}'", CSTR("L")); - - // *** Type *** - - { - const char* unsuported_type = - "The format-spec type has a type not supported for a string argument"; - const char* not_a_type = - "The format-spec should consume the input or end with a '}'"; - - test_exception>(unsuported_type, CSTR("A}")); - test_exception>(unsuported_type, CSTR("B}")); - test_exception>(not_a_type, CSTR("C}")); - test_exception>(not_a_type, CSTR("D}")); - test_exception>(unsuported_type, CSTR("E}")); - test_exception>(unsuported_type, CSTR("F}")); - test_exception>(unsuported_type, CSTR("G}")); - test_exception>(not_a_type, CSTR("H}")); - test_exception>(not_a_type, CSTR("I}")); - test_exception>(not_a_type, CSTR("J}")); - test_exception>(not_a_type, CSTR("K}")); - test_exception>(not_a_type, CSTR("L}")); - test_exception>(not_a_type, CSTR("M}")); - test_exception>(not_a_type, CSTR("N}")); - test_exception>(not_a_type, CSTR("O}")); - test_exception>(not_a_type, CSTR("P}")); - test_exception>(not_a_type, CSTR("Q}")); - test_exception>(not_a_type, CSTR("R}")); - test_exception>(not_a_type, CSTR("S}")); - test_exception>(not_a_type, CSTR("T}")); - test_exception>(not_a_type, CSTR("U}")); - test_exception>(not_a_type, CSTR("V}")); - test_exception>(not_a_type, CSTR("W}")); - test_exception>(unsuported_type, CSTR("X}")); - test_exception>(not_a_type, CSTR("Y}")); - test_exception>(not_a_type, CSTR("Z}")); - - test_exception>(unsuported_type, CSTR("a}")); - test_exception>(unsuported_type, CSTR("b}")); - test_exception>(unsuported_type, CSTR("c}")); - test_exception>(unsuported_type, CSTR("d}")); - test_exception>(unsuported_type, CSTR("e}")); - test_exception>(unsuported_type, CSTR("f}")); - test_exception>(unsuported_type, CSTR("g}")); - test_exception>(not_a_type, CSTR("h}")); - test_exception>(not_a_type, CSTR("i}")); - test_exception>(not_a_type, CSTR("j}")); - test_exception>(not_a_type, CSTR("k}")); - test_exception>(not_a_type, CSTR("l}")); - test_exception>(not_a_type, CSTR("m}")); - test_exception>(not_a_type, CSTR("n}")); - test_exception>(unsuported_type, CSTR("o}")); - test_exception>(unsuported_type, CSTR("p}")); - test_exception>(not_a_type, CSTR("q}")); - test_exception>(not_a_type, CSTR("r}")); - test({.type = _Flags::_Type::__string}, 1, CSTR("s}")); - test_exception>(not_a_type, CSTR("t}")); - test_exception>(not_a_type, CSTR("u}")); - test_exception>(not_a_type, CSTR("v}")); - test_exception>(not_a_type, CSTR("w}")); - test_exception>(unsuported_type, CSTR("x}")); - test_exception>(not_a_type, CSTR("y}")); - test_exception>(not_a_type, CSTR("z}")); - } - // **** General *** - test_exception>( - "The format-spec should consume the input or end with a '}'", CSTR("ss")); -} - -constexpr bool test() { - test(); -#ifndef TEST_HAS_NO_WIDE_CHARACTERS - test(); -#endif -#ifndef _LIBCPP_HAS_NO_CHAR8_T - test(); -#endif -#ifndef TEST_HAS_NO_UNICODE_CHARS - test(); - test(); -#endif - - return true; -} - -int main(int, char**) { -#ifndef _WIN32 - // Make sure the parsers match the expectations. The layout of the - // subobjects is chosen to minimize the size required. - LIBCPP_STATIC_ASSERT(sizeof(Parser) == 3 * sizeof(uint32_t)); -#ifndef TEST_HAS_NO_WIDE_CHARACTERS - LIBCPP_STATIC_ASSERT( - sizeof(Parser) == - (sizeof(wchar_t) <= 2 ? 3 * sizeof(uint32_t) : 4 * sizeof(uint32_t))); -#endif -#endif // _WIN32 - - test(); - static_assert(test()); - - return 0; -}