Diff 503481

libcxx/include/__format/formatter_output.h

	// -- C++ --			// -- C++ --
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP___FORMAT_FORMATTER_OUTPUT_H			#ifndef _LIBCPP___FORMAT_FORMATTER_OUTPUT_H
	#define _LIBCPP___FORMAT_FORMATTER_OUTPUT_H			#define _LIBCPP___FORMAT_FORMATTER_OUTPUT_H

	#include <__algorithm/ranges_copy.h>			#include <__algorithm/ranges_copy.h>
	#include <__algorithm/ranges_fill_n.h>			#include <__algorithm/ranges_fill_n.h>
				#include <__algorithm/ranges_for_each.h>
	#include <__algorithm/ranges_transform.h>			#include <__algorithm/ranges_transform.h>
	#include <__chrono/statically_widen.h>			#include <__chrono/statically_widen.h>
	#include <__concepts/same_as.h>			#include <__concepts/same_as.h>
	#include <__config>			#include <__config>
	#include <__format/buffer.h>			#include <__format/buffer.h>
	#include <__format/concepts.h>			#include <__format/concepts.h>
	#include <__format/escaped_output_table.h>			#include <__format/escaped_output_table.h>
	#include <__format/formatter.h>			#include <__format/formatter.h>
	▲ Show 20 Lines • Show All 475 Lines • ▼ Show 20 Lines
	}			}

	template <class _CharT>			template <class _CharT>
	_LIBCPP_HIDE_FROM_ABI void			_LIBCPP_HIDE_FROM_ABI void
	__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {			__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
	__unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};			__unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};

	while (!__view.__at_end()) {			while (!__view.__at_end()) {
	auto __first = __view.__position();			auto __first = __view.__position();
	typename __unicode::__consume_p2286_result __result = __view.__consume_p2286();			typename __unicode::__consume_result __result = __view.__consume();
	if (__result.__ill_formed_size == 0) {			if (__result.__status == __unicode::__consume_result::__ok) {
	if (!__formatter::__is_escaped_sequence_written(__str, __result.__value, __mark))			if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
	// 2.2.1.3 - Add the character			// 2.2.1.3 - Add the character
	ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));			ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));

	} else {			} else {
	// 2.2.3 sequence of ill-formed code units			// 2.2.3 sequence of ill-formed code units
	// The number of code-units in __result.__value depends on the character type being used.			ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
	if constexpr (sizeof(_CharT) == 1) {			__formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
	_LIBCPP_ASSERT(__result.__ill_formed_size == 1 \|\| __result.__ill_formed_size == 4,			});
	"illegal number of invalid code units.");
	if (__result.__ill_formed_size == 1) // ill-formed, one code unit
	__formatter::__write_escape_ill_formed_code_unit(__str, __result.__value & 0xff);
	else { // out of valid range, four code units
	// The code point was properly encoded, decode the value.
	__formatter::__write_escape_ill_formed_code_unit(__str, __result.__value >> 18 \| 0xf0);
	__formatter::__write_escape_ill_formed_code_unit(__str, (__result.__value >> 12 & 0x3f) \| 0x80);
	__formatter::__write_escape_ill_formed_code_unit(__str, (__result.__value >> 6 & 0x3f) \| 0x80);
	__formatter::__write_escape_ill_formed_code_unit(__str, (__result.__value & 0x3f) \| 0x80);
	}
	} else if constexpr (sizeof(_CharT) == 2) {
	_LIBCPP_ASSERT(__result.__ill_formed_size == 1, "for UTF-16 at most one invalid code unit");
	__formatter::__write_escape_ill_formed_code_unit(__str, __result.__value & 0xffff);
	} else {
	static_assert(sizeof(_CharT) == 4, "unsupported character width");
	_LIBCPP_ASSERT(__result.__ill_formed_size == 1, "for UTF-32 one code unit is one code point");
	__formatter::__write_escape_ill_formed_code_unit(__str, __result.__value);
	}
	}			}
	}			}
	}			}

	template <class _CharT>			template <class _CharT>
	_LIBCPP_HIDE_FROM_ABI auto			_LIBCPP_HIDE_FROM_ABI auto
	__format_escaped_char(_CharT __value,			__format_escaped_char(_CharT __value,
	output_iterator<const _CharT&> auto __out_it,			output_iterator<const _CharT&> auto __out_it,
	Show All 29 Lines

libcxx/include/__format/unicode.h

Show All 25 Lines

#endif #endif

_LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD

#if _LIBCPP_STD_VER >= 20 #if _LIBCPP_STD_VER >= 20

namespace __unicode { namespace __unicode {

# if _LIBCPP_STD_VER >= 23 // Helper struct for the result of a consume operation.

/// The result of consuming a code point using P2286' semantics // The status value for a correct code point is 0. This allows a valid value to

/// // be used without masking.

/// TODO FMT Combine __consume and __consume_p2286 in one function. // When the decoding fails it know the number of code units affected. For the

struct __consume_p2286_result { // current use-cases that value is not needed, therefore it is not stored.

// A size of 0 means well formed. This to differenciate between // The escape routine needs the number of code units for both a valid and

// a valid code point and a code unit that's invalid like 0b11111xxx. // invalid character and keeps track of it itself. Doing it in this result

int __ill_formed_size; // unconditionally would give some overhead when the value is unneeded.

struct __consume_result {

// If well formed the consumed code point. // When __status == __ok it contains the decoded code point.

// Otherwise the ill-formed code units as unsigned 8-bit values. They are // Else it contains the replacement character U+FFFD

// stored in reverse order, to make it easier to extract the values. char32_t __code_point : 31;

char32_t __value;

enum : char32_t {

// Consumed a well-formed code point.

__ok = 0,

// Encountered invalid UTF-8

__error = 1

} __status : 1 {__ok};

}; };

static_assert(sizeof(__consume_result) == sizeof(char32_t));

# endif // _LIBCPP_STD_VER >= 23

# ifndef _LIBCPP_HAS_NO_UNICODE # ifndef _LIBCPP_HAS_NO_UNICODE

/// Implements the grapheme cluster boundary rules /// Implements the grapheme cluster boundary rules

/// ///

/// These rules are used to implement format's width estimation as stated in /// These rules are used to implement format's width estimation as stated in

/// [format.string.std]/11 /// [format.string.std]/11

/// ///

/// The Standard refers to UAX \#29 for Unicode 12.0.0 /// The Standard refers to UAX \#29 for Unicode 12.0.0

/// https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules /// https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules

/// ///

/// The data tables used are /// The data tables used are

/// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt /// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt

/// https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt /// https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt

/// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt (for testing only) /// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt (for testing only)

inline constexpr char32_t __replacement_character = U'\ufffd'; inline constexpr char32_t __replacement_character = U'\ufffd';

// The error of a consume operation.

// This sets the code point to the replacement character. This code point does

// not participate in the grapheme clustering, so grapheme clustering code can

// ignore the error status and always use the code point.

inline constexpr __consume_result __consume_result_error{__replacement_character, __consume_result::__error};

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool __is_high_surrogate(char32_t __value) {

return __value >= 0xd800 && __value <= 0xdbff;

}

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool __is_low_surrogate(char32_t __value) {

return __value >= 0xdc00 && __value <= 0xdfff;

}

// https://www.unicode.org/glossary/#surrogate_code_point

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool __is_surrogate(char32_t __value) {

return __value >= 0xd800 && __value <= 0xdfff;

}

// https://www.unicode.org/glossary/#code_point

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool __is_code_point(char32_t __value) {

return __value <= 0x10ffff;

}

// https://www.unicode.org/glossary/#unicode_scalar_value

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool __is_scalar_value(char32_t __value) {

return __unicode::__is_code_point(__value) && !__unicode::__is_surrogate(__value);

}

template <contiguous_iterator _Iterator> template <contiguous_iterator _Iterator>

requires same_as<iter_value_t<_Iterator>, char> requires same_as<iter_value_t<_Iterator>, char>

_LIBCPP_HIDE_FROM_ABI constexpr bool __is_continuation(_Iterator __char, int __count) { _LIBCPP_HIDE_FROM_ABI constexpr bool __is_continuation(_Iterator __char, int __count) {

do { do {

if ((*__char & 0b1000'0000) != 0b1000'0000) if ((*__char & 0b1000'0000) != 0b1000'0000)

return false; return false;

--__count; --__count;

++__char; ++__char;

Show All 15 Lines

public: public:

_LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last) _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last)

: __first_(__first), __last_(__last) {} : __first_(__first), __last_(__last) {}

_LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; } _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; }

_LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; } _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; }

_LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept { // https://www.unicode.org/versions/latest/ch03.pdf#G7404

// Based on Table 3-7, Well-Formed UTF-8 Byte Sequences

// Code Points First Byte Second Byte Third Byte Fourth Byte Remarks

// U+0000..U+007F 00..7F U+0000..U+007F 1 code unit range

// C0..C1 80..BF invalid overlong encoding

// U+0080..U+07FF C2..DF 80..BF U+0080..U+07FF 2 code unit range

// E0 80..9F 80..BF invalid overlong encoding

// U+0800..U+0FFF E0 A0..BF 80..BF U+0800..U+FFFF 3 code unit range

// U+1000..U+CFFF E1..EC 80..BF 80..BF

// U+D000..U+D7FF ED 80..9F 80..BF

// U+D800..U+DFFF ED A0..BF 80..BF invalid encoding of surrogate code point

// U+E000..U+FFFF EE..EF 80..BF 80..BF

tahonermannUnsubmitted

Done

// Code Points First Byte Second Byte Third Byte Fourth Byte Remarks

- // U+0000..U+007F 00..7F

- // U+0080..U+07FF *C2*..DF 80..BF U+0000..U0+007F 1 code unit range*

- // U+0800..U+0FFF E0 *A0*..BF 80..BF U+0000..U+07FFF 1 and 2 code unit range

+ // U+0000..U+007F 00..7F U+0000..U+007F 1 code unit range*

+ // U+0080..U+07FF *C2*..DF 80..BF U+0080..U+07FF 2 code unit range

+ // U+0800..U+0FFF E0 *A0*..BF 80..BF U+0800..U+FFFF 3 code unit range

// U+1000..U+CFFF E1..EC 80..BF 80..BF

- // U+D000..U+D7FF ED 80..*9F* 80..BF U+D800..D+DFFFF surrogate range

+ // U+D000..U+D7FF ED 80..*9F* 80..BF U+D800..D+DFFF surrogate range

// U+E000..U+FFFF EE..EF 80..BF 80..BF

- // U+10000..U+3FFFF F0 *90*..BF 80..BF 80..BF U+0000..U+FFFF 1, 2, and 3 code unit range

+ // U+10000..U+3FFFF F0 *90*..BF 80..BF 80..BF U+10000..U+10FFFF 4 code unit range

// U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF

- // U+100000..U+10FFFF F4 80..*8F* 80..BF 80..BF U+10FFFF..U+1FFFFF invalid range

+ // U+100000..U+10FFFF F4 80..*8F* 80..BF 80..BF U+110000.. invalid code point range

// *Marked* entries are not the full range 80..BF.

I corrected several of the U+XXXX identifiers in the suggested edit. I also aligned the remarks with the rows that I think they better correspond to.

tahonermann: I corrected several of the `U+XXXX` identifiers in the suggested edit. I also aligned the…

tahonermannUnsubmitted

Done

// Code Points First Byte Second Byte Third Byte Fourth Byte Remarks

// U+0000..U+007F 00..7F U+0000..U+007F 1 code unit range

- // U+0080..U+07FF *C2*..DF 80..BF U+0080..U+07FF 2 code unit range *

- // U+0800..U+0FFF E0 *A0*..BF 80..BF U+0800..U+FFFF 3 code unit range

+ // C0..C1 80..BF invalid overlong encoding

+ // U+0080..U+07FF C2..DF 80..BF U+0080..U+07FF 2 code unit range

+ // E0 80..9F 80..BF invalid overlong encoding

+ // U+0800..U+0FFF E0 A0..BF 80..BF U+0800..U+FFFF 3 code unit range

// U+1000..U+CFFF E1..EC 80..BF 80..BF

- // U+D000..U+D7FF ED 80..*9F* 80..BF U+D800..U+DFFF surrogate range

+ // U+D000..U+D7FF ED 80..9F 80..BF

+ // U+D800..U+DFFF ED A0..BF 80..BF invalid encoding of surrogate code point

// U+E000..U+FFFF EE..EF 80..BF 80..BF

- // U+10000..U+3FFFF F0 *90*..BF 80..BF 80..BF U+10000..U+10FFFF 4 code unit range

+ // F0 80..8F 80..BF 80..BF invalid overlong encoding

+ // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF U+10000..U+10FFFF 4 code unit range

// U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF

- // U+100000..U+10FFFF F4 80..*8F* 80..BF 80..BF U+110000.. invalid code point range

+ // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF

+ // F4 90..BF 80..BF 80..BF U+110000.. invalid code point range

// *Marked* entries are not the full range 80..BF.

Here is another presentation option that avoids the need for those footnotes. If you like this better, great. If not, no problem. The current presentation has the benefit of matching the bold highlighting in the table from the Unicode Standard, but I think the suggested presentation better explains the reason those invalid ranges exist.

tahonermann: Here is another presentation option that avoids the need for those footnotes. If you like this…

MordanteAuthorUnsubmitted

Done

Actually I like this a lot, thanks! This matches the code closer; it does
not validate all ranges, but it rejects the "invalid overlong encoding"

I made a few more changes in the surrounding comments, since they looked odd with the new table.

Mordante: Actually I like this a lot, thanks! This matches the code closer; it does not validate all…

// F0 80..8F 80..BF 80..BF invalid overlong encoding

// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF U+10000..U+10FFFF 4 code unit range

// U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF

tahonermannUnsubmitted

Done

I don't understand this footnote. The full range of code points that are encodeable in a single code unit is U+0000..U+007F.

tahonermann: I don't understand this footnote. The full range of code points that are encodeable in a single…

MordanteAuthorUnsubmitted

Done

It seems the * is placed on the wrong line, it should have been at *C2*..DF 80..BF.
Based on the encoding scheme that requires the first code unit to start with 110xxxxx, this allows the values starting from 0xC0. This value is not marked in the Unicode Standard, but I think it's good to point out. Especially since this decoder doesn't use a nested if statement. Instead it decodes the value and tests whether it's in the valid range. This reduces the number of comparisons. IMO this makes the code easier to read.

Mordante: It seems the * is placed on the wrong line, it should have been at `*C2*..DF 80..BF`. Based…

// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF

// F4 90..BF 80..BF 80..BF U+110000.. invalid code point range

// Unlike other parsers, these invalid entries are tested after decoding.

// - The parser always needs to consume these code units

// - The code is optimized for well-formed UTF-8

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept {

_LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");

// Based on the number of leading 1 bits the number of code units in the // Based on the number of leading 1 bits the number of code units in the

// code point can be determined. See // code point can be determined. See

// https://en.wikipedia.org/wiki/UTF-8#Encoding // https://en.wikipedia.org/wiki/UTF-8#Encoding

switch (_VSTD::countl_one(static_cast<unsigned char>(*__first_))) { switch (std::countl_one(static_cast<unsigned char>(*__first_))) {

case 0: case 0:

return *__first_++; return {static_cast<unsigned char>(*__first_++)};

case 2: case 2: {

if (__last_ - __first_ < 2 || !__unicode::__is_continuation(__first_ + 1, 1)) [[unlikely]] if (__last_ - __first_ < 2 || !__unicode::__is_continuation(__first_ + 1, 1)) [[unlikely]]

break; break;

else {

char32_t __value = static_cast<unsigned char>(*__first_++) & 0x1f; char32_t __value = static_cast<unsigned char>(*__first_++) & 0x1f;

__value <<= 6; __value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f; __value |= static_cast<unsigned char>(*__first_++) & 0x3f;

return __value;

// These values should be encoded in 1 UTF-8 code unit.

if (__value < 0x0080) [[unlikely]]

return __consume_result_error;

return {__value};

} }

case 3: case 3: {

if (__last_ - __first_ < 3 || !__unicode::__is_continuation(__first_ + 1, 2)) [[unlikely]] if (__last_ - __first_ < 3 || !__unicode::__is_continuation(__first_ + 1, 2)) [[unlikely]]

break; break;

else {

char32_t __value = static_cast<unsigned char>(*__first_++) & 0x0f; char32_t __value = static_cast<unsigned char>(*__first_++) & 0x0f;

__value <<= 6; __value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f; __value |= static_cast<unsigned char>(*__first_++) & 0x3f;

__value <<= 6; __value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f; __value |= static_cast<unsigned char>(*__first_++) & 0x3f;

return __value;

}

case 4: // These values should be encoded in 1 or 2 UTF-8 code units.

if (__last_ - __first_ < 4 || !__unicode::__is_continuation(__first_ + 1, 3)) [[unlikely]] if (__value < 0x0800) [[unlikely]]

break; return __consume_result_error;

else {

char32_t __value = static_cast<unsigned char>(*__first_++) & 0x07;

__value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f;

__value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f;

__value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f;

return __value;

}

// An invalid number of leading ones can be garbage or a code unit in the

// middle of a code point. By consuming one code unit the parser may get

// "in sync" after a few code units.

++__first_;

return __replacement_character;

}

# if _LIBCPP_STD_VER >= 23 // A surrogate value is always encoded in 3 UTF-8 code units.

_LIBCPP_HIDE_FROM_ABI constexpr __consume_p2286_result __consume_p2286() noexcept { if (__unicode::__is_surrogate(__value)) [[unlikely]]

_LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); return __consume_result_error;

// Based on the number of leading 1 bits the number of code units in the return {__value};

// code point can be determined. See

// https://en.wikipedia.org/wiki/UTF-8#Encoding

switch (std::countl_one(static_cast<unsigned char>(*__first_))) {

case 0:

return {0, static_cast<unsigned char>(*__first_++)};

case 2:

if (__last_ - __first_ < 2) [[unlikely]]

break;

if (__unicode::__is_continuation(__first_ + 1, 1)) {

char32_t __value = static_cast<unsigned char>(*__first_++) & 0x1f;

__value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f;

return {0, __value};

} }

break;

case 3: case 4: {

if (__last_ - __first_ < 3) [[unlikely]] if (__last_ - __first_ < 4 || !__unicode::__is_continuation(__first_ + 1, 3)) [[unlikely]]

break;

if (__unicode::__is_continuation(__first_ + 1, 2)) {

char32_t __value = static_cast<unsigned char>(*__first_++) & 0x0f;

__value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f;

__value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f;

return {0, __value};

}

break;

case 4:

if (__last_ - __first_ < 4) [[unlikely]]

break; break;

if (__unicode::__is_continuation(__first_ + 1, 3)) {

char32_t __value = static_cast<unsigned char>(*__first_++) & 0x07; char32_t __value = static_cast<unsigned char>(*__first_++) & 0x07;

__value <<= 6; __value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f; __value |= static_cast<unsigned char>(*__first_++) & 0x3f;

__value <<= 6; __value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f; __value |= static_cast<unsigned char>(*__first_++) & 0x3f;

__value <<= 6; __value <<= 6;

__value |= static_cast<unsigned char>(*__first_++) & 0x3f; __value |= static_cast<unsigned char>(*__first_++) & 0x3f;

if (__value > 0x10FFFF) // Outside the valid Unicode range? // These values should be encoded in 1, 2, or 3 UTF-8 code units.

return {4, __value}; if (__value < 0x10000) [[unlikely]]

return __consume_result_error;

// A value too large is always encoded in 4 UTF-8 code units.

if (!__unicode::__is_code_point(__value)) [[unlikely]]

return __consume_result_error;

return {0, __value}; return {__value};

} }

break;

} }

// An invalid number of leading ones can be garbage or a code unit in the // An invalid number of leading ones can be garbage or a code unit in the

// middle of a code point. By consuming one code unit the parser may get // middle of a code point. By consuming one code unit the parser may get

// "in sync" after a few code units. // "in sync" after a few code units.

return {1, static_cast<unsigned char>(*__first_++)}; ++__first_;

return __consume_result_error;

} }

# endif // _LIBCPP_STD_VER >= 23

private: private:

_Iterator __first_; _Iterator __first_;

_Iterator __last_; _Iterator __last_;

}; };

# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS

_LIBCPP_HIDE_FROM_ABI constexpr bool __is_surrogate_pair_high(wchar_t __value) { _LIBCPP_HIDE_FROM_ABI constexpr bool __is_surrogate_pair_high(wchar_t __value) {

Show All 15 Lines public:

static_assert(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4, "sizeof(wchar_t) has a not implemented value"); static_assert(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4, "sizeof(wchar_t) has a not implemented value");

_LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last) _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last)

: __first_(__first), __last_(__last) {} : __first_(__first), __last_(__last) {}

_LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; } _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; }

_LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; } _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; }

_LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept {

_LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");

char32_t __value = static_cast<char32_t>(*__first_++);

tahonermannUnsubmitted

Done

_LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");

- char32_t __value = *__first_++;

+ char32_t __value = static_cast<char32_t>(*__first_++);

if constexpr (sizeof(wchar_t) == 2) {

It is implementation-defined whether wchar_t is a signed type so I think a cast to an unsigned type is needed here and in other cases below where first is dereferenced.

This would be a good use case for a std::as_unsigned() or std::to_unsigned() function.

tahonermann: It is implementation-defined whether `wchar_t` is a signed type so I think a cast to an…

MordanteAuthorUnsubmitted

Done

I think it's not needed here, but on line 277 it indeed might be a good idea. For consistency I did it at all places. I think a static_cast is fine, I did the same for char.

Mordante: I think it's not needed here, but on line 277 it indeed might be a good idea. For consistency I…

if constexpr (sizeof(wchar_t) == 2) { if constexpr (sizeof(wchar_t) == 2) {

char32_t __result = *__first_++; if (__unicode::__is_low_surrogate(__value)) [[unlikely]]

// Is the code unit part of a surrogate pair? See return __consume_result_error;

// https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF

if (__result >= 0xd800 && __result <= 0xDfff) {

// Malformed Unicode.

if (__first_ == __last_) [[unlikely]]

return __replacement_character;

__result -= 0xd800;

__result <<= 10;

__result += *__first_++ - 0xdc00;

__result += 0x10000;

}

return __result;

} else if constexpr (sizeof(wchar_t) == 4) {

char32_t __result = *__first_++;

if (__result > 0x10FFFF) [[unlikely]]

return __replacement_character;

return __result;

} else {

__libcpp_unreachable();

}

# if _LIBCPP_STD_VER >= 23 if (__unicode::__is_high_surrogate(__value)) {

_LIBCPP_HIDE_FROM_ABI constexpr __consume_p2286_result __consume_p2286() noexcept { if (__first_ == __last_ || !__unicode::__is_low_surrogate(static_cast<char32_t>(*__first_))) [[unlikely]]

_LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); return __consume_result_error;

__value -= 0xd800;

__value <<= 10;

__value += static_cast<char32_t>(*__first_++) - 0xdc00;

__value += 0x10000;

char32_t __result = *__first_++; if (!__unicode::__is_code_point(__value)) [[unlikely]]

if constexpr (sizeof(wchar_t) == 2) { return __consume_result_error;

// https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF }

if (__is_surrogate_pair_high(__result)) {

// Malformed Unicode.

if (__first_ == __last_ || !__is_surrogate_pair_low(*(__first_ + 1))) [[unlikely]]

return {1, __result};

__result -= 0xd800;

__result <<= 10;

__result += *__first_++ - 0xdc00;

__result += 0x10000;

} else if (__is_surrogate_pair_low(__result))

// A code point shouldn't start with the low surrogate pair

return {1, __result};

} else { } else {

if (__result > 0x10FFFF) [[unlikely]] if (!__unicode::__is_scalar_value(__value)) [[unlikely]]

return {1, __result}; return __consume_result_error;

} }

return {0, __result}; return {__value};

} }

# endif // _LIBCPP_STD_VER >= 23

private: private:

_Iterator __first_; _Iterator __first_;

_Iterator __last_; _Iterator __last_;

}; };

# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS

_LIBCPP_HIDE_FROM_ABI constexpr bool __at_extended_grapheme_cluster_break( _LIBCPP_HIDE_FROM_ABI constexpr bool __at_extended_grapheme_cluster_break(

▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines

/// Therefore only this code point is extracted. /// Therefore only this code point is extracted.

template <class _CharT> template <class _CharT>

class __extended_grapheme_cluster_view { class __extended_grapheme_cluster_view {

using _Iterator = typename basic_string_view<_CharT>::const_iterator; using _Iterator = typename basic_string_view<_CharT>::const_iterator;

public: public:

_LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_view(_Iterator __first, _Iterator __last) _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_view(_Iterator __first, _Iterator __last)

: __code_point_view_(__first, __last), : __code_point_view_(__first, __last),

__next_code_point_(__code_point_view_.__consume()), __next_code_point_(__code_point_view_.__consume().__code_point),

__next_prop_(__extended_grapheme_custer_property_boundary::__get_property(__next_code_point_)) {} __next_prop_(__extended_grapheme_custer_property_boundary::__get_property(__next_code_point_)) {}

struct __cluster { struct __cluster {

/// The first code point of the extended grapheme cluster. /// The first code point of the extended grapheme cluster.

/// ///

/// The first code point is used to estimate the width of the extended /// The first code point is used to estimate the width of the extended

/// grapheme cluster. /// grapheme cluster.

char32_t __code_point_; char32_t __code_point_;

/// Points one beyond the last code unit in the extended grapheme cluster. /// Points one beyond the last code unit in the extended grapheme cluster.

/// ///

/// It's expected the caller has the start position and thus can determine /// It's expected the caller has the start position and thus can determine

/// the code unit range of the extended grapheme cluster. /// the code unit range of the extended grapheme cluster.

_Iterator __last_; _Iterator __last_;

}; };

_LIBCPP_HIDE_FROM_ABI constexpr __cluster __consume() { _LIBCPP_HIDE_FROM_ABI constexpr __cluster __consume() {

_LIBCPP_ASSERT( _LIBCPP_ASSERT(

__next_prop_ != __extended_grapheme_custer_property_boundary::__property::__eot, __next_prop_ != __extended_grapheme_custer_property_boundary::__property::__eot,

"can't move beyond the end of input"); "can't move beyond the end of input");

char32_t __code_point = __next_code_point_; char32_t __code_point = __next_code_point_;

if (!__code_point_view_.__at_end()) if (!__code_point_view_.__at_end())

return {__code_point, __get_break()}; return {__code_point, __get_break()};

__next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot; __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot;

return {__code_point, __code_point_view_.__position()}; return {__code_point, __code_point_view_.__position()};

} }

private: private:

__code_point_view<_CharT> __code_point_view_; __code_point_view<_CharT> __code_point_view_;

char32_t __next_code_point_; char32_t __next_code_point_;

__extended_grapheme_custer_property_boundary::__property __next_prop_; __extended_grapheme_custer_property_boundary::__property __next_prop_;

_LIBCPP_HIDE_FROM_ABI constexpr _Iterator __get_break() { _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __get_break() {

bool __ri_break_allowed = true; bool __ri_break_allowed = true;

bool __has_extened_pictographic = false; bool __has_extened_pictographic = false;

while (true) { while (true) {

_Iterator __result = __code_point_view_.__position(); _Iterator __result = __code_point_view_.__position();

__extended_grapheme_custer_property_boundary::__property __prev = __next_prop_; __extended_grapheme_custer_property_boundary::__property __prev = __next_prop_;

if (__code_point_view_.__at_end()) { if (__code_point_view_.__at_end()) {

__next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot; __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot;

return __result; return __result;

} }

__next_code_point_ = __code_point_view_.__consume(); __next_code_point_ = __code_point_view_.__consume().__code_point;

__next_prop_ = __extended_grapheme_custer_property_boundary::__get_property(__next_code_point_); __next_prop_ = __extended_grapheme_custer_property_boundary::__get_property(__next_code_point_);

__has_extened_pictographic |= __has_extened_pictographic |=

__prev == __extended_grapheme_custer_property_boundary::__property::__Extended_Pictographic; __prev == __extended_grapheme_custer_property_boundary::__property::__Extended_Pictographic;

if (__at_extended_grapheme_cluster_break(__ri_break_allowed, __has_extened_pictographic, __prev, __next_prop_)) if (__at_extended_grapheme_cluster_break(__ri_break_allowed, __has_extened_pictographic, __prev, __next_prop_))

return __result; return __result;

} }

Show All 13 Lines

public: public:

_LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last) _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last)

: __first_(__first), __last_(__last) {} : __first_(__first), __last_(__last) {}

_LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; } _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; }

_LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; } _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; }

_LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept {

_LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");

return *__first_++; return {static_cast<char32_t>(*__first_++)};

}

# if _LIBCPP_STD_VER >= 23

_LIBCPP_HIDE_FROM_ABI constexpr __consume_p2286_result __consume_p2286() noexcept {

_LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");

return {0, std::make_unsigned_t<_CharT>(*__first_++)};

} }

# endif // _LIBCPP_STD_VER >= 23

private: private:

_Iterator __first_; _Iterator __first_;

_Iterator __last_; _Iterator __last_;

}; };

# endif // _LIBCPP_HAS_NO_UNICODE # endif // _LIBCPP_HAS_NO_UNICODE

} // namespace __unicode } // namespace __unicode

#endif //_LIBCPP_STD_VER >= 20 #endif //_LIBCPP_STD_VER >= 20

_LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD

#endif // _LIBCPP___FORMAT_UNICODE_H #endif // _LIBCPP___FORMAT_UNICODE_H

libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp

Show First 20 Lines • Show All 196 Lines • ▼ Show 20 Lines	if constexpr (sizeof(CharT) > 1) {
// Paragraph_Separator		// Paragraph_Separator
test_format(V{L"'\\u{2029}'"}, L"{:?}", L'\x2029'); // PARAGRAPH SEPARATOR		test_format(V{L"'\\u{2029}'"}, L"{:?}", L'\x2029'); // PARAGRAPH SEPARATOR

// Format		// Format
test_format(V{L"'\\u{ad}'"}, L"{:?}", L'\xad'); // SOFT HYPHEN		test_format(V{L"'\\u{ad}'"}, L"{:?}", L'\xad'); // SOFT HYPHEN
test_format(V{L"'\\u{600}'"}, L"{:?}", L'\x600'); // ARABIC NUMBER SIGN		test_format(V{L"'\\u{600}'"}, L"{:?}", L'\x600'); // ARABIC NUMBER SIGN
test_format(V{L"'\\u{feff}'"}, L"{:?}", L'\xfeff'); // ZERO WIDTH NO-BREAK SPACE		test_format(V{L"'\\u{feff}'"}, L"{:?}", L'\xfeff'); // ZERO WIDTH NO-BREAK SPACE

if constexpr (sizeof(CharT) == 2) {
// Incomplete surrogate pair in UTF-16		// Incomplete surrogate pair in UTF-16
test_format(V{L"'\\x{d800}'"}, L"{:?}", L'\xd800'); // <surrogate-D800>		test_format(V{L"'\\x{d800}'"}, L"{:?}", L'\xd800'); // <surrogate-D800>
test_format(V{L"'\\x{dfff}'"}, L"{:?}", L'\xdfff'); // <surrogate-DFFF>		test_format(V{L"'\\x{dfff}'"}, L"{:?}", L'\xdfff'); // <surrogate-DFFF>
} else {
test_format(V{L"'\\u{d800}'"}, L"{:?}", L'\xd800'); // <surrogate-D800>
test_format(V{L"'\\u{dfff}'"}, L"{:?}", L'\xdfff'); // <surrogate-DFFF>
}

// Private_Use		// Private_Use
test_format(V{L"'\\u{e000}'"}, L"{:?}", L'\xe000'); // <private-use-E000>		test_format(V{L"'\\u{e000}'"}, L"{:?}", L'\xe000'); // <private-use-E000>
test_format(V{L"'\\u{f8ff}'"}, L"{:?}", L'\xf8ff'); // <private-use-F8FF>		test_format(V{L"'\\u{f8ff}'"}, L"{:?}", L'\xf8ff'); // <private-use-F8FF>

// Unassigned		// Unassigned
test_format(V{L"'\\u{378}'"}, L"{:?}", L'\x378'); // <reserved-0378>		test_format(V{L"'\\u{378}'"}, L"{:?}", L'\x378'); // <reserved-0378>
test_format(V{L"'\\u{1774}'"}, L"{:?}", L'\x1774'); // <reserved-1774>		test_format(V{L"'\\u{1774}'"}, L"{:?}", L'\x1774'); // <reserved-1774>
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	void test_string() {
test_format(SV(R"(["Спасибо, Виктор ♥!"])"), SV("[{:?}]"), SV("Спасибо, Виктор ♥!"));		test_format(SV(R"(["Спасибо, Виктор ♥!"])"), SV("[{:?}]"), SV("Спасибо, Виктор ♥!"));

test_format(SV(R"(["\u{0} \n \t \u{2} \u{1b}"])"), SV("[{:?}]"), SV("\0 \n \t \x02 \x1b"));		test_format(SV(R"(["\u{0} \n \t \u{2} \u{1b}"])"), SV("[{:?}]"), SV("\0 \n \t \x02 \x1b"));

if constexpr (sizeof(CharT) == 1) {		if constexpr (sizeof(CharT) == 1) {
// Ill-formend UTF-8		// Ill-formend UTF-8
test_format(SV(R"(["\x{c3}"])"), SV("[{:?}]"), "\xc3");		test_format(SV(R"(["\x{c3}"])"), SV("[{:?}]"), "\xc3");
test_format(SV(R"(["\x{c3}("])"), SV("[{:?}]"), "\xc3\x28");		test_format(SV(R"(["\x{c3}("])"), SV("[{:?}]"), "\xc3\x28");

		/* U+0000..U+0007F 1 code unit range, encoded in 2 code units. */
		test_format(SV(R"(["\x{c0}\x{80}"])"), SV("[{:?}]"), "\xc0\x80"); // U+0000
		test_format(SV(R"(["\x{c1}\x{bf}"])"), SV("[{:?}]"), "\xc1\xbf"); // U+007F
		test_format(SV(R"(["\u{80}"])"), SV("[{:?}]"), "\xc2\x80"); // U+0080 first valid (General_Category=Control)

		/* U+0000..U+07FFF 1 and 2 code unit range, encoded in 3 code units. */
		test_format(SV(R"(["\x{e0}\x{80}\x{80}"])"), SV("[{:?}]"), "\xe0\x80\x80"); // U+0000
		test_format(SV(R"(["\x{e0}\x{81}\x{bf}"])"), SV("[{:?}]"), "\xe0\x81\xbf"); // U+007F
		test_format(SV(R"(["\x{e0}\x{82}\x{80}"])"), SV("[{:?}]"), "\xe0\x82\x80"); // U+0080
		test_format(SV(R"(["\x{e0}\x{9f}\x{bf}"])"), SV("[{:?}]"), "\xe0\x9f\xbf"); // U+07FF
		test_format(SV("[\"\u0800\"]"), SV("[{:?}]"), "\xe0\xa0\x80"); // U+0800 first valid

		#if 0
		// This code point is in the Hangul Jamo Extended-B block and at the time of writing
		// it's unassigned. When it comes defined, this branch might become true.
		test_format(SV("[\"\ud7ff\"]"), SV("[{:?}]"), "\xed\x9f\xbf"); // U+D7FF last valid
		#else
		/* U+D800..D+DFFFF surrogate range */
		test_format(SV(R"(["\u{d7ff}"])"), SV("[{:?}]"), "\xed\x9f\xbf"); // U+D7FF last valid
		#endif
		test_format(SV(R"(["\x{ed}\x{a0}\x{80}"])"), SV("[{:?}]"), "\xed\xa0\x80"); // U+D800
		test_format(SV(R"(["\x{ed}\x{af}\x{bf}"])"), SV("[{:?}]"), "\xed\xaf\xbf"); // U+DBFF
		test_format(SV(R"(["\x{ed}\x{bf}\x{80}"])"), SV("[{:?}]"), "\xed\xbf\x80"); // U+DC00
		test_format(SV(R"(["\x{ed}\x{bf}\x{bf}"])"), SV("[{:?}]"), "\xed\xbf\xbf"); // U+DFFF
		test_format(SV(R"(["\u{e000}"])"), SV("[{:?}]"), "\xee\x80\x80"); // U+E000 first valid
		// (in the Private Use Area block)

		/* U+0000..U+FFFF 1, 2, and 3 code unit range */
		test_format(SV(R"(["\x{f0}\x{80}\x{80}\x{80}"])"), SV("[{:?}]"), "\xf0\x80\x80\x80"); // U+0000
		test_format(SV(R"(["\x{f0}\x{80}\x{81}\x{bf}"])"), SV("[{:?}]"), "\xf0\x80\x81\xbf"); // U+007F
		test_format(SV(R"(["\x{f0}\x{80}\x{82}\x{80}"])"), SV("[{:?}]"), "\xf0\x80\x82\x80"); // U+0080
		test_format(SV(R"(["\x{f0}\x{80}\x{9f}\x{bf}"])"), SV("[{:?}]"), "\xf0\x80\x9f\xbf"); // U+07FF
		test_format(SV(R"(["\x{f0}\x{80}\x{a0}\x{80}"])"), SV("[{:?}]"), "\xf0\x80\xa0\x80"); // U+0800
		test_format(SV(R"(["\x{f0}\x{8f}\x{bf}\x{bf}"])"), SV("[{:?}]"), "\xf0\x8f\xbf\xbf"); // U+FFFF
		test_format(SV("[\"\U00010000\"]"), SV("[{:?}]"), "\xf0\x90\x80\x80"); // U+10000 first valid

		/* U+10FFFF..U+1FFFFF invalid range */
		test_format(SV(R"(["\u{10ffff}"])"), SV("[{:?}]"), "\xf4\x8f\xbf\xbf"); // U+10FFFF last valid
		// (in Supplementary Private Use Area-B)
		test_format(SV(R"(["\x{f4}\x{90}\x{80}\x{80}"])"), SV("[{:?}]"), "\xf4\x90\x80\x80"); // U+110000
		test_format(SV(R"(["\x{f4}\x{bf}\x{bf}\x{bf}"])"), SV("[{:?}]"), "\xf4\xbf\xbf\xbf"); // U+11FFFF
} else {		} else {
// Valid UTF-16 and UTF-32		// Valid UTF-16 and UTF-32
test_format(SV("[\"\u00c3\"]"), SV("[{:?}]"), L"\xc3"); // LATIN CAPITAL LETTER A WITH TILDE		test_format(SV("[\"\u00c3\"]"), SV("[{:?}]"), L"\xc3"); // LATIN CAPITAL LETTER A WITH TILDE
test_format(SV("[\"\u00c3(\"]"), SV("[{:?}]"), L"\xc3\x28");		test_format(SV("[\"\u00c3(\"]"), SV("[{:?}]"), L"\xc3\x28");
}		}

test_format(SV(R"(["🤷🏻\u{200d}♂\u{fe0f}"])"), SV("[{:?}]"), SV("🤷🏻‍♂️"));		test_format(SV(R"(["🤷🏻\u{200d}♂\u{fe0f}"])"), SV("[{:?}]"), SV("🤷🏻‍♂️"));

Show All 27 Lines	if constexpr (sizeof(CharT) > 1) {
test_format(V{LR"("\u{2028}")"}, L"{:?}", L"\x2028"); // LINE SEPARATOR		test_format(V{LR"("\u{2028}")"}, L"{:?}", L"\x2028"); // LINE SEPARATOR

// Paragraph_Separator		// Paragraph_Separator
test_format(V{LR"("\u{2029}")"}, L"{:?}", L"\x2029"); // PARAGRAPH SEPARATOR		test_format(V{LR"("\u{2029}")"}, L"{:?}", L"\x2029"); // PARAGRAPH SEPARATOR

// Format		// Format
test_format(V{LR"("\u{ad}\u{600}\u{feff}")"}, L"{:?}", L"\xad\x600\xfeff");		test_format(V{LR"("\u{ad}\u{600}\u{feff}")"}, L"{:?}", L"\xad\x600\xfeff");

if constexpr (sizeof(CharT) == 2)
// Incomplete surrogate pair in UTF-16		// Incomplete surrogate pair in UTF-16
test_format(V{LR"("\x{d800}")"}, L"{:?}", L"\xd800");		test_format(V{LR"("\x{d800}")"}, L"{:?}", L"\xd800");
else
test_format(V{LR"("\u{d800}")"}, L"{:?}", L"\xd800");

// Private_Use		// Private_Use
test_format(V{LR"("\u{e000}\u{f8ff}")"}, L"{:?}", L"\xe000\xf8ff");		test_format(V{LR"("\u{e000}\u{f8ff}")"}, L"{:?}", L"\xe000\xf8ff");

// Unassigned		// Unassigned
test_format(V{LR"("\u{378}\u{1774}\u{ffff}")"}, L"{:?}", L"\x378\x1774\xffff");		test_format(V{LR"("\u{378}\u{1774}\u{ffff}")"}, L"{:?}", L"\x378\x1774\xffff");

// Grapheme Extended		// Grapheme Extended
▲ Show 20 Lines • Show All 198 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[libc++][format] Improves Unicode decoders.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 503481

libcxx/include/__format/formatter_output.h

libcxx/include/__format/unicode.h

libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[libc++][format] Improves Unicode decoders.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 503481

libcxx/include/__format/formatter_output.h

libcxx/include/__format/unicode.h

libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp

[libc++][format] Improves Unicode decoders.
ClosedPublic