diff --git a/libcxx/include/__format/unicode.h b/libcxx/include/__format/unicode.h --- a/libcxx/include/__format/unicode.h +++ b/libcxx/include/__format/unicode.h @@ -105,7 +105,7 @@ requires same_as, char> _LIBCPP_HIDE_FROM_ABI constexpr bool __is_continuation(_Iterator __char, int __count) { do { - if ((*__char & 0b1000'0000) != 0b1000'0000) + if ((*__char & 0b1100'0000) != 0b1000'0000) return false; --__count; ++__char; diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp @@ -504,6 +504,13 @@ "\xf7\xbf\xbf" "a"); + // http://unicode.org/review/pr-121.html + test_format(R"("a\x{f1}\x{80}\x{80}\x{e1}\x{80}\x{c2}b")"sv, + "{:?}", + "a" + "\xf1\x80\x80\xe1\x80\xc2" + "b"); + // Code unit out of range test_format(R"("\u{10ffff}")"sv, "{:?}", "\xf4\x8f\xbf\xbf"); // last valid code point test_format(R"("\x{f4}\x{90}\x{80}\x{80}")"sv, "{:?}", "\xf4\x90\x80\x80"); // first invalid code point diff --git a/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp @@ -272,6 +272,31 @@ check(SV("*ZZZZ\xefZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xefZZZZ")); check(SV("*ZZZZ\xffZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xffZZZZ")); + // Invalid continuations + check(SV("\xc2\x00"), SV("{}"), SV("\xc2\x00")); // 0b0000'0000 + check(SV("\xc2\x40"), SV("{}"), SV("\xc2\x40")); // 0b0100'0000 + check(SV("\xc2\xc0"), SV("{}"), SV("\xc2\xc0")); // 0b1100'0000 + + check(SV("\xe0\x00\x80"), SV("{}"), SV("\xe0\x00\x80")); + check(SV("\xe0\x40\x80"), SV("{}"), SV("\xe0\x40\x80")); + check(SV("\xe0\xc0\x80"), SV("{}"), SV("\xe0\xc0\x80")); + + check(SV("\xe0\x80\x00"), SV("{}"), SV("\xe0\x80\x00")); + check(SV("\xe0\x80\x40"), SV("{}"), SV("\xe0\x80\x40")); + check(SV("\xe0\x80\xc0"), SV("{}"), SV("\xe0\x80\xc0")); + + check(SV("\xf0\x80\x80\x00"), SV("{}"), SV("\xf0\x80\x80\x00")); + check(SV("\xf0\x80\x80\x40"), SV("{}"), SV("\xf0\x80\x80\x40")); + check(SV("\xf0\x80\x80\xc0"), SV("{}"), SV("\xf0\x80\x80\xc0")); + + check(SV("\xf0\x80\x00\x80"), SV("{}"), SV("\xf0\x80\x00\x80")); + check(SV("\xf0\x80\x40\x80"), SV("{}"), SV("\xf0\x80\x40\x80")); + check(SV("\xf0\x80\xc0\x80"), SV("{}"), SV("\xf0\x80\xc0\x80")); + + check(SV("\xf0\x00\x80\x80"), SV("{}"), SV("\xf0\x00\x80\x80")); + check(SV("\xf0\x40\x80\x80"), SV("{}"), SV("\xf0\x40\x80\x80")); + check(SV("\xf0\xc0\x80\x80"), SV("{}"), SV("\xf0\xc0\x80\x80")); + // Premature end. check(SV("*ZZZZ\xef\xf5*"), SV("{:*^8}"), SV("ZZZZ\xef\xf5")); check(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("{:*^12}"), SV("ZZZZ\xef\xf5ZZZZ"));