Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -315,6 +315,8 @@ template parameter, to conform to the Itanium C++ ABI and be compatible with GCC. This breaks binary compatibility with code compiled with earlier versions of clang; use the ``-fclang-abi-compat=14`` option to get the old mangling. +- Preprocessor character literals with a ``u8`` prefix are now correctly treated as + unsigned character literals. This fixes `Issue 54886 `_. C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ Index: clang/lib/Lex/PPExpressions.cpp =================================================================== --- clang/lib/Lex/PPExpressions.cpp +++ clang/lib/Lex/PPExpressions.cpp @@ -408,9 +408,18 @@ // Set the value. Val = Literal.getValue(); // Set the signedness. UTF-16 and UTF-32 are always unsigned + // UTF-8 is unsigned if -fchar8_t is specified. if (Literal.isWide()) Val.setIsUnsigned(!TargetInfo::isTypeSigned(TI.getWCharType())); - else if (!Literal.isUTF16() && !Literal.isUTF32()) + else if (Literal.isUTF16() || Literal.isUTF32()) + Val.setIsUnsigned(true); + else if (Literal.isUTF8()) { + if (PP.getLangOpts().CPlusPlus) + Val.setIsUnsigned( + PP.getLangOpts().Char8 ? true : !PP.getLangOpts().CharIsSigned); + else + Val.setIsUnsigned(true); + } else Val.setIsUnsigned(!PP.getLangOpts().CharIsSigned); if (Result.Val.getBitWidth() > Val.getBitWidth()) { Index: clang/test/Lexer/utf8-char-literal.cpp =================================================================== --- clang/test/Lexer/utf8-char-literal.cpp +++ clang/test/Lexer/utf8-char-literal.cpp @@ -1,7 +1,10 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s // RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -fsyntax-only -verify %s // RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c2x -x c -fsyntax-only -verify %s -// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++1z -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++17 -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++17 -fsyntax-only -fchar8_t -DCHAR8_T -verify %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++20 -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++20 -fsyntax-only -fno-char8_t -DNO_CHAR8_T -verify %s int array0[u'ñ' == u'\xf1'? 1 : -1]; int array1['\xF1' != u'\xf1'? 1 : -1]; @@ -13,7 +16,7 @@ char d = u8'\u1234'; // expected-error {{character too large for enclosing character literal type}} char e = u8'ሴ'; // expected-error {{character too large for enclosing character literal type}} char f = u8'ab'; // expected-error {{Unicode character literals may not contain multiple characters}} -#elif __STDC_VERSION__ > 202000L +#elif __STDC_VERSION__ >= 202000L char a = u8'ñ'; // expected-error {{character too large for enclosing character literal type}} char b = u8'\x80'; // ok char c = u8'\u0080'; // expected-error {{universal character name refers to a control character}} @@ -26,3 +29,40 @@ unsigned char : 1), "Surprise!"); #endif + + +/// In C++17, the behavior depends on -fchar8_t. +#if __cplusplus == 201703L +# if defined(__cpp_char8_t) +# if u8'\xff' == '\xff' // expected-warning {{right side of operator converted from negative value to unsigned}} +# error Something's not right. +# endif +# else +# if u8'\xff' != '\xff' +# error Something's not right. +# endif +# endif +#endif + + +/// In C++20 and up, u8 char literals are unsigned by default, +/// unless -fno-char8_t is specified. +#if __cplusplus > 201703L +# if defined(__cpp_char8_t) +# if u8'\xff' != 0xff +# error u8 char literal is not unsigned +# endif +# else +# if u8'\xff' == 0xff +# error u8 char literal is unsigned +# endif +# endif +#endif + + +/// In C2x, u8 char literals are always unsigned. +#if __STDC_VERSION__ >= 202000L +# if u8'\xff' != 0xff +# error u8 char literal is not unsigned +# endif +#endif