diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -557,6 +557,8 @@ - Support label at end of compound statement (`P2324 `_). - Implemented `P1169R4: static operator() `_. +- Implemented "char8_t Compatibility and Portability Fix" (`P2513R3 `_). + This Change was applied to C++20 as a Defect Report. CUDA/HIP Language Changes in Clang ---------------------------------- @@ -654,8 +656,8 @@ the behavior of ``QualType::getNonReferenceType`` for ``CXType``. - Introduced the new function ``clang_CXXMethod_isDeleted``, which queries whether the method is declared ``= delete``. -- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, - ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and +- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, + ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and ``clang_Cursor_getTemplateArgumentUnsignedValue`` now work on struct, class, and partial template specialization cursors in addition to function cursors. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6868,8 +6868,8 @@ def note_array_init_plain_string_into_char8_t : Note< "add 'u8' prefix to form a 'char8_t' string literal">; def err_array_init_utf8_string_into_char : Error< - "%select{|ISO C++20 does not permit }0initialization of char array with " - "UTF-8 string literal%select{ is not permitted by '-fchar8_t'|}0">; + "initialization of %select{|signed }0char array with " + "UTF-8 string literal is not permitted by %select{'-fchar8_t'|C++20}1">; def warn_cxx20_compat_utf8_string : Warning< "type of UTF-8 string literal will change from array of const char to " "array of const char8_t in C++20">, InGroup, DefaultIgnore; diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -705,7 +705,7 @@ Builder.defineMacro("__cpp_named_character_escapes", "202207L"); if (LangOpts.Char8) - Builder.defineMacro("__cpp_char8_t", "201811L"); + Builder.defineMacro("__cpp_char8_t", "202207L"); Builder.defineMacro("__cpp_impl_destroying_delete", "201806L"); // TS features. diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -81,10 +81,20 @@ const QualType ElemTy = Context.getCanonicalType(AT->getElementType()).getUnqualifiedType(); + auto IsCharOrUnsignedChar = [](const QualType &T) { + const BuiltinType *BT = dyn_cast(T.getTypePtr()); + return BT && BT->isCharType() && BT->getKind() != BuiltinType::SChar; + }; + switch (SL->getKind()) { case StringLiteral::UTF8: // char8_t array can be initialized with a UTF-8 string. - if (ElemTy->isChar8Type()) + // - C++20 [dcl.init.string] (DR) + // Additionally, an array of char or unsigned char may be initialized + // by a UTF-8 string literal. + if (ElemTy->isChar8Type() || + (Context.getLangOpts().Char8 && + IsCharOrUnsignedChar(ElemTy.getCanonicalType()))) return SIF_None; [[fallthrough]]; case StringLiteral::Ordinary: @@ -9114,9 +9124,8 @@ << FixItHint::CreateInsertion(Args.front()->getBeginLoc(), "u8"); break; case FK_UTF8StringIntoPlainChar: - S.Diag(Kind.getLocation(), - diag::err_array_init_utf8_string_into_char) - << S.getLangOpts().CPlusPlus20; + S.Diag(Kind.getLocation(), diag::err_array_init_utf8_string_into_char) + << DestType->isSignedIntegerType() << S.getLangOpts().CPlusPlus20; break; case FK_ArrayTypeMismatch: case FK_NonConstantArrayInit: diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -66,9 +66,9 @@ #error "wrong value for __cpp_aggregate_paren_init" #endif -#if defined(CHAR8_T) ? check(char8_t, 201811, 201811, 201811, 201811, 201811, 201811) : \ +#if defined(CHAR8_T) ? check(char8_t, 202207, 202207, 202207, 202207, 202207, 202207) : \ defined(NO_CHAR8_T) ? check(char8_t, 0, 0, 0, 0, 0, 0) : \ - check(char8_t, 0, 0, 0, 0, 201811, 201811) + check(char8_t, 0, 0, 0, 0, 202207, 202207) #error "wrong value for __cpp_char8_t" #endif diff --git a/clang/test/SemaCXX/char8_t.cpp b/clang/test/SemaCXX/char8_t.cpp --- a/clang/test/SemaCXX/char8_t.cpp +++ b/clang/test/SemaCXX/char8_t.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fchar8_t -std=c++17 -verify %s -// RUN: %clang_cc1 -std=c++2a -verify %s +// RUN: %clang_cc1 -std=c++2a -verify=expected %s +// RUN: %clang_cc1 -std=c++2a -verify=expected -fno-signed-char %s + char8_t a = u8'a'; char8_t b[] = u8"foo"; @@ -7,15 +9,35 @@ char8_t d[] = "foo"; // expected-error {{initializing 'char8_t' array with plain string literal}} expected-note {{add 'u8' prefix}} char e = u8'a'; -char f[] = u8"foo"; -#if __cplusplus <= 201703L -// expected-error@-2 {{initialization of char array with UTF-8 string literal is not permitted by '-fchar8_t'}} -#else -// expected-error@-4 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}} -#endif char g = 'a'; char h[] = "foo"; +unsigned char i[] = u8"foo"; +unsigned char j[] = { u8"foo" }; +char k[] = u8"foo"; +char l[] = { u8"foo" }; +signed char m[] = u8"foo"; // expected-error {{initialization of char array with UTF-8 string literal is not permitted}} +signed char n[] = { u8"foo" }; // expected-error {{cannot initialize an array element of type 'signed char' with an lvalue of type 'const char8_t[4]'}} + +const unsigned char* uptr = u8"foo"; // expected-error {{cannot initialize}} +const signed char* sptr = u8"foo"; // expected-error {{cannot initialize}} +const char* ptr = u8"foo"; // expected-error {{cannot initialize}} + +template +void check_values() { + constexpr T c[] = {0, static_cast(0xFF), 0x42}; + constexpr T a[] = u8"\x00\xFF\x42"; + + static_assert(a[0] == c[0]); + static_assert(a[1] == c[1]); + static_assert(a[2] == c[2]); +} + +void call_check_values() { + check_values(); + check_values(); +} + void disambig() { char8_t (a) = u8'x'; } @@ -48,3 +70,21 @@ static_assert(sizeof(char8_t) == 1); static_assert(char8_t(-1) > 0); static_assert(u8"\u0080"[0] > 0); + +namespace ambiguous { + +struct A { + char8_t s[10]; +}; +struct B { + char s[10]; +}; + +void f(A); // expected-note {{candidate}} +void f(B); // expected-note {{candidate}} + +int test() { + f({u8"foo"}); // expected-error {{call to 'f' is ambiguous}} +} + +} diff --git a/clang/test/SemaCXX/cxx2a-compat.cpp b/clang/test/SemaCXX/cxx2a-compat.cpp --- a/clang/test/SemaCXX/cxx2a-compat.cpp +++ b/clang/test/SemaCXX/cxx2a-compat.cpp @@ -33,9 +33,8 @@ // expected-warning@-4 {{type of UTF-8 string literal will change}} expected-note@-4 {{remove 'u8' prefix}} // expected-warning@-4 {{type of UTF-8 string literal will change}} expected-note@-4 {{remove 'u8' prefix}} #else -// expected-error@-8 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}} -// expected-error@-8 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}} -// expected-error@-8 {{no viable conversion from 'const char8_t[9]' to 'string'}} +// expected-error@-7 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}} +// expected-error@-7 {{no viable conversion from 'const char8_t[9]' to 'string'}} #endif template diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1502,7 +1502,7 @@ char8_t Compatibility and Portability Fix P2513R3 - No + Clang 16 Relax requirements on wchar_t to match existing practices