diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -335,6 +335,8 @@ ^^^^^^^^^^^^^^^^^^^^^ - Support label at end of compound statement (`P2324 `_). +- Implemented "char8_t Compatibility and Portability Fix" (`P2513R3 `_). + This Change was applied to C++20 as a Defect Report. CUDA/HIP Language Changes in Clang ---------------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6849,8 +6849,8 @@ def note_array_init_plain_string_into_char8_t : Note< "add 'u8' prefix to form a 'char8_t' string literal">; def err_array_init_utf8_string_into_char : Error< - "%select{|ISO C++20 does not permit }0initialization of char array with " - "UTF-8 string literal%select{ is not permitted by '-fchar8_t'|}0">; + "initialization of %select{|signed }0char array with " + "UTF-8 string literal is not permitted by %select{'-fchar8_t'|C++20}1">; def warn_cxx20_compat_utf8_string : Warning< "type of UTF-8 string literal will change from array of const char to " "array of const char8_t in C++20">, InGroup, DefaultIgnore; diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -697,7 +697,8 @@ Builder.defineMacro("__cpp_multidimensional_subscript", "202110L"); } if (LangOpts.Char8) - Builder.defineMacro("__cpp_char8_t", "201811L"); + Builder.defineMacro("__cpp_char8_t", + LangOpts.CPlusPlus20 ? "202207L" : "201811L"); Builder.defineMacro("__cpp_impl_destroying_delete", "201806L"); // TS features. diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -81,10 +81,20 @@ const QualType ElemTy = Context.getCanonicalType(AT->getElementType()).getUnqualifiedType(); + auto IsCharOrUnsignedChar = [](const QualType &T) { + const BuiltinType *BT = dyn_cast(T.getTypePtr()); + return BT && BT->isCharType() && BT->getKind() != BuiltinType::SChar; + }; + switch (SL->getKind()) { case StringLiteral::UTF8: // char8_t array can be initialized with a UTF-8 string. - if (ElemTy->isChar8Type()) + // - C++20 [dcl.init.string] (DR) + // Additionally, an array of char or unsigned char may be initialized + // by a UTF-8 string literal + if (ElemTy->isChar8Type() || + (Context.getLangOpts().CPlusPlus20 && + IsCharOrUnsignedChar(ElemTy.getCanonicalType()))) return SIF_None; [[fallthrough]]; case StringLiteral::Ordinary: @@ -9114,9 +9124,8 @@ << FixItHint::CreateInsertion(Args.front()->getBeginLoc(), "u8"); break; case FK_UTF8StringIntoPlainChar: - S.Diag(Kind.getLocation(), - diag::err_array_init_utf8_string_into_char) - << S.getLangOpts().CPlusPlus20; + S.Diag(Kind.getLocation(), diag::err_array_init_utf8_string_into_char) + << DestType->isSignedIntegerType() << S.getLangOpts().CPlusPlus20; break; case FK_ArrayTypeMismatch: case FK_NonConstantArrayInit: diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -46,9 +46,9 @@ #error "wrong value for __cpp_aggregate_paren_init" #endif -#if defined(CHAR8_T) ? check(char8_t, 201811, 201811, 201811, 201811, 201811, 201811) : \ +#if defined(CHAR8_T) ? check(char8_t, 201811, 201811, 201811, 201811, 202207, 202207) : \ defined(NO_CHAR8_T) ? check(char8_t, 0, 0, 0, 0, 0, 0) : \ - check(char8_t, 0, 0, 0, 0, 201811, 201811) + check(char8_t, 0, 0, 0, 0, 202207, 202207) #error "wrong value for __cpp_char8_t" #endif diff --git a/clang/test/SemaCXX/char8_t.cpp b/clang/test/SemaCXX/char8_t.cpp --- a/clang/test/SemaCXX/char8_t.cpp +++ b/clang/test/SemaCXX/char8_t.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fchar8_t -std=c++17 -verify %s -// RUN: %clang_cc1 -std=c++2a -verify %s +// RUN: %clang_cc1 -std=c++2a -verify=expected,cxx20 %s +// RUN: %clang_cc1 -std=c++2a -verify=expected,cxx20 -fno-signed-char %s + char8_t a = u8'a'; char8_t b[] = u8"foo"; @@ -10,12 +12,39 @@ char f[] = u8"foo"; #if __cplusplus <= 201703L // expected-error@-2 {{initialization of char array with UTF-8 string literal is not permitted by '-fchar8_t'}} -#else -// expected-error@-4 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}} #endif char g = 'a'; char h[] = "foo"; +#if __cplusplus >= 201902L +unsigned char i[] = u8"foo"; +unsigned char j[] = { u8"foo" }; +char k[] = u8"foo"; +char l[] = { u8"foo" }; +signed char m[] = u8"foo"; // expected-error {{initialization of char array with UTF-8 string literal is not permitted by C++20}} +signed char n[] = { u8"foo" }; // expected-error {{cannot initialize an array element of type 'signed char' with an lvalue of type 'const char8_t[4]'}} + +const unsigned char* uptr = u8"foo"; // expected-error {{cannot initialize}} +const signed char* sptr = u8"foo"; // expected-error {{cannot initialize}} +const char* ptr = u8"foo"; // expected-error {{cannot initialize}} + +template +void check_values() { + constexpr T c[] = {0, static_cast(0xFF), 0x42}; + constexpr T a[] = u8"\x00\xFF\x42"; + + static_assert(a[0] == c[0]); + static_assert(a[1] == c[1]); + static_assert(a[2] == c[2]); +} + +void call_check_values() { + check_values(); + check_values(); +} + +#endif + void disambig() { char8_t (a) = u8'x'; } @@ -48,3 +77,21 @@ static_assert(sizeof(char8_t) == 1); static_assert(char8_t(-1) > 0); static_assert(u8"\u0080"[0] > 0); + +namespace ambiguous { + +struct A { + char8_t s[10]; +}; +struct B { + char s[10]; +}; + +void f(A); // cxx20-note {{candidate}} +void f(B); // cxx20-note {{candidate}} + +int test() { + f({u8"foo"}); // cxx20-error {{call to 'f' is ambiguous}} +} + +} diff --git a/clang/test/SemaCXX/cxx2a-compat.cpp b/clang/test/SemaCXX/cxx2a-compat.cpp --- a/clang/test/SemaCXX/cxx2a-compat.cpp +++ b/clang/test/SemaCXX/cxx2a-compat.cpp @@ -33,9 +33,8 @@ // expected-warning@-4 {{type of UTF-8 string literal will change}} expected-note@-4 {{remove 'u8' prefix}} // expected-warning@-4 {{type of UTF-8 string literal will change}} expected-note@-4 {{remove 'u8' prefix}} #else -// expected-error@-8 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}} -// expected-error@-8 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}} -// expected-error@-8 {{no viable conversion from 'const char8_t[9]' to 'string'}} +// expected-error@-7 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}} +// expected-error@-7 {{no viable conversion from 'const char8_t[9]' to 'string'}} #endif template diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1506,7 +1506,7 @@ char8_t Compatibility and Portability Fix P2513R3 - No + Clang 16 Relax requirements on wchar_t to match existing practices