Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -1688,6 +1688,16 @@ /// defined in as defined by the target. QualType getWideCharType() const { return WideCharTy; } + /// Return the type of char16 characters. In C++, this returns the + /// unique char16_t type. In C11, this returns a type compatible with the type + /// defined in as defined by the target. + QualType getChar16Type() const { return Char16Ty; } + + /// Return the type of char32 characters. In C++, this returns the + /// unique char32_t type. In C11, this returns a type compatible with the type + /// defined in as defined by the target. + QualType getChar32Type() const { return Char32Ty; } + /// Return the type of "signed wchar_t". /// /// Used when in C++, as a GCC extension. Index: clang/include/clang/AST/FormatString.h =================================================================== --- clang/include/clang/AST/FormatString.h +++ clang/include/clang/AST/FormatString.h @@ -65,22 +65,24 @@ public: enum Kind { None, - AsChar, // 'hh' - AsShort, // 'h' - AsShortLong, // 'hl' (OpenCL float/int vector element) - AsLong, // 'l' - AsLongLong, // 'll' - AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) - AsIntMax, // 'j' - AsSizeT, // 'z' - AsPtrDiff, // 't' - AsInt32, // 'I32' (MSVCRT, like __int32) - AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) - AsInt64, // 'I64' (MSVCRT, like __int64) - AsLongDouble, // 'L' - AsAllocate, // for '%as', GNU extension to C90 scanf - AsMAllocate, // for '%ms', GNU extension to scanf - AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z + AsChar, // 'hh' + AsShort, // 'h' + AsShortLong, // 'hl' (OpenCL float/int vector element) + AsLong, // 'l' + AsLongLong, // 'll' + AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) + AsIntMax, // 'j' + AsSizeT, // 'z' + AsPtrDiff, // 't' + AsInt32, // 'I32' (MSVCRT, like __int32) + AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) + AsInt64, // 'I64' (MSVCRT, like __int64) + AsLongDouble, // 'L' + AsAllocate, // for '%as', GNU extension to C90 scanf + AsMAllocate, // for '%ms', GNU extension to scanf + AsUTF16, // for '%l16(c|s)', Clang extension + AsUTF32, // for '%l32(c|s)', Clang extension + AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z AsWideChar = AsLong // for '%ls', only makes sense for printf }; @@ -248,8 +250,19 @@ class ArgType { public: - enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, - AnyCharTy, CStrTy, WCStrTy, WIntTy }; + enum Kind { + UnknownTy, + InvalidTy, + SpecificTy, + ObjCPointerTy, + CPointerTy, + AnyCharTy, + CStrTy, + WCStrTy, + WIntTy, + Char16Ty, + Char32Ty + }; /// How well a given conversion specifier matches its argument. enum MatchKind { Index: clang/lib/AST/FormatString.cpp =================================================================== --- clang/lib/AST/FormatString.cpp +++ clang/lib/AST/FormatString.cpp @@ -232,6 +232,15 @@ break; case 'l': ++I; + if (I != E && I + 1 != E && I + 2 != E) { + if (*I + 1 == '1' && *I + 2 == '6') { + I += 2; + lmKind = LengthModifier::AsUTF16; + } else if (*I + 1 == '3' && *I + 2 == '2') { + I += 2; + lmKind = LengthModifier::AsUTF32; + } + } if (I != E && *I == 'l') { ++I; lmKind = LengthModifier::AsLongLong; @@ -359,6 +368,9 @@ case BuiltinType::SChar: case BuiltinType::UChar: case BuiltinType::Char_U: + case BuiltinType::Char8: + case BuiltinType::Char16: + case BuiltinType::Char32: case BuiltinType::Bool: return Match; } @@ -520,6 +532,12 @@ case WCStrTy: Res = C.getPointerType(C.getWideCharType()); break; + case Char16Ty: + Res = C.getPointerType(C.getChar16Type()); + break; + case Char32Ty: + Res = C.getPointerType(C.getChar32Type()); + break; case ObjCPointerTy: Res = C.ObjCBuiltinIdTy; break; @@ -607,6 +625,10 @@ return "m"; case AsWide: return "w"; + case AsUTF16: + return "l16"; + case AsUTF32: + return "l32"; case None: return ""; } @@ -860,6 +882,17 @@ default: return false; } + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: + switch (CS.getKind()) { + case ConversionSpecifier::cArg: + case ConversionSpecifier::CArg: + case ConversionSpecifier::sArg: + case ConversionSpecifier::SArg: + return true; + default: + return false; + } case LengthModifier::AsWide: switch (CS.getKind()) { case ConversionSpecifier::cArg: @@ -886,6 +919,8 @@ case LengthModifier::AsSizeT: case LengthModifier::AsPtrDiff: case LengthModifier::AsLongDouble: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: return true; case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: @@ -997,6 +1032,12 @@ } else if (Identifier->getName() == "ptrdiff_t") { LM.setKind(LengthModifier::AsPtrDiff); return true; + } else if (Identifier->getName() == "char16_t") { + LM.setKind(LengthModifier::AsUTF16); + return true; + } else if (Identifier->getName() == "char32_t") { + LM.setKind(LengthModifier::AsUTF32); + return true; } QualType T = Typedef->getUnderlyingType(); Index: clang/lib/AST/PrintfFormatString.cpp =================================================================== --- clang/lib/AST/PrintfFormatString.cpp +++ clang/lib/AST/PrintfFormatString.cpp @@ -535,6 +535,8 @@ case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: return ArgType::Invalid(); } @@ -570,6 +572,8 @@ case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: return ArgType::Invalid(); } @@ -618,6 +622,8 @@ case LengthModifier::AsInt3264: case LengthModifier::AsInt64: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: return ArgType::Invalid(); case LengthModifier::AsShortLong: llvm_unreachable("only used for OpenCL which doesn not handle nArg"); Index: clang/lib/AST/ScanfFormatString.cpp =================================================================== --- clang/lib/AST/ScanfFormatString.cpp +++ clang/lib/AST/ScanfFormatString.cpp @@ -261,6 +261,8 @@ case LengthModifier::AsInt32: case LengthModifier::AsInt3264: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: case LengthModifier::AsShortLong: return ArgType::Invalid(); } @@ -302,6 +304,8 @@ case LengthModifier::AsInt32: case LengthModifier::AsInt3264: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: case LengthModifier::AsShortLong: return ArgType::Invalid(); } @@ -337,6 +341,10 @@ case LengthModifier::AsLong: case LengthModifier::AsWide: return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); + case LengthModifier::AsUTF16: + return ArgType::PtrTo(ArgType(Ctx.getChar16Type(), "char16_t")); + case LengthModifier::AsUTF32: + return ArgType::PtrTo(ArgType(Ctx.getChar32Type(), "char32_t")); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: return ArgType::PtrTo(ArgType::CStrTy); @@ -354,6 +362,10 @@ case LengthModifier::None: case LengthModifier::AsWide: return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); + case LengthModifier::AsUTF16: + return ArgType::PtrTo(ArgType(Ctx.getChar16Type(), "char16_t")); + case LengthModifier::AsUTF32: + return ArgType::PtrTo(ArgType(Ctx.getChar32Type(), "char32_t")); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); @@ -398,6 +410,8 @@ case LengthModifier::AsInt32: case LengthModifier::AsInt3264: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: case LengthModifier::AsShortLong: return ArgType::Invalid(); } @@ -494,6 +508,12 @@ LM.setKind(LengthModifier::AsLongDouble); break; + case BuiltinType::Char16: + LM.setKind(LengthModifier::AsUTF16); + break; + case BuiltinType::Char32: + LM.setKind(LengthModifier::AsUTF32); + // Don't know. default: return false; Index: clang/test/Sema/format-strings-int-typedefs.c =================================================================== --- clang/test/Sema/format-strings-int-typedefs.c +++ clang/test/Sema/format-strings-int-typedefs.c @@ -10,18 +10,35 @@ printf("%td", 42.0); // expected-warning {{format specifies type 'ptrdiff_t' (aka 'int')}} printf("%lc", 42.0); // expected-warning {{format specifies type 'wint_t' (aka 'int')}} printf("%ls", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + printf("%l16c", 42.0); // expected-warning {{format specifies type 'char16_t' (aka 'int')}} + printf("%l16s", 42.0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + printf("%l32c", 42.0); // expected-warning {{format specifies type 'char32_t' (aka 'int')}} + printf("%l32s", 42.0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} printf("%S", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} printf("%C", 42.0); // expected-warning {{format specifies type 'wchar_t' (aka 'int')}} + wprintf(L"%l16c", 42.0); // expected-warning {{format specifies type 'char16_t' (aka 'short')}} + wprintf(L"%l16s", 42.0); // expected-warning {{format specifies type 'char16_t *' (aka 'short *')}} + wprintf(L"%l32c", 42.0); // expected-warning {{format specifies type 'char32_t' (aka 'int')}} + wprintf(L"%l32s", 42.0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} + scanf("%jd", 0); // expected-warning {{format specifies type 'intmax_t *' (aka 'long long *')}} scanf("%ju", 0); // expected-warning {{format specifies type 'uintmax_t *' (aka 'unsigned long long *')}} scanf("%zu", 0); // expected-warning {{format specifies type 'size_t *' (aka 'unsigned long *')}} scanf("%td", 0); // expected-warning {{format specifies type 'ptrdiff_t *' (aka 'int *')}} scanf("%lc", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} scanf("%ls", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + scanf("%l16c", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + scanf("%l16s", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + scanf("%l32c", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} + scanf("%l32s", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} scanf("%S", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} scanf("%C", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + wscanf("%l16c", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + wscanf("%l16s", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + wscanf("%l32c", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} + wscanf("%l32s", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} // typedef size_t et al. to something crazy. typedef void *size_t; Index: clang/test/SemaCXX/format-strings.cpp =================================================================== --- clang/test/SemaCXX/format-strings.cpp +++ clang/test/SemaCXX/format-strings.cpp @@ -24,6 +24,8 @@ void g() { printf("%ls", "foo"); // expected-warning{{format specifies type 'wchar_t *' but the argument has type 'const char *'}} + printf("%l16s", "foo"); // expected-warning{{format specifies type 'char16_t *' but the argument has type 'const char *'}} + printf("%l32s", "foo"); // expected-warning{{format specifies type 'char32_t *' but the argument has type 'const char *'}} } // Test that we properly handle format_idx on C++ members.