Index: clang-tools-extra/clang-tidy/boost/UseToStringCheck.cpp =================================================================== --- clang-tools-extra/clang-tidy/boost/UseToStringCheck.cpp +++ clang-tools-extra/clang-tidy/boost/UseToStringCheck.cpp @@ -16,7 +16,7 @@ namespace { AST_MATCHER(Type, isStrictlyInteger) { - return Node.isIntegerType() && !Node.isAnyCharacterType() && + return Node.isIntegerType() && !Node.isAnyCharacterType(Finder->getASTContext().getLangOpts()) && !Node.isBooleanType(); } } // namespace Index: clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp =================================================================== --- clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp +++ clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp @@ -145,7 +145,7 @@ "catch by reference instead"; // We do not diagnose when catching pointer to strings since we also allow // throwing string literals. - if (!PT->getPointeeType()->isAnyCharacterType()) + if (!PT->getPointeeType()->isAnyCharacterType(getLangOpts())) diag(varDecl->getBeginLoc(), diagMsgCatchReference); } else if (!caughtType->isReferenceType()) { const char *diagMsgCatchReference = "catch handler catches by value; " Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -1685,6 +1685,16 @@ /// unique wchar_t type. In C99, this returns a type compatible with the type /// defined in as defined by the target. QualType getWideCharType() const { return WideCharTy; } + + /// Return the type of char16 characters. In C++, this returns the + /// unique char16_t type. In C11, this returns a type compatible with the type + /// defined in as defined by the target. + QualType getChar16Type() const { return Char16Ty; } + + /// Return the type of char32 characters. In C++, this returns the + /// unique char32_t type. In C11, this returns a type compatible with the type + /// defined in as defined by the target. + QualType getChar32Type() const { return Char32Ty; } /// Return the type of "signed wchar_t". /// Index: clang/include/clang/AST/Expr.h =================================================================== --- clang/include/clang/AST/Expr.h +++ clang/include/clang/AST/Expr.h @@ -1775,6 +1775,9 @@ public: enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32 }; + + char *getStrDataAsChar(); + const char *getStrDataAsChar() const; private: unsigned numTrailingObjects(OverloadToken) const { return 1; } @@ -1786,9 +1789,6 @@ return getByteLength(); } - char *getStrDataAsChar() { return getTrailingObjects(); } - const char *getStrDataAsChar() const { return getTrailingObjects(); } - const uint16_t *getStrDataAsUInt16() const { return reinterpret_cast(getTrailingObjects()); } @@ -1840,6 +1840,30 @@ "This function is used in places that assume strings use char"); return StringRef(getStrDataAsChar(), getByteLength()); } + + std::string getStringAsChar() const { + assert(getCharByteWidth() == 1 && + "This function is used in places that assume strings use char"); + return std::string(getTrailingObjects(), getTrailingObjects() + getByteLength()); + } + + std::u16string getStringAsChar16() const { + assert(getCharByteWidth() == 2 && + "This function is used in places that assume strings use char16_t"); + return std::u16string(reinterpret_cast(getTrailingObjects()), reinterpret_cast(getTrailingObjects() + getByteLength())); + } + + std::u32string getStringAsChar32() const { + assert(getCharByteWidth() == 4 && + "This function is used in places that assume strings use char32_t"); + return std::u32string(reinterpret_cast(getTrailingObjects()), reinterpret_cast(getTrailingObjects() + getByteLength())); + } + + std::wstring getStringAsWChar() const { + assert((getCharByteWidth() == 2 || getCharByteWidth() == 4) && + "This function is used in places that assume strings use wchar_t"); + return std::wstring(reinterpret_cast(getTrailingObjects()), reinterpret_cast(getTrailingObjects() + getByteLength())); + } /// Allow access to clients that need the byte representation, such as /// ASTWriterStmt::VisitStringLiteral(). Index: clang/include/clang/AST/FormatString.h =================================================================== --- clang/include/clang/AST/FormatString.h +++ clang/include/clang/AST/FormatString.h @@ -249,7 +249,7 @@ class ArgType { public: enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, - AnyCharTy, CStrTy, WCStrTy, WIntTy }; + AnyCharTy, CStrTy, WCStrTy, WIntTy, Char16Ty, Char32Ty }; /// How well a given conversion specifier matches its argument. enum MatchKind { Index: clang/include/clang/AST/Type.h =================================================================== --- clang/include/clang/AST/Type.h +++ clang/include/clang/AST/Type.h @@ -1973,11 +1973,11 @@ bool isScopedEnumeralType() const; bool isBooleanType() const; bool isCharType() const; - bool isWideCharType() const; bool isChar8Type() const; - bool isChar16Type() const; - bool isChar32Type() const; - bool isAnyCharacterType() const; + bool isWideCharType() const; + bool isChar16Type(const LangOptions &LangOpts) const; + bool isChar32Type(const LangOptions &LangOpts) const; + bool isAnyCharacterType(const LangOptions &LangOpts) const; bool isIntegralType(const ASTContext &Ctx) const; /// Determine whether this type is an integral or enumeration type. Index: clang/include/clang/ASTMatchers/ASTMatchers.h =================================================================== --- clang/include/clang/ASTMatchers/ASTMatchers.h +++ clang/include/clang/ASTMatchers/ASTMatchers.h @@ -6075,7 +6075,7 @@ /// functionDecl(hasAnyParameter(hasType(isAnyCharacter()))) /// matches "a(char)", "b(wchar_t)", but not "c(double)". AST_MATCHER(QualType, isAnyCharacter) { - return Node->isAnyCharacterType(); + return Node->isAnyCharacterType(Finder->getASTContext().getLangOpts()); } /// Matches QualType nodes that are of any pointer type; this includes Index: clang/include/clang/Basic/Builtins.def =================================================================== --- clang/include/clang/Basic/Builtins.def +++ clang/include/clang/Basic/Builtins.def @@ -581,7 +581,18 @@ BUILTIN(__builtin_wmemcmp, "iwC*wC*z", "nF") BUILTIN(__builtin_wmemcpy, "w*w*wC*z", "nF") BUILTIN(__builtin_wmemmove, "w*w*wC*z", "nF") -BUILTIN(__builtin_realloc, "v*v*z", "nF") +BUILTIN(__builtin_wprintf, "iwC*R.", "Fp:0:") +BUILTIN(__builtin_wscanf, "iwC*R.", "Fs:0:") +BUILTIN(__builtin_fwprintf, "iP*RCw*R", "fp:1:") +BUILTIN(__builtin_fwscanf, "iP*RCw*R", "fs:1:") +BUILTIN(__builtin_swprintf, "iw*RzCw*R.", "fp:2:") +BUILTIN(__builtin_swscanf, "iCw*RCw*R.", "fs:1:") +BUILTIN(__builtin_vfwprintf,"iP*RCw*Ra", "fp:1:") +BUILTIN(__builtin_vfwscanf, "iP*RCw*Ra", "fs:1:") +BUILTIN(__builtin_vswprintf,"iw*RzCw*Ra", "fp:2:") +BUILTIN(__builtin_vswscanf, "iCw*RCw*Ra", "fs:1:") +BUILTIN(__builtin_vwprintf, "iCw*Ra", "fp:0:") +BUILTIN(__builtin_vwscanf, "iCw*Ra", "fs:0:") BUILTIN(__builtin_return_address, "v*IUi", "n") BUILTIN(__builtin_extract_return_addr, "v*v*", "n") BUILTIN(__builtin_frame_address, "v*IUi", "n") @@ -1051,6 +1062,18 @@ LIBBUILTIN(wmemcmp, "iwC*wC*z", "f", "wchar.h", ALL_LANGUAGES) LIBBUILTIN(wmemcpy, "w*w*wC*z", "f", "wchar.h", ALL_LANGUAGES) LIBBUILTIN(wmemmove,"w*w*wC*z", "f", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(wprintf, "iwC*R.", "fp:0:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(wscanf, "iwC*R.", "fs:0:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(fwprintf, "iP*RCw*R", "fp:1:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(fwscanf, "iP*RCw*R", "fs:1:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(swprintf, "iw*RzCw*R.", "fp:2:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(swscanf, "iCw*RCw*R.", "fs:1:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(vfwprintf,"iP*RCw*Ra", "fp:1:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(vfwscanf, "iP*RCw*Ra", "fs:1:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(vswprintf,"iw*RzCw*Ra", "fp:2:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(vswscanf, "iCw*RCw*Ra", "fs:1:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(vwprintf, "iCw*Ra", "fp:0:", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(vwscanf, "iCw*Ra", "fs:0:", "wchar.h", ALL_LANGUAGES) // C99 // In some systems setjmp is a macro that expands to _setjmp. We undefine Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7006,7 +7006,8 @@ def warn_format_nonliteral : Warning< "format string is not a string literal">, InGroup, DefaultIgnore; - +def err_format_invalid_type : Error< + "format strings type is invalid">; def err_unexpected_interface : Error< "unexpected interface name %0: expected expression">; def err_ref_non_value : Error<"%0 does not refer to a value">; @@ -9205,7 +9206,6 @@ InGroup; def note_array_declared_here : Note< "array %0 declared here">; - def warn_printf_insufficient_data_args : Warning< "more '%%' conversions than data arguments">, InGroup; def warn_printf_data_arg_not_used : Warning< @@ -9256,8 +9256,6 @@ "callee declares array parameter as static here">; def warn_empty_format_string : Warning< "format string is empty">, InGroup; -def warn_format_string_is_wide_literal : Warning< - "format string should not be a wide string">, InGroup; def warn_printf_format_string_contains_null_char : Warning< "format string contains '\\0' within the string body">, InGroup; def warn_printf_format_string_not_null_terminated : Warning< Index: clang/lib/AST/Expr.cpp =================================================================== --- clang/lib/AST/Expr.cpp +++ clang/lib/AST/Expr.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ConvertUTF.h" #include #include using namespace clang; @@ -1066,6 +1067,48 @@ return CharByteWidth; } +char *StringLiteral::getStrDataAsChar() { + std::string Output; + char *CString = nullptr; + + switch (getKind()) { + case StringKind::Ascii: + LLVM_FALLTHROUGH; + case StringKind::UTF8: + return getTrailingObjects(); + break; + case StringKind::UTF16: { + ArrayRef AR(getTrailingObjects(), getByteLength()); + if (llvm::convertUTF16ToUTF8String(AR, Output)) { + CString = new char[Output.size() + 1]; // +1 for terminating NULL + return CString; + } + break; + } + case StringKind::UTF32: { + ArrayRef AR(getTrailingObjects(), getByteLength()); + if (llvm::convertUTF32ToUTF8String(AR, Output)) { + CString = new char[Output.size() + 1]; + memcpy(CString, Output.c_str(), Output.size()); + return CString; + } + break; + } + case StringKind::Wide: { + if (llvm::convertWideToUTF8(getStringAsWChar(), Output)) { + CString = new char[Output.size() + 1]; + memcpy(CString, Output.c_str(), Output.size()); + return CString; + } + break; + } + } +} + +const char *StringLiteral::getStrDataAsChar() const { + return const_cast(getStrDataAsChar()); +} + StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str, StringKind Kind, bool Pascal, QualType Ty, const SourceLocation *Loc, @@ -1260,10 +1303,8 @@ const LangOptions &Features, const TargetInfo &Target, unsigned *StartToken, unsigned *StartTokenByteOffset) const { - assert((getKind() == StringLiteral::Ascii || - getKind() == StringLiteral::UTF8) && + assert((getKind() == StringLiteral::Ascii || getKind() == StringLiteral::UTF8) && "Only narrow string literals are currently supported"); - // Loop over all of the tokens in this string until we find the one that // contains the byte we're looking for. unsigned TokNo = 0; Index: clang/lib/AST/ExprCXX.cpp =================================================================== --- clang/lib/AST/ExprCXX.cpp +++ clang/lib/AST/ExprCXX.cpp @@ -927,7 +927,7 @@ cast(getCalleeDecl())->getParamDecl(0)->getType(); if (ParamTy->isPointerType()) return LOK_Raw; - if (ParamTy->isAnyCharacterType()) + if (ParamTy->isAnyCharacterType(getCalleeDecl()->getLangOpts())) return LOK_Character; if (ParamTy->isIntegerType()) return LOK_Integer; Index: clang/lib/AST/OSLog.cpp =================================================================== --- clang/lib/AST/OSLog.cpp +++ clang/lib/AST/OSLog.cpp @@ -201,8 +201,10 @@ } const StringLiteral *Lit = cast(StringArg->IgnoreParenCasts()); - assert(Lit && (Lit->isAscii() || Lit->isUTF8())); - StringRef Data = Lit->getString(); + assert(Lit); + std::string String(Lit->getStrDataAsChar()); + StringRef Data(String); + OSLogFormatStringHandler H(VarArgs); ParsePrintfString(H, Data.begin(), Data.end(), Ctx.getLangOpts(), Ctx.getTargetInfo(), /*isFreeBSDKPrintf*/ false); Index: clang/lib/AST/TemplateBase.cpp =================================================================== --- clang/lib/AST/TemplateBase.cpp +++ clang/lib/AST/TemplateBase.cpp @@ -88,15 +88,15 @@ Out << "(unsigned char)"; } CharacterLiteral::print(Val.getZExtValue(), CharacterLiteral::Ascii, Out); - } else if (T->isAnyCharacterType() && !Policy.MSVCFormatting) { + } else if (T->isAnyCharacterType(T->getAs()->getDecl()->getLangOpts()) && !Policy.MSVCFormatting) { CharacterLiteral::CharacterKind Kind; if (T->isWideCharType()) Kind = CharacterLiteral::Wide; else if (T->isChar8Type()) Kind = CharacterLiteral::UTF8; - else if (T->isChar16Type()) + else if (T->isChar16Type(T->getAs()->getDecl()->getLangOpts())) Kind = CharacterLiteral::UTF16; - else if (T->isChar32Type()) + else if (T->isChar32Type(T->getAs()->getDecl()->getLangOpts())) Kind = CharacterLiteral::UTF32; else Kind = CharacterLiteral::Ascii; Index: clang/lib/AST/Type.cpp =================================================================== --- clang/lib/AST/Type.cpp +++ clang/lib/AST/Type.cpp @@ -1968,36 +1968,61 @@ return false; } -bool Type::isChar16Type() const { +bool Type::isChar16Type(const LangOptions &LangOpts) const { if (const auto *BT = dyn_cast(CanonicalType)) - return BT->getKind() == BuiltinType::Char16; - return false; -} - -bool Type::isChar32Type() const { - if (const auto *BT = dyn_cast(CanonicalType)) - return BT->getKind() == BuiltinType::Char32; - return false; -} - -/// Determine whether this type is any of the built-in character -/// types. -bool Type::isAnyCharacterType() const { - const auto *BT = dyn_cast(CanonicalType); - if (!BT) return false; - switch (BT->getKind()) { - default: return false; - case BuiltinType::Char_U: - case BuiltinType::UChar: - case BuiltinType::WChar_U: - case BuiltinType::Char8: - case BuiltinType::Char16: - case BuiltinType::Char32: - case BuiltinType::Char_S: - case BuiltinType::SChar: - case BuiltinType::WChar_S: - return true; + if (BT->getKind() == BuiltinType::Char16) + return true; + if (!LangOpts.CPlusPlus) { + QualType Desugar = this->getLocallyUnqualifiedSingleStepDesugaredType(); + + + while (!Desugar->isCanonicalUnqualified()) { + if (Desugar.getAsString() == "char16_t") { + return true; + } + Desugar = Desugar->getLocallyUnqualifiedSingleStepDesugaredType(); + } } + return false; +} + +bool Type::isChar32Type(const LangOptions &LangOpts) const { + if (const auto *BT = dyn_cast(CanonicalType)) + if (BT->getKind() == BuiltinType::Char32) + return true; + if (!LangOpts.CPlusPlus) { + QualType Desugar = this->getLocallyUnqualifiedSingleStepDesugaredType(); + + while (!Desugar->isCanonicalUnqualified()) { + if (Desugar.getAsString() == "char32_t") { + return true; + } + Desugar = Desugar->getLocallyUnqualifiedSingleStepDesugaredType(); + } + } + return false; +} + +/// Determine whether this type is any of the character types. +bool Type::isAnyCharacterType(const LangOptions &LangOpts) const { + if (const auto *BT = dyn_cast(CanonicalType)) { + switch (BT->getKind()) { + case BuiltinType::Char8: + case BuiltinType::Char16: + case BuiltinType::Char32: + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: + return true; + } + } + if (!LangOpts.CPlusPlus) { + return isChar16Type(LangOpts) | isChar32Type(LangOpts); + } + return false; } /// isSignedIntegerType - Return true if this is an integer type that is Index: clang/lib/Lex/LiteralSupport.cpp =================================================================== --- clang/lib/Lex/LiteralSupport.cpp +++ clang/lib/Lex/LiteralSupport.cpp @@ -1883,9 +1883,18 @@ // Handle UTF-8 strings just like narrow strings. if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8') SpellingPtr += 2; - - assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && - SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet"); + + // Handle UTF-16 strings + if (SpellingPtr[0] == 'u' && SpellingPtr[1] != '8') + SpellingPtr += 1; + + // Handle UTF-32 strings + if (SpellingPtr[0] == 'U') + SpellingPtr += 1; + + // Handle wide strings + if (SpellingPtr[0] == 'L') + SpellingPtr += 1; // For raw string literals, this is easy. if (SpellingPtr[0] == 'R') { Index: clang/lib/Sema/SemaCast.cpp =================================================================== --- clang/lib/Sema/SemaCast.cpp +++ clang/lib/Sema/SemaCast.cpp @@ -2012,8 +2012,8 @@ return; } // or one of the types is a char or void type - if (DestTy->isAnyCharacterType() || DestTy->isVoidType() || - SrcTy->isAnyCharacterType() || SrcTy->isVoidType()) { + if (DestTy->isAnyCharacterType(getLangOpts()) || DestTy->isVoidType() || + SrcTy->isAnyCharacterType(getLangOpts()) || SrcTy->isVoidType()) { return; } // or one of the types is a tag type. Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -620,17 +620,14 @@ auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts(); if (auto *Format = dyn_cast(FormatExpr)) { - - if (!Format->isAscii() && !Format->isUTF8()) - return; - - StringRef FormatStrRef = Format->getString(); + std::string String(Format->getStrDataAsChar()); + StringRef FormatStrRef(String); + EstimateSizeFormatHandler H(FormatStrRef); const char *FormatBytes = FormatStrRef.data(); - const ConstantArrayType *T = - Context.getAsConstantArrayType(Format->getType()); + const ConstantArrayType *T = Context.getAsConstantArrayType(Format->getType()); assert(T && "String literal not of constant array type!"); - size_t TypeSize = T->getSize().getZExtValue(); + size_t TypeSize = String.size(); // In case there's a null byte somewhere. size_t StrLen = @@ -7474,6 +7471,26 @@ StringRef getString() const { return FExpr->getString().drop_front(Offset); } + + const char *getStrDataAsChar() const { + return FExpr->getStrDataAsChar(); + } + + std::string getStringAsChar() const { + return FExpr->getStringAsChar(); + } + + std::u16string getStringAsChar16() const { + return FExpr->getStringAsChar16(); + } + + std::u32string getStringAsChar32() const { + return FExpr->getStringAsChar32(); + } + + std::wstring getStringAsWChar() const { + return FExpr->getStringAsWChar(); + } unsigned getByteLength() const { return FExpr->getByteLength() - getCharByteWidth() * Offset; @@ -7836,8 +7853,8 @@ Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) { return llvm::StringSwitch(Format->getType()->getName()) - .Case("scanf", FST_Scanf) - .Cases("printf", "printf0", FST_Printf) + .Cases("scanf", "wscanf", FST_Scanf) + .Cases("printf", "printf0", "wprintf", FST_Printf) .Cases("NSString", "CFString", FST_NSString) .Case("strftime", FST_Strftime) .Case("strfmon", FST_Strfmon) @@ -9508,23 +9525,24 @@ llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg, bool IgnoreStringsWithoutSpecifiers) { - // CHECK: is the format string a wide literal? - if (!FExpr->isAscii() && !FExpr->isUTF8()) { + // CHECK: is the format string's type valid? + if (!FExpr->isAscii() && !FExpr->isUTF8() && !FExpr->isUTF16() && !FExpr->isUTF32() && !FExpr->isWide()) { CheckFormatHandler::EmitFormatDiagnostic( S, inFunctionCall, Args[format_idx], - S.PDiag(diag::warn_format_string_is_wide_literal), FExpr->getBeginLoc(), + S.PDiag(diag::err_format_invalid_type), FExpr->getBeginLoc(), /*IsStringLocation*/ true, OrigFormatExpr->getSourceRange()); return; } - - // Str - The format string. NOTE: this is NOT null-terminated! - StringRef StrRef = FExpr->getString(); + std::string String(FExpr->getStrDataAsChar()); + + StringRef StrRef(String); const char *Str = StrRef.data(); + // Account for cases where the string literal is truncated in a declaration. - const ConstantArrayType *T = - S.Context.getAsConstantArrayType(FExpr->getType()); + const ConstantArrayType *T = S.Context.getAsConstantArrayType(FExpr->getType()); assert(T && "String literal not of constant array type!"); size_t TypeSize = T->getSize().getZExtValue(); + size_t StrLen = std::min(std::max(TypeSize, size_t(1)) - 1, StrRef.size()); const unsigned numDataArgs = Args.size() - firstDataArg; @@ -9532,16 +9550,16 @@ !analyze_format_string::parseFormatStringHasFormattingSpecifiers( Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo())) return; - + // Emit a warning if the string literal is truncated and does not contain an // embedded null character. - if (TypeSize <= StrRef.size() && + if (TypeSize < StrRef.size() && StrRef.substr(0, TypeSize).find('\0') == StringRef::npos) { CheckFormatHandler::EmitFormatDiagnostic( - S, inFunctionCall, Args[format_idx], - S.PDiag(diag::warn_printf_format_string_not_null_terminated), - FExpr->getBeginLoc(), - /*IsStringLocation=*/true, OrigFormatExpr->getSourceRange()); + S, inFunctionCall, Args[format_idx], + S.PDiag(diag::warn_printf_format_string_not_null_terminated), + FExpr->getBeginLoc(), + /*IsStringLocation=*/true, OrigFormatExpr->getSourceRange()); return; } @@ -9581,13 +9599,15 @@ } bool Sema::FormatStringHasSArg(const StringLiteral *FExpr) { - // Str - The format string. NOTE: this is NOT null-terminated! - StringRef StrRef = FExpr->getString(); - const char *Str = StrRef.data(); // Account for cases where the string literal is truncated in a declaration. - const ConstantArrayType *T = Context.getAsConstantArrayType(FExpr->getType()); + const ConstantArrayType *T = Context.getAsConstantArrayType(FExpr->getType()); // FExpr->getType() is a QualType, and it's no longer relevent because we changed the format literal for type checking? assert(T && "String literal not of constant array type!"); size_t TypeSize = T->getSize().getZExtValue(); + + std::string String(FExpr->getStrDataAsChar()); + StringRef StrRef(String); + const char *Str = StrRef.data(); + size_t StrLen = std::min(std::max(TypeSize, size_t(1)) - 1, StrRef.size()); return analyze_format_string::ParseFormatStringHasSArg(Str, Str + StrLen, getLangOpts(), Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -3366,7 +3366,7 @@ if (NotNSStringTy && !isCFStringType(Ty, S.Context) && (!Ty->isPointerType() || - !Ty->castAs()->getPointeeType()->isCharType())) { + !Ty->castAs()->getPointeeType()->isAnyCharacterType(S.getLangOpts()))) { S.Diag(AL.getLoc(), diag::err_format_attribute_not) << "a string type" << IdxExpr->getSourceRange() << getFunctionOrMethodParamRange(D, 0); @@ -3376,7 +3376,7 @@ if (!isNSStringType(Ty, S.Context, /*AllowNSAttributedString=*/true) && !isCFStringType(Ty, S.Context) && (!Ty->isPointerType() || - !Ty->castAs()->getPointeeType()->isCharType())) { + !Ty->castAs()->getPointeeType()->isAnyCharacterType(S.getLangOpts()))) { S.Diag(AL.getLoc(), diag::err_format_attribute_result_not) << (NotNSStringTy ? "string type" : "NSString") << IdxExpr->getSourceRange() << getFunctionOrMethodParamRange(D, 0); @@ -3407,6 +3407,7 @@ // Otherwise, check for supported formats. .Cases("scanf", "printf", "printf0", "strfmon", SupportedFormat) .Cases("cmn_err", "vcmn_err", "zcmn_err", SupportedFormat) + .Cases("wscanf", "wprintf", SupportedFormat) .Case("kprintf", SupportedFormat) // OpenBSD. .Case("freebsd_kprintf", SupportedFormat) // FreeBSD. .Case("os_trace", SupportedFormat) @@ -3477,7 +3478,7 @@ return ::new (Context) FormatAttr(Context, CI, Format, FormatIdx, FirstArg); } -/// Handle __attribute__((format(type,idx,firstarg))) attributes based on +/// Handle __attribute__((format(type,format_idx,firstarg))) attributes based on /// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.isArgIdent(0)) { @@ -3499,7 +3500,7 @@ II = &S.Context.Idents.get(Format); } - // Check for supported formats. + // Check the type parameter FormatAttrKind Kind = getFormatAttrKind(Format); if (Kind == IgnoredFormat) @@ -3511,7 +3512,7 @@ return; } - // checks for the 2nd argument + // Check the format_idx parameter Expr *IdxExpr = AL.getArgAsExpr(1); uint32_t Idx; if (!checkUInt32Argument(S, AL, IdxExpr, Idx, 2)) @@ -3556,14 +3557,14 @@ return; } } else if (!Ty->isPointerType() || - !Ty->castAs()->getPointeeType()->isCharType()) { + !Ty->castAs()->getPointeeType()->isAnyCharacterType(S.getLangOpts())) { S.Diag(AL.getLoc(), diag::err_format_attribute_not) << "a string type" << IdxExpr->getSourceRange() << getFunctionOrMethodParamRange(D, ArgIdx); return; } - // check the 3rd argument + // check the firstarg parameter Expr *FirstArgExpr = AL.getArgAsExpr(2); uint32_t FirstArg; if (!checkUInt32Argument(S, AL, FirstArgExpr, FirstArg, 3)) Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -10636,14 +10636,14 @@ return; // Return if not a CharacterType. - if (!StringType->getPointeeType()->isAnyCharacterType()) + if (!StringType->getPointeeType()->isAnyCharacterType(Self.getLangOpts())) return; ASTContext &Ctx = Self.getASTContext(); SourceRange DiagRange(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); const QualType CharType = CharExpr->getType(); - if (!CharType->isAnyCharacterType() && + if (!CharType->isAnyCharacterType(Self.getLangOpts()) && CharType->isIntegerType() && llvm::isUIntN(Ctx.getCharWidth(), CharExpr->getValue())) { Self.Diag(OpLoc, diag::warn_string_plus_char) Index: clang/lib/Sema/SemaExprObjC.cpp =================================================================== --- clang/lib/Sema/SemaExprObjC.cpp +++ clang/lib/Sema/SemaExprObjC.cpp @@ -330,11 +330,11 @@ break; case CharacterLiteral::UTF16: - NumberType = Context.Char16Ty; + NumberType = Context.getChar16Type(); break; case CharacterLiteral::UTF32: - NumberType = Context.Char32Ty; + NumberType = Context.getChar32Type(); break; } } @@ -620,11 +620,11 @@ break; case CharacterLiteral::UTF16: - ValueType = Context.Char16Ty; + ValueType = Context.getChar16Type(); break; case CharacterLiteral::UTF32: - ValueType = Context.Char32Ty; + ValueType = Context.getChar32Type(); break; } } Index: clang/lib/Sema/SemaFixItUtils.cpp =================================================================== --- clang/lib/Sema/SemaFixItUtils.cpp +++ clang/lib/Sema/SemaFixItUtils.cpp @@ -189,9 +189,9 @@ return "'\\0'"; if (T.isWideCharType()) return "L'\\0'"; - if (T.isChar16Type()) + if (T.isChar16Type(S.getLangOpts())) return "u'\\0'"; - if (T.isChar32Type()) + if (T.isChar32Type(S.getLangOpts())) return "U'\\0'"; return "0"; } Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -2142,7 +2142,7 @@ // underlying type, an rvalue a prvalue of type char16_t, char32_t, // or wchar_t can be converted to an rvalue a prvalue of its underlying // type. - if (FromType->isAnyCharacterType() && !FromType->isCharType() && + if (FromType->isAnyCharacterType(getLangOpts()) && !FromType->isCharType() && ToType->isIntegerType()) { // Determine whether the type we're converting from is signed or // unsigned. Index: clang/lib/Sema/SemaTemplate.cpp =================================================================== --- clang/lib/Sema/SemaTemplate.cpp +++ clang/lib/Sema/SemaTemplate.cpp @@ -7606,15 +7606,15 @@ T = ET->getDecl()->getIntegerType(); Expr *E; - if (T->isAnyCharacterType()) { + if (T->isAnyCharacterType(getLangOpts())) { CharacterLiteral::CharacterKind Kind; if (T->isWideCharType()) Kind = CharacterLiteral::Wide; else if (T->isChar8Type() && getLangOpts().Char8) Kind = CharacterLiteral::UTF8; - else if (T->isChar16Type()) + else if (T->isChar16Type(getLangOpts())) Kind = CharacterLiteral::UTF16; - else if (T->isChar32Type()) + else if (T->isChar32Type(getLangOpts())) Kind = CharacterLiteral::UTF32; else Kind = CharacterLiteral::Ascii; Index: clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp +++ clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp @@ -780,14 +780,18 @@ bool BoundsProvided = ArgIndex == DEPR_ONLY; if (!BoundsProvided) { - // Currently we only handle (not wide) string literals. It is possible to do - // better, either by looking at references to const variables, or by doing + // Currently we only handle string literals. It is possible to do better + // either by looking at references to const variables, or by doing // real flow analysis. auto FormatString = dyn_cast(CE->getArg(ArgIndex)->IgnoreParenImpCasts()); + + std::string String(FormatString->getStrDataAsChar()); + StringRef StrRef = StringRef(String); + if (FormatString && - FormatString->getString().find("%s") == StringRef::npos && - FormatString->getString().find("%[") == StringRef::npos) + StrRef.find("%s") == StringRef::npos && + StrRef.find("%[") == StringRef::npos) BoundsProvided = true; } Index: clang/test/Sema/format-strings-c90.c =================================================================== --- clang/test/Sema/format-strings-c90.c +++ clang/test/Sema/format-strings-c90.c @@ -1,12 +1,16 @@ /* RUN: %clang_cc1 -fsyntax-only -verify -triple i386-apple-darwin9 -Wformat-non-iso -std=c89 %s */ -int scanf(const char * restrict, ...); int printf(const char *restrict, ...); +int scanf(const char *restrict, ...); +int wprintf(const char *restrict, ...); +int wscanf(const char *restrict, ...); void foo(char **sp, float *fp, int *ip) { scanf("%as", sp); /* expected-warning{{'a' length modifier is not supported by ISO C}} */ scanf("%a[abc]", sp); /* expected-warning{{'a' length modifier is not supported by ISO C}} */ + wscanf(L"%as", sp); /* expected-warning{{'a' length modifier is not supported by ISO C}} */ + wscanf(L"%a[abc]", sp); /* expected-warning{{'a' length modifier is not supported by ISO C}} */ /* TODO: Warn that the 'a' conversion specifier is a C99 feature. */ scanf("%a", fp); @@ -16,8 +20,18 @@ printf("%aS", 1.0); printf("%a[", 1.0); printf("%afoo", 1.0); + + wscanf(L"%a", fp); + wscanf(L"%afoobar", fp); + wprintf(L"%a", 1.0); + wprintf(L"%as", 1.0); + wprintf(L"%aS", 1.0); + wprintf(L"%a[", 1.0); + wprintf(L"%afoo", 1.0); scanf("%da", ip); + + wscanf("%da", ip); /* Test argument type check for the 'a' length modifier. */ scanf("%as", fp); /* expected-warning{{format specifies type 'char **' but the argument has type 'float *'}} @@ -27,4 +41,12 @@ expected-warning{{'S' conversion specifier is not supported by ISO C}} */ scanf("%a[abc]", fp); /* expected-warning{{format specifies type 'char **' but the argument has type 'float *'}} expected-warning{{'a' length modifier is not supported by ISO C}} */ + + wscanf(L"%as", fp); /* expected-warning{{format specifies type 'char **' but the argument has type 'float *'}} + expected-warning{{'a' length modifier is not supported by ISO C}} */ + wscanf(L"%aS", fp); /* expected-warning{{format specifies type 'wchar_t **' (aka 'int **') but the argument has type 'float *'}} + expected-warning{{'a' length modifier is not supported by ISO C}} + expected-warning{{'S' conversion specifier is not supported by ISO C}} */ + wscanf(L"%a[abc]", fp); /* expected-warning{{format specifies type 'char **' but the argument has type 'float *'}} + expected-warning{{'a' length modifier is not supported by ISO C}} */ } Index: clang/test/Sema/format-strings-darwin.c =================================================================== --- clang/test/Sema/format-strings-darwin.c +++ clang/test/Sema/format-strings-darwin.c @@ -8,7 +8,9 @@ // RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-unknown-freebsd -Wformat-non-iso %s int printf(const char *restrict, ...); -int scanf(const char * restrict, ...) ; +int scanf(const char *restrict, ...); +int wprintf(const char *restrict, ...); +int wscanf(const char *restrict, ...); void test() { int justRight = 1; @@ -20,6 +22,13 @@ printf("%U", tooLong); printf("%O", justRight); printf("%O", tooLong); + + wprintf(L"%D", justRight); + wprintf(L"%D", tooLong); + wprintf(L"%U", justRight); + wprintf(L"%U", tooLong); + wprintf(L"%O", justRight); + wprintf(L"%O", tooLong); #ifdef ALLOWED // expected-warning@-8 {{'D' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'd'?}} @@ -28,6 +37,12 @@ // expected-warning@-8 {{'U' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'u'?}} expected-warning@-8 {{format specifies type 'unsigned int' but the argument has type 'long'}} // expected-warning@-8 {{'O' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'o'?}} // expected-warning@-8 {{'O' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'o'?}} expected-warning@-8 {{format specifies type 'unsigned int' but the argument has type 'long'}} + // expected-warning@-8 {{'D' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'd'?}} + // expected-warning@-8 {{'D' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'd'?}} expected-warning@-8 {{format specifies type 'int' but the argument has type 'long'}} + // expected-warning@-8 {{'U' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'u'?}} + // expected-warning@-8 {{'U' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'u'?}} expected-warning@-8 {{format specifies type 'unsigned int' but the argument has type 'long'}} + // expected-warning@-8 {{'O' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'o'?}} + // expected-warning@-8 {{'O' conversion specifier is not supported by ISO C}} expected-note@-8 {{did you mean to use 'o'?}} expected-warning@-8 {{format specifies type 'unsigned int' but the argument has type 'long'}} #else // expected-warning@-15 {{invalid conversion specifier 'D'}} // expected-warning@-15 {{invalid conversion specifier 'D'}} @@ -35,6 +50,12 @@ // expected-warning@-15 {{invalid conversion specifier 'U'}} // expected-warning@-15 {{invalid conversion specifier 'O'}} // expected-warning@-15 {{invalid conversion specifier 'O'}} + // expected-warning@-15 {{invalid conversion specifier 'D'}} + // expected-warning@-15 {{invalid conversion specifier 'D'}} + // expected-warning@-15 {{invalid conversion specifier 'U'}} + // expected-warning@-15 {{invalid conversion specifier 'U'}} + // expected-warning@-15 {{invalid conversion specifier 'O'}} + // expected-warning@-15 {{invalid conversion specifier 'O'}} #endif } @@ -51,6 +72,18 @@ printf("% '0.5lD", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'd'?}} printf("%#0.5lO", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'o'?}} printf("%'0.5lU", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'u'?}} + + wprintf("%hD", x); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'd'?}} + wprintf("%lD", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'd'?}} + wprintf("%hU", x); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'u'?}} + wprintf("%lU", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'u'?}} + wprintf("%hO", x); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'o'?}} + wprintf("%lO", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'o'?}} + + wprintf("%+'0.5lD", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'd'?}} + wprintf("% '0.5lD", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'd'?}} + wprintf("%#0.5lO", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'o'?}} + wprintf("%'0.5lU", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'u'?}} } void testScanf(short *x, long *y) { @@ -60,5 +93,12 @@ scanf("%lU", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'u'?}} scanf("%hO", x); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'o'?}} scanf("%lO", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'o'?}} + + wscanf("%hD", x); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'd'?}} + wscanf("%lD", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'd'?}} + wscanf("%hU", x); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'u'?}} + wscanf("%lU", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'u'?}} + wscanf("%hO", x); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'o'?}} + wscanf("%lO", y); // expected-warning{{conversion specifier is not supported by ISO C}} expected-note {{did you mean to use 'o'?}} } #endif Index: clang/test/Sema/format-strings-int-typedefs.c =================================================================== --- clang/test/Sema/format-strings-int-typedefs.c +++ clang/test/Sema/format-strings-int-typedefs.c @@ -2,6 +2,8 @@ int printf(char const *, ...); int scanf(char const *, ...); +int wprintf(char const *, ...); +int wscanf(char const *, ...); void test(void) { printf("%jd", 42.0); // expected-warning {{format specifies type 'intmax_t' (aka 'long long')}} @@ -12,6 +14,15 @@ printf("%ls", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} printf("%S", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} printf("%C", 42.0); // expected-warning {{format specifies type 'wchar_t' (aka 'int')}} + + wprintf(L"%jd", 42.0); // expected-warning {{format specifies type 'intmax_t' (aka 'long long')}} + wprintf(L"%ju", 42.0); // expected-warning {{format specifies type 'uintmax_t' (aka 'unsigned long long')}} + wprintf(L"%zu", 42.0); // expected-warning {{format specifies type 'size_t' (aka 'unsigned long')}} + wprintf(L"%td", 42.0); // expected-warning {{format specifies type 'ptrdiff_t' (aka 'int')}} + wprintf(L"%lc", 42.0); // expected-warning {{format specifies type 'wint_t' (aka 'int')}} + wprintf(L"%ls", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + wprintf(L"%S", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + wprintf(L"%C", 42.0); // expected-warning {{format specifies type 'wchar_t' (aka 'int')}} scanf("%jd", 0); // expected-warning {{format specifies type 'intmax_t *' (aka 'long long *')}} scanf("%ju", 0); // expected-warning {{format specifies type 'uintmax_t *' (aka 'unsigned long long *')}} @@ -21,7 +32,15 @@ scanf("%ls", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} scanf("%S", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} scanf("%C", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} - + + wscanf("%jd", 0); // expected-warning {{format specifies type 'intmax_t *' (aka 'long long *')}} + wscanf("%ju", 0); // expected-warning {{format specifies type 'uintmax_t *' (aka 'unsigned long long *')}} + wscanf("%zu", 0); // expected-warning {{format specifies type 'size_t *' (aka 'unsigned long *')}} + wscanf("%td", 0); // expected-warning {{format specifies type 'ptrdiff_t *' (aka 'int *')}} + wscanf("%lc", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + wscanf("%ls", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + wscanf("%S", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + wscanf("%C", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} // typedef size_t et al. to something crazy. typedef void *size_t; Index: clang/test/Sema/format-strings-ms.c =================================================================== --- clang/test/Sema/format-strings-ms.c +++ clang/test/Sema/format-strings-ms.c @@ -2,7 +2,9 @@ // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 -Wformat-non-iso -DNON_ISO_WARNING %s int printf(const char *format, ...) __attribute__((format(printf, 1, 2))); -int scanf(const char * restrict, ...) ; +int scanf(const char * restrict, ...); +int wprintf(const char *format, ...) __attribute__((format(printf, 1, 2))); +int wscanf(const char * restrict, ...); typedef unsigned short wchar_t; #ifdef NON_ISO_WARNING @@ -14,24 +16,38 @@ printf("%I64d", i64); // expected-warning{{'I64' length modifier is not supported by ISO C}} printf("%wc", c); // expected-warning{{'w' length modifier is not supported by ISO C}} printf("%Z", p); // expected-warning{{'Z' conversion specifier is not supported by ISO C}} + + wprintf(L"%Id", i32); // expected-warning{{'I' length modifier is not supported by ISO C}} + wprintf(L"%I32d", i32); // expected-warning{{'I32' length modifier is not supported by ISO C}} + wprintf(L"%I64d", i64); // expected-warning{{'I64' length modifier is not supported by ISO C}} + wprintf(L"%wc", c); // expected-warning{{'w' length modifier is not supported by ISO C}} + wprintf(L"%Z", p); // expected-warning{{'Z' conversion specifier is not supported by ISO C}} } #else void signed_test() { short val = 30; - printf("val = %I64d\n", val); // expected-warning{{format specifies type '__int64' (aka 'long long') but the argument has type 'short'}} long long bigval = 30; + printf("val = %I64d\n", val); // expected-warning{{format specifies type '__int64' (aka 'long long') but the argument has type 'short'}} printf("val = %I32d\n", bigval); // expected-warning{{format specifies type '__int32' (aka 'int') but the argument has type 'long long'}} printf("val = %Id\n", bigval); // expected-warning{{format specifies type '__int32' (aka 'int') but the argument has type 'long long'}} + + wprintf(L"val = %I64d\n", val); // expected-warning{{format specifies type '__int64' (aka 'long long') but the argument has type 'short'}} + wprintf(L"val = %I32d\n", bigval); // expected-warning{{format specifies type '__int32' (aka 'int') but the argument has type 'long long'}} + wprintf(L"val = %Id\n", bigval); // expected-warning{{format specifies type '__int32' (aka 'int') but the argument has type 'long long'}} } void unsigned_test() { unsigned short val = 30; - printf("val = %I64u\n", val); // expected-warning{{format specifies type 'unsigned __int64' (aka 'unsigned long long') but the argument has type 'unsigned short'}} unsigned long long bigval = 30; + printf("val = %I64u\n", val); // expected-warning{{format specifies type 'unsigned __int64' (aka 'unsigned long long') but the argument has type 'unsigned short'}} printf("val = %I32u\n", bigval); // expected-warning{{format specifies type 'unsigned __int32' (aka 'unsigned int') but the argument has type 'unsigned long long'}} printf("val = %Iu\n", bigval); // expected-warning{{format specifies type 'unsigned __int32' (aka 'unsigned int') but the argument has type 'unsigned long long'}} + + wprintf(L"val = %I64u\n", val); // expected-warning{{format specifies type 'unsigned __int64' (aka 'unsigned long long') but the argument has type 'unsigned short'}} + wprintf(L"val = %I32u\n", bigval); // expected-warning{{format specifies type 'unsigned __int32' (aka 'unsigned int') but the argument has type 'unsigned long long'}} + wprintf(L"val = %Iu\n", bigval); // expected-warning{{format specifies type 'unsigned __int32' (aka 'unsigned int') but the argument has type 'unsigned long long'}} } void w_test(wchar_t c, wchar_t *s) { @@ -47,6 +63,20 @@ scanf("%ws", s); scanf("%wS", s); scanf("%S", s); + + wprintf(L"%wc", c); + wprintf(L"%wC", c); + wprintf(L"%C", c); + wprintf(L"%ws", s); + wprintf(L"%wS", s); + wprintf(L"%S", s); + + wscanf(L"%wc", &c); + wscanf(L"%wC", &c); + wscanf(L"%C", &c); + wscanf(L"%ws", s); + wscanf(L"%wS", s); + wscanf(L"%S", s); double bad; printf("%wc", bad); // expected-warning{{format specifies type 'wint_t' (aka 'unsigned short') but the argument has type 'double'}} @@ -61,6 +91,19 @@ scanf("%ws", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} scanf("%wS", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} scanf("%S", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} + + wprintf(L"%wc", bad); // expected-warning{{format specifies type 'wint_t' (aka 'unsigned short') but the argument has type 'double'}} + wprintf(L"%wC", bad); // expected-warning{{format specifies type 'wchar_t' (aka 'unsigned short') but the argument has type 'double'}} + wprintf(L"%C", bad); // expected-warning{{format specifies type 'wchar_t' (aka 'unsigned short') but the argument has type 'double'}} + wprintf(L"%ws", bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double'}} + wprintf(L"%wS", bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double'}} + wprintf(L"%S", bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double'}} + wscanf(L"%wc", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} + wscanf(L"%wC", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} + wscanf(L"%C", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} + wscanf(L"%ws", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} + wscanf(L"%wS", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} + wscanf(L"%S", &bad); // expected-warning{{format specifies type 'wchar_t *' (aka 'unsigned short *') but the argument has type 'double *'}} } @@ -74,6 +117,15 @@ scanf("%hC", &bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double *'}} scanf("%hs", &bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double *'}} scanf("%hS", &bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double *'}} + + wprintf(L"%hc", bad); // expected-warning{{format specifies type 'int' but the argument has type 'double'}} + wprintf(L"%hC", bad); // expected-warning{{format specifies type 'int' but the argument has type 'double'}} + wprintf(L"%hs", bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double'}} + wprintf(L"%hS", bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double'}} + wscanf(L"%hc", &bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double *'}} + wscanf(L"%hC", &bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double *'}} + wscanf(L"%hs", &bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double *'}} + wscanf(L"%hS", &bad); // expected-warning{{format specifies type 'char *' but the argument has type 'double *'}} } void z_test(void *p) { @@ -83,6 +135,13 @@ printf("%wZ", p); printf("%hhZ", p); // expected-warning{{length modifier 'hh' results in undefined behavior or no effect with 'Z' conversion specifier}} scanf("%Z", p); // expected-warning{{invalid conversion specifier 'Z'}} + + wprintf("%Z", p); + wprintf("%hZ", p); + wprintf("%lZ", p); + wprintf("%wZ", p); + wprintf("%hhZ", p); // expected-warning{{length modifier 'hh' results in undefined behavior or no effect with 'Z' conversion specifier}} + wscanf("%Z", p); // expected-warning{{invalid conversion specifier 'Z'}} } #endif Index: clang/test/Sema/format-strings-non-iso.c =================================================================== --- clang/test/Sema/format-strings-non-iso.c +++ clang/test/Sema/format-strings-non-iso.c @@ -1,7 +1,10 @@ // RUN: %clang_cc1 -triple i686-linux-gnu -fsyntax-only -verify -std=c99 -Wformat-non-iso %s int printf(const char *restrict, ...); -int scanf(const char * restrict, ...); +int scanf(const char *restrict, ...); + +int wprintf(const char *restrict, ...); +int wscanf(const char *restrict, ...); void f(void) { char *cp; @@ -9,13 +12,20 @@ // The 'q' length modifier. printf("%qd", (long long)42); // expected-warning{{'q' length modifier is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} scanf("%qd", (long long *)0); // expected-warning{{'q' length modifier is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} + + // The 'q' length modifier. + wprintf(L"%qd", (long long)42); // expected-warning{{'q' length modifier is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} + wscanf(L"%qd", (long long *)0); // expected-warning{{'q' length modifier is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} // The 'm' length modifier. scanf("%ms", &cp); // expected-warning{{'m' length modifier is not supported by ISO C}} + wscanf(L"%ms", &cp); // expected-warning{{'m' length modifier is not supported by ISO C}} // The 'S' and 'C' conversion specifiers. printf("%S", L"foo"); // expected-warning{{'S' conversion specifier is not supported by ISO C}} printf("%C", L'x'); // expected-warning{{'C' conversion specifier is not supported by ISO C}} + wprintf(L"%S", L"foo"); // expected-warning{{'S' conversion specifier is not supported by ISO C}} + wprintf(L"%C", L'x'); // expected-warning{{'C' conversion specifier is not supported by ISO C}} // Combining 'L' with an integer conversion specifier. printf("%Li", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'i' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} @@ -23,7 +33,14 @@ printf("%Lu", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'u' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} printf("%Lx", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'x' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} printf("%LX", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'X' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} + + wprintf(L"%Li", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'i' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} + wprintf(L"%Lo", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'o' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} + wprintf(L"%Lu", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'u' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} + wprintf(L"%Lx", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'x' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} + wprintf(L"%LX", (long long)42); // expected-warning{{using length modifier 'L' with conversion specifier 'X' is not supported by ISO C}} expected-note{{did you mean to use 'll'?}} // Positional arguments. printf("%1$d", 42); // expected-warning{{positional arguments are not supported by ISO C}} + wprintf(L"%1$d", 42); // expected-warning{{positional arguments are not supported by ISO C}} } Index: clang/test/Sema/format-strings-pedantic.c =================================================================== --- clang/test/Sema/format-strings-pedantic.c +++ clang/test/Sema/format-strings-pedantic.c @@ -5,9 +5,15 @@ __attribute__((format(printf, 1, 2))) int printf(const char *restrict, ...); +__attribute__((format(wprintf, 1, 2))) +int wprintf(const wchar_t *restrict, ...); + int main() { printf("%p", (int *)0); // expected-warning {{format specifies type 'void *' but the argument has type 'int *'}} printf("%p", (void *)0); + + wprintf(L"%p", (int *)0); // expected-warning {{format specifies type 'void *' but the argument has type 'int *'}} + wprintf(L"%p", (void *)0); #ifdef __OBJC__ printf("%p", ^{}); // expected-warning {{format specifies type 'void *' but the argument has type 'void (^)(void)'}} Index: clang/test/Sema/format-strings-scanf.c =================================================================== --- clang/test/Sema/format-strings-scanf.c +++ clang/test/Sema/format-strings-scanf.c @@ -28,6 +28,7 @@ int fscanf(FILE * restrict, const char * restrict, ...) ; int scanf(const char * restrict, ...) ; +int wscanf(const char * restrict, ...) ; int sscanf(const char * restrict, const char * restrict, ...) ; int my_scanf(const char * restrict, ...) __attribute__((__format__(__scanf__, 1, 2))); @@ -40,6 +41,11 @@ scanf("%0d", i); // expected-warning{{zero field width in scanf format string is unused}} scanf("%00d", i); // expected-warning{{zero field width in scanf format string is unused}} scanf("%d%[asdfasdfd", i, s); // expected-warning{{no closing ']' for '%[' in scanf format string}} + + wscanf(s, i); // expected-warning{{format string is not a string literal}} + wscanf("%0d", i); // expected-warning{{zero field width in scanf format string is unused}} + wscanf("%00d", i); // expected-warning{{zero field width in scanf format string is unused}} + wscanf("%d%[asdfasdfd", i, s); // expected-warning{{no closing ']' for '%[' in scanf format string}} unsigned short s_x; scanf ("%" "hu" "\n", &s_x); // no-warning @@ -51,12 +57,29 @@ scanf("%*d", i); // // expected-warning{{data argument not used by format string}} scanf("%*d", i); // // expected-warning{{data argument not used by format string}} scanf("%*d%1$d", i); // no-warning + + unsigned short s_x; + wscanf(L"%" "hu" "\n", &s_x); // no-warning + wscanf(L"%y", i); // expected-warning{{invalid conversion specifier 'y'}} + wscanf(L"%%"); // no-warning + wscanf(L"%%%1$d", i); // no-warning + wscanf(L"%1$d%%", i); // no-warning + wscanf(L"%d", i, i); // expected-warning{{data argument not used by format string}} + wscanf(L"%*d", i); // expected-warning{{data argument not used by format string}} + wscanf(L"%*d", i); // expected-warning{{data argument not used by format string}} + wscanf(L"%*d%1$d", i); // no-warning scanf("%s", (char*)0); // no-warning scanf("%s", (volatile char*)0); // no-warning scanf("%s", (signed char*)0); // no-warning scanf("%s", (unsigned char*)0); // no-warning scanf("%hhu", (signed char*)0); // no-warning + + wscanf(L"%s", (wchar_t*)0); // no-warning + wscanf(L"%s", (volatile wchar_t*)0); // no-warning + wscanf(L"%s", (signed wchar_t*)0); // no-warning + wscanf(L"%s", (unsigned wchar_t*)0); // no-warning + wscanf(L"%hhu", (signed wchar_t*)0); // no-warning } void bad_length_modifiers(char *s, void *p, wchar_t *ws, long double *ld) { @@ -64,6 +87,11 @@ scanf("%1$zp", &p); // expected-warning{{length modifier 'z' results in undefined behavior or no effect with 'p' conversion specifier}} scanf("%ls", ws); // no-warning scanf("%#.2Lf", ld); // expected-warning{{invalid conversion specifier '#'}} + + wscanf(L"%hhs", "foo"); // expected-warning{{length modifier 'hh' results in undefined behavior or no effect with 's' conversion specifier}} + wscanf(L"%1$zp", &p); // expected-warning{{length modifier 'z' results in undefined behavior or no effect with 'p' conversion specifier}} + wscanf(L"%ls", ws); // no-warning + wscanf(L"%#.2Lf", ld); // expected-warning{{invalid conversion specifier '#'}} } // Test that the scanf call site is where the warning is attached. If the Index: clang/test/Sema/string-plus-char.c =================================================================== --- clang/test/Sema/string-plus-char.c +++ clang/test/Sema/string-plus-char.c @@ -2,10 +2,16 @@ struct AB{const char *a; const char*b;}; +struct CD{const wchar_t *a; const wchar_t *b;}; + const char *foo(const struct AB *ab) { return ab->a + 'b'; // expected-warning {{adding 'char' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} } +const wchar_t *foo(const struct CD *cd) { + return cd->a + 'b'; // expected-warning {{adding 'wchar_t' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} +} + void f(const char *s) { char *str = 0; char *str2 = str + 'c'; // expected-warning {{adding 'char' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} @@ -28,3 +34,26 @@ str = str + c; str = c + str; } + +void g(const wchar_t *s) { + wchar_t *str = 0; + wchar_t *str2 = str + 'c'; // expected-warning {{adding 'wchar_t' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} + + const wchar_t *constStr = s + 'c'; // expected-warning {{adding 'wchar_t' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} + + str = 'c' + str;// expected-warning {{adding 'wchar_t' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} + + wchar_t strArr[] = L"foo"; + str = strArr + L'c'; // expected-warning {{adding 'wchar_t' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} + wchar_t *strArr2[] = {L"ac",L"dc"}; + str = strArr2[0] + L'c'; // expected-warning {{adding 'wchar_t' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} + + + struct CD cd; + constStr = foo(&cd) + L'c'; // expected-warning {{adding 'wchar_t' to a string pointer does not append to the string}} expected-note {{use array indexing to silence this warning}} + + // no-warning + wchar_t c = 'c'; + str = str + c; + str = c + str; +} Index: clang/test/SemaCXX/format-strings-0x.cpp =================================================================== --- clang/test/SemaCXX/format-strings-0x.cpp +++ clang/test/SemaCXX/format-strings-0x.cpp @@ -3,33 +3,53 @@ extern "C" { extern int scanf(const char *restrict, ...); extern int printf(const char *restrict, ...); +extern int wscanf(const wchar_t *restrict, ...); } void f(char **sp, float *fp) { scanf("%as", sp); // expected-warning{{format specifies type 'float *' but the argument has type 'char **'}} + wscanf("%as", sp); // expected-warning{{format specifies type 'float *' but the argument has type 'wchar_t **'}} printf("%p", sp); // expected-warning{{format specifies type 'void *' but the argument has type 'char **'}} + wprintf("%p", sp); // expected-warning{{format specifies type 'void *' but the argument has type 'wchar_t **'}} scanf("%p", sp); // expected-warning{{format specifies type 'void **' but the argument has type 'char **'}} + wscanf("%p", sp); // expected-warning{{format specifies type 'void **' but the argument has type 'wchar_t **'}} printf("%a", 1.0); scanf("%afoobar", fp); + wprintf("%a", 1.0); + wscanf("%afoobar", fp); printf(nullptr); printf(*sp); // expected-warning {{not a string literal}} // expected-note@-1{{treat the string as an argument to avoid this}} + wprintf(*sp); // expected-warning {{not a string literal}} + // expected-note@-1{{treat the string as an argument to avoid this}} // PR13099 printf( R"foobar(%)foobar" R"bazquux(d)bazquux" // expected-warning {{more '%' conversions than data arguments}} R"xyzzy()xyzzy"); + wprintf( + R"foobar(%)foobar" + R"bazquux(d)bazquux" // expected-warning {{more '%' conversions than data arguments}} + R"xyzzy()xyzzy"); printf(u8"this is %d test", 0); // ok + wprintf(u8"this is %d test", 0); // ok printf(u8R"foo( \u1234\U0010fffe %d)foo" // expected-warning {{more '%' conversions than data arguments}} ); + wprintf(u8R"foo( + \u1234\U0010fffe + %d)foo" // expected-warning {{more '%' conversions than data arguments}} + ); printf("init list: %d", { 0 }); // expected-error {{cannot pass initializer list to variadic function; expected type from format string was 'int'}} + wprintf("init list: %d", { 0 }); // expected-error {{cannot pass initializer list to variadic function; expected type from format string was 'int'}} printf("void: %d", f(sp, fp)); // expected-error {{cannot pass expression of type 'void' to variadic function; expected type from format string was 'int'}} + wprintf("void: %d", f(sp, fp)); // expected-error {{cannot pass expression of type 'void' to variadic function; expected type from format string was 'int'}} printf(0, { 0 }); // expected-error {{cannot pass initializer list to variadic function}} + wprintf(0, { 0 }); // expected-error {{cannot pass initializer list to variadic function}} } Index: clang/test/SemaCXX/format-strings.cpp =================================================================== --- clang/test/SemaCXX/format-strings.cpp +++ clang/test/SemaCXX/format-strings.cpp @@ -7,7 +7,10 @@ extern "C" { extern int scanf(const char *restrict, ...); extern int printf(const char *restrict, ...); +extern int wscanf(const char *restrict, ...); +extern int wprintf(const char *restrict, ...); extern int vprintf(const char *restrict, va_list); +extern int vwprintf(const char *restrict, va_list); } void f(char **sp, float *fp) { @@ -17,13 +20,24 @@ #else // expected-warning@-4 {{format specifies type 'float *' but the argument has type 'char **'}} #endif + + scanf("%as", sp); +#if __cplusplus <= 199711L + // expected-warning@-2 {{'a' length modifier is not supported by ISO C}} +#else + // expected-warning@-4 {{format specifies type 'float *' but the argument has type 'wchar_t **'}} +#endif printf("%a", 1.0); scanf("%afoobar", fp); + + wprintf("%a", 1.0); + wscanf("%afoobar", fp); } void g() { printf("%ls", "foo"); // expected-warning{{format specifies type 'wchar_t *' but the argument has type 'const char *'}} + wprintf("%ls", "foo"); // expected-warning{{format specifies type 'wchar_t *' but the argument has type 'const char *'}} } // Test that we properly handle format_idx on C++ members. @@ -76,7 +90,7 @@ va_start(ap,fmt); const char * const format = fmt; vprintf(format, ap); // no-warning - + const char *format2 = fmt; vprintf(format2, ap); // expected-warning{{format string is not a string literal}}