diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -175,6 +175,10 @@ C Language Changes in Clang --------------------------- +- Adjusted ``-Wformat`` warnings according to `WG14 N2562 `_. + Clang will now consider default argument promotions in printf, and remove unnecessary warnings. + Especially ``int`` argument with specifier ``%hhd`` and ``%hd``. + C2x Feature Support ------------------- diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -261,8 +261,14 @@ /// instance, "%d" and float. NoMatch = 0, /// The conversion specifier and the argument type are compatible. For - /// instance, "%d" and _Bool. + /// instance, "%d" and int. Match = 1, + /// The conversion specifier and the argument type are compatible because of + /// default argument promotions. For instance, "%hhd" and int. + MatchPromotion, + /// The conversion specifier and the argument type are compatible but still + /// seems likely to be an error. For instanace, "%hhd" and short. + NoMatchPromotionTypeConfusion, /// The conversion specifier and the argument type are disallowed by the C /// standard, but are in practice harmless. For instance, "%p" and int*. NoMatchPedantic, diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -348,7 +348,7 @@ return Match; case AnyCharTy: { - if (const EnumType *ETy = argTy->getAs()) { + if (const auto *ETy = argTy->getAs()) { // If the enum is incomplete we know nothing about the underlying type. // Assume that it's 'int'. if (!ETy->getDecl()->isComplete()) @@ -356,17 +356,34 @@ argTy = ETy->getDecl()->getIntegerType(); } - if (const BuiltinType *BT = argTy->getAs()) + if (const auto *BT = argTy->getAs()) { + // The types are perfectly matched? switch (BT->getKind()) { + default: + break; + case BuiltinType::Char_S: + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::Char_U: + case BuiltinType::Bool: + return Match; + } + // "Partially matched" because of promotions? + if (!Ptr) { + switch (BT->getKind()) { default: break; - case BuiltinType::Char_S: - case BuiltinType::SChar: - case BuiltinType::UChar: - case BuiltinType::Char_U: - case BuiltinType::Bool: - return Match; + case BuiltinType::Int: + case BuiltinType::UInt: + return MatchPromotion; + case BuiltinType::Short: + case BuiltinType::UShort: + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: + return NoMatchPromotionTypeConfusion; + } } + } return NoMatch; } @@ -383,8 +400,9 @@ if (T == argTy) return Match; - // Check for "compatible types". - if (const BuiltinType *BT = argTy->getAs()) + if (const auto *BT = argTy->getAs()) { + // Check if the only difference between them is signed vs unsigned + // if true, we consider they are compatible. switch (BT->getKind()) { default: break; @@ -395,25 +413,66 @@ case BuiltinType::Bool: if (T == C.UnsignedShortTy || T == C.ShortTy) return NoMatchTypeConfusion; - return T == C.UnsignedCharTy || T == C.SignedCharTy ? Match - : NoMatch; + if (T == C.UnsignedCharTy || T == C.SignedCharTy) + return Match; + break; case BuiltinType::Short: - return T == C.UnsignedShortTy ? Match : NoMatch; + if (T == C.UnsignedShortTy) + return Match; + break; case BuiltinType::UShort: - return T == C.ShortTy ? Match : NoMatch; + if (T == C.ShortTy) + return Match; + break; case BuiltinType::Int: - return T == C.UnsignedIntTy ? Match : NoMatch; + if (T == C.UnsignedIntTy) + return Match; + break; case BuiltinType::UInt: - return T == C.IntTy ? Match : NoMatch; + if (T == C.IntTy) + return Match; + break; case BuiltinType::Long: - return T == C.UnsignedLongTy ? Match : NoMatch; + if (T == C.UnsignedLongTy) + return Match; + break; case BuiltinType::ULong: - return T == C.LongTy ? Match : NoMatch; + if (T == C.LongTy) + return Match; + break; case BuiltinType::LongLong: - return T == C.UnsignedLongLongTy ? Match : NoMatch; + if (T == C.UnsignedLongLongTy) + return Match; + break; case BuiltinType::ULongLong: - return T == C.LongLongTy ? Match : NoMatch; - } + if (T == C.LongLongTy) + return Match; + break; + } + // "Partially matched" because of promotions? + if (!Ptr) { + switch (BT->getKind()) { + default: + break; + case BuiltinType::Int: + case BuiltinType::UInt: + if (T == C.SignedCharTy || T == C.UnsignedCharTy || + T == C.ShortTy || T == C.UnsignedShortTy || T == C.WCharTy || + T == C.WideCharTy) + return MatchPromotion; + break; + case BuiltinType::Short: + case BuiltinType::UShort: + if (T == C.SignedCharTy || T == C.UnsignedCharTy) + return NoMatchPromotionTypeConfusion; + break; + case BuiltinType::WChar_U: + case BuiltinType::WChar_S: + if (T != C.WCharTy && T != C.WideCharTy) + return NoMatchPromotionTypeConfusion; + } + } + } return NoMatch; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -10081,10 +10081,14 @@ return true; } - analyze_printf::ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy); - if (Match == analyze_printf::ArgType::Match) + ArgType::MatchKind ImplicitMatch = ArgType::NoMatch; + ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy); + if (Match == ArgType::Match) return true; + // NoMatchPromotionTypeConfusion should be only returned in ImplictCastExpr + assert(Match != ArgType::NoMatchPromotionTypeConfusion); + // Look through argument promotions for our error message's reported type. // This includes the integral and floating promotions, but excludes array // and function pointer decay (seeing that an argument intended to be a @@ -10101,13 +10105,9 @@ if (ICE->getType() == S.Context.IntTy || ICE->getType() == S.Context.UnsignedIntTy) { // All further checking is done on the subexpression - const analyze_printf::ArgType::MatchKind ImplicitMatch = - AT.matchesType(S.Context, ExprTy); - if (ImplicitMatch == analyze_printf::ArgType::Match) + ImplicitMatch = AT.matchesType(S.Context, ExprTy); + if (ImplicitMatch == ArgType::Match) return true; - if (ImplicitMatch == ArgType::NoMatchPedantic || - ImplicitMatch == ArgType::NoMatchTypeConfusion) - Match = ImplicitMatch; } } } else if (const CharacterLiteral *CL = dyn_cast(E)) { @@ -10118,10 +10118,29 @@ // modifier is provided. if (ExprTy == S.Context.IntTy && FS.getLengthModifier().getKind() != LengthModifier::AsChar) - if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue())) + if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue())) { ExprTy = S.Context.CharTy; + // To improve check results, we consider a character literal in C + // to be a 'char' rather than an 'int'. 'printf("%hd", 'a');' is + // more likely a type confusion situation, so we will suggest to + // use '%hhd' instead by discarding the MatchPromotion. + if (Match == ArgType::MatchPromotion) + Match = ArgType::NoMatch; + } } - + if (Match == ArgType::MatchPromotion) { + // WG14 N2562 only clarified promotions in *printf + // For NSLog in ObjC, just preserve -Wformat behavior + if (!S.getLangOpts().ObjC && + ImplicitMatch != ArgType::NoMatchPromotionTypeConfusion && + ImplicitMatch != ArgType::NoMatchTypeConfusion) + return true; + Match = ArgType::NoMatch; + } + if (ImplicitMatch == ArgType::NoMatchPedantic || + ImplicitMatch == ArgType::NoMatchTypeConfusion) + Match = ImplicitMatch; + assert(Match != ArgType::MatchPromotion); // Look through enums to their underlying type. bool IsEnum = false; if (auto EnumTy = ExprTy->getAs()) { @@ -10194,7 +10213,10 @@ if (IntendedTy == ExprTy && !ShouldNotPrintDirectly) { unsigned Diag; switch (Match) { - case ArgType::Match: llvm_unreachable("expected non-matching"); + case ArgType::Match: + case ArgType::MatchPromotion: + case ArgType::NoMatchPromotionTypeConfusion: + llvm_unreachable("expected non-matching"); case ArgType::NoMatchPedantic: Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic; break; @@ -10291,7 +10313,10 @@ case Sema::VAK_ValidInCXX11: { unsigned Diag; switch (Match) { - case ArgType::Match: llvm_unreachable("expected non-matching"); + case ArgType::Match: + case ArgType::MatchPromotion: + case ArgType::NoMatchPromotionTypeConfusion: + llvm_unreachable("expected non-matching"); case ArgType::NoMatchPedantic: Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic; break; diff --git a/clang/test/Sema/format-strings-freebsd.c b/clang/test/Sema/format-strings-freebsd.c --- a/clang/test/Sema/format-strings-freebsd.c +++ b/clang/test/Sema/format-strings-freebsd.c @@ -35,9 +35,9 @@ freebsd_kernel_printf("%lr", l); // no-warning // h modifier expects a short - freebsd_kernel_printf("%hr", i); // expected-warning{{format specifies type 'short' but the argument has type 'int'}} + freebsd_kernel_printf("%hr", i); // no-warning freebsd_kernel_printf("%hr", h); // no-warning - freebsd_kernel_printf("%hy", i); // expected-warning{{format specifies type 'short' but the argument has type 'int'}} + freebsd_kernel_printf("%hy", i); // no-warning freebsd_kernel_printf("%hy", h); // no-warning // %y expects an int diff --git a/clang/test/Sema/format-strings-scanf.c b/clang/test/Sema/format-strings-scanf.c --- a/clang/test/Sema/format-strings-scanf.c +++ b/clang/test/Sema/format-strings-scanf.c @@ -246,3 +246,55 @@ scanf(i ? "%d" : "%d", i, s); // expected-warning{{data argument not used}} scanf(i ? "%s" : "%d", s); // expected-warning{{format specifies type 'int *'}} } + +void test_promotion(void) { + // No promotions for *scanf pointers clarified in N2562 + // https://github.com/llvm/llvm-project/issues/57102 + // N2562: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf + int i; + signed char sc; + unsigned char uc; + short ss; + unsigned short us; + + // pointers could not be "promoted" + scanf("%hhd", &i); // expected-warning{{format specifies type 'char *' but the argument has type 'int *'}} + scanf("%hd", &i); // expected-warning{{format specifies type 'short *' but the argument has type 'int *'}} + scanf("%d", &i); // no-warning + // char & uchar + scanf("%hhd", &sc); // no-warning + scanf("%hhd", &uc); // no-warning + scanf("%hd", &sc); // expected-warning{{format specifies type 'short *' but the argument has type 'signed char *'}} + scanf("%hd", &uc); // expected-warning{{format specifies type 'short *' but the argument has type 'unsigned char *'}} + scanf("%d", &sc); // expected-warning{{format specifies type 'int *' but the argument has type 'signed char *'}} + scanf("%d", &uc); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned char *'}} + // short & ushort + scanf("%hhd", &ss); // expected-warning{{format specifies type 'char *' but the argument has type 'short *'}} + scanf("%hhd", &us); // expected-warning{{format specifies type 'char *' but the argument has type 'unsigned short *'}} + scanf("%hd", &ss); // no-warning + scanf("%hd", &us); // no-warning + scanf("%d", &ss); // expected-warning{{format specifies type 'int *' but the argument has type 'short *'}} + scanf("%d", &us); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned short *'}} + + // long types + scanf("%ld", &i); // expected-warning{{format specifies type 'long *' but the argument has type 'int *'}} + scanf("%lld", &i); // expected-warning{{format specifies type 'long long *' but the argument has type 'int *'}} + scanf("%ld", &sc); // expected-warning{{format specifies type 'long *' but the argument has type 'signed char *'}} + scanf("%lld", &sc); // expected-warning{{format specifies type 'long long *' but the argument has type 'signed char *'}} + scanf("%ld", &uc); // expected-warning{{format specifies type 'long *' but the argument has type 'unsigned char *'}} + scanf("%lld", &uc); // expected-warning{{format specifies type 'long long *' but the argument has type 'unsigned char *'}} + scanf("%llx", &i); // expected-warning{{format specifies type 'unsigned long long *' but the argument has type 'int *'}} + + // ill-formed floats + scanf("%hf", // expected-warning{{length modifier 'h' results in undefined behavior or no effect with 'f' conversion specifier}} + &sc); + + // pointers in scanf + scanf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} + + // FIXME: does this match what the C committee allows or should it be pedantically warned on? + char c; + void *vp; + scanf("%hhd", &c); // Pedantic warning? + scanf("%hhd", vp); // expected-warning{{format specifies type 'char *' but the argument has type 'void *'}} +} diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -830,3 +830,57 @@ printf_arg2("foo", "%s string %i\n", "aaa", 123); printf_arg2("%s string\n", "foo", "bar"); // expected-warning{{data argument not used by format string}} } + +void test_promotion(void) { + // Default argument promotions for *printf in N2562 + // https://github.com/llvm/llvm-project/issues/57102 + // N2562: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf + int i; + signed char sc; + unsigned char uc; + char c; + short ss; + unsigned short us; + + printf("%hhd %hd %d %hhd %hd %d", i, i, i, sc, sc, sc); // no-warning + printf("%hhd %hd %d %hhd %hd %d", uc, uc, uc, c, c, c); // no-warning + + // %ld %lld %llx + printf("%ld", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} + printf("%lld", i); // expected-warning{{format specifies type 'long long' but the argument has type 'int'}} + printf("%ld", sc); // expected-warning{{format specifies type 'long' but the argument has type 'signed char'}} + printf("%lld", sc); // expected-warning{{format specifies type 'long long' but the argument has type 'signed char'}} + printf("%ld", uc); // expected-warning{{format specifies type 'long' but the argument has type 'unsigned char'}} + printf("%lld", uc); // expected-warning{{format specifies type 'long long' but the argument has type 'unsigned char'}} + printf("%llx", i); // expected-warning{{format specifies type 'unsigned long long' but the argument has type 'int'}} + + // ill formed spec for floats + printf("%hf", // expected-warning{{length modifier 'h' results in undefined behavior or no effect with 'f' conversion specifier}} + sc); // expected-warning{{format specifies type 'double' but the argument has type 'signed char'}} + + // for %hhd and `short` they are compatible by promotions but more likely misuse + printf("%hd", ss); // no-warning + printf("%hhd", ss); // expected-warning{{format specifies type 'char' but the argument has type 'short'}} + printf("%hu", us); // no-warning + printf("%hhu", ss); // expected-warning{{format specifies type 'unsigned char' but the argument has type 'short'}} + + // floats & integers are not compatible + printf("%f", i); // expected-warning{{format specifies type 'double' but the argument has type 'int'}} + printf("%f", sc); // expected-warning{{format specifies type 'double' but the argument has type 'signed char'}} + printf("%f", uc); // expected-warning{{format specifies type 'double' but the argument has type 'unsigned char'}} + printf("%f", c); // expected-warning{{format specifies type 'double' but the argument has type 'char'}} + printf("%f", ss); // expected-warning{{format specifies type 'double' but the argument has type 'short'}} + printf("%f", us); // expected-warning{{format specifies type 'double' but the argument has type 'unsigned short'}} + + // character literals + // In C language engineering practice, printing a character literal with %hhd or %d is common, but %hd may be misuse. + printf("%hhu", 'a'); // no-warning + printf("%hhd", 'a'); // no-warning + printf("%hd", 'a'); // expected-warning{{format specifies type 'short' but the argument has type 'char'}} + printf("%hu", 'a'); // expected-warning{{format specifies type 'unsigned short' but the argument has type 'char'}} + printf("%d", 'a'); // no-warning + printf("%u", 'a'); // no-warning + + // pointers + printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} +} diff --git a/clang/www/c_status.html b/clang/www/c_status.html --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -819,13 +819,7 @@ Unclear type relationship between a format specifier and its argument N2562 - -
Partial - Clang supports diagnostics checking format specifier validity, but - does not yet account for all of the changes in this paper, especially - regarding length modifiers like h and hh. -
- + Clang 16