diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -138,6 +138,10 @@ C Language Changes in Clang --------------------------- +- Implemented `WG14 N2562 `_. + Clang will now consider default argument promotions in printf, and remove unnecessary warnings. + Especially ``int`` argument with specifier ``%hhd`` and ``%hd``. + C2x Feature Support ------------------- diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -261,8 +261,14 @@ /// instance, "%d" and float. NoMatch = 0, /// The conversion specifier and the argument type are compatible. For - /// instance, "%d" and _Bool. + /// instance, "%d" and int. Match = 1, + /// The conversion specifier and the argument type are compatible because of + /// default argument promotions. For instance, "%hhd" and int. + MatchPromotion, + /// The conversion specifier and the argument type are compatible but still + /// seems likely to be an error. For instanace, "%hhd" and short. + NoMatchPromotionTypeConfusion, /// The conversion specifier and the argument type are disallowed by the C /// standard, but are in practice harmless. For instance, "%p" and int*. NoMatchPedantic, diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -348,7 +348,7 @@ return Match; case AnyCharTy: { - if (const EnumType *ETy = argTy->getAs()) { + if (const auto *ETy = argTy->getAs()) { // If the enum is incomplete we know nothing about the underlying type. // Assume that it's 'int'. if (!ETy->getDecl()->isComplete()) @@ -356,17 +356,33 @@ argTy = ETy->getDecl()->getIntegerType(); } - if (const BuiltinType *BT = argTy->getAs()) + if (const auto *BT = argTy->getAs()) { + // the types are perfectly matched? switch (BT->getKind()) { + default: + break; + case BuiltinType::Char_S: + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::Char_U: + return Match; + } + // "partially matched" because of promotions? + if (!Ptr) { + switch (BT->getKind()) { default: break; - case BuiltinType::Char_S: - case BuiltinType::SChar: - case BuiltinType::UChar: - case BuiltinType::Char_U: - case BuiltinType::Bool: - return Match; + case BuiltinType::Int: + case BuiltinType::UInt: + return MatchPromotion; + case BuiltinType::Short: + case BuiltinType::UShort: + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: + return NoMatchPromotionTypeConfusion; + } } + } return NoMatch; } @@ -383,8 +399,32 @@ if (T == argTy) return Match; - // Check for "compatible types". - if (const BuiltinType *BT = argTy->getAs()) + if (const auto *BT = argTy->getAs()) { + // "partially matched" because of promotions? + if (!Ptr) { + switch (BT->getKind()) { + default: + break; + case BuiltinType::Int: + case BuiltinType::UInt: + if (T == C.SignedCharTy || T == C.UnsignedCharTy || + T == C.ShortTy || T == C.UnsignedShortTy || T == C.WCharTy || + T == C.WideCharTy) + return MatchPromotion; + break; + case BuiltinType::Short: + case BuiltinType::UShort: + if (T == C.SignedCharTy || T == C.UnsignedCharTy) + return NoMatchPromotionTypeConfusion; + break; + case BuiltinType::WChar_U: + case BuiltinType::WChar_S: + if (T != C.WCharTy && T != C.WideCharTy) + return NoMatchPromotionTypeConfusion; + } + } + // check if the only difference between them is signed vs unsigned + // if true, we consider they are compatible. switch (BT->getKind()) { default: break; @@ -414,6 +454,7 @@ case BuiltinType::ULongLong: return T == C.LongLongTy ? Match : NoMatch; } + } return NoMatch; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -10081,10 +10081,14 @@ return true; } - analyze_printf::ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy); - if (Match == analyze_printf::ArgType::Match) + ArgType::MatchKind ImplicitMatch = ArgType::NoMatch; + ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy); + if (Match == ArgType::Match) return true; + // NoMatchPromotionTypeConfusion should be only returned in ImplictCastExpr + assert(Match != ArgType::NoMatchPromotionTypeConfusion); + // Look through argument promotions for our error message's reported type. // This includes the integral and floating promotions, but excludes array // and function pointer decay (seeing that an argument intended to be a @@ -10101,13 +10105,9 @@ if (ICE->getType() == S.Context.IntTy || ICE->getType() == S.Context.UnsignedIntTy) { // All further checking is done on the subexpression - const analyze_printf::ArgType::MatchKind ImplicitMatch = - AT.matchesType(S.Context, ExprTy); - if (ImplicitMatch == analyze_printf::ArgType::Match) + ImplicitMatch = AT.matchesType(S.Context, ExprTy); + if (ImplicitMatch == ArgType::Match) return true; - if (ImplicitMatch == ArgType::NoMatchPedantic || - ImplicitMatch == ArgType::NoMatchTypeConfusion) - Match = ImplicitMatch; } } } else if (const CharacterLiteral *CL = dyn_cast(E)) { @@ -10118,10 +10118,22 @@ // modifier is provided. if (ExprTy == S.Context.IntTy && FS.getLengthModifier().getKind() != LengthModifier::AsChar) - if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue())) + if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue())) { ExprTy = S.Context.CharTy; + if (Match == ArgType::MatchPromotion) + Match = ArgType::NoMatch; // Still consider 'a' in C + } } - + if (Match == ArgType::MatchPromotion) { + if (ImplicitMatch != ArgType::NoMatchPromotionTypeConfusion && + ImplicitMatch != ArgType::NoMatchTypeConfusion) + return true; + Match = ArgType::NoMatch; + } + if (ImplicitMatch == ArgType::NoMatchPedantic || + ImplicitMatch == ArgType::NoMatchTypeConfusion) + Match = ImplicitMatch; + assert(Match != ArgType::MatchPromotion); // Look through enums to their underlying type. bool IsEnum = false; if (auto EnumTy = ExprTy->getAs()) { @@ -10194,7 +10206,10 @@ if (IntendedTy == ExprTy && !ShouldNotPrintDirectly) { unsigned Diag; switch (Match) { - case ArgType::Match: llvm_unreachable("expected non-matching"); + case ArgType::Match: + case ArgType::MatchPromotion: + case ArgType::NoMatchPromotionTypeConfusion: + llvm_unreachable("expected non-matching"); case ArgType::NoMatchPedantic: Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic; break; @@ -10291,7 +10306,10 @@ case Sema::VAK_ValidInCXX11: { unsigned Diag; switch (Match) { - case ArgType::Match: llvm_unreachable("expected non-matching"); + case ArgType::Match: + case ArgType::MatchPromotion: + case ArgType::NoMatchPromotionTypeConfusion: + llvm_unreachable("expected non-matching"); case ArgType::NoMatchPedantic: Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic; break; diff --git a/clang/test/FixIt/format.m b/clang/test/FixIt/format.m --- a/clang/test/FixIt/format.m +++ b/clang/test/FixIt/format.m @@ -170,11 +170,9 @@ NSLog(@"%@", 'abcd'); // expected-warning{{format specifies type 'id' but the argument has type 'int'}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d" - NSLog(@"%hhd", 'a'); // expected-warning{{format specifies type 'char' but the argument has type 'int'}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:15}:"%d" + NSLog(@"%hhd", 'a'); - NSLog(@"%hhu", 'a'); // expected-warning{{format specifies type 'unsigned char' but the argument has type 'int'}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:15}:"%d" + NSLog(@"%hhu", 'a'); } void multichar_constants_false_negative(void) { @@ -192,20 +190,8 @@ const unsigned short data = 'a'; NSLog(@"%C", data); // no-warning - NSLog(@"%C", 0x260300); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'int'}} - // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d" - // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:16-[[@LINE-2]]:16}:"(unsigned short)" typedef unsigned short unichar; - - NSLog(@"%C", 0x260300); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'int'}} - // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d" - // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:16-[[@LINE-2]]:16}:"(unichar)" - - NSLog(@"%C", data ? 0x2F0000 : 0x260300); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'int'}} - // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d" - // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:16-[[@LINE-2]]:16}:"(unichar)(" - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:42-[[@LINE-3]]:42}:")" NSLog(@"%C", 0.0); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'double'}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%f" diff --git a/clang/test/FixIt/format.mm b/clang/test/FixIt/format.mm --- a/clang/test/FixIt/format.mm +++ b/clang/test/FixIt/format.mm @@ -11,19 +11,15 @@ NSLog(@"%C", wchar_data); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'wchar_t'}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:16-[[@LINE-1]]:16}:"(unsigned short)" - NSLog(@"%C", 0x260300); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'int'}} - // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d" - // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:16-[[@LINE-2]]:16}:"(unsigned short)" - + NSLog(@"%C", 0x260300); // no-warning + typedef unsigned short unichar; NSLog(@"%C", wchar_data); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'wchar_t'}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:16-[[@LINE-1]]:16}:"(unichar)" - - NSLog(@"%C", 0x260300); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'int'}} - // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d" - // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:16-[[@LINE-2]]:16}:"(unichar)" - + + NSLog(@"%C", 0x260300); // no-warning + NSLog(@"%C", 0.0); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'double'}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%f" // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-2]]:16-[[@LINE-2]]:16}:"(unichar)" diff --git a/clang/test/Sema/format-strings-freebsd.c b/clang/test/Sema/format-strings-freebsd.c --- a/clang/test/Sema/format-strings-freebsd.c +++ b/clang/test/Sema/format-strings-freebsd.c @@ -35,9 +35,9 @@ freebsd_kernel_printf("%lr", l); // no-warning // h modifier expects a short - freebsd_kernel_printf("%hr", i); // expected-warning{{format specifies type 'short' but the argument has type 'int'}} + freebsd_kernel_printf("%hr", i); // no-warning freebsd_kernel_printf("%hr", h); // no-warning - freebsd_kernel_printf("%hy", i); // expected-warning{{format specifies type 'short' but the argument has type 'int'}} + freebsd_kernel_printf("%hy", i); // no-warning freebsd_kernel_printf("%hy", h); // no-warning // %y expects an int diff --git a/clang/test/Sema/format-strings-scanf.c b/clang/test/Sema/format-strings-scanf.c --- a/clang/test/Sema/format-strings-scanf.c +++ b/clang/test/Sema/format-strings-scanf.c @@ -246,3 +246,55 @@ scanf(i ? "%d" : "%d", i, s); // expected-warning{{data argument not used}} scanf(i ? "%s" : "%d", s); // expected-warning{{format specifies type 'int *'}} } + +void test_promotion(void) { + // No promotions for *scanf pointers clarified in N2562 + // https://github.com/llvm/llvm-project/issues/57102 + // N2562: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf + int i; + signed char sc; + unsigned char uc; + short ss; + unsigned short us; + + // pointers could not be "promoted" + scanf("%hhd", &i); // expected-warning{{format specifies type 'char *' but the argument has type 'int *'}} + scanf("%hd", &i); // expected-warning{{format specifies type 'short *' but the argument has type 'int *'}} + scanf("%d", &i); // no-warning + // char & uchar + scanf("%hhd", &sc); // no-warning + scanf("%hhd", &uc); // no-warning + scanf("%hd", &sc); // expected-warning{{format specifies type 'short *' but the argument has type 'signed char *'}} + scanf("%hd", &uc); // expected-warning{{format specifies type 'short *' but the argument has type 'unsigned char *'}} + scanf("%d", &sc); // expected-warning{{format specifies type 'int *' but the argument has type 'signed char *'}} + scanf("%d", &uc); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned char *'}} + // short & ushort + scanf("%hhd", &ss); // expected-warning{{format specifies type 'char *' but the argument has type 'short *'}} + scanf("%hhd", &us); // expected-warning{{format specifies type 'char *' but the argument has type 'unsigned short *'}} + scanf("%hd", &ss); // no-warning + scanf("%hd", &us); // no-warning + scanf("%d", &ss); // expected-warning{{format specifies type 'int *' but the argument has type 'short *'}} + scanf("%d", &us); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned short *'}} + + // long types + scanf("%ld", &i); // expected-warning{{format specifies type 'long *' but the argument has type 'int *'}} + scanf("%lld", &i); // expected-warning{{format specifies type 'long long *' but the argument has type 'int *'}} + scanf("%ld", &sc); // expected-warning{{format specifies type 'long *' but the argument has type 'signed char *'}} + scanf("%lld", &sc); // expected-warning{{format specifies type 'long long *' but the argument has type 'signed char *'}} + scanf("%ld", &uc); // expected-warning{{format specifies type 'long *' but the argument has type 'unsigned char *'}} + scanf("%lld", &uc); // expected-warning{{format specifies type 'long long *' but the argument has type 'unsigned char *'}} + scanf("%llx", &i); // expected-warning{{format specifies type 'unsigned long long *' but the argument has type 'int *'}} + + // ill-formed floats + scanf("%hf", // expected-warning{{length modifier 'h' results in undefined behavior or no effect with 'f' conversion specifier}} + &sc); + + // pointers in scanf + scanf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} + + // FIXME: does this match what the C committee allows or should it be pedantically warned on? + char c; + void *vp; + scanf("%hhd", &c); // Pedantic warning? + scanf("%hhd", vp); // expected-warning{{format specifies type 'char *' but the argument has type 'void *'}} +} diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -830,3 +830,58 @@ printf_arg2("foo", "%s string %i\n", "aaa", 123); printf_arg2("%s string\n", "foo", "bar"); // expected-warning{{data argument not used by format string}} } + +void test_promotion(void) { + // Default argument promotions for *printf in N2562 + // https://github.com/llvm/llvm-project/issues/57102 + // N2562: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf + int i; + signed char sc; + unsigned char uc; + char c; + short ss; + unsigned short us; + + printf("%hhd %hd %d %hhd %hd %d", i, i, i, sc, sc, sc); // no-warning + printf("%hhd %hd %d %hhd %hd %d", uc, uc, uc, c, c, c); // no-warning + + // %ld %lld %llx + printf("%ld", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} + printf("%lld", i); // expected-warning{{format specifies type 'long long' but the argument has type 'int'}} + printf("%ld", sc); // expected-warning{{format specifies type 'long' but the argument has type 'signed char'}} + printf("%lld", sc); // expected-warning{{format specifies type 'long long' but the argument has type 'signed char'}} + printf("%ld", uc); // expected-warning{{format specifies type 'long' but the argument has type 'unsigned char'}} + printf("%lld", uc); // expected-warning{{format specifies type 'long long' but the argument has type 'unsigned char'}} + printf("%llx", i); // expected-warning{{format specifies type 'unsigned long long' but the argument has type 'int'}} + + // ill formed spec for floats + printf("%hf", // expected-warning{{length modifier 'h' results in undefined behavior or no effect with 'f' conversion specifier}} + sc); // expected-warning{{format specifies type 'double' but the argument has type 'signed char'}} + + // for %hhd and `short` they are compatible by promotions but more likely misuse + printf("%hd", ss); // no-warning + printf("%hhd", ss); // expected-warning{{format specifies type 'char' but the argument has type 'short'}} + printf("%hu", us); // no-warning + printf("%hhu", ss); // expected-warning{{format specifies type 'unsigned char' but the argument has type 'short'}} + + // floats & integers are not compatible + printf("%f", i); // expected-warning{{format specifies type 'double' but the argument has type 'int'}} + printf("%f", sc); // expected-warning{{format specifies type 'double' but the argument has type 'signed char'}} + printf("%f", uc); // expected-warning{{format specifies type 'double' but the argument has type 'unsigned char'}} + printf("%f", c); // expected-warning{{format specifies type 'double' but the argument has type 'char'}} + printf("%f", ss); // expected-warning{{format specifies type 'double' but the argument has type 'short'}} + printf("%f", us); // expected-warning{{format specifies type 'double' but the argument has type 'unsigned short'}} + + // character literals + // In C language engineering practice, printing a character literal with %hhd or %d is common, but %hd may be misuse. + printf("%hhu", 'a'); // no-warning + printf("%hhd", 'a'); // no-warning + printf("%hd", 'a'); // expected-warning{{format specifies type 'short' but the argument has type 'char'}} + printf("%hu", 'a'); // expected-warning{{format specifies type 'unsigned short' but the argument has type 'char'}} + printf("%d", 'a'); // no-warning + printf("%u", 'a'); // no-warning + + + // pointers + printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} +} diff --git a/clang/test/SemaObjC/format-strings-objc.m b/clang/test/SemaObjC/format-strings-objc.m --- a/clang/test/SemaObjC/format-strings-objc.m +++ b/clang/test/SemaObjC/format-strings-objc.m @@ -191,7 +191,7 @@ NSLog(@"%C", data); // no-warning const wchar_t wchar_data = L'a'; - NSLog(@"%C", wchar_data); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'wchar_t'}} + NSLog(@"%C", wchar_data); // no-warning } // Test that %@ works with toll-free bridging (). @@ -273,7 +273,7 @@ void testUnicode(void) { NSLog(@"%C", 0x2022); // no-warning - NSLog(@"%C", 0x202200); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'int'}} + NSLog(@"%C", 0x202200); // no-warning } // Test Objective-C modifier flags. diff --git a/clang/www/c_status.html b/clang/www/c_status.html --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -819,13 +819,7 @@ Unclear type relationship between a format specifier and its argument N2562 - -
Partial - Clang supports diagnostics checking format specifier validity, but - does not yet account for all of the changes in this paper, especially - regarding length modifiers like h and hh. -
- + Clang 16