diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -741,6 +741,12 @@ "'%0' size argument is too large; destination buffer has size %1," " but size argument is %2">, InGroup; +def warn_fortify_source_format_overflow : Warning< + "'%0' will always overflow; destination buffer has size %1," + " but format string expands to at least %2">, + InGroup; + + /// main() // static main() is not an error in C, just in C++. def warn_static_main : Warning<"'main' should not be declared static">, diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -390,13 +390,194 @@ return false; } +namespace { + +class EstimateSizeFormatHandler + : public analyze_format_string::FormatStringHandler { + size_t Size; + +public: + EstimateSizeFormatHandler(StringRef Format) + : Size(std::min(Format.find(0), Format.size()) + + 1 /* null byte always written by sprintf */) {} + + bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, + const char *, unsigned SpecifierLen) override { + + const size_t FieldWidth = computeFieldWidth(FS); + const size_t Precision = computePrecision(FS); + + // The actual format. + switch (FS.getConversionSpecifier().getKind()) { + // Just a char. + case analyze_format_string::ConversionSpecifier::cArg: + case analyze_format_string::ConversionSpecifier::CArg: + Size += std::max(FieldWidth, (size_t)1); + break; + // Just an integer. + case analyze_format_string::ConversionSpecifier::dArg: + case analyze_format_string::ConversionSpecifier::DArg: + case analyze_format_string::ConversionSpecifier::iArg: + case analyze_format_string::ConversionSpecifier::oArg: + case analyze_format_string::ConversionSpecifier::OArg: + case analyze_format_string::ConversionSpecifier::uArg: + case analyze_format_string::ConversionSpecifier::UArg: + case analyze_format_string::ConversionSpecifier::xArg: + case analyze_format_string::ConversionSpecifier::XArg: + Size += std::max(FieldWidth, Precision); + break; + + // %g style conversion switches between %f or %e style dynamically. + // %f always takes less space, so default to it. + case analyze_format_string::ConversionSpecifier::gArg: + case analyze_format_string::ConversionSpecifier::GArg: + + // Floating point number in the form '[+]ddd.ddd'. + case analyze_format_string::ConversionSpecifier::fArg: + case analyze_format_string::ConversionSpecifier::FArg: + Size += std::max(FieldWidth, 1 /* integer part */ + + (Precision ? 1 + Precision + : 0) /* period + decimal */); + break; + + // Floating point number in the form '[-]d.ddde[+-]dd'. + case analyze_format_string::ConversionSpecifier::eArg: + case analyze_format_string::ConversionSpecifier::EArg: + Size += + std::max(FieldWidth, + 1 /* integer part */ + + (Precision ? 1 + Precision : 0) /* period + decimal */ + + 1 /* e or E letter */ + 2 /* exponent */); + break; + + // Floating point number in the form '[-]0xh.hhhhp±dd'. + case analyze_format_string::ConversionSpecifier::aArg: + case analyze_format_string::ConversionSpecifier::AArg: + Size += + std::max(FieldWidth, + 2 /* 0x */ + 1 /* integer part */ + + (Precision ? 1 + Precision : 0) /* period + decimal */ + + 1 /* p or P letter */ + 1 /* + or - */ + 1 /* value */); + break; + + // Just a string. + case analyze_format_string::ConversionSpecifier::sArg: + case analyze_format_string::ConversionSpecifier::SArg: + Size += FieldWidth; + break; + + // Just a pointer in the form '0xddd'. + case analyze_format_string::ConversionSpecifier::pArg: + Size += std::max(FieldWidth, 2 /* leading 0x */ + Precision); + break; + + // A plain percent. + case analyze_format_string::ConversionSpecifier::PercentArg: + Size += 1; + break; + + default: + break; + } + + Size += FS.hasPlusPrefix() || FS.hasSpacePrefix(); + + if (FS.hasAlternativeForm()) { + switch (FS.getConversionSpecifier().getKind()) { + default: + break; + // Force a leading '0'. + case analyze_format_string::ConversionSpecifier::oArg: + Size += 1; + break; + // Force a leading '0x'. + case analyze_format_string::ConversionSpecifier::xArg: + case analyze_format_string::ConversionSpecifier::XArg: + Size += 2; + break; + // Force a period '.' before decimal, even if precision is 0. + case analyze_format_string::ConversionSpecifier::aArg: + case analyze_format_string::ConversionSpecifier::AArg: + case analyze_format_string::ConversionSpecifier::eArg: + case analyze_format_string::ConversionSpecifier::EArg: + case analyze_format_string::ConversionSpecifier::fArg: + case analyze_format_string::ConversionSpecifier::FArg: + case analyze_format_string::ConversionSpecifier::gArg: + case analyze_format_string::ConversionSpecifier::GArg: + Size += (Precision ? 0 : 1); + break; + } + } + assert(SpecifierLen <= Size && "no underflow"); + Size -= SpecifierLen; + return true; + } + + size_t getSizeLowerBound() const { return Size; } + +private: + static size_t computeFieldWidth(const analyze_printf::PrintfSpecifier &FS) { + const analyze_format_string::OptionalAmount &FW = FS.getFieldWidth(); + size_t FieldWidth = 0; + if (FW.getHowSpecified() == analyze_format_string::OptionalAmount::Constant) + FieldWidth = FW.getConstantAmount(); + return FieldWidth; + } + + static size_t computePrecision(const analyze_printf::PrintfSpecifier &FS) { + const analyze_format_string::OptionalAmount &FW = FS.getPrecision(); + size_t Precision = 0; + + // See man 3 printf for default precision value based on the specifier. + switch (FW.getHowSpecified()) { + case analyze_format_string::OptionalAmount::NotSpecified: + switch (FS.getConversionSpecifier().getKind()) { + default: + break; + case analyze_format_string::ConversionSpecifier::dArg: // %d + case analyze_format_string::ConversionSpecifier::DArg: // %D + case analyze_format_string::ConversionSpecifier::iArg: // %i + Precision = 1; + break; + case analyze_format_string::ConversionSpecifier::oArg: // %d + case analyze_format_string::ConversionSpecifier::OArg: // %D + case analyze_format_string::ConversionSpecifier::uArg: // %d + case analyze_format_string::ConversionSpecifier::UArg: // %D + case analyze_format_string::ConversionSpecifier::xArg: // %d + case analyze_format_string::ConversionSpecifier::XArg: // %D + Precision = 1; + break; + case analyze_format_string::ConversionSpecifier::fArg: // %f + case analyze_format_string::ConversionSpecifier::FArg: // %F + case analyze_format_string::ConversionSpecifier::eArg: // %e + case analyze_format_string::ConversionSpecifier::EArg: // %E + case analyze_format_string::ConversionSpecifier::gArg: // %g + case analyze_format_string::ConversionSpecifier::GArg: // %G + Precision = 6; + break; + case analyze_format_string::ConversionSpecifier::pArg: // %d + Precision = 1; + break; + } + break; + case analyze_format_string::OptionalAmount::Constant: + Precision = FW.getConstantAmount(); + break; + default: + break; + } + return Precision; + } +}; + +} // namespace + /// Check a call to BuiltinID for buffer overflows. If BuiltinID is a /// __builtin_*_chk function, then use the object size argument specified in the /// source. Otherwise, infer the object size using __builtin_object_size. void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall) { // FIXME: There are some more useful checks we could be doing here: - // - Analyze the format string of sprintf to see how much of buffer is used. // - Evaluate strlen of strcpy arguments, use as object size. if (TheCall->isValueDependent() || TheCall->isTypeDependent() || @@ -407,12 +588,55 @@ if (!BuiltinID) return; + const TargetInfo &TI = getASTContext().getTargetInfo(); + unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType()); + unsigned DiagID = 0; bool IsChkVariant = false; + Optional UsedSize; unsigned SizeIndex, ObjectIndex; switch (BuiltinID) { default: return; + case Builtin::BIsprintf: + case Builtin::BI__builtin___sprintf_chk: { + size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3; + auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts(); + + if (auto *Format = dyn_cast(FormatExpr)) { + + if (!Format->isAscii() && !Format->isUTF8()) + return; + + StringRef FormatStrRef = Format->getString(); + EstimateSizeFormatHandler H(FormatStrRef); + const char *FormatBytes = FormatStrRef.data(); + const ConstantArrayType *T = + Context.getAsConstantArrayType(Format->getType()); + assert(T && "String literal not of constant array type!"); + size_t TypeSize = T->getSize().getZExtValue(); + + // In case there's a null byte somewhere. + size_t StrLen = + std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0)); + if (!analyze_format_string::ParsePrintfString( + H, FormatBytes, FormatBytes + StrLen, getLangOpts(), + Context.getTargetInfo(), false)) { + DiagID = diag::warn_fortify_source_format_overflow; + UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound()) + .extOrTrunc(SizeTypeWidth); + if (BuiltinID == Builtin::BI__builtin___sprintf_chk) { + IsChkVariant = true; + ObjectIndex = 2; + } else { + IsChkVariant = false; + ObjectIndex = 0; + } + break; + } + } + return; + } case Builtin::BI__builtin___memcpy_chk: case Builtin::BI__builtin___memmove_chk: case Builtin::BI__builtin___memset_chk: @@ -505,19 +729,19 @@ if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType)) return; // Get the object size in the target's size_t width. - const TargetInfo &TI = getASTContext().getTargetInfo(); - unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType()); ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth); } // Evaluate the number of bytes of the object that this call will use. - Expr::EvalResult Result; - Expr *UsedSizeArg = TheCall->getArg(SizeIndex); - if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext())) - return; - llvm::APSInt UsedSize = Result.Val.getInt(); + if (!UsedSize) { + Expr::EvalResult Result; + Expr *UsedSizeArg = TheCall->getArg(SizeIndex); + if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext())) + return; + UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth); + } - if (UsedSize.ule(ObjectSize)) + if (UsedSize.getValue().ule(ObjectSize)) return; StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); @@ -533,7 +757,7 @@ DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, PDiag(DiagID) << FunctionName << ObjectSize.toString(/*Radix=*/10) - << UsedSize.toString(/*Radix=*/10)); + << UsedSize.getValue().toString(/*Radix=*/10)); } static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c --- a/clang/test/Sema/warn-fortify-source.c +++ b/clang/test/Sema/warn-fortify-source.c @@ -11,6 +11,8 @@ extern "C" { #endif +extern int sprintf(char *str, const char *format, ...); + #if defined(USE_PASS_OBJECT_SIZE) void *memcpy(void *dst, const void *src, size_t c); static void *memcpy(void *dst __attribute__((pass_object_size(1))), const void *src, size_t c) __attribute__((overloadable)) __asm__("merp"); @@ -96,6 +98,91 @@ __builtin_vsnprintf(buf, 11, "merp", list); // expected-warning {{'vsnprintf' size argument is too large; destination buffer has size 10, but size argument is 11}} } +void call_sprintf_chk(char *buf) { + __builtin___sprintf_chk(buf, 1, 6, "hell\n"); + __builtin___sprintf_chk(buf, 1, 5, "hell\n"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}} + __builtin___sprintf_chk(buf, 1, 6, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}} + __builtin___sprintf_chk(buf, 1, 2, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}} + // expected-warning@-1 {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 5}} + __builtin___sprintf_chk(buf, 1, 6, "hello"); + __builtin___sprintf_chk(buf, 1, 5, "hello"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}} + __builtin___sprintf_chk(buf, 1, 2, "%c", '9'); + __builtin___sprintf_chk(buf, 1, 1, "%c", '9'); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%d", 9); + __builtin___sprintf_chk(buf, 1, 1, "%d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%i", 9); + __builtin___sprintf_chk(buf, 1, 1, "%i", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%o", 9); + __builtin___sprintf_chk(buf, 1, 1, "%o", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%u", 9); + __builtin___sprintf_chk(buf, 1, 1, "%u", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%x", 9); + __builtin___sprintf_chk(buf, 1, 1, "%x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%X", 9); + __builtin___sprintf_chk(buf, 1, 1, "%X", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%hhd", (char)9); + __builtin___sprintf_chk(buf, 1, 1, "%hhd", (char)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%hd", (short)9); + __builtin___sprintf_chk(buf, 1, 1, "%hd", (short)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%ld", 9l); + __builtin___sprintf_chk(buf, 1, 1, "%ld", 9l); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%lld", 9ll); + __builtin___sprintf_chk(buf, 1, 1, "%lld", 9ll); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 2, "%%"); + __builtin___sprintf_chk(buf, 1, 1, "%%"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 4, "%#x", 9); + __builtin___sprintf_chk(buf, 1, 3, "%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}} + __builtin___sprintf_chk(buf, 1, 4, "%p", (void *)9); + __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}} + __builtin___sprintf_chk(buf, 1, 3, "%+d", 9); + __builtin___sprintf_chk(buf, 1, 2, "%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}} + __builtin___sprintf_chk(buf, 1, 3, "% i", 9); + __builtin___sprintf_chk(buf, 1, 2, "% i", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}} + __builtin___sprintf_chk(buf, 1, 6, "%5d", 9); + __builtin___sprintf_chk(buf, 1, 5, "%5d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}} + __builtin___sprintf_chk(buf, 1, 9, "%f", 9.f); + __builtin___sprintf_chk(buf, 1, 8, "%f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 8, but format string expands to at least 9}} + __builtin___sprintf_chk(buf, 1, 9, "%Lf", (long double)9.); + __builtin___sprintf_chk(buf, 1, 8, "%Lf", (long double)9.); // expected-warning {{'sprintf' will always overflow; destination buffer has size 8, but format string expands to at least 9}} + __builtin___sprintf_chk(buf, 1, 10, "%+f", 9.f); + __builtin___sprintf_chk(buf, 1, 9, "%+f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 9, but format string expands to at least 10}} + __builtin___sprintf_chk(buf, 1, 12, "%e", 9.f); + __builtin___sprintf_chk(buf, 1, 11, "%e", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 11, but format string expands to at least 12}} +} + +void call_sprintf() { + char buf[6]; + sprintf(buf, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}} + sprintf(buf, "hello b\0y"); // expected-warning {{format string contains '\0' within the string body}} + // expected-warning@-1 {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 8}} + sprintf(buf, "hello"); + sprintf(buf, "hello!"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "1234%%"); + sprintf(buf, "12345%%"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "1234%c", '9'); + sprintf(buf, "12345%c", '9'); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "1234%d", 9); + sprintf(buf, "12345%d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "1234%lld", 9ll); + sprintf(buf, "12345%lld", 9ll); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "12%#x", 9); + sprintf(buf, "123%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "12%p", (void *)9); + sprintf(buf, "123%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "123%+d", 9); + sprintf(buf, "1234%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "123% i", 9); + sprintf(buf, "1234% i", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "%5d", 9); + sprintf(buf, "1%5d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "%.3f", 9.f); + sprintf(buf, "5%.3f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "%+.2f", 9.f); + sprintf(buf, "%+.3f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "%.0e", 9.f); + sprintf(buf, "5%.1e", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 8}} +} + #ifdef __cplusplus template struct S { void mf() const {