diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -833,6 +833,10 @@ " but format string expands to at least %2">, InGroup; +def warn_fortify_scanf_overflow : Warning< + "'%0' may overflow; destination buffer in argument %1 has size " + "%2, but the corresponding field width plus NUL byte is %3">, + InGroup; /// main() // static main() is not an error in C, just in C++. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -408,6 +408,50 @@ namespace { +class ScanfDiagnosticFormatHandler + : public analyze_format_string::FormatStringHandler { + // Accepts the argument index (relative to the first destination index) of the + // argument whose size we want. + using ComputeSizeFunction = + llvm::function_ref(unsigned)>; + + // Accepts the argument index (relative to the first destination index), the + // destination size, and the source size). + using DiagnoseFunction = + llvm::function_ref; + + ComputeSizeFunction ComputeSizeArgument; + DiagnoseFunction Diagnose; + +public: + ScanfDiagnosticFormatHandler(ComputeSizeFunction ComputeSizeArgument, + DiagnoseFunction Diagnose) + : ComputeSizeArgument(ComputeSizeArgument), Diagnose(Diagnose) {} + + bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, + const char *StartSpecifier, + unsigned specifierLen) override { + auto OptionalFW = FS.getFieldWidth(); + if (OptionalFW.getHowSpecified() != + analyze_format_string::OptionalAmount::HowSpecified::Constant) + return true; + + // We have to write the data plus a NUL byte. + unsigned SourceSize = OptionalFW.getConstantAmount() + 1; + + auto DestSizeAPS = ComputeSizeArgument(FS.getArgIndex()); + if (!DestSizeAPS) + return true; + + unsigned DestSize = DestSizeAPS->getZExtValue(); + + if (DestSize < SourceSize) + Diagnose(FS.getArgIndex(), DestSize, SourceSize); + + return true; + } +}; + class EstimateSizeFormatHandler : public analyze_format_string::FormatStringHandler { size_t Size; @@ -615,9 +659,12 @@ // (potentially) more strict checking mode. Otherwise, conservatively assume // type 0. int BOSType = 0; - if (const auto *POS = - FD->getParamDecl(Index)->getAttr()) - BOSType = POS->getType(); + // This check can fail for variadic functions. + if (Index < FD->getNumParams()) { + if (const auto *POS = + FD->getParamDecl(Index)->getAttr()) + BOSType = POS->getType(); + } const Expr *ObjArg = TheCall->getArg(Index); uint64_t Result; @@ -642,6 +689,20 @@ unsigned DiagID = 0; bool IsChkVariant = false; + auto GetFunctionName = [&]() { + StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); + // Skim off the details of whichever builtin was called to produce a better + // diagnostic, as it's unlikely that the user wrote the __builtin + // explicitly. + if (IsChkVariant) { + FunctionName = FunctionName.drop_front(std::strlen("__builtin___")); + FunctionName = FunctionName.drop_back(std::strlen("_chk")); + } else if (FunctionName.startswith("__builtin_")) { + FunctionName = FunctionName.drop_front(std::strlen("__builtin_")); + } + return FunctionName; + }; + switch (BuiltinID) { default: return; @@ -661,6 +722,59 @@ break; } + case Builtin::BIscanf: + case Builtin::BIfscanf: + case Builtin::BIsscanf: { + unsigned FormatIndex = 1; + unsigned DataIndex = 2; + if (BuiltinID == Builtin::BIscanf) { + FormatIndex = 0; + DataIndex = 1; + } + + auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts(); + + auto *Format = dyn_cast(FormatExpr); + if (!Format) + return; + + if (!Format->isAscii() && !Format->isUTF8()) + return; + + auto Diagnose = [&](unsigned ArgIndex, unsigned DestSize, + unsigned SourceSize) { + DiagID = diag::warn_fortify_scanf_overflow; + StringRef FunctionName = GetFunctionName(); + DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, + PDiag(DiagID) + << FunctionName << (ArgIndex + DataIndex + 1) + << DestSize << SourceSize); + }; + + StringRef FormatStrRef = Format->getString(); + ScanfDiagnosticFormatHandler H( + [&](unsigned Index) { return ComputeSizeArgument(Index + DataIndex); }, + Diagnose); + const char *FormatBytes = FormatStrRef.data(); + const ConstantArrayType *T = + Context.getAsConstantArrayType(Format->getType()); + assert(T && "String literal not of constant array type!"); + size_t TypeSize = T->getSize().getZExtValue(); + + // In case there's a null byte somewhere. + size_t StrLen = + std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0)); + + analyze_format_string::ParseScanfString(H, FormatBytes, + FormatBytes + StrLen, getLangOpts(), + Context.getTargetInfo()); + + // Unlike the other cases, in this one we have already issued the diagnostic + // here, so no need to continue (because unlike the other cases, here the + // diagnostic refers to the argument number). + return; + } + case Builtin::BIsprintf: case Builtin::BI__builtin___sprintf_chk: { size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3; @@ -771,15 +885,7 @@ SourceSize.getValue().ule(DestinationSize.getValue())) return; - StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); - // Skim off the details of whichever builtin was called to produce a better - // diagnostic, as it's unlikely that the user wrote the __builtin explicitly. - if (IsChkVariant) { - FunctionName = FunctionName.drop_front(std::strlen("__builtin___")); - FunctionName = FunctionName.drop_back(std::strlen("_chk")); - } else if (FunctionName.startswith("__builtin_")) { - FunctionName = FunctionName.drop_front(std::strlen("__builtin_")); - } + StringRef FunctionName = GetFunctionName(); SmallString<16> DestinationStr; SmallString<16> SourceStr; diff --git a/clang/test/Sema/warn-fortify-scanf.c b/clang/test/Sema/warn-fortify-scanf.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/warn-fortify-scanf.c @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify + +typedef struct _FILE FILE; +extern int scanf(const char *format, ...); +extern int fscanf(FILE *f, const char *format, ...); +extern int sscanf(const char *input, const char *format, ...); + +void call_scanf() { + char buf10[10]; + char buf20[20]; + char buf30[30]; + scanf("%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding field width plus NUL byte is 11}} + scanf("%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding field width plus NUL byte is 12}} + scanf("%4s %5s %9s", buf20, buf30, buf10); + scanf("%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding field width plus NUL byte is 21}} + scanf("%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding field width plus NUL byte is 22}} + scanf("%19s %5s %9s", buf20, buf30, buf10); + scanf("%19s %29s %9s", buf20, buf30, buf10); +} + +void call_sscanf() { + char buf10[10]; + char buf20[20]; + char buf30[30]; + sscanf("a b c", "%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding field width plus NUL byte is 11}} + sscanf("a b c", "%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding field width plus NUL byte is 12}} + sscanf("a b c", "%4s %5s %9s", buf20, buf30, buf10); + sscanf("a b c", "%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding field width plus NUL byte is 21}} + sscanf("a b c", "%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding field width plus NUL byte is 22}} + sscanf("a b c", "%19s %5s %9s", buf20, buf30, buf10); + sscanf("a b c", "%19s %29s %9s", buf20, buf30, buf10); +} + +void call_fscanf() { + char buf10[10]; + char buf20[20]; + char buf30[30]; + fscanf(0, "%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding field width plus NUL byte is 11}} + fscanf(0, "%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding field width plus NUL byte is 12}} + fscanf(0, "%4s %5s %9s", buf20, buf30, buf10); + fscanf(0, "%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding field width plus NUL byte is 21}} + fscanf(0, "%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding field width plus NUL byte is 22}} + fscanf(0, "%19s %5s %9s", buf20, buf30, buf10); + fscanf(0, "%19s %29s %9s", buf20, buf30, buf10); +}