diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -740,6 +740,12 @@ bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const; + /// If the current Expr is a pointer, this will try to statically + /// determine the strlen of the string pointed to. + /// Returns true if all of the above holds and we were able to figure out the + /// strlen, false otherwise. + bool tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const; + /// Enumeration used to describe the kind of Null pointer constant /// returned from \c isNullPointerConstant(). enum NullPointerConstantKind { diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -816,6 +816,11 @@ "'%0' size argument is too large; destination buffer has size %1," " but size argument is %2">, InGroup; +def warn_fortify_strlen_overflow: Warning< + "'%0' will always overflow; destination buffer has size %1," + " but the source string has length %2 (including NUL byte)">, + InGroup; + def warn_fortify_source_format_overflow : Warning< "'%0' will always overflow; destination buffer has size %1," " but format string expands to at least %2">, diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1826,6 +1826,8 @@ static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result, EvalInfo &Info); static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result); +static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, + EvalInfo &Info); /// Evaluate an integer or fixed point expression into an APResult. static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, @@ -11836,46 +11838,10 @@ case Builtin::BI__builtin_wcslen: { // As an extension, we support __builtin_strlen() as a constant expression, // and support folding strlen() to a constant. - LValue String; - if (!EvaluatePointer(E->getArg(0), String, Info)) - return false; - - QualType CharTy = E->getArg(0)->getType()->getPointeeType(); - - // Fast path: if it's a string literal, search the string value. - if (const StringLiteral *S = dyn_cast_or_null( - String.getLValueBase().dyn_cast())) { - // The string literal may have embedded null characters. Find the first - // one and truncate there. - StringRef Str = S->getBytes(); - int64_t Off = String.Offset.getQuantity(); - if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() && - S->getCharByteWidth() == 1 && - // FIXME: Add fast-path for wchar_t too. - Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) { - Str = Str.substr(Off); - - StringRef::size_type Pos = Str.find(0); - if (Pos != StringRef::npos) - Str = Str.substr(0, Pos); - - return Success(Str.size(), E); - } - - // Fall through to slow path to issue appropriate diagnostic. - } - - // Slow path: scan the bytes of the string looking for the terminating 0. - for (uint64_t Strlen = 0; /**/; ++Strlen) { - APValue Char; - if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) || - !Char.isInt()) - return false; - if (!Char.getInt()) - return Success(Strlen, E); - if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1)) - return false; - } + uint64_t StrLen; + if (EvaluateBuiltinStrLen(E->getArg(0), StrLen, Info)) + return Success(StrLen, E); + return false; } case Builtin::BIstrcmp: @@ -15736,3 +15702,58 @@ EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); return tryEvaluateBuiltinObjectSize(this, Type, Info, Result); } + +static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, + EvalInfo &Info) { + if (!E->getType()->hasPointerRepresentation() || !E->isPRValue()) + return false; + + LValue String; + + if (!EvaluatePointer(E, String, Info)) + return false; + + QualType CharTy = E->getType()->getPointeeType(); + + // Fast path: if it's a string literal, search the string value. + if (const StringLiteral *S = dyn_cast_or_null( + String.getLValueBase().dyn_cast())) { + StringRef Str = S->getBytes(); + int64_t Off = String.Offset.getQuantity(); + if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() && + S->getCharByteWidth() == 1 && + // FIXME: Add fast-path for wchar_t too. + Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) { + Str = Str.substr(Off); + + StringRef::size_type Pos = Str.find(0); + if (Pos != StringRef::npos) + Str = Str.substr(0, Pos); + + Result = Str.size(); + return true; + } + + // Fall through to slow path. + } + + // Slow path: scan the bytes of the string looking for the terminating 0. + for (uint64_t Strlen = 0; /**/; ++Strlen) { + APValue Char; + if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) || + !Char.isInt()) + return false; + if (!Char.getInt()) { + Result = Strlen; + return true; + } + if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1)) + return false; + } +} + +bool Expr::tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const { + Expr::EvalStatus Status; + EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); + return EvaluateBuiltinStrLen(this, Result, Info); +} diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -588,14 +588,8 @@ } // namespace -/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a -/// __builtin_*_chk function, then use the object size argument specified in the -/// source. Otherwise, infer the object size using __builtin_object_size. void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall) { - // FIXME: There are some more useful checks we could be doing here: - // - Evaluate strlen of strcpy arguments, use as object size. - if (TheCall->isValueDependent() || TheCall->isTypeDependent() || isConstantEvaluated()) return; @@ -607,13 +601,66 @@ const TargetInfo &TI = getASTContext().getTargetInfo(); unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType()); + auto ComputeExplicitObjectSizeArgument = + [&](unsigned Index) -> Optional { + Expr::EvalResult Result; + Expr *SizeArg = TheCall->getArg(Index); + if (!SizeArg->EvaluateAsInt(Result, getASTContext())) + return llvm::None; + return Result.Val.getInt(); + }; + + auto ComputeSizeArgument = [&](unsigned Index) -> Optional { + // If the parameter has a pass_object_size attribute, then we should use its + // (potentially) more strict checking mode. Otherwise, conservatively assume + // type 0. + int BOSType = 0; + if (const auto *POS = + FD->getParamDecl(Index)->getAttr()) + BOSType = POS->getType(); + + const Expr *ObjArg = TheCall->getArg(Index); + uint64_t Result; + if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType)) + return llvm::None; + + // Get the object size in the target's size_t width. + return llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth); + }; + + auto ComputeStrLenArgument = [&](unsigned Index) -> Optional { + Expr *ObjArg = TheCall->getArg(Index); + uint64_t Result; + if (!ObjArg->tryEvaluateStrLen(Result, getASTContext())) + return llvm::None; + // Add 1 for null byte. + return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth); + }; + + Optional SourceSize; + Optional DestinationSize; unsigned DiagID = 0; bool IsChkVariant = false; - Optional UsedSize; - unsigned SizeIndex, ObjectIndex; + switch (BuiltinID) { default: return; + case Builtin::BI__builtin_strcpy: + case Builtin::BIstrcpy: { + DiagID = diag::warn_fortify_strlen_overflow; + SourceSize = ComputeStrLenArgument(1); + DestinationSize = ComputeSizeArgument(0); + break; + } + + case Builtin::BI__builtin___strcpy_chk: { + DiagID = diag::warn_fortify_strlen_overflow; + SourceSize = ComputeStrLenArgument(1); + DestinationSize = ComputeExplicitObjectSizeArgument(2); + IsChkVariant = true; + break; + } + case Builtin::BIsprintf: case Builtin::BI__builtin___sprintf_chk: { size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3; @@ -639,14 +686,13 @@ H, FormatBytes, FormatBytes + StrLen, getLangOpts(), Context.getTargetInfo(), false)) { DiagID = diag::warn_fortify_source_format_overflow; - UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound()) - .extOrTrunc(SizeTypeWidth); + SourceSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound()) + .extOrTrunc(SizeTypeWidth); if (BuiltinID == Builtin::BI__builtin___sprintf_chk) { + DestinationSize = ComputeExplicitObjectSizeArgument(2); IsChkVariant = true; - ObjectIndex = 2; } else { - IsChkVariant = false; - ObjectIndex = 0; + DestinationSize = ComputeSizeArgument(0); } break; } @@ -664,18 +710,19 @@ case Builtin::BI__builtin___memccpy_chk: case Builtin::BI__builtin___mempcpy_chk: { DiagID = diag::warn_builtin_chk_overflow; + SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 2); + DestinationSize = + ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); IsChkVariant = true; - SizeIndex = TheCall->getNumArgs() - 2; - ObjectIndex = TheCall->getNumArgs() - 1; break; } case Builtin::BI__builtin___snprintf_chk: case Builtin::BI__builtin___vsnprintf_chk: { DiagID = diag::warn_builtin_chk_overflow; + SourceSize = ComputeExplicitObjectSizeArgument(1); + DestinationSize = ComputeExplicitObjectSizeArgument(3); IsChkVariant = true; - SizeIndex = 1; - ObjectIndex = 3; break; } @@ -691,8 +738,8 @@ // size larger than the destination buffer though; this is a runtime abort // in _FORTIFY_SOURCE mode, and is quite suspicious otherwise. DiagID = diag::warn_fortify_source_size_mismatch; - SizeIndex = TheCall->getNumArgs() - 1; - ObjectIndex = 0; + SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); + DestinationSize = ComputeSizeArgument(0); break; } @@ -705,8 +752,8 @@ case Builtin::BImempcpy: case Builtin::BI__builtin_mempcpy: { DiagID = diag::warn_fortify_source_overflow; - SizeIndex = TheCall->getNumArgs() - 1; - ObjectIndex = 0; + SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); + DestinationSize = ComputeSizeArgument(0); break; } case Builtin::BIsnprintf: @@ -714,50 +761,14 @@ case Builtin::BIvsnprintf: case Builtin::BI__builtin_vsnprintf: { DiagID = diag::warn_fortify_source_size_mismatch; - SizeIndex = 1; - ObjectIndex = 0; + SourceSize = ComputeExplicitObjectSizeArgument(1); + DestinationSize = ComputeSizeArgument(0); break; } } - llvm::APSInt ObjectSize; - // For __builtin___*_chk, the object size is explicitly provided by the caller - // (usually using __builtin_object_size). Use that value to check this call. - if (IsChkVariant) { - Expr::EvalResult Result; - Expr *SizeArg = TheCall->getArg(ObjectIndex); - if (!SizeArg->EvaluateAsInt(Result, getASTContext())) - return; - ObjectSize = Result.Val.getInt(); - - // Otherwise, try to evaluate an imaginary call to __builtin_object_size. - } else { - // If the parameter has a pass_object_size attribute, then we should use its - // (potentially) more strict checking mode. Otherwise, conservatively assume - // type 0. - int BOSType = 0; - if (const auto *POS = - FD->getParamDecl(ObjectIndex)->getAttr()) - BOSType = POS->getType(); - - Expr *ObjArg = TheCall->getArg(ObjectIndex); - uint64_t Result; - if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType)) - return; - // Get the object size in the target's size_t width. - ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth); - } - - // Evaluate the number of bytes of the object that this call will use. - if (!UsedSize) { - Expr::EvalResult Result; - Expr *UsedSizeArg = TheCall->getArg(SizeIndex); - if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext())) - return; - UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth); - } - - if (UsedSize.getValue().ule(ObjectSize)) + if (!SourceSize || !DestinationSize || + SourceSize.getValue().ule(DestinationSize.getValue())) return; StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); @@ -770,10 +781,13 @@ FunctionName = FunctionName.drop_front(std::strlen("__builtin_")); } + SmallString<16> DestinationStr; + SmallString<16> SourceStr; + DestinationSize->toString(DestinationStr, /*Radix=*/10); + SourceSize->toString(SourceStr, /*Radix=*/10); DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, PDiag(DiagID) - << FunctionName << toString(ObjectSize, /*Radix=*/10) - << toString(UsedSize.getValue(), /*Radix=*/10)); + << FunctionName << DestinationStr << SourceStr); } static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, diff --git a/clang/test/Analysis/security-syntax-checks.m b/clang/test/Analysis/security-syntax-checks.m --- a/clang/test/Analysis/security-syntax-checks.m +++ b/clang/test/Analysis/security-syntax-checks.m @@ -1,37 +1,37 @@ -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \ +// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \ // RUN: -analyzer-checker=security.insecureAPI \ // RUN: -analyzer-checker=security.FloatLoopCounter -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \ +// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \ // RUN: -DUSE_BUILTINS \ // RUN: -analyzer-checker=security.insecureAPI \ // RUN: -analyzer-checker=security.FloatLoopCounter -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \ +// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \ // RUN: -DVARIANT \ // RUN: -analyzer-checker=security.insecureAPI \ // RUN: -analyzer-checker=security.FloatLoopCounter -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \ +// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \ // RUN: -DUSE_BUILTINS -DVARIANT \ // RUN: -analyzer-checker=security.insecureAPI \ // RUN: -analyzer-checker=security.FloatLoopCounter -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \ +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \ // RUN: -analyzer-checker=security.insecureAPI \ // RUN: -analyzer-checker=security.FloatLoopCounter -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \ +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \ // RUN: -DUSE_BUILTINS \ // RUN: -analyzer-checker=security.insecureAPI \ // RUN: -analyzer-checker=security.FloatLoopCounter -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \ +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \ // RUN: -DVARIANT \ // RUN: -analyzer-checker=security.insecureAPI \ // RUN: -analyzer-checker=security.FloatLoopCounter -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \ +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \ // RUN: -DUSE_BUILTINS -DVARIANT \ // RUN: -analyzer-checker=security.insecureAPI \ // RUN: -analyzer-checker=security.FloatLoopCounter diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c --- a/clang/test/Sema/warn-fortify-source.c +++ b/clang/test/Sema/warn-fortify-source.c @@ -58,6 +58,19 @@ __builtin_stpncpy(s1, s2, 20); // expected-warning {{'stpncpy' size argument is too large; destination buffer has size 10, but size argument is 20}} } +void call_strcpy() { + const char *const src = "abcd"; + char dst[4]; + __builtin_strcpy(dst, src); // expected-warning {{'strcpy' will always overflow; destination buffer has size 4, but the source string has length 5 (including NUL byte)}} +} + +void call_strcpy_nowarn() { + const char *const src = "abcd"; + char dst[5]; + // We should not get a warning here. + __builtin_strcpy(dst, src); +} + void call_memmove() { char s1[10], s2[20]; __builtin_memmove(s2, s1, 20);