Index: clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt =================================================================== --- clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -12,7 +12,8 @@ BlockInCriticalSectionChecker.cpp BoolAssignmentChecker.cpp BuiltinFunctionChecker.cpp - CStringChecker.cpp + CStringChecker/CStringChecker.cpp + CStringChecker/CStringLengthModeling.cpp CStringSyntaxChecker.cpp CallAndMessageChecker.cpp CastSizeChecker.cpp @@ -139,3 +140,7 @@ DEPENDS omp_gen ) + +target_include_directories(clangStaticAnalyzerCheckers PRIVATE + CStringChecker + ) Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.h =================================================================== --- /dev/null +++ clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.h @@ -0,0 +1,226 @@ +//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Models C string related functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGCHECKER_CSTRINGCHECKER_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGCHECKER_CSTRINGCHECKER_H + +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +namespace clang { +namespace ento { +namespace cstring { + +struct AnyArgExpr { + // FIXME: Remove constructor in C++17 to turn it into an aggregate. + AnyArgExpr(const Expr *Expression, unsigned ArgumentIndex) + : Expression{Expression}, ArgumentIndex{ArgumentIndex} {} + const Expr *Expression; + unsigned ArgumentIndex; +}; + +struct SourceArgExpr : AnyArgExpr { + using AnyArgExpr::AnyArgExpr; // FIXME: Remove using in C++17. +}; + +struct DestinationArgExpr : AnyArgExpr { + using AnyArgExpr::AnyArgExpr; // FIXME: Same. +}; + +struct SizeArgExpr : AnyArgExpr { + using AnyArgExpr::AnyArgExpr; // FIXME: Same. +}; + +class CStringChecker + : public Checker, check::LiveSymbols, + check::DeadSymbols, check::RegionChanges> { + mutable std::unique_ptr BT_Null, BT_Bounds, BT_Overlap, + BT_NotCString; + + mutable const char *CurrentFunctionDescription; + + using ErrorMessage = SmallString<128>; + enum class AccessKind { write, read }; + enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 }; + +public: + /// Models and checks cstring related function pre and post-conditions. + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + + /// Tracks and maintains the associated cstring lengths of memory regions. + static void *getTag(); + void checkPreStmt(const DeclStmt *, CheckerContext &) const; + void checkLiveSymbols(ProgramStateRef, SymbolReaper &) const; + void checkDeadSymbols(SymbolReaper &, CheckerContext &) const; + ProgramStateRef + checkRegionChanges(ProgramStateRef, const InvalidatedSymbols *, + ArrayRef, ArrayRef, + const LocationContext *, const CallEvent *) const; + // TODO: Is it useful? + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, + const char *Sep) const; + + /// The filter is used to filter out the diagnostics which are not enabled by + /// the user. + struct { + DefaultBool CheckCStringNullArg; + DefaultBool CheckCStringOutOfBounds; + DefaultBool CheckCStringBufferOverlap; + DefaultBool CheckCStringNotNullTerm; + + CheckerNameRef CheckNameCStringNullArg; + CheckerNameRef CheckNameCStringOutOfBounds; + CheckerNameRef CheckNameCStringBufferOverlap; + CheckerNameRef CheckNameCStringNotNullTerm; + } Filter; + +private: + typedef void (CStringChecker::*FnCheck)(CheckerContext &, + const CallExpr *) const; + CallDescriptionMap Callbacks = { + {{CDF_MaybeBuiltin, "memcpy", 3}, &CStringChecker::evalMemcpy}, + {{CDF_MaybeBuiltin, "mempcpy", 3}, &CStringChecker::evalMempcpy}, + {{CDF_MaybeBuiltin, "memcmp", 3}, &CStringChecker::evalMemcmp}, + {{CDF_MaybeBuiltin, "memmove", 3}, &CStringChecker::evalMemmove}, + {{CDF_MaybeBuiltin, "memset", 3}, &CStringChecker::evalMemset}, + {{CDF_MaybeBuiltin, "explicit_memset", 3}, &CStringChecker::evalMemset}, + {{CDF_MaybeBuiltin, "strcpy", 2}, &CStringChecker::evalStrcpy}, + {{CDF_MaybeBuiltin, "strncpy", 3}, &CStringChecker::evalStrncpy}, + {{CDF_MaybeBuiltin, "stpcpy", 2}, &CStringChecker::evalStpcpy}, + {{CDF_MaybeBuiltin, "strlcpy", 3}, &CStringChecker::evalStrlcpy}, + {{CDF_MaybeBuiltin, "strcat", 2}, &CStringChecker::evalStrcat}, + {{CDF_MaybeBuiltin, "strncat", 3}, &CStringChecker::evalStrncat}, + {{CDF_MaybeBuiltin, "strlcat", 3}, &CStringChecker::evalStrlcat}, + {{CDF_MaybeBuiltin, "strlen", 1}, &CStringChecker::evalstrLength}, + {{CDF_MaybeBuiltin, "strnlen", 2}, &CStringChecker::evalstrnLength}, + {{CDF_MaybeBuiltin, "strcmp", 2}, &CStringChecker::evalStrcmp}, + {{CDF_MaybeBuiltin, "strncmp", 3}, &CStringChecker::evalStrncmp}, + {{CDF_MaybeBuiltin, "strcasecmp", 2}, &CStringChecker::evalStrcasecmp}, + {{CDF_MaybeBuiltin, "strncasecmp", 3}, &CStringChecker::evalStrncasecmp}, + {{CDF_MaybeBuiltin, "strsep", 2}, &CStringChecker::evalStrsep}, + {{CDF_MaybeBuiltin, "bcopy", 3}, &CStringChecker::evalBcopy}, + {{CDF_MaybeBuiltin, "bcmp", 3}, &CStringChecker::evalMemcmp}, + {{CDF_MaybeBuiltin, "bzero", 2}, &CStringChecker::evalBzero}, + {{CDF_MaybeBuiltin, "explicit_bzero", 2}, &CStringChecker::evalBzero}, + }; + + // These require a bit of special handling. + CallDescription StdCopy{{"std", "copy"}, 3}, + StdCopyBackward{{"std", "copy_backward"}, 3}; + + FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const; + void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; + void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; + void evalMemmove(CheckerContext &C, const CallExpr *CE) const; + void evalBcopy(CheckerContext &C, const CallExpr *CE) const; + void evalCopyCommon(CheckerContext &C, const CallExpr *CE, + ProgramStateRef state, SizeArgExpr Size, + DestinationArgExpr Dest, SourceArgExpr Source, + bool Restricted, bool IsMempcpy) const; + + void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; + + void evalstrLength(CheckerContext &C, const CallExpr *CE) const; + void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; + void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, + bool IsStrnlen = false) const; + + void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; + void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; + void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; + void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const; + void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd, + bool IsBounded, ConcatFnKind appendK, + bool returnPtr = true) const; + + void evalStrcat(CheckerContext &C, const CallExpr *CE) const; + void evalStrncat(CheckerContext &C, const CallExpr *CE) const; + void evalStrlcat(CheckerContext &C, const CallExpr *CE) const; + + void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, + bool IsBounded = false, bool IgnoreCase = false) const; + + void evalStrsep(CheckerContext &C, const CallExpr *CE) const; + + void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; + void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; + void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; + void evalMemset(CheckerContext &C, const CallExpr *CE) const; + void evalBzero(CheckerContext &C, const CallExpr *CE) const; + + // Utility methods + + static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription, + AccessKind Access); + + /// Simply wraps the cstring::getCStringLength function to emit warnings. + SVal getCStringLengthChecked(CheckerContext &Ctx, ProgramStateRef &State, + const Expr *Ex, SVal Buf, + bool hypothetical = false) const; + + std::pair static assumeZero( + CheckerContext &C, ProgramStateRef state, SVal V, QualType Ty); + + static ProgramStateRef InvalidateBuffer(CheckerContext &C, + ProgramStateRef state, const Expr *Ex, + SVal V, bool IsSourceBuffer, + const Expr *Size); + + static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, + const MemRegion *MR); + + static bool memsetAux(const Expr *DstBuffer, SVal CharE, const Expr *Size, + CheckerContext &C, ProgramStateRef &State); + + // Re-usable checks + ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State, + AnyArgExpr Arg, SVal l) const; + ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state, + AnyArgExpr Buffer, SVal Element, + AccessKind Access) const; + ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State, + AnyArgExpr Buffer, SizeArgExpr Size, + AccessKind Access) const; + ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state, + SizeArgExpr Size, AnyArgExpr First, + AnyArgExpr Second) const; + void emitOverlapBug(CheckerContext &C, ProgramStateRef state, + const Stmt *First, const Stmt *Second) const; + + void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, + StringRef WarningMsg) const; + void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, + const Stmt *S, StringRef WarningMsg) const; + void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, + const Stmt *S, StringRef WarningMsg) const; + void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; + + ProgramStateRef checkAdditionOverflow(CheckerContext &C, + ProgramStateRef state, NonLoc left, + NonLoc right) const; + + // Return true if the destination buffer of the copy function may be in bound. + // Expects SVal of Size to be positive and unsigned. + // Expects SVal of FirstBuf to be a FieldRegion. + static bool IsFirstBufInBound(CheckerContext &C, ProgramStateRef state, + const Expr *FirstBuf, const Expr *Size); +}; + +} // namespace cstring +} // namespace ento +} // namespace clang + +#endif \ No newline at end of file Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.cpp @@ -11,50 +11,27 @@ // //===----------------------------------------------------------------------===// -#include "InterCheckerAPI.h" +#include "CStringChecker.h" +#include "CStringLength.h" #include "clang/Basic/CharInfo.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" -#include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" -using namespace clang; -using namespace ento; +namespace clang { +namespace ento { +namespace cstring { -namespace { -struct AnyArgExpr { - // FIXME: Remove constructor in C++17 to turn it into an aggregate. - AnyArgExpr(const Expr *Expression, unsigned ArgumentIndex) - : Expression{Expression}, ArgumentIndex{ArgumentIndex} {} - const Expr *Expression; - unsigned ArgumentIndex; -}; - -struct SourceArgExpr : AnyArgExpr { - using AnyArgExpr::AnyArgExpr; // FIXME: Remove using in C++17. -}; - -struct DestinationArgExpr : AnyArgExpr { - using AnyArgExpr::AnyArgExpr; // FIXME: Same. -}; - -struct SizeArgExpr : AnyArgExpr { - using AnyArgExpr::AnyArgExpr; // FIXME: Same. -}; - -using ErrorMessage = SmallString<128>; -enum class AccessKind { write, read }; - -static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription, - AccessKind Access) { +auto CStringChecker::createOutOfBoundErrorMsg(StringRef FunctionDescription, + AccessKind Access) + -> ErrorMessage { ErrorMessage Message; llvm::raw_svector_ostream Os(Message); @@ -71,214 +48,80 @@ return Message; } -enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 }; -class CStringChecker : public Checker< eval::Call, - check::PreStmt, - check::LiveSymbols, - check::DeadSymbols, - check::RegionChanges - > { - mutable std::unique_ptr BT_Null, BT_Bounds, BT_Overlap, - BT_NotCString, BT_AdditionOverflow; - - mutable const char *CurrentFunctionDescription; - -public: - /// The filter is used to filter out the diagnostics which are not enabled by - /// the user. - struct CStringChecksFilter { - DefaultBool CheckCStringNullArg; - DefaultBool CheckCStringOutOfBounds; - DefaultBool CheckCStringBufferOverlap; - DefaultBool CheckCStringNotNullTerm; - - CheckerNameRef CheckNameCStringNullArg; - CheckerNameRef CheckNameCStringOutOfBounds; - CheckerNameRef CheckNameCStringBufferOverlap; - CheckerNameRef CheckNameCStringNotNullTerm; - }; - - CStringChecksFilter Filter; - - static void *getTag() { static int tag; return &tag; } - - bool evalCall(const CallEvent &Call, CheckerContext &C) const; - void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; - void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; - void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; - - ProgramStateRef - checkRegionChanges(ProgramStateRef state, - const InvalidatedSymbols *, - ArrayRef ExplicitRegions, - ArrayRef Regions, - const LocationContext *LCtx, - const CallEvent *Call) const; - - typedef void (CStringChecker::*FnCheck)(CheckerContext &, - const CallExpr *) const; - CallDescriptionMap Callbacks = { - {{CDF_MaybeBuiltin, "memcpy", 3}, &CStringChecker::evalMemcpy}, - {{CDF_MaybeBuiltin, "mempcpy", 3}, &CStringChecker::evalMempcpy}, - {{CDF_MaybeBuiltin, "memcmp", 3}, &CStringChecker::evalMemcmp}, - {{CDF_MaybeBuiltin, "memmove", 3}, &CStringChecker::evalMemmove}, - {{CDF_MaybeBuiltin, "memset", 3}, &CStringChecker::evalMemset}, - {{CDF_MaybeBuiltin, "explicit_memset", 3}, &CStringChecker::evalMemset}, - {{CDF_MaybeBuiltin, "strcpy", 2}, &CStringChecker::evalStrcpy}, - {{CDF_MaybeBuiltin, "strncpy", 3}, &CStringChecker::evalStrncpy}, - {{CDF_MaybeBuiltin, "stpcpy", 2}, &CStringChecker::evalStpcpy}, - {{CDF_MaybeBuiltin, "strlcpy", 3}, &CStringChecker::evalStrlcpy}, - {{CDF_MaybeBuiltin, "strcat", 2}, &CStringChecker::evalStrcat}, - {{CDF_MaybeBuiltin, "strncat", 3}, &CStringChecker::evalStrncat}, - {{CDF_MaybeBuiltin, "strlcat", 3}, &CStringChecker::evalStrlcat}, - {{CDF_MaybeBuiltin, "strlen", 1}, &CStringChecker::evalstrLength}, - {{CDF_MaybeBuiltin, "strnlen", 2}, &CStringChecker::evalstrnLength}, - {{CDF_MaybeBuiltin, "strcmp", 2}, &CStringChecker::evalStrcmp}, - {{CDF_MaybeBuiltin, "strncmp", 3}, &CStringChecker::evalStrncmp}, - {{CDF_MaybeBuiltin, "strcasecmp", 2}, &CStringChecker::evalStrcasecmp}, - {{CDF_MaybeBuiltin, "strncasecmp", 3}, &CStringChecker::evalStrncasecmp}, - {{CDF_MaybeBuiltin, "strsep", 2}, &CStringChecker::evalStrsep}, - {{CDF_MaybeBuiltin, "bcopy", 3}, &CStringChecker::evalBcopy}, - {{CDF_MaybeBuiltin, "bcmp", 3}, &CStringChecker::evalMemcmp}, - {{CDF_MaybeBuiltin, "bzero", 2}, &CStringChecker::evalBzero}, - {{CDF_MaybeBuiltin, "explicit_bzero", 2}, &CStringChecker::evalBzero}, - }; - - // These require a bit of special handling. - CallDescription StdCopy{{"std", "copy"}, 3}, - StdCopyBackward{{"std", "copy_backward"}, 3}; - - FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const; - void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; - void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; - void evalMemmove(CheckerContext &C, const CallExpr *CE) const; - void evalBcopy(CheckerContext &C, const CallExpr *CE) const; - void evalCopyCommon(CheckerContext &C, const CallExpr *CE, - ProgramStateRef state, SizeArgExpr Size, - DestinationArgExpr Dest, SourceArgExpr Source, - bool Restricted, bool IsMempcpy) const; - - void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; - - void evalstrLength(CheckerContext &C, const CallExpr *CE) const; - void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; - void evalstrLengthCommon(CheckerContext &C, - const CallExpr *CE, - bool IsStrnlen = false) const; - - void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; - void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; - void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; - void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const; - void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd, - bool IsBounded, ConcatFnKind appendK, - bool returnPtr = true) const; - - void evalStrcat(CheckerContext &C, const CallExpr *CE) const; - void evalStrncat(CheckerContext &C, const CallExpr *CE) const; - void evalStrlcat(CheckerContext &C, const CallExpr *CE) const; - - void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; - void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; - void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; - void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; - void evalStrcmpCommon(CheckerContext &C, - const CallExpr *CE, - bool IsBounded = false, - bool IgnoreCase = false) const; - - void evalStrsep(CheckerContext &C, const CallExpr *CE) const; - - void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; - void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; - void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; - void evalMemset(CheckerContext &C, const CallExpr *CE) const; - void evalBzero(CheckerContext &C, const CallExpr *CE) const; - - // Utility methods - std::pair - static assumeZero(CheckerContext &C, - ProgramStateRef state, SVal V, QualType Ty); - - static ProgramStateRef setCStringLength(ProgramStateRef state, - const MemRegion *MR, - SVal strLength); - static SVal getCStringLengthForRegion(CheckerContext &C, - ProgramStateRef &state, - const Expr *Ex, - const MemRegion *MR, - bool hypothetical); - SVal getCStringLength(CheckerContext &C, - ProgramStateRef &state, - const Expr *Ex, - SVal Buf, - bool hypothetical = false) const; - - const StringLiteral *getCStringLiteral(CheckerContext &C, - ProgramStateRef &state, - const Expr *expr, - SVal val) const; - - static ProgramStateRef InvalidateBuffer(CheckerContext &C, - ProgramStateRef state, - const Expr *Ex, SVal V, - bool IsSourceBuffer, - const Expr *Size); - - static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, - const MemRegion *MR); - - static bool memsetAux(const Expr *DstBuffer, SVal CharE, - const Expr *Size, CheckerContext &C, - ProgramStateRef &State); - - // Re-usable checks - ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State, - AnyArgExpr Arg, SVal l) const; - ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state, - AnyArgExpr Buffer, SVal Element, - AccessKind Access) const; - ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State, - AnyArgExpr Buffer, SizeArgExpr Size, - AccessKind Access) const; - ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state, - SizeArgExpr Size, AnyArgExpr First, - AnyArgExpr Second) const; - void emitOverlapBug(CheckerContext &C, - ProgramStateRef state, - const Stmt *First, - const Stmt *Second) const; - - void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, - StringRef WarningMsg) const; - void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, - const Stmt *S, StringRef WarningMsg) const; - void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, - const Stmt *S, StringRef WarningMsg) const; - void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; - - ProgramStateRef checkAdditionOverflow(CheckerContext &C, - ProgramStateRef state, - NonLoc left, - NonLoc right) const; - - // Return true if the destination buffer of the copy function may be in bound. - // Expects SVal of Size to be positive and unsigned. - // Expects SVal of FirstBuf to be a FieldRegion. - static bool IsFirstBufInBound(CheckerContext &C, - ProgramStateRef state, - const Expr *FirstBuf, - const Expr *Size); -}; - -} //end anonymous namespace - -REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) - //===----------------------------------------------------------------------===// // Individual checks and utility methods. //===----------------------------------------------------------------------===// +static const StringLiteral *getCStringLiteral(SVal val) { + // Get the memory region pointed to by the val. + const MemRegion *bufRegion = val.getAsRegion(); + if (!bufRegion) + return nullptr; + + // Strip casts off the memory region. + bufRegion = bufRegion->StripCasts(); + + // Cast the memory region to a string region. + const StringRegion *strRegion = dyn_cast(bufRegion); + if (!strRegion) + return nullptr; + + // Return the actual string in the string region. + return strRegion->getStringLiteral(); +} + +SVal CStringChecker::getCStringLengthChecked(CheckerContext &Ctx, + ProgramStateRef &State, + const Expr *Ex, SVal Buf, + bool hypothetical) const { + SVal CStrLen = cstring::getCStringLength(Ctx, State, Ex, Buf, hypothetical); + + // Simply return if everything goes well. + // Otherwise we shall investigate why did it fail. + if (!CStrLen.isUndef()) + return CStrLen; + + // Handle if the buffer was not referring to a memory region. + const MemRegion *MR = Buf.getAsRegion(); + if (!MR) { + // If we can't get a region, see if it's something we /know/ isn't a + // C string. In the context of locations, the only time we can issue such + // a warning is for labels. + if (Optional Label = Buf.getAs()) { + if (Filter.CheckCStringNotNullTerm) { + SmallString<120> buf; + llvm::raw_svector_ostream os(buf); + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription + << " is the address of the label '" << Label->getLabel()->getName() + << "', which is not a null-terminated string"; + + emitNotCStringBug(Ctx, State, Ex, os.str()); + } + return UndefinedVal(); + } + } + + // Other regions (mostly non-data) can't have a reliable C string length. + // In this case, an error is emitted and UndefinedVal is returned. + // The caller should always be prepared to handle this case. + if (Filter.CheckCStringNotNullTerm) { + SmallString<120> buf; + llvm::raw_svector_ostream os(buf); + + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription << " is "; + + if (SummarizeRegion(os, Ctx.getASTContext(), MR)) + os << ", which is not a null-terminated string"; + else + os << "not a null-terminated string"; + + emitNotCStringBug(Ctx, State, Ex, os.str()); + } + return UndefinedVal(); +} + std::pair CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, QualType Ty) { @@ -694,181 +537,6 @@ return state; } -ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, - const MemRegion *MR, - SVal strLength) { - assert(!strLength.isUndef() && "Attempt to set an undefined string length"); - - MR = MR->StripCasts(); - - switch (MR->getKind()) { - case MemRegion::StringRegionKind: - // FIXME: This can happen if we strcpy() into a string region. This is - // undefined [C99 6.4.5p6], but we should still warn about it. - return state; - - case MemRegion::SymbolicRegionKind: - case MemRegion::AllocaRegionKind: - case MemRegion::NonParamVarRegionKind: - case MemRegion::ParamVarRegionKind: - case MemRegion::FieldRegionKind: - case MemRegion::ObjCIvarRegionKind: - // These are the types we can currently track string lengths for. - break; - - case MemRegion::ElementRegionKind: - // FIXME: Handle element regions by upper-bounding the parent region's - // string length. - return state; - - default: - // Other regions (mostly non-data) can't have a reliable C string length. - // For now, just ignore the change. - // FIXME: These are rare but not impossible. We should output some kind of - // warning for things like strcpy((char[]){'a', 0}, "b"); - return state; - } - - if (strLength.isUnknown()) - return state->remove(MR); - - return state->set(MR, strLength); -} - -SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, - ProgramStateRef &state, - const Expr *Ex, - const MemRegion *MR, - bool hypothetical) { - if (!hypothetical) { - // If there's a recorded length, go ahead and return it. - const SVal *Recorded = state->get(MR); - if (Recorded) - return *Recorded; - } - - // Otherwise, get a new symbol and update the state. - SValBuilder &svalBuilder = C.getSValBuilder(); - QualType sizeTy = svalBuilder.getContext().getSizeType(); - SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), - MR, Ex, sizeTy, - C.getLocationContext(), - C.blockCount()); - - if (!hypothetical) { - if (Optional strLn = strLength.getAs()) { - // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 - BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); - const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); - llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); - const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, - fourInt); - NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); - SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, - maxLength, sizeTy); - state = state->assume(evalLength.castAs(), true); - } - state = state->set(MR, strLength); - } - - return strLength; -} - -SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, - const Expr *Ex, SVal Buf, - bool hypothetical) const { - const MemRegion *MR = Buf.getAsRegion(); - if (!MR) { - // If we can't get a region, see if it's something we /know/ isn't a - // C string. In the context of locations, the only time we can issue such - // a warning is for labels. - if (Optional Label = Buf.getAs()) { - if (Filter.CheckCStringNotNullTerm) { - SmallString<120> buf; - llvm::raw_svector_ostream os(buf); - assert(CurrentFunctionDescription); - os << "Argument to " << CurrentFunctionDescription - << " is the address of the label '" << Label->getLabel()->getName() - << "', which is not a null-terminated string"; - - emitNotCStringBug(C, state, Ex, os.str()); - } - return UndefinedVal(); - } - - // If it's not a region and not a label, give up. - return UnknownVal(); - } - - // If we have a region, strip casts from it and see if we can figure out - // its length. For anything we can't figure out, just return UnknownVal. - MR = MR->StripCasts(); - - switch (MR->getKind()) { - case MemRegion::StringRegionKind: { - // Modifying the contents of string regions is undefined [C99 6.4.5p6], - // so we can assume that the byte length is the correct C string length. - SValBuilder &svalBuilder = C.getSValBuilder(); - QualType sizeTy = svalBuilder.getContext().getSizeType(); - const StringLiteral *strLit = cast(MR)->getStringLiteral(); - return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); - } - case MemRegion::SymbolicRegionKind: - case MemRegion::AllocaRegionKind: - case MemRegion::NonParamVarRegionKind: - case MemRegion::ParamVarRegionKind: - case MemRegion::FieldRegionKind: - case MemRegion::ObjCIvarRegionKind: - return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); - case MemRegion::CompoundLiteralRegionKind: - // FIXME: Can we track this? Is it necessary? - return UnknownVal(); - case MemRegion::ElementRegionKind: - // FIXME: How can we handle this? It's not good enough to subtract the - // offset from the base string length; consider "123\x00567" and &a[5]. - return UnknownVal(); - default: - // Other regions (mostly non-data) can't have a reliable C string length. - // In this case, an error is emitted and UndefinedVal is returned. - // The caller should always be prepared to handle this case. - if (Filter.CheckCStringNotNullTerm) { - SmallString<120> buf; - llvm::raw_svector_ostream os(buf); - - assert(CurrentFunctionDescription); - os << "Argument to " << CurrentFunctionDescription << " is "; - - if (SummarizeRegion(os, C.getASTContext(), MR)) - os << ", which is not a null-terminated string"; - else - os << "not a null-terminated string"; - - emitNotCStringBug(C, state, Ex, os.str()); - } - return UndefinedVal(); - } -} - -const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, - ProgramStateRef &state, const Expr *expr, SVal val) const { - - // Get the memory region pointed to by the val. - const MemRegion *bufRegion = val.getAsRegion(); - if (!bufRegion) - return nullptr; - - // Strip casts off the memory region. - bufRegion = bufRegion->StripCasts(); - - // Cast the memory region to a string region. - const StringRegion *strRegion= dyn_cast(bufRegion); - if (!strRegion) - return nullptr; - - // Return the actual string in the string region. - return strRegion->getStringLiteral(); -} - bool CStringChecker::IsFirstBufInBound(CheckerContext &C, ProgramStateRef state, const Expr *FirstBuf, @@ -1094,8 +762,8 @@ if (StateNullChar && !StateNonNullChar) { // If the value of the second argument of 'memset()' is zero, set the // string length of destination buffer to 0 directly. - State = setCStringLength(State, MR, - svalBuilder.makeZeroVal(Ctx.getSizeType())); + State = cstring::setCStringLength( + State, MR, svalBuilder.makeZeroVal(Ctx.getSizeType())); } else if (!StateNullChar && StateNonNullChar) { SVal NewStrLen = svalBuilder.getMetadataSymbolVal( CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(), @@ -1106,7 +774,7 @@ SVal NewStrLenGESize = svalBuilder.evalBinOp( State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType()); - State = setCStringLength( + State = cstring::setCStringLength( State->assume(NewStrLenGESize.castAs(), true), MR, NewStrLen); } @@ -1394,7 +1062,7 @@ if (!state) return; - SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal); + SVal strLength = getCStringLengthChecked(C, state, Arg.Expression, ArgVal); // If the argument isn't a valid C string, there's no valid state to // transition to. @@ -1561,11 +1229,12 @@ return; // Get the string length of the source. - SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal); + SVal strLength = + getCStringLengthChecked(C, state, srcExpr.Expression, srcVal); Optional strLengthNL = strLength.getAs(); // Get the string length of the destination buffer. - SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal); + SVal dstStrLength = getCStringLengthChecked(C, state, Dst.Expression, DstVal); Optional dstStrLengthNL = dstStrLength.getAs(); // If the source isn't a valid C string, give up. @@ -1789,7 +1458,7 @@ if (finalStrLength.isUnknown()) { // Try to get a "hypothetical" string length symbol, which we can later // set as a real value if that turns out to be the case. - finalStrLength = getCStringLength(C, state, CE, DstVal, true); + finalStrLength = getCStringLengthChecked(C, state, CE, DstVal, true); assert(!finalStrLength.isUndef()); if (Optional finalStrLengthNL = finalStrLength.getAs()) { @@ -1899,7 +1568,8 @@ if (amountCopied != strLength) finalStrLength = UnknownVal(); } - state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); + state = cstring::setCStringLength(state, dstRegVal->getRegion(), + finalStrLength); } assert(state); @@ -1959,12 +1629,13 @@ return; // Get the string length of the first string or give up. - SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal); + SVal LeftLength = getCStringLengthChecked(C, state, Left.Expression, LeftVal); if (LeftLength.isUndef()) return; // Get the string length of the second string or give up. - SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal); + SVal RightLength = + getCStringLengthChecked(C, state, Right.Expression, RightVal); if (RightLength.isUndef()) return; @@ -1999,10 +1670,8 @@ // For now, we only do this if they're both known string literals. // Attempt to extract string literals from both expressions. - const StringLiteral *LeftStrLiteral = - getCStringLiteral(C, state, Left.Expression, LeftVal); - const StringLiteral *RightStrLiteral = - getCStringLiteral(C, state, Right.Expression, RightVal); + const StringLiteral *LeftStrLiteral = getCStringLiteral(LeftVal); + const StringLiteral *RightStrLiteral = getCStringLiteral(RightVal); bool canComputeResult = false; SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); @@ -2314,147 +1983,29 @@ return C.isDifferent(); } -void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { - // Record string length for char a[] = "abc"; - ProgramStateRef state = C.getState(); +} // namespace cstring +} // namespace ento +} // namespace clang - for (const auto *I : DS->decls()) { - const VarDecl *D = dyn_cast(I); - if (!D) - continue; - - // FIXME: Handle array fields of structs. - if (!D->getType()->isArrayType()) - continue; - - const Expr *Init = D->getInit(); - if (!Init) - continue; - if (!isa(Init)) - continue; - - Loc VarLoc = state->getLValue(D, C.getLocationContext()); - const MemRegion *MR = VarLoc.getAsRegion(); - if (!MR) - continue; - - SVal StrVal = C.getSVal(Init); - assert(StrVal.isValid() && "Initializer string is unknown or undefined"); - DefinedOrUnknownSVal strLength = - getCStringLength(C, state, Init, StrVal).castAs(); - - state = state->set(MR, strLength); - } - - C.addTransition(state); +void clang::ento::registerCStringModeling(CheckerManager &Mgr) { + Mgr.registerChecker(); } -ProgramStateRef -CStringChecker::checkRegionChanges(ProgramStateRef state, - const InvalidatedSymbols *, - ArrayRef ExplicitRegions, - ArrayRef Regions, - const LocationContext *LCtx, - const CallEvent *Call) const { - CStringLengthTy Entries = state->get(); - if (Entries.isEmpty()) - return state; - - llvm::SmallPtrSet Invalidated; - llvm::SmallPtrSet SuperRegions; - - // First build sets for the changed regions and their super-regions. - for (ArrayRef::iterator - I = Regions.begin(), E = Regions.end(); I != E; ++I) { - const MemRegion *MR = *I; - Invalidated.insert(MR); - - SuperRegions.insert(MR); - while (const SubRegion *SR = dyn_cast(MR)) { - MR = SR->getSuperRegion(); - SuperRegions.insert(MR); - } - } - - CStringLengthTy::Factory &F = state->get_context(); - - // Then loop over the entries in the current state. - for (CStringLengthTy::iterator I = Entries.begin(), - E = Entries.end(); I != E; ++I) { - const MemRegion *MR = I.getKey(); - - // Is this entry for a super-region of a changed region? - if (SuperRegions.count(MR)) { - Entries = F.remove(Entries, MR); - continue; - } - - // Is this entry for a sub-region of a changed region? - const MemRegion *Super = MR; - while (const SubRegion *SR = dyn_cast(Super)) { - Super = SR->getSuperRegion(); - if (Invalidated.count(Super)) { - Entries = F.remove(Entries, MR); - break; - } - } - } - - return state->set(Entries); -} - -void CStringChecker::checkLiveSymbols(ProgramStateRef state, - SymbolReaper &SR) const { - // Mark all symbols in our string length map as valid. - CStringLengthTy Entries = state->get(); - - for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); - I != E; ++I) { - SVal Len = I.getData(); - - for (SymExpr::symbol_iterator si = Len.symbol_begin(), - se = Len.symbol_end(); si != se; ++si) - SR.markInUse(*si); - } -} - -void CStringChecker::checkDeadSymbols(SymbolReaper &SR, - CheckerContext &C) const { - ProgramStateRef state = C.getState(); - CStringLengthTy Entries = state->get(); - if (Entries.isEmpty()) - return; - - CStringLengthTy::Factory &F = state->get_context(); - for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); - I != E; ++I) { - SVal Len = I.getData(); - if (SymbolRef Sym = Len.getAsSymbol()) { - if (SR.isDead(Sym)) - Entries = F.remove(Entries, I.getKey()); - } - } - - state = state->set(Entries); - C.addTransition(state); -} - -void ento::registerCStringModeling(CheckerManager &Mgr) { - Mgr.registerChecker(); -} - -bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) { +bool clang::ento::shouldRegisterCStringModeling(const CheckerManager &) { return true; } #define REGISTER_CHECKER(name) \ - void ento::register##name(CheckerManager &mgr) { \ - CStringChecker *checker = mgr.getChecker(); \ + void clang::ento::register##name(clang::ento::CheckerManager &mgr) { \ + auto *checker = mgr.getChecker(); \ checker->Filter.Check##name = true; \ checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \ } \ \ - bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } + bool clang::ento::shouldRegister##name( \ + const clang::ento::CheckerManager &mgr) { \ + return true; \ + } REGISTER_CHECKER(CStringNullArg) REGISTER_CHECKER(CStringOutOfBounds) Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLength.h =================================================================== --- /dev/null +++ clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLength.h @@ -0,0 +1,53 @@ +//=== CStringLength.h Query and store the length of a cstring. ---*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines an interface for interacting and manipulating the associated cstring +// length of a given memory region. +// You can assign a cstring length to any memory region. +// The represented value is what strlen would return on the given memory region. +// Eg: 3 for both "ABC" and "abc\00def". +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGLENGTH_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGLENGTH_H + +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" + +namespace clang { +namespace ento { +class CheckerContext; + +namespace cstring { + +/// Assigns a cstring length to a memory region. +LLVM_NODISCARD ProgramStateRef setCStringLength(ProgramStateRef State, + const MemRegion *MR, + SVal StrLength); + +/// Removes the assigned cstring length from the memory region. +/// It is useful for invalidation. +LLVM_NODISCARD ProgramStateRef removeCStringLength(ProgramStateRef State, + const MemRegion *MR); + +// FIXME: Eventually rework the interface of this function. +// Especially the magic 'Hypothetical' parameter. +LLVM_NODISCARD SVal getCStringLength(CheckerContext &Ctx, + ProgramStateRef &State, const Expr *Ex, + SVal Buf, bool Hypothetical = false); + +LLVM_DUMP_METHOD void dumpCStringLengths(ProgramStateRef State, + raw_ostream &Out = llvm::errs(), + const char *NL = "\n", + const char *Sep = " : "); +} // namespace cstring +} // namespace ento +} // namespace clang + +#endif Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLengthModeling.cpp =================================================================== --- /dev/null +++ clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLengthModeling.cpp @@ -0,0 +1,313 @@ +//=== CStringLengthModeling.cpp Implementation of CStringLength API C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements the CStringLength API and the CStringChecker bookkeeping parts. +// Updates the associated cstring lengths of memory regions: +// - Infers the cstring length of string literals. +// - Removes cstring length associations of dead symbols. +// - Handles region invalidation. +// +//===----------------------------------------------------------------------===// + +#include "CStringChecker.h" +#include "CStringLength.h" + +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; +using namespace cstring; + +/// Associates an strlen to a memory region. +REGISTER_MAP_WITH_PROGRAMSTATE(CStringLengthMap, const MemRegion *, SVal) + +//===----------------------------------------------------------------------===// +// Implementation of the public CStringLength API. +//===----------------------------------------------------------------------===// + +ProgramStateRef cstring::setCStringLength(ProgramStateRef State, + const MemRegion *MR, SVal StrLength) { + assert(!StrLength.isUndef() && "Attempt to set an undefined string length"); + + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: + // FIXME: This can happen if we strcpy() into a string region. This is + // undefined [C99 6.4.5p6], but we should still warn about it. + return State; + + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::NonParamVarRegionKind: + case MemRegion::ParamVarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + // These are the types we can currently track string lengths for. + break; + + case MemRegion::ElementRegionKind: + // FIXME: Handle element regions by upper-bounding the parent region's + // string length. + return State; + + default: + // Other regions (mostly non-data) can't have a reliable C string length. + // For now, just ignore the change. + // FIXME: These are rare but not impossible. We should output some kind of + // warning for things like strcpy((char[]){'a', 0}, "b"); + return State; + } + + if (StrLength.isUnknown()) + return removeCStringLength(State, MR); + return State->set(MR, StrLength); +} + +ProgramStateRef cstring::removeCStringLength(ProgramStateRef State, + const MemRegion *MR) { + return State->remove(MR); +} + +static SVal getCStringLengthForRegion(CheckerContext &Ctx, + ProgramStateRef &State, const Expr *Ex, + const MemRegion *MR, bool Hypothetical) { + if (!Hypothetical) { + // If there's a recorded length, go ahead and return it. + if (const SVal *Recorded = State->get(MR)) + return *Recorded; + } + + // Otherwise, get a new symbol and update the state. + SValBuilder &SVB = Ctx.getSValBuilder(); + QualType SizeTy = SVB.getContext().getSizeType(); + NonLoc CStrLen = + SVB.getMetadataSymbolVal(CStringChecker::getTag(), MR, Ex, SizeTy, + Ctx.getLocationContext(), Ctx.blockCount()) + .castAs(); + + if (!Hypothetical) { + // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + const llvm::APSInt &MaxValue = BVF.getMaxValue(SizeTy); + const llvm::APSInt Four = APSIntType(MaxValue).getValue(4); + const llvm::APSInt *MaxLength = BVF.evalAPSInt(BO_Div, MaxValue, Four); + const NonLoc MaxLengthSVal = SVB.makeIntVal(*MaxLength); + SVal Constrained = + SVB.evalBinOpNN(State, BO_LE, CStrLen, MaxLengthSVal, SizeTy); + State = State->assume(Constrained.castAs(), true); + State = State->set(MR, CStrLen); + } + + return CStrLen; +} + +SVal cstring::getCStringLength(CheckerContext &Ctx, ProgramStateRef &State, + const Expr *Ex, SVal Buf, + bool Hypothetical /*=false*/) { + if (Buf.isUnknownOrUndef()) + return Buf; + + if (Buf.getAs()) + return UndefinedVal(); + + // If it's not a region, give up. + const MemRegion *MR = Buf.getAsRegion(); + if (!MR) + return UnknownVal(); + + // If we have a region, strip casts from it and see if we can figure out + // its length. For anything we can't figure out, just return UnknownVal. + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: { + // Modifying the contents of string regions is undefined [C99 6.4.5p6], + // so we can assume that the byte length is the correct C string length. + SValBuilder &SVB = Ctx.getSValBuilder(); + QualType SizeTy = SVB.getContext().getSizeType(); + const StringLiteral *StrLiteral = + cast(MR)->getStringLiteral(); + return SVB.makeIntVal(StrLiteral->getByteLength(), SizeTy); + } + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::NonParamVarRegionKind: + case MemRegion::ParamVarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + return getCStringLengthForRegion(Ctx, State, Ex, MR, Hypothetical); + case MemRegion::CompoundLiteralRegionKind: + // FIXME: Can we track this? Is it necessary? + return UnknownVal(); + case MemRegion::ElementRegionKind: + // FIXME: How can we handle this? It's not good enough to subtract the + // offset from the base string length; consider "123\x00567" and &a[5]. + return UnknownVal(); + default: + // Other regions (mostly non-data) can't have a reliable C string length. + return UndefinedVal(); + } +} + +void cstring::dumpCStringLengths(ProgramStateRef State, raw_ostream &Out, + const char *NL, const char *Sep) { + const CStringLengthMapTy Items = State->get(); + if (!Items.isEmpty()) + Out << "CString lengths:" << NL; + for (const auto &Item : Items) { + Item.first->dumpToStream(Out); + Out << Sep; + Item.second.dumpToStream(Out); + Out << NL; + } +} + +//===----------------------------------------------------------------------===// +// Implementation of the tracking and bookkeeping part of the CStringChecker. +// Updates the CStringLengthMap. +// - Infers the cstring length of string literals. +// - Removes cstring length associations of dead symbols. +// - Handles region invalidation. +//===----------------------------------------------------------------------===// + +void *CStringChecker::getTag() { + static int Tag; + return &Tag; +} + +void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { + // Record string length for char a[] = "abc"; + ProgramStateRef state = C.getState(); + + for (const auto *I : DS->decls()) { + const VarDecl *D = dyn_cast(I); + if (!D) + continue; + + // FIXME: Handle array fields of structs. + if (!D->getType()->isArrayType()) + continue; + + const Expr *Init = D->getInit(); + if (!Init) + continue; + if (!isa(Init)) + continue; + + Loc VarLoc = state->getLValue(D, C.getLocationContext()); + const MemRegion *MR = VarLoc.getAsRegion(); + if (!MR) + continue; + + SVal StrVal = C.getSVal(Init); + assert(StrVal.isValid() && "Initializer string is unknown or undefined"); + DefinedOrUnknownSVal strLength = + getCStringLength(C, state, Init, StrVal).castAs(); + + state = state->set(MR, strLength); + } + + C.addTransition(state); +} + +void CStringChecker::checkLiveSymbols(ProgramStateRef State, + SymbolReaper &SR) const { + // Mark all symbols in our string length map as valid. + for (const auto &Item : State->get()) { + SVal Len = Item.second; + const auto LenSymbolRange = + llvm::make_range(Len.symbol_begin(), Len.symbol_end()); + for (SymbolRef Symbol : LenSymbolRange) + SR.markInUse(Symbol); + } +} + +void CStringChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + CStringLengthMapTy Entries = State->get(); + if (Entries.isEmpty()) + return; + + CStringLengthMapTy::Factory &F = State->get_context(); + for (CStringLengthMapTy::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + SVal Len = I.getData(); + if (SymbolRef Sym = Len.getAsSymbol()) { + if (SR.isDead(Sym)) + Entries = F.remove(Entries, I.getKey()); + } + } + + State = State->set(Entries); + C.addTransition(State); +} + +ProgramStateRef CStringChecker::checkRegionChanges( + ProgramStateRef state, const InvalidatedSymbols *, + ArrayRef ExplicitRegions, + ArrayRef Regions, const LocationContext *, + const CallEvent *) const { + CStringLengthMapTy Entries = state->get(); + if (Entries.isEmpty()) + return state; + + llvm::SmallPtrSet Invalidated; + llvm::SmallPtrSet SuperRegions; + + // First build sets for the changed regions and their super-regions. + for (ArrayRef::iterator I = Regions.begin(), + E = Regions.end(); + I != E; ++I) { + const MemRegion *MR = *I; + Invalidated.insert(MR); + + SuperRegions.insert(MR); + while (const SubRegion *SR = dyn_cast(MR)) { + MR = SR->getSuperRegion(); + SuperRegions.insert(MR); + } + } + + CStringLengthMapTy::Factory &F = state->get_context(); + + // Then loop over the entries in the current state. + for (CStringLengthMapTy::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + const MemRegion *MR = I.getKey(); + + // Is this entry for a super-region of a changed region? + if (SuperRegions.count(MR)) { + Entries = F.remove(Entries, MR); + continue; + } + + // Is this entry for a sub-region of a changed region? + const MemRegion *Super = MR; + while (const SubRegion *SR = dyn_cast(Super)) { + Super = SR->getSuperRegion(); + if (Invalidated.count(Super)) { + Entries = F.remove(Entries, MR); + break; + } + } + } + + return state->set(Entries); +} + +// TODO: Is it useful? +void CStringChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + dumpCStringLengths(State, Out, NL, Sep); +}