diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -260,11 +260,34 @@ const Expr *expr, SVal val) const; - static ProgramStateRef InvalidateBuffer(CheckerContext &C, - ProgramStateRef state, - const Expr *Ex, SVal V, - bool IsSourceBuffer, - const Expr *Size); + /// Invalidate the destination buffer determined by characters copied. + static ProgramStateRef + invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S, + const Expr *BufE, SVal BufV, SVal SizeV, + QualType SizeTy); + + /// Operation never overflows, do not invalidate the super region. + static ProgramStateRef invalidateDestinationBufferNeverOverflows( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); + + /// We do not know whether the operation can overflow (e.g. size is unknown), + /// invalidate the super region and escape related pointers. + static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); + + /// Invalidate the source buffer for escaping pointers. + static ProgramStateRef invalidateSourceBuffer(CheckerContext &C, + ProgramStateRef S, + const Expr *BufE, SVal BufV); + + /// @param InvalidationTraitOperations Determine how to invlidate the + /// MemRegion by setting the invalidation traits. Return true to cause pointer + /// escape, or false otherwise. + static ProgramStateRef invalidateBufferAux( + CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V, + llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &, + const MemRegion *)> + InvalidationTraitOperations); static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, const MemRegion *MR); @@ -310,10 +333,9 @@ // Return true if the destination buffer of the copy function may be in bound. // Expects SVal of Size to be positive and unsigned. // Expects SVal of FirstBuf to be a FieldRegion. - static bool IsFirstBufInBound(CheckerContext &C, - ProgramStateRef state, - const Expr *FirstBuf, - const Expr *Size); + static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State, + SVal BufVal, QualType BufTy, SVal LengthVal, + QualType LengthTy); }; } //end anonymous namespace @@ -967,43 +989,40 @@ return strRegion->getStringLiteral(); } -bool CStringChecker::IsFirstBufInBound(CheckerContext &C, - ProgramStateRef state, - const Expr *FirstBuf, - const Expr *Size) { +bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State, + SVal BufVal, QualType BufTy, + SVal LengthVal, QualType LengthTy) { // If we do not know that the buffer is long enough we return 'true'. // Otherwise the parent region of this field region would also get // invalidated, which would lead to warnings based on an unknown state. + if (LengthVal.isUnknown()) + return false; + // Originally copied from CheckBufferAccess and CheckLocation. - SValBuilder &svalBuilder = C.getSValBuilder(); - ASTContext &Ctx = svalBuilder.getContext(); - const LocationContext *LCtx = C.getLocationContext(); + SValBuilder &SB = C.getSValBuilder(); + ASTContext &Ctx = C.getASTContext(); - QualType sizeTy = Size->getType(); QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); - SVal BufVal = state->getSVal(FirstBuf, LCtx); - SVal LengthVal = state->getSVal(Size, LCtx); std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); if (!Length) return true; // cf top comment. // Compute the offset of the last element to be accessed: size-1. - NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); - SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy); + NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>(); + SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy); if (Offset.isUnknown()) return true; // cf top comment NonLoc LastOffset = Offset.castAs<NonLoc>(); // Check that the first buffer is sufficiently long. - SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); + SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy); std::optional<Loc> BufLoc = BufStart.getAs<Loc>(); if (!BufLoc) return true; // cf top comment. - SVal BufEnd = - svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy); + SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); // Check for out of bound array element access. const MemRegion *R = BufEnd.getAsRegion(); @@ -1017,28 +1036,90 @@ // FIXME: Does this crash when a non-standard definition // of a library function is encountered? assert(ER->getValueType() == C.getASTContext().CharTy && - "IsFirstBufInBound should only be called with char* ElementRegions"); + "isFirstBufInBound should only be called with char* ElementRegions"); // Get the size of the array. const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); - DefinedOrUnknownSVal SizeDV = getDynamicExtent(state, superReg, svalBuilder); + DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB); // Get the index of the accessed element. DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); - ProgramStateRef StInBound = state->assumeInBound(Idx, SizeDV, true); + ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true); return static_cast<bool>(StInBound); } -ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, - ProgramStateRef state, - const Expr *E, SVal V, - bool IsSourceBuffer, - const Expr *Size) { +ProgramStateRef CStringChecker::invalidateDestinationBufferBySize( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV, + SVal SizeV, QualType SizeTy) { + auto InvalidationTraitOperations = + [&C, S, BufTy = BufE->getType(), BufV, SizeV, + SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { + // If destination buffer is a field region and access is in bound, do + // not invalidate its super region. + if (MemRegion::FieldRegionKind == R->getKind() && + isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) { + ITraits.setTrait( + R, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + } + return false; + }; + + return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); +} + +ProgramStateRef +CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { + auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &, + const MemRegion *R) { + return isa<FieldRegion>(R); + }; + + return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); +} + +ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { + auto InvalidationTraitOperations = + [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { + if (MemRegion::FieldRegionKind == R->getKind()) + ITraits.setTrait( + R, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + return false; + }; + + return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); +} + +ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C, + ProgramStateRef S, + const Expr *BufE, + SVal BufV) { + auto InvalidationTraitOperations = + [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { + ITraits.setTrait( + R->getBaseRegion(), + RegionAndSymbolInvalidationTraits::TK_PreserveContents); + ITraits.setTrait(R, + RegionAndSymbolInvalidationTraits::TK_SuppressEscape); + return true; + }; + + return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); +} + +ProgramStateRef CStringChecker::invalidateBufferAux( + CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V, + llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &, + const MemRegion *)> + InvalidationTraitOperations) { std::optional<Loc> L = V.getAs<Loc>(); if (!L) - return state; + return State; // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes // some assumptions about the value that CFRefCount can't. Even so, it should @@ -1055,29 +1136,10 @@ // Invalidate this region. const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); - - bool CausesPointerEscape = false; RegionAndSymbolInvalidationTraits ITraits; - // Invalidate and escape only indirect regions accessible through the source - // buffer. - if (IsSourceBuffer) { - ITraits.setTrait(R->getBaseRegion(), - RegionAndSymbolInvalidationTraits::TK_PreserveContents); - ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); - CausesPointerEscape = true; - } else { - const MemRegion::Kind& K = R->getKind(); - if (K == MemRegion::FieldRegionKind) - if (Size && IsFirstBufInBound(C, state, E, Size)) { - // If destination buffer is a field region and access is in bound, - // do not invalidate its super region. - ITraits.setTrait( - R, - RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); - } - } + bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R); - return state->invalidateRegions(R, E, C.blockCount(), LCtx, + return State->invalidateRegions(R, E, C.blockCount(), LCtx, CausesPointerEscape, nullptr, nullptr, &ITraits); } @@ -1085,7 +1147,7 @@ // If we have a non-region value by chance, just remove the binding. // FIXME: is this necessary or correct? This handles the non-Region // cases. Is it ever valid to store to these? - return state->killBinding(*L); + return State->killBinding(*L); } bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, @@ -1182,8 +1244,8 @@ } else { // If the destination buffer's extent is not equal to the value of // third argument, just invalidate buffer. - State = InvalidateBuffer(C, State, DstBuffer, MemVal, - /*IsSourceBuffer*/ false, Size); + State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, + SizeVal, Size->getType()); } if (StateNullChar && !StateNonNullChar) { @@ -1208,8 +1270,8 @@ } else { // If the offset is not zero and char value is not concrete, we can do // nothing but invalidate the buffer. - State = InvalidateBuffer(C, State, DstBuffer, MemVal, - /*IsSourceBuffer*/ false, Size); + State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, + SizeVal, Size->getType()); } return true; } @@ -1305,15 +1367,14 @@ // can use LazyCompoundVals to copy the source values into the destination. // This would probably remove any existing bindings past the end of the // copied region, but that's still an improvement over blank invalidation. - state = - InvalidateBuffer(C, state, Dest.Expression, C.getSVal(Dest.Expression), - /*IsSourceBuffer*/ false, Size.Expression); + state = invalidateDestinationBufferBySize( + C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal, + Size.Expression->getType()); // Invalidate the source (const-invalidation without const-pointer-escaping // the address of the top-level region). - state = InvalidateBuffer(C, state, Source.Expression, - C.getSVal(Source.Expression), - /*IsSourceBuffer*/ true, nullptr); + state = invalidateSourceBuffer(C, state, Source.Expression, + C.getSVal(Source.Expression)); C.addTransition(state); } @@ -1985,13 +2046,13 @@ // can use LazyCompoundVals to copy the source values into the destination. // This would probably remove any existing bindings past the end of the // string, but that's still an improvement over blank invalidation. - state = InvalidateBuffer(C, state, Dst.Expression, *dstRegVal, - /*IsSourceBuffer*/ false, nullptr); + state = invalidateDestinationBufferBySize(C, state, Dst.Expression, + *dstRegVal, amountCopied, + C.getASTContext().getSizeType()); // Invalidate the source (const-invalidation without const-pointer-escaping // the address of the top-level region). - state = InvalidateBuffer(C, state, srcExpr.Expression, srcVal, - /*IsSourceBuffer*/ true, nullptr); + state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal); // Set the C string length of the destination, if we know it. if (IsBounded && (appendK == ConcatFnKind::none)) { @@ -2206,8 +2267,9 @@ // Invalidate the search string, representing the change of one delimiter // character to NUL. - State = InvalidateBuffer(C, State, SearchStrPtr.Expression, Result, - /*IsSourceBuffer*/ false, nullptr); + // As the replacement never overflows, do not invalidate its super region. + State = invalidateDestinationBufferNeverOverflows( + C, State, SearchStrPtr.Expression, Result); // Overwrite the search string pointer. The new value is either an address // further along in the same string, or NULL if there are no more tokens. @@ -2256,8 +2318,10 @@ // Invalidate the destination buffer const Expr *Dst = CE->getArg(2); SVal DstVal = State->getSVal(Dst, LCtx); - State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false, - /*Size=*/nullptr); + // FIXME: As we do not know how many items are copied, we also invalidate the + // super region containing the target location. + State = + invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal); SValBuilder &SVB = C.getSValBuilder(); diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h --- a/clang/test/Analysis/Inputs/system-header-simulator.h +++ b/clang/test/Analysis/Inputs/system-header-simulator.h @@ -63,7 +63,9 @@ char *strcpy(char *restrict, const char *restrict); char *strncpy(char *dst, const char *src, size_t n); +char *strsep(char **stringp, const char *delim); void *memcpy(void *dst, const void *src, size_t n); +void *memset(void *s, int c, size_t n); typedef unsigned long __darwin_pthread_key_t; typedef __darwin_pthread_key_t pthread_key_t; diff --git a/clang/test/Analysis/issue-55019.cpp b/clang/test/Analysis/issue-55019.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Analysis/issue-55019.cpp @@ -0,0 +1,89 @@ +// Refer issue 55019 for more details. +// A supplemental test case of pr22954.c for other functions modeled in +// the CStringChecker. + +// RUN: %clang_analyze_cc1 %s -verify \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=unix \ +// RUN: -analyzer-checker=debug.ExprInspection + +#include "Inputs/system-header-simulator.h" +#include "Inputs/system-header-simulator-cxx.h" + +void *malloc(size_t); +void free(void *); + +struct mystruct { + void *ptr; + char arr[4]; +}; + +void clang_analyzer_dump(const void *); + +// CStringChecker::memsetAux +void fmemset() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + memset(x.arr, 0, sizeof(x.arr)); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +// CStringChecker::evalCopyCommon +void fmemcpy() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + memcpy(x.arr, "hi", 2); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +// CStringChecker::evalStrcpyCommon +void fstrcpy() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + strcpy(x.arr, "hi"); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +void fstrncpy() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + strncpy(x.arr, "hi", sizeof(x.arr)); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +// CStringChecker::evalStrsep +void fstrsep() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + char *p = x.arr; + (void)strsep(&p, "x"); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +// CStringChecker::evalStdCopyCommon +void fstdcopy() { + mystruct x; + x.ptr = new char; + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + + const char *p = "x"; + std::copy(p, p + 1, x.arr); + + // FIXME: As we currently cannot know whether the copy overflows, the checker + // invalidates the entire `x` object. When the copy size through iterators + // can be correctly modeled, we can then update the verify direction from + // SymRegion to HeapSymRegion as this std::copy call never overflows and + // hence the pointer `x.ptr` shall not be invalidated. + clang_analyzer_dump(x.ptr); // expected-warning {{SymRegion}} + delete static_cast<char*>(x.ptr); // no-leak-warning +} diff --git a/clang/test/Analysis/pr22954.c b/clang/test/Analysis/pr22954.c --- a/clang/test/Analysis/pr22954.c +++ b/clang/test/Analysis/pr22954.c @@ -556,12 +556,13 @@ x263.s2 = strdup("hello"); char input[] = {'a', 'b', 'c', 'd'}; memcpy(x263.s1, input, *(len + n)); - clang_analyzer_eval(x263.s1[0] == 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(x263.s1[0] == 0); // expected-warning{{UNKNOWN}}\ + expected-warning{{Potential leak of memory pointed to by 'x263.s2'}} clang_analyzer_eval(x263.s1[1] == 0); // expected-warning{{UNKNOWN}} clang_analyzer_eval(x263.s1[2] == 0); // expected-warning{{UNKNOWN}} clang_analyzer_eval(x263.s1[3] == 0); // expected-warning{{UNKNOWN}} clang_analyzer_eval(x263.s2 == 0); // expected-warning{{UNKNOWN}} - return 0; // expected-warning{{Potential leak of memory pointed to by 'x263.s2'}} + return 0; }