diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2625,13 +2625,44 @@ """""""""""""""""""""""""""""""""" Check for out-of-bounds access in string functions; applies to:`` strncopy, strncat``. - .. code-block:: c void test() { int y = strlen((char *)&test); // warn } +.. _alpha-unix-cstring-UninitializedRead: + +alpha.unix.cstring.UninitializedRead (C) +"""""""""""""""""""""""""""""""""""""""" +Check for uninitialized reads from common memory copy/manipulation functions such as: + ``memcpy, mempcpy, memmove, memcmp, strcmp, strncmp, strcpy, strlen, strsep`` and many more. + +.. code-block:: c + + void test() { + char src[10]; + char dst[5]; + memcpy(dst,src,sizeof(dst)); // warn: Bytes string function accesses uninitialized/garbage values + } + +Limitations: + + - Due to limitations of the memory modeling in the analyzer, one can likely + observe a lot of false-positive reports like this: + .. code-block:: c + + void false_positive() { + int src[] = {1, 2, 3, 4}; + int dst[5] = {0}; + memcpy(dst, src, 4 * sizeof(int)); // false-positive: + // The 'src' buffer was correctly initialized, yet we cannot conclude + // that since the analyzer could not see a direct initialization of the + // very last byte of the source buffer. + } + + More details at the corresponding `GitHub issue `_. + .. _alpha-nondeterminism-PointerIteration: alpha.nondeterminism.PointerIteration (C++) diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -471,7 +471,12 @@ HelpText<"Check for arguments which are not null-terminating strings">, Dependencies<[CStringModeling]>, Documentation; - + +def CStringUninitializedRead : Checker<"UninitializedRead">, + HelpText<"Checks if the string manipulation function would read uninitialized bytes">, + Dependencies<[CStringModeling]>, + Documentation; + } // end "alpha.unix.cstring" let ParentPackage = Unix in { diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -80,7 +80,7 @@ check::RegionChanges > { mutable std::unique_ptr BT_Null, BT_Bounds, BT_Overlap, - BT_NotCString, BT_AdditionOverflow; + BT_NotCString, BT_AdditionOverflow, BT_UninitRead; mutable const char *CurrentFunctionDescription; @@ -92,11 +92,13 @@ DefaultBool CheckCStringOutOfBounds; DefaultBool CheckCStringBufferOverlap; DefaultBool CheckCStringNotNullTerm; + DefaultBool CheckCStringUninitializedRead; CheckerNameRef CheckNameCStringNullArg; CheckerNameRef CheckNameCStringOutOfBounds; CheckerNameRef CheckNameCStringBufferOverlap; CheckerNameRef CheckNameCStringNotNullTerm; + CheckerNameRef CheckNameCStringUninitializedRead; }; CStringChecksFilter Filter; @@ -257,6 +259,8 @@ void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, StringRef WarningMsg) const; void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; + void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, + const Expr *E) const; ProgramStateRef checkAdditionOverflow(CheckerContext &C, ProgramStateRef state, @@ -368,6 +372,14 @@ return nullptr; } + // Ensure that we wouldn't read uninitialized value. + if (Access == AccessKind::read) { + if (StInBound->getSVal(ER).isUndef()) { + emitUninitializedReadBug(C, StInBound, Buffer.Expression); + return nullptr; + } + } + // Array bound check succeeded. From this point forward the array bound // should always succeed. return StInBound; @@ -421,6 +433,7 @@ SVal BufEnd = svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); + State = CheckLocation(C, State, Buffer, BufStart, Access); State = CheckLocation(C, State, Buffer, BufEnd, Access); // If the buffer isn't large enough, abort. @@ -580,6 +593,26 @@ } } +void CStringChecker::emitUninitializedReadBug(CheckerContext &C, + ProgramStateRef State, + const Expr *E) const { + if (ExplodedNode *N = C.generateErrorNode(State)) { + const char *Msg = + "Bytes string function accesses uninitialized/garbage values"; + if (!BT_UninitRead) + BT_UninitRead.reset( + new BuiltinBug(Filter.CheckNameCStringUninitializedRead, + "Accessing unitialized/garbage values", Msg)); + + BuiltinBug *BT = static_cast(BT_UninitRead.get()); + + auto Report = std::make_unique(*BT, Msg, N); + Report->addRange(E->getSourceRange()); + bugreporter::trackExpressionValue(N, E, *Report); + C.emitReport(std::move(Report)); + } +} + void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, StringRef WarningMsg) const { @@ -2460,3 +2493,4 @@ REGISTER_CHECKER(CStringOutOfBounds) REGISTER_CHECKER(CStringBufferOverlap) REGISTER_CHECKER(CStringNotNullTerm) +REGISTER_CHECKER(CStringUninitializedRead) \ No newline at end of file diff --git a/clang/test/Analysis/bstring.c b/clang/test/Analysis/bstring.c --- a/clang/test/Analysis/bstring.c +++ b/clang/test/Analysis/bstring.c @@ -70,6 +70,11 @@ #endif /* VARIANT */ +void top(char *dst) { + char buf[10]; + memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} + (void)buf; +} void memcpy0 () { char src[] = {1, 2, 3, 4}; @@ -297,9 +302,12 @@ int dst[5] = {0}; int *p; - p = mempcpy(dst, src, 4 * sizeof(int)); + p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} + // FIXME: This behaviour is actually Unexpected and needs to be fix, + // mempcpy seems to consider the src buffered byte as uninitialized + // and returning undef which is actually not the case It should return something like Unknown . - clang_analyzer_eval(p == &dst[4]); // expected-warning{{TRUE}} + clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal) } struct st { @@ -314,9 +322,10 @@ struct st *p2; p1 = (&s2) + 1; - p2 = mempcpy(&s2, &s1, sizeof(struct st)); - - clang_analyzer_eval(p1 == p2); // expected-warning{{TRUE}} + p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} + // FIXME: It seems same as mempcpy14() case. + + clang_analyzer_eval(p1 == p2); // no-warning (above is fatal) } void mempcpy16() {