Index: clang/include/clang/StaticAnalyzer/Checkers/Checkers.td =================================================================== --- clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -427,22 +427,29 @@ let ParentPackage = CString in { def CStringModeling : Checker<"CStringModeling">, + HelpText<"Responsible for essential modeling of cstring lengths. " + "This is required by all cstring related checkers.">, + Documentation, + Hidden; + +def CStringChecker : Checker<"CStringChecker">, HelpText<"The base of several CString related checkers. On it's own it emits " "no reports, but adds valuable information to the analysis when " "enabled.">, + Dependencies<[CStringModeling]>, Documentation, Hidden; def CStringNullArg : Checker<"NullArg">, HelpText<"Check for null pointers being passed as arguments to C string " "functions">, - Dependencies<[CStringModeling]>, + Dependencies<[CStringChecker]>, Documentation; def CStringSyntaxChecker : Checker<"BadSizeArg">, HelpText<"Check the size argument passed into C string functions for common " "erroneous patterns">, - Dependencies<[CStringModeling]>, + Dependencies<[CStringChecker]>, Documentation; } // end "unix.cstring" @@ -451,17 +458,17 @@ def CStringOutOfBounds : Checker<"OutOfBounds">, HelpText<"Check for out-of-bounds access in string functions">, - Dependencies<[CStringModeling]>, + Dependencies<[CStringChecker]>, Documentation; def CStringBufferOverlap : Checker<"BufferOverlap">, HelpText<"Checks for overlap in two buffer arguments">, - Dependencies<[CStringModeling]>, + Dependencies<[CStringChecker]>, Documentation; def CStringNotNullTerm : Checker<"NotNullTerminated">, HelpText<"Check for arguments which are not null-terminating strings">, - Dependencies<[CStringModeling]>, + Dependencies<[CStringChecker]>, Documentation; } // end "alpha.unix.cstring" @@ -485,7 +492,7 @@ "false", InAlpha> ]>, - Dependencies<[CStringModeling]>, + Dependencies<[CStringChecker]>, Documentation, Hidden; Index: clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt =================================================================== --- clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -13,6 +13,7 @@ BoolAssignmentChecker.cpp BuiltinFunctionChecker.cpp CStringChecker.cpp + CStringLength.cpp CStringSyntaxChecker.cpp CallAndMessageChecker.cpp CastSizeChecker.cpp Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "InterCheckerAPI.h" +#include "CStringLength.h" #include "clang/Basic/CharInfo.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" @@ -20,7 +20,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -72,12 +71,7 @@ } enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 }; -class CStringChecker : public Checker< eval::Call, - check::PreStmt, - check::LiveSymbols, - check::DeadSymbols, - check::RegionChanges - > { +class CStringChecker : public Checker { mutable std::unique_ptr BT_Null, BT_Bounds, BT_Overlap, BT_NotCString, BT_AdditionOverflow; @@ -103,17 +97,6 @@ static void *getTag() { static int tag; return &tag; } bool evalCall(const CallEvent &Call, CheckerContext &C) const; - void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; - void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; - void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; - - ProgramStateRef - checkRegionChanges(ProgramStateRef state, - const InvalidatedSymbols *, - ArrayRef ExplicitRegions, - ArrayRef Regions, - const LocationContext *LCtx, - const CallEvent *Call) const; typedef void (CStringChecker::*FnCheck)(CheckerContext &, const CallExpr *) const; @@ -200,25 +183,6 @@ static assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, QualType Ty); - static ProgramStateRef setCStringLength(ProgramStateRef state, - const MemRegion *MR, - SVal strLength); - static SVal getCStringLengthForRegion(CheckerContext &C, - ProgramStateRef &state, - const Expr *Ex, - const MemRegion *MR, - bool hypothetical); - SVal getCStringLength(CheckerContext &C, - ProgramStateRef &state, - const Expr *Ex, - SVal Buf, - bool hypothetical = false) const; - - const StringLiteral *getCStringLiteral(CheckerContext &C, - ProgramStateRef &state, - const Expr *expr, - SVal val) const; - static ProgramStateRef InvalidateBuffer(CheckerContext &C, ProgramStateRef state, const Expr *Ex, SVal V, @@ -269,16 +233,90 @@ ProgramStateRef state, const Expr *FirstBuf, const Expr *Size); + + // FIXME: refactor the rest of the code, and remove this function. + // Simply wraps the cstring::getCStringLength function and emits warnings. + SVal getCStringLengthChecked(CheckerContext &Ctx, ProgramStateRef &State, + const Expr *Ex, SVal Buf, + bool hypothetical = false) const; }; } //end anonymous namespace -REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) - //===----------------------------------------------------------------------===// // Individual checks and utility methods. //===----------------------------------------------------------------------===// +static const StringLiteral *getCStringLiteral(SVal val) { + // Get the memory region pointed to by the val. + const MemRegion *bufRegion = val.getAsRegion(); + if (!bufRegion) + return nullptr; + + // Strip casts off the memory region. + bufRegion = bufRegion->StripCasts(); + + // Cast the memory region to a string region. + const StringRegion *strRegion = dyn_cast(bufRegion); + if (!strRegion) + return nullptr; + + // Return the actual string in the string region. + return strRegion->getStringLiteral(); +} + +SVal CStringChecker::getCStringLengthChecked(CheckerContext &Ctx, + ProgramStateRef &State, + const Expr *Ex, SVal Buf, + bool hypothetical) const { + SVal CStrLen = cstring::getCStringLength(Ctx, State, Ex, Buf, hypothetical); + + // Simply return if everything goes well. + // Otherwise we shall investigate why did it fail. + if (!CStrLen.isUndef()) + return CStrLen; + + // Handle if the buffer was not referring to a memory region. + const MemRegion *MR = Buf.getAsRegion(); + if (!MR) { + // If we can't get a region, see if it's something we /know/ isn't a + // C string. In the context of locations, the only time we can issue such + // a warning is for labels. + if (Optional Label = Buf.getAs()) { + if (Filter.CheckCStringNotNullTerm) { + SmallString<120> buf; + llvm::raw_svector_ostream os(buf); + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription + << " is the address of the label '" << Label->getLabel()->getName() + << "', which is not a null-terminated string"; + + emitNotCStringBug(Ctx, State, Ex, os.str()); + } + return UndefinedVal(); + } + } + + // Other regions (mostly non-data) can't have a reliable C string length. + // In this case, an error is emitted and UndefinedVal is returned. + // The caller should always be prepared to handle this case. + if (Filter.CheckCStringNotNullTerm) { + SmallString<120> buf; + llvm::raw_svector_ostream os(buf); + + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription << " is "; + + if (SummarizeRegion(os, Ctx.getASTContext(), MR)) + os << ", which is not a null-terminated string"; + else + os << "not a null-terminated string"; + + emitNotCStringBug(Ctx, State, Ex, os.str()); + } + return UndefinedVal(); +} + std::pair CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, QualType Ty) { @@ -694,181 +732,6 @@ return state; } -ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, - const MemRegion *MR, - SVal strLength) { - assert(!strLength.isUndef() && "Attempt to set an undefined string length"); - - MR = MR->StripCasts(); - - switch (MR->getKind()) { - case MemRegion::StringRegionKind: - // FIXME: This can happen if we strcpy() into a string region. This is - // undefined [C99 6.4.5p6], but we should still warn about it. - return state; - - case MemRegion::SymbolicRegionKind: - case MemRegion::AllocaRegionKind: - case MemRegion::NonParamVarRegionKind: - case MemRegion::ParamVarRegionKind: - case MemRegion::FieldRegionKind: - case MemRegion::ObjCIvarRegionKind: - // These are the types we can currently track string lengths for. - break; - - case MemRegion::ElementRegionKind: - // FIXME: Handle element regions by upper-bounding the parent region's - // string length. - return state; - - default: - // Other regions (mostly non-data) can't have a reliable C string length. - // For now, just ignore the change. - // FIXME: These are rare but not impossible. We should output some kind of - // warning for things like strcpy((char[]){'a', 0}, "b"); - return state; - } - - if (strLength.isUnknown()) - return state->remove(MR); - - return state->set(MR, strLength); -} - -SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, - ProgramStateRef &state, - const Expr *Ex, - const MemRegion *MR, - bool hypothetical) { - if (!hypothetical) { - // If there's a recorded length, go ahead and return it. - const SVal *Recorded = state->get(MR); - if (Recorded) - return *Recorded; - } - - // Otherwise, get a new symbol and update the state. - SValBuilder &svalBuilder = C.getSValBuilder(); - QualType sizeTy = svalBuilder.getContext().getSizeType(); - SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), - MR, Ex, sizeTy, - C.getLocationContext(), - C.blockCount()); - - if (!hypothetical) { - if (Optional strLn = strLength.getAs()) { - // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 - BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); - const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); - llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); - const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, - fourInt); - NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); - SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, - maxLength, sizeTy); - state = state->assume(evalLength.castAs(), true); - } - state = state->set(MR, strLength); - } - - return strLength; -} - -SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, - const Expr *Ex, SVal Buf, - bool hypothetical) const { - const MemRegion *MR = Buf.getAsRegion(); - if (!MR) { - // If we can't get a region, see if it's something we /know/ isn't a - // C string. In the context of locations, the only time we can issue such - // a warning is for labels. - if (Optional Label = Buf.getAs()) { - if (Filter.CheckCStringNotNullTerm) { - SmallString<120> buf; - llvm::raw_svector_ostream os(buf); - assert(CurrentFunctionDescription); - os << "Argument to " << CurrentFunctionDescription - << " is the address of the label '" << Label->getLabel()->getName() - << "', which is not a null-terminated string"; - - emitNotCStringBug(C, state, Ex, os.str()); - } - return UndefinedVal(); - } - - // If it's not a region and not a label, give up. - return UnknownVal(); - } - - // If we have a region, strip casts from it and see if we can figure out - // its length. For anything we can't figure out, just return UnknownVal. - MR = MR->StripCasts(); - - switch (MR->getKind()) { - case MemRegion::StringRegionKind: { - // Modifying the contents of string regions is undefined [C99 6.4.5p6], - // so we can assume that the byte length is the correct C string length. - SValBuilder &svalBuilder = C.getSValBuilder(); - QualType sizeTy = svalBuilder.getContext().getSizeType(); - const StringLiteral *strLit = cast(MR)->getStringLiteral(); - return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); - } - case MemRegion::SymbolicRegionKind: - case MemRegion::AllocaRegionKind: - case MemRegion::NonParamVarRegionKind: - case MemRegion::ParamVarRegionKind: - case MemRegion::FieldRegionKind: - case MemRegion::ObjCIvarRegionKind: - return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); - case MemRegion::CompoundLiteralRegionKind: - // FIXME: Can we track this? Is it necessary? - return UnknownVal(); - case MemRegion::ElementRegionKind: - // FIXME: How can we handle this? It's not good enough to subtract the - // offset from the base string length; consider "123\x00567" and &a[5]. - return UnknownVal(); - default: - // Other regions (mostly non-data) can't have a reliable C string length. - // In this case, an error is emitted and UndefinedVal is returned. - // The caller should always be prepared to handle this case. - if (Filter.CheckCStringNotNullTerm) { - SmallString<120> buf; - llvm::raw_svector_ostream os(buf); - - assert(CurrentFunctionDescription); - os << "Argument to " << CurrentFunctionDescription << " is "; - - if (SummarizeRegion(os, C.getASTContext(), MR)) - os << ", which is not a null-terminated string"; - else - os << "not a null-terminated string"; - - emitNotCStringBug(C, state, Ex, os.str()); - } - return UndefinedVal(); - } -} - -const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, - ProgramStateRef &state, const Expr *expr, SVal val) const { - - // Get the memory region pointed to by the val. - const MemRegion *bufRegion = val.getAsRegion(); - if (!bufRegion) - return nullptr; - - // Strip casts off the memory region. - bufRegion = bufRegion->StripCasts(); - - // Cast the memory region to a string region. - const StringRegion *strRegion= dyn_cast(bufRegion); - if (!strRegion) - return nullptr; - - // Return the actual string in the string region. - return strRegion->getStringLiteral(); -} - bool CStringChecker::IsFirstBufInBound(CheckerContext &C, ProgramStateRef state, const Expr *FirstBuf, @@ -1094,8 +957,8 @@ if (StateNullChar && !StateNonNullChar) { // If the value of the second argument of 'memset()' is zero, set the // string length of destination buffer to 0 directly. - State = setCStringLength(State, MR, - svalBuilder.makeZeroVal(Ctx.getSizeType())); + State = cstring::setCStringLength( + State, MR, svalBuilder.makeZeroVal(Ctx.getSizeType())); } else if (!StateNullChar && StateNonNullChar) { SVal NewStrLen = svalBuilder.getMetadataSymbolVal( CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(), @@ -1106,7 +969,7 @@ SVal NewStrLenGESize = svalBuilder.evalBinOp( State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType()); - State = setCStringLength( + State = cstring::setCStringLength( State->assume(NewStrLenGESize.castAs(), true), MR, NewStrLen); } @@ -1394,7 +1257,7 @@ if (!state) return; - SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal); + SVal strLength = getCStringLengthChecked(C, state, Arg.Expression, ArgVal); // If the argument isn't a valid C string, there's no valid state to // transition to. @@ -1561,11 +1424,12 @@ return; // Get the string length of the source. - SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal); + SVal strLength = + getCStringLengthChecked(C, state, srcExpr.Expression, srcVal); Optional strLengthNL = strLength.getAs(); // Get the string length of the destination buffer. - SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal); + SVal dstStrLength = getCStringLengthChecked(C, state, Dst.Expression, DstVal); Optional dstStrLengthNL = dstStrLength.getAs(); // If the source isn't a valid C string, give up. @@ -1789,7 +1653,7 @@ if (finalStrLength.isUnknown()) { // Try to get a "hypothetical" string length symbol, which we can later // set as a real value if that turns out to be the case. - finalStrLength = getCStringLength(C, state, CE, DstVal, true); + finalStrLength = getCStringLengthChecked(C, state, CE, DstVal, true); assert(!finalStrLength.isUndef()); if (Optional finalStrLengthNL = finalStrLength.getAs()) { @@ -1899,7 +1763,8 @@ if (amountCopied != strLength) finalStrLength = UnknownVal(); } - state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); + state = cstring::setCStringLength(state, dstRegVal->getRegion(), + finalStrLength); } assert(state); @@ -1959,12 +1824,13 @@ return; // Get the string length of the first string or give up. - SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal); + SVal LeftLength = getCStringLengthChecked(C, state, Left.Expression, LeftVal); if (LeftLength.isUndef()) return; // Get the string length of the second string or give up. - SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal); + SVal RightLength = + getCStringLengthChecked(C, state, Right.Expression, RightVal); if (RightLength.isUndef()) return; @@ -1999,10 +1865,8 @@ // For now, we only do this if they're both known string literals. // Attempt to extract string literals from both expressions. - const StringLiteral *LeftStrLiteral = - getCStringLiteral(C, state, Left.Expression, LeftVal); - const StringLiteral *RightStrLiteral = - getCStringLiteral(C, state, Right.Expression, RightVal); + const StringLiteral *LeftStrLiteral = getCStringLiteral(LeftVal); + const StringLiteral *RightStrLiteral = getCStringLiteral(RightVal); bool canComputeResult = false; SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); @@ -2314,138 +2178,11 @@ return C.isDifferent(); } -void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { - // Record string length for char a[] = "abc"; - ProgramStateRef state = C.getState(); - - for (const auto *I : DS->decls()) { - const VarDecl *D = dyn_cast(I); - if (!D) - continue; - - // FIXME: Handle array fields of structs. - if (!D->getType()->isArrayType()) - continue; - - const Expr *Init = D->getInit(); - if (!Init) - continue; - if (!isa(Init)) - continue; - - Loc VarLoc = state->getLValue(D, C.getLocationContext()); - const MemRegion *MR = VarLoc.getAsRegion(); - if (!MR) - continue; - - SVal StrVal = C.getSVal(Init); - assert(StrVal.isValid() && "Initializer string is unknown or undefined"); - DefinedOrUnknownSVal strLength = - getCStringLength(C, state, Init, StrVal).castAs(); - - state = state->set(MR, strLength); - } - - C.addTransition(state); -} - -ProgramStateRef -CStringChecker::checkRegionChanges(ProgramStateRef state, - const InvalidatedSymbols *, - ArrayRef ExplicitRegions, - ArrayRef Regions, - const LocationContext *LCtx, - const CallEvent *Call) const { - CStringLengthTy Entries = state->get(); - if (Entries.isEmpty()) - return state; - - llvm::SmallPtrSet Invalidated; - llvm::SmallPtrSet SuperRegions; - - // First build sets for the changed regions and their super-regions. - for (ArrayRef::iterator - I = Regions.begin(), E = Regions.end(); I != E; ++I) { - const MemRegion *MR = *I; - Invalidated.insert(MR); - - SuperRegions.insert(MR); - while (const SubRegion *SR = dyn_cast(MR)) { - MR = SR->getSuperRegion(); - SuperRegions.insert(MR); - } - } - - CStringLengthTy::Factory &F = state->get_context(); - - // Then loop over the entries in the current state. - for (CStringLengthTy::iterator I = Entries.begin(), - E = Entries.end(); I != E; ++I) { - const MemRegion *MR = I.getKey(); - - // Is this entry for a super-region of a changed region? - if (SuperRegions.count(MR)) { - Entries = F.remove(Entries, MR); - continue; - } - - // Is this entry for a sub-region of a changed region? - const MemRegion *Super = MR; - while (const SubRegion *SR = dyn_cast(Super)) { - Super = SR->getSuperRegion(); - if (Invalidated.count(Super)) { - Entries = F.remove(Entries, MR); - break; - } - } - } - - return state->set(Entries); -} - -void CStringChecker::checkLiveSymbols(ProgramStateRef state, - SymbolReaper &SR) const { - // Mark all symbols in our string length map as valid. - CStringLengthTy Entries = state->get(); - - for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); - I != E; ++I) { - SVal Len = I.getData(); - - for (SymExpr::symbol_iterator si = Len.symbol_begin(), - se = Len.symbol_end(); si != se; ++si) - SR.markInUse(*si); - } -} - -void CStringChecker::checkDeadSymbols(SymbolReaper &SR, - CheckerContext &C) const { - ProgramStateRef state = C.getState(); - CStringLengthTy Entries = state->get(); - if (Entries.isEmpty()) - return; - - CStringLengthTy::Factory &F = state->get_context(); - for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); - I != E; ++I) { - SVal Len = I.getData(); - if (SymbolRef Sym = Len.getAsSymbol()) { - if (SR.isDead(Sym)) - Entries = F.remove(Entries, I.getKey()); - } - } - - state = state->set(Entries); - C.addTransition(state); -} - -void ento::registerCStringModeling(CheckerManager &Mgr) { +void ento::registerCStringChecker(CheckerManager &Mgr) { Mgr.registerChecker(); } -bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) { - return true; -} +bool ento::shouldRegisterCStringChecker(const CheckerManager &) { return true; } #define REGISTER_CHECKER(name) \ void ento::register##name(CheckerManager &mgr) { \ Index: clang/lib/StaticAnalyzer/Checkers/CStringLength.h =================================================================== --- /dev/null +++ clang/lib/StaticAnalyzer/Checkers/CStringLength.h @@ -0,0 +1,52 @@ +//=== CStringLength.h Stores the length of a cstring. ------------*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines an interface for interacting and manipulating the associated cstring +// length of a given memory region. +// You can assign a cstring length to any memory region, representing the first +// zero terminator in that region. +// Eg: "abc\00def" -> 4 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGLENGTH_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGLENGTH_H + +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +namespace clang { +namespace ento { +class CheckerContext; + +namespace cstring { + +/// Assigns a cstring length to a memory region. +LLVM_NODISCARD ProgramStateRef setCStringLength(ProgramStateRef State, + const MemRegion *MR, + SVal StrLength); + +/// Removes the assigned cstring length from the memory region. +/// It is useful for invalidation. +LLVM_NODISCARD ProgramStateRef removeCStringLength(ProgramStateRef State, + const MemRegion *MR); + +// FIXME: Eventually rework the interface of this function. +// Especially the magic 'Hypothetical' parameter. +LLVM_NODISCARD SVal getCStringLength(CheckerContext &Ctx, + ProgramStateRef &State, const Expr *Ex, + SVal Buf, bool Hypothetical = false); + +LLVM_DUMP_METHOD void dumpCStringLengths(ProgramStateRef State, + raw_ostream &Out = llvm::errs(), + const char *NL = "\n", + const char *Sep = " : "); +} // namespace cstring +} // namespace ento +} // namespace clang + +#endif Index: clang/lib/StaticAnalyzer/Checkers/CStringLength.cpp =================================================================== --- /dev/null +++ clang/lib/StaticAnalyzer/Checkers/CStringLength.cpp @@ -0,0 +1,331 @@ +//=== CStringLength.cpp ------------------------------------------*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Also defines the CStringModeling checker which responsible for tracking and +// maintaining the associated cstring lengths. Such informations are tracked in +// the CStringLengthMap. +// +// +//===----------------------------------------------------------------------===// + +#include "CStringLength.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +/// Stores the size of the zero terminated string in the memory region. +/// In other words, the inclusive range between the begin of the region and the +/// zero terminator. +REGISTER_MAP_WITH_PROGRAMSTATE(CStringLengthMap, const MemRegion *, SVal) + +namespace { +using namespace cstring; + +/// Updates the CStringLengthMap. +/// - Infers the cstring lenght of string literals. +/// - Removes cstring length associations of dead symbols. +/// - Handles region invalidation. +class CStringModeling + : public Checker, check::LiveSymbols, + check::DeadSymbols, check::RegionChanges> { +public: + static void *getTag() { + static int Tag; + return &Tag; + } + + void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { + // Record string length for char a[] = "abc"; + ProgramStateRef state = C.getState(); + + for (const auto *I : DS->decls()) { + const VarDecl *D = dyn_cast(I); + if (!D) + continue; + + // FIXME: Handle array fields of structs. + if (!D->getType()->isArrayType()) + continue; + + const Expr *Init = D->getInit(); + if (!Init) + continue; + if (!isa(Init)) + continue; + + Loc VarLoc = state->getLValue(D, C.getLocationContext()); + const MemRegion *MR = VarLoc.getAsRegion(); + if (!MR) + continue; + + SVal StrVal = C.getSVal(Init); + assert(StrVal.isValid() && "Initializer string is unknown or undefined"); + DefinedOrUnknownSVal strLength = getCStringLength(C, state, Init, StrVal) + .castAs(); + + state = state->set(MR, strLength); + } + + C.addTransition(state); + } + + void checkLiveSymbols(ProgramStateRef State, SymbolReaper &SR) const { + // Mark all symbols in our string length map as valid. + for (const auto &Item : State->get()) { + SVal Len = Item.second; + const auto LenSymbolRange = + llvm::make_range(Len.symbol_begin(), Len.symbol_end()); + for (SymbolRef Symbol : LenSymbolRange) + SR.markInUse(Symbol); + } + } + + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + CStringLengthMapTy Entries = State->get(); + if (Entries.isEmpty()) + return; + + CStringLengthMapTy::Factory &F = State->get_context(); + for (CStringLengthMapTy::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + SVal Len = I.getData(); + if (SymbolRef Sym = Len.getAsSymbol()) { + if (SR.isDead(Sym)) + Entries = F.remove(Entries, I.getKey()); + } + } + + State = State->set(Entries); + C.addTransition(State); + } + + ProgramStateRef + checkRegionChanges(ProgramStateRef state, const InvalidatedSymbols *, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const LocationContext *, const CallEvent *) const { + CStringLengthMapTy Entries = state->get(); + if (Entries.isEmpty()) + return state; + + llvm::SmallPtrSet Invalidated; + llvm::SmallPtrSet SuperRegions; + + // First build sets for the changed regions and their super-regions. + for (ArrayRef::iterator I = Regions.begin(), + E = Regions.end(); + I != E; ++I) { + const MemRegion *MR = *I; + Invalidated.insert(MR); + + SuperRegions.insert(MR); + while (const SubRegion *SR = dyn_cast(MR)) { + MR = SR->getSuperRegion(); + SuperRegions.insert(MR); + } + } + + CStringLengthMapTy::Factory &F = state->get_context(); + + // Then loop over the entries in the current state. + for (CStringLengthMapTy::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + const MemRegion *MR = I.getKey(); + + // Is this entry for a super-region of a changed region? + if (SuperRegions.count(MR)) { + Entries = F.remove(Entries, MR); + continue; + } + + // Is this entry for a sub-region of a changed region? + const MemRegion *Super = MR; + while (const SubRegion *SR = dyn_cast(Super)) { + Super = SR->getSuperRegion(); + if (Invalidated.count(Super)) { + Entries = F.remove(Entries, MR); + break; + } + } + } + + return state->set(Entries); + } + + // TODO: Is it useful? + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, + const char *Sep) const { + dumpCStringLengths(State, Out, NL, Sep); + } +}; // class CStringModeling +} // namespace + +void ento::registerCStringModeling(CheckerManager &Mgr) { + Mgr.registerChecker(); +} + +bool ento::shouldRegisterCStringModeling(const CheckerManager &) { + return true; +} + +//===----------------------------------------------------------------------===// +// Implementation of the public API. +//===----------------------------------------------------------------------===// + +namespace clang { +namespace ento { +namespace cstring { + +ProgramStateRef setCStringLength(ProgramStateRef State, const MemRegion *MR, + SVal StrLength) { + assert(!StrLength.isUndef() && "Attempt to set an undefined string length"); + + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: + // FIXME: This can happen if we strcpy() into a string region. This is + // undefined [C99 6.4.5p6], but we should still warn about it. + return State; + + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::NonParamVarRegionKind: + case MemRegion::ParamVarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + // These are the types we can currently track string lengths for. + break; + + case MemRegion::ElementRegionKind: + // FIXME: Handle element regions by upper-bounding the parent region's + // string length. + return State; + + default: + // Other regions (mostly non-data) can't have a reliable C string length. + // For now, just ignore the change. + // FIXME: These are rare but not impossible. We should output some kind of + // warning for things like strcpy((char[]){'a', 0}, "b"); + return State; + } + + if (StrLength.isUnknown()) + return removeCStringLength(State, MR); + + return State->set(MR, StrLength); +} + +ProgramStateRef removeCStringLength(ProgramStateRef State, + const MemRegion *MR) { + return State->remove(MR); +} + +static SVal getCStringLengthForRegion(CheckerContext &Ctx, + ProgramStateRef &State, const Expr *Ex, + const MemRegion *MR, bool Hypothetical) { + if (!Hypothetical) { + // If there's a recorded length, go ahead and return it. + if (const SVal *Recorded = State->get(MR)) + return *Recorded; + } + + // Otherwise, get a new symbol and update the state. + SValBuilder &SVB = Ctx.getSValBuilder(); + QualType SizeTy = SVB.getContext().getSizeType(); + NonLoc CStrLen = + SVB.getMetadataSymbolVal(CStringModeling::getTag(), MR, Ex, SizeTy, + Ctx.getLocationContext(), Ctx.blockCount()) + .castAs(); + + if (!Hypothetical) { + // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + const llvm::APSInt &MaxValue = BVF.getMaxValue(SizeTy); + const llvm::APSInt Four = APSIntType(MaxValue).getValue(4); + const llvm::APSInt *MaxLength = BVF.evalAPSInt(BO_Div, MaxValue, Four); + const NonLoc MaxLengthSVal = SVB.makeIntVal(*MaxLength); + SVal Constrained = + SVB.evalBinOpNN(State, BO_LE, CStrLen, MaxLengthSVal, SizeTy); + State = State->assume(Constrained.castAs(), true); + State = State->set(MR, CStrLen); + } + + return CStrLen; +} + +SVal getCStringLength(CheckerContext &Ctx, ProgramStateRef &State, + const Expr *Ex, SVal Buf, bool Hypothetical /*=false*/) { + if (Buf.isUnknownOrUndef()) + return Buf; + + if (Buf.getAs()) + return UndefinedVal(); + + // If it's not a region, give up. + const MemRegion *MR = Buf.getAsRegion(); + if (!MR) + return UnknownVal(); + + // If we have a region, strip casts from it and see if we can figure out + // its length. For anything we can't figure out, just return UnknownVal. + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: { + // Modifying the contents of string regions is undefined [C99 6.4.5p6], + // so we can assume that the byte length is the correct C string length. + SValBuilder &SVB = Ctx.getSValBuilder(); + QualType SizeTy = SVB.getContext().getSizeType(); + const StringLiteral *StrLiteral = + cast(MR)->getStringLiteral(); + return SVB.makeIntVal(StrLiteral->getByteLength(), SizeTy); + } + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::NonParamVarRegionKind: + case MemRegion::ParamVarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + return getCStringLengthForRegion(Ctx, State, Ex, MR, Hypothetical); + case MemRegion::CompoundLiteralRegionKind: + // FIXME: Can we track this? Is it necessary? + return UnknownVal(); + case MemRegion::ElementRegionKind: + // FIXME: How can we handle this? It's not good enough to subtract the + // offset from the base string length; consider "123\x00567" and &a[5]. + return UnknownVal(); + default: + // Other regions (mostly non-data) can't have a reliable C string length. + return UndefinedVal(); + } +} + +void dumpCStringLengths(ProgramStateRef State, raw_ostream &Out, const char *NL, + const char *Sep) { + const CStringLengthMapTy Items = State->get(); + if (!Items.isEmpty()) + Out << "CString lengths:" << NL; + for (const auto &Item : Items) { + Item.first->dumpToStream(Out); + Out << Sep; + Item.second.dumpToStream(Out); + Out << NL; + } +} + +} // namespace cstring +} // namespace ento +} // namespace clang Index: clang/test/Analysis/analyzer-enabled-checkers.c =================================================================== --- clang/test/Analysis/analyzer-enabled-checkers.c +++ clang/test/Analysis/analyzer-enabled-checkers.c @@ -41,6 +41,7 @@ // CHECK-NEXT: security.insecureAPI.vfork // CHECK-NEXT: unix.API // CHECK-NEXT: unix.cstring.CStringModeling +// CHECK-NEXT: unix.cstring.CStringChecker // CHECK-NEXT: unix.DynamicMemoryModeling // CHECK-NEXT: unix.Malloc // CHECK-NEXT: unix.MallocSizeof