diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -30,8 +30,7 @@ using namespace taint; namespace { -class ArrayBoundCheckerV2 : - public Checker { +class ArrayBoundCheckerV2 : public Checker> { mutable std::unique_ptr BT; mutable std::unique_ptr TaintBT; @@ -45,8 +44,7 @@ static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); public: - void checkLocation(SVal l, bool isLoad, const Stmt *S, - CheckerContext &C) const; + void checkPreStmt(const ArraySubscriptExpr *ASE, CheckerContext &C) const; }; // FIXME: Eventually replace RegionRawOffset with this class. @@ -63,7 +61,8 @@ const SubRegion *getRegion() const { return baseRegion; } static std::optional - computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location); + computeOffset(ProgramStateRef State, SValBuilder &SVB, + const LocationContext *LCtx, const ArraySubscriptExpr *ASE); void dump() const; void dumpToStream(raw_ostream &os) const; @@ -86,8 +85,8 @@ APSIntType(extent.getValue()).convert(SIE->getRHS()); switch (SIE->getOpcode()) { case BO_Mul: - // The constant should never be 0 here, since it the result of scaling - // based on the size of a type which is never 0. + // In a SymIntExpr multiplication the constant cannot be 0, because + // then the enginge would've replaced it with a constant 0 value. if ((extent.getValue() % constant) != 0) return std::pair(offset, extent); else @@ -140,44 +139,41 @@ return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, - const Stmt* LoadS, - CheckerContext &checkerContext) const { - +void ArrayBoundCheckerV2::checkPreStmt(const ArraySubscriptExpr *ASE, + CheckerContext &C) const { // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping // some new logic here that reasons directly about memory region extents. // Once that logic is more mature, we can bring it back to assumeInBound() // for all clients to use. - // - // The algorithm we are using here for bounds checking is to see if the - // memory access is within the extent of the base region. Since we - // have some flexibility in defining the base region, we can achieve - // various levels of conservatism in our buffer overflow checking. // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) // and incomplete analysis of these leads to false positives. As even // accurate reports would be confusing for the users, just disable reports // from these macros: - if (isFromCtypeMacro(LoadS, checkerContext.getASTContext())) + if (isFromCtypeMacro(ASE, C.getASTContext())) return; - ProgramStateRef state = checkerContext.getState(); + ProgramStateRef state = C.getState(); + + SValBuilder &svalBuilder = C.getSValBuilder(); + + const LocationContext *LCtx = C.getLocationContext(); - SValBuilder &svalBuilder = checkerContext.getSValBuilder(); const std::optional &RawOffset = - RegionRawOffsetV2::computeOffset(state, svalBuilder, location); + RegionRawOffsetV2::computeOffset(state, svalBuilder, LCtx, ASE); if (!RawOffset) return; NonLoc ByteOffset = RawOffset->getByteOffset(); + const SubRegion *Reg = RawOffset->getRegion(); // CHECK LOWER BOUND - const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace(); - if (!llvm::isa(SR)) { - // A pointer to UnknownSpaceRegion may point to the middle of - // an allocated region. + const MemSpaceRegion *Space = Reg->getMemorySpace(); + if (!(isa(Reg) && isa(Space))) { + // Symbolic regions in unknown spaces are used for modelling unknown + // pointers, so they may point to the middle of an array. auto [state_precedesLowerBound, state_withinLowerBound] = compareValueToThreshold(state, ByteOffset, @@ -185,7 +181,7 @@ if (state_precedesLowerBound && !state_withinLowerBound) { // We know that the index definitely precedes the lower bound. - reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); + reportOOB(C, state_precedesLowerBound, OOB_Precedes); return; } @@ -194,8 +190,7 @@ } // CHECK UPPER BOUND - DefinedOrUnknownSVal Size = - getDynamicExtent(state, RawOffset->getRegion(), svalBuilder); + DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder); if (auto KnownSize = Size.getAs()) { auto [state_withinUpperBound, state_exceedsUpperBound] = compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder); @@ -203,12 +198,12 @@ if (state_exceedsUpperBound) { if (!state_withinUpperBound) { // We know that the index definitely exceeds the upper bound. - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); + reportOOB(C, state_exceedsUpperBound, OOB_Excedes); return; } if (isTainted(state, ByteOffset)) { // Both cases are possible, but the index is tainted, so report. - reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset); + reportTaintOOB(C, state_exceedsUpperBound, ByteOffset); return; } } @@ -217,7 +212,7 @@ state = state_withinUpperBound; } - checkerContext.addTransition(state); + C.addTransition(state); } void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext, @@ -303,53 +298,50 @@ } #endif -/// For a given Location that can be represented as a symbolic expression -/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block -/// Arr and the distance of Location from the beginning of Arr (expressed in a -/// NonLoc that specifies the number of CharUnits). Returns nullopt when these -/// cannot be determined. +/// For a given ArraySubscriptExpr 'Ptr[Idx]', calculate a (Region, Offset) +/// pair where Region is the memory area that may be legitimately accessed via +/// Ptr and Offset is the distance of Ptr[Idx] from the beginning of Region +/// (expressed as a NonLoc that specifies the number of CharUnits). Returns +/// nullopt when these cannot be determined. std::optional RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB, - SVal Location) { - QualType T = SVB.getArrayIndexType(); - auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) { - // We will use this utility to add and multiply values. - return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs(); - }; - - const MemRegion *Region = Location.getAsRegion(); - NonLoc Offset = SVB.makeZeroArrayIndex(); - - while (Region) { - if (const auto *ERegion = dyn_cast(Region)) { - if (const auto Index = ERegion->getIndex().getAs()) { - QualType ElemType = ERegion->getElementType(); - // If the element is an incomplete type, go no further. - if (ElemType->isIncompleteType()) - return std::nullopt; - - // Perform Offset += Index * sizeof(ElemType); then continue the offset - // calculations with SuperRegion: - NonLoc Size = SVB.makeArrayIndex( - SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); - if (auto Delta = Calc(BO_Mul, *Index, Size)) { - if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) { - Offset = *NewOffset; - Region = ERegion->getSuperRegion(); - continue; - } - } + const LocationContext *LCtx, + const ArraySubscriptExpr *ASE) { + QualType ElemType = ASE->getType(); + assert(!ElemType->isIncompleteType()); + + QualType IdxType = SVB.getArrayIndexType(); + + if (auto Idx = State->getSVal(ASE->getIdx(), LCtx).getAs()) { + const MemRegion *R = State->getSVal(ASE->getBase(), LCtx).getAsRegion(); + const ElementRegion *ER; + while ((ER = dyn_cast_or_null(R)) && + ER->getElementType() == ElemType) { + // Special case to strip ElementRegion layers that represent pointer + // arithmetics. For example in code like + // char Text[] = "Hello world!", *Word = Text+6; + // the area *Word is represented as Element{, 6 S64B, char} which + // is nominally a single byte area, but logically we want to handle + // Word[X] equivalently to Text[X + 6]. On the other hand if we have + // int Matrix[10][10]; + // we want to warn when we see Matrix[0][20] because this is logically + // invalid and technically invokes undefined behavior when it takes the + // 20th element of the int[10] value Matrix[0]. + R = ER->getSuperRegion(); + Idx = SVB.evalBinOpNN(State, BO_Add, *Idx, ER->getIndex(), IdxType) + .getAs(); + if (!Idx) + return std::nullopt; + } + + if (const auto *SRegion = dyn_cast_or_null(R)) { + NonLoc ESize = SVB.makeArrayIndex( + SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); + SVal Offset = SVB.evalBinOpNN(State, BO_Mul, ESize, *Idx, IdxType); + if (const auto OffsetNL = Offset.getAs()) { + return RegionRawOffsetV2(SRegion, *OffsetNL); } - } else if (const auto *SRegion = dyn_cast(Region)) { - // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising - // to see a MemSpaceRegion at this point. - // FIXME: We may return with {, 0} even if we didn't handle any - // ElementRegion layers. I think that this behavior was introduced - // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so - // it may be useful to review it in the future. - return RegionRawOffsetV2(SRegion, Offset); } - return std::nullopt; } return std::nullopt; }