diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -21,210 +21,309 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" using namespace clang; using namespace ento; using namespace taint; +using ConcreteInt = nonloc::ConcreteInt; +using SymbolVal = nonloc::SymbolVal; namespace { -class ArrayBoundCheckerV2 : - public Checker { - mutable std::unique_ptr BT; +// FIXME: Eventually replace RegionRawOffset with this class. +class RegionRawOffsetV2 { +private: + const SubRegion *BaseRegion = nullptr; + SVal ByteOffset = UnknownVal(); + + RegionRawOffsetV2() = default; + + /// Compute a raw byte offset from a base region. + /// Flatten the nested ElementRegion structure into a byte-offset in a + /// SubRegion. E.g: + /// dereferenced location: + /// &Element{Element{Sym{reg_p}, conj{n, int}, char}, 3, int} + /// Finds the base region: + /// Region: Sym{reg_p} + /// Builds the corresponding symbolic offset expression: + /// Offset: SymIntExpr{conj{n, int}, +, 12, long long} + class RawOffsetCalculator final + : public MemRegionVisitor { + ProgramStateRef State; + SValBuilder &SVB; + + RegionRawOffsetV2 Leaf(const SubRegion *R) const { + return {R, SVB.makeArrayIndex(0)}; + } - enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; + public: + RawOffsetCalculator(ProgramStateRef State, SValBuilder &SVB) + : State(State), SVB(SVB) {} + using MemRegionVisitor::Visit; - void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, - std::unique_ptr Visitor = nullptr) const; + auto VisitMemRegion(const MemRegion *R) { + return Leaf(dyn_cast(R)); + } -public: - void checkLocation(SVal l, bool isLoad, const Stmt*S, - CheckerContext &C) const; -}; + RegionRawOffsetV2 VisitElementRegion(const ElementRegion *ER) { + // For: Elem{SuperReg, ElemTy, ElemIdx} + // 1) Calculate the raw offset of the SuperReg. + // 2) Handle the current level. + // Offset := Offset + sizeof(ElemTy) * ElemIdx + const RegionRawOffsetV2 RawOffset = Visit(ER->getSuperRegion()); -// FIXME: Eventually replace RegionRawOffset with this class. -class RegionRawOffsetV2 { -private: - const SubRegion *baseRegion; - SVal byteOffset; + const QualType ElemTy = ER->getElementType(); + const NonLoc Index = ER->getIndex(); + + // If we can not calculate the sizeof ElemTy, erase result and give up. + if (ElemTy->isIncompleteType()) + return Leaf(nullptr); + + const NonLoc SizeofElemTy = SVB.makeArrayIndex( + SVB.getContext().getTypeSizeInChars(ElemTy).getQuantity()); - RegionRawOffsetV2() - : baseRegion(nullptr), byteOffset(UnknownVal()) {} + const QualType ArrayIndexTy = SVB.getArrayIndexType(); + const NonLoc ByteElementOffset = + SVB.evalBinOpNN(State, BO_Mul, Index, SizeofElemTy, ArrayIndexTy) + .castAs(); + + SVal NewByteOffset = SVB.evalBinOpNN( + State, BO_Add, RawOffset.getByteOffset().castAs(), + ByteElementOffset, ArrayIndexTy); + return {RawOffset.getRegion(), NewByteOffset}; + } + }; public: - RegionRawOffsetV2(const SubRegion* base, SVal offset) - : baseRegion(base), byteOffset(offset) {} + RegionRawOffsetV2(const SubRegion *BaseRegion, SVal ByteOffset) + : BaseRegion(BaseRegion), ByteOffset(ByteOffset) {} - NonLoc getByteOffset() const { return byteOffset.castAs(); } - const SubRegion *getRegion() const { return baseRegion; } + NonLoc getByteOffset() const { return ByteOffset.castAs(); } + const SubRegion *getRegion() const { return BaseRegion; } - static RegionRawOffsetV2 computeOffset(ProgramStateRef state, - SValBuilder &svalBuilder, - SVal location); + static RegionRawOffsetV2 computeOffset(ProgramStateRef State, + SValBuilder &SVB, + loc::MemRegionVal Location) { + return RawOffsetCalculator(State, SVB).Visit(Location.getRegion()); + } void dump() const; void dumpToStream(raw_ostream &os) const; }; -} - -static SVal computeExtentBegin(SValBuilder &svalBuilder, - const MemRegion *region) { - const MemSpaceRegion *SR = region->getMemorySpace(); - if (SR->getKind() == MemRegion::UnknownSpaceRegionKind) - return UnknownVal(); - else - return svalBuilder.makeZeroArrayIndex(); -} - -// TODO: once the constraint manager is smart enough to handle non simplified -// symbolic expressions remove this function. Note that this can not be used in -// the constraint manager as is, since this does not handle overflows. It is -// safe to assume, however, that memory offsets will not overflow. -static std::pair -getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, - SValBuilder &svalBuilder) { - Optional SymVal = offset.getAs(); - if (SymVal && SymVal->isExpression()) { - if (const SymIntExpr *SIE = dyn_cast(SymVal->getSymbol())) { - llvm::APSInt constant = - APSIntType(extent.getValue()).convert(SIE->getRHS()); - switch (SIE->getOpcode()) { - case BO_Mul: - // The constant should never be 0 here, since it the result of scaling - // based on the size of a type which is never 0. - if ((extent.getValue() % constant) != 0) - return std::pair(offset, extent); - else - return getSimplifiedOffsets( - nonloc::SymbolVal(SIE->getLHS()), - svalBuilder.makeIntVal(extent.getValue() / constant), - svalBuilder); - case BO_Add: - return getSimplifiedOffsets( - nonloc::SymbolVal(SIE->getLHS()), - svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); - default: - break; - } - } - } - return std::pair(offset, extent); -} +/// Check if the pointer dereference would access (read or write) a memory +/// region outside of an array. +/// It has much smarter logic dealing with constant addition and multiplication +/// in the indexer expression, such as: For 'buf[x+3]' it would check if 'x' is +/// either **below** '-3' or 'x' is greater then or equal to 'extentOf(buf)' to +/// emit a bug report. Otherwise, we will assume that the indexer expression +/// **in bound of** the valid range. +class ArrayBoundCheckerV2 : public Checker { + mutable std::unique_ptr BT; -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, - const Stmt* LoadS, - CheckerContext &checkerContext) const { + enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; - // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping - // some new logic here that reasons directly about memory region extents. - // Once that logic is more mature, we can bring it back to assumeInBound() - // for all clients to use. - // - // The algorithm we are using here for bounds checking is to see if the - // memory access is within the extent of the base region. Since we - // have some flexibility in defining the base region, we can achieve - // various levels of conservatism in our buffer overflow checking. - ProgramStateRef state = checkerContext.getState(); + void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, + std::unique_ptr Visitor = nullptr) const; - SValBuilder &svalBuilder = checkerContext.getSValBuilder(); - const RegionRawOffsetV2 &rawOffset = - RegionRawOffsetV2::computeOffset(state, svalBuilder, location); + // Returns null state if reported bug, non-null otherwise. + ProgramStateRef checkLowerBound(CheckerContext &Ctx, SValBuilder &SVB, + const ProgramStateRef State, + RegionRawOffsetV2 RawOffset) const; - if (!rawOffset.getRegion()) - return; + // Returns null state if reported bug, non-null otherwise. + ProgramStateRef checkUpperBound(CheckerContext &Ctx, SValBuilder &SVB, + const ProgramStateRef State, + RegionRawOffsetV2 RawOffset) const; - NonLoc rawOffsetVal = rawOffset.getByteOffset(); +public: + void checkLocation(SVal l, bool isLoad, const Stmt *S, + CheckerContext &Ctx) const; +}; - // CHECK LOWER BOUND: Is byteOffset < extent begin? - // If so, we are doing a load/store - // before the first valid offset in the memory region. +std::pair simplify(SValBuilder &SVB, NonLoc Root, + ConcreteInt Index) { + /// Peel of SymIntExprs one by one while folding the constants on the right. + /// + /// It reorders the expression `Root` and `Index` at the same time. + /// Eg: if `Root` is: + /// `x + 5` then `Index := Index - (typeof(Index))5` + /// `x * 5` then `Index := Index / (typeof(Index))5` + /// Then recurse on `x`. + class SymExprSimplifier final : public SymExprVisitor { + SValBuilder &SVB; + const SymExpr *RootSymbol; + ConcreteInt Index; + + public: + SymExprSimplifier(SValBuilder &SVB, const SymExpr *RootSymbol, + ConcreteInt Index) + : SVB(SVB), RootSymbol(RootSymbol), Index(Index) {} + using SymExprVisitor::Visit; + + void VisitSymIntExpr(const SymIntExpr *E) { + const BinaryOperator::Opcode Op = E->getOpcode(); + if (Op != BO_Add && Op != BO_Mul) + return; - SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion()); + llvm::APSInt RHSConstant = + APSIntType(Index.getValue()).convert(E->getRHS()); - if (Optional NV = extentBegin.getAs()) { - if (NV->getAs()) { - std::pair simplifiedOffsets = - getSimplifiedOffsets(rawOffset.getByteOffset(), - NV->castAs(), - svalBuilder); - rawOffsetVal = simplifiedOffsets.first; - *NV = simplifiedOffsets.second; - } + if (Op == BO_Add) { + Index = SVB.makeIntVal(Index.getValue() - RHSConstant); + RootSymbol = E->getLHS(); + Visit(RootSymbol); + return; + } - SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV, - svalBuilder.getConditionType()); + assert(Op == BO_Mul); - Optional lowerBoundToCheck = lowerBound.getAs(); - if (!lowerBoundToCheck) - return; + // The constant should never be 0 here, since it the result of scaling + // based on the size of a type which is never 0. + assert(!RHSConstant.isNullValue()); - ProgramStateRef state_precedesLowerBound, state_withinLowerBound; - std::tie(state_precedesLowerBound, state_withinLowerBound) = - state->assume(*lowerBoundToCheck); + // If the NewExtent is not divisible, we can not further simplify + // expressions. + if ((Index.getValue() % RHSConstant) != 0) + return; - // Are we constrained enough to definitely precede the lower bound? - if (state_precedesLowerBound && !state_withinLowerBound) { - reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); - return; + Index = SVB.makeIntVal(Index.getValue() / RHSConstant); + RootSymbol = E->getLHS(); + Visit(RootSymbol); } - // Otherwise, assume the constraint of the lower bound. - assert(state_withinLowerBound); - state = state_withinLowerBound; + SymbolVal getRootSymbol() const { return RootSymbol; } + ConcreteInt getFoldedIndex() const { return Index; } + }; + if (const auto SymbolicRoot = Root.getAs()) { + SymExprSimplifier Visitor(SVB, SymbolicRoot->getSymbol(), Index); + Visitor.Visit(SymbolicRoot->getSymbol()); + return {Visitor.getRootSymbol(), Visitor.getFoldedIndex()}; + } + assert(Root.getAs() && "Root must be either int or symbol."); + return {Root, Index}; +} +} // namespace + +ProgramStateRef +ArrayBoundCheckerV2::checkLowerBound(CheckerContext &Ctx, SValBuilder &SVB, + const ProgramStateRef State, + RegionRawOffsetV2 RawOffset) const { + // If we don't know that the pointer points to the beginning of the region, + // skip lower-bound check. + if (isa(RawOffset.getRegion()->getMemorySpace())) + return State; + + ConcreteInt Zero = SVB.makeZeroArrayIndex().castAs(); + SVal RootNonLoc; + SVal ConstantFoldedRHS; + std::tie(RootNonLoc, ConstantFoldedRHS) = + simplify(SVB, RawOffset.getByteOffset(), Zero); + + NonLoc LowerBoundCheck = + SVB.evalBinOpNN(State, BO_LT, RootNonLoc.castAs(), + ConstantFoldedRHS.castAs(), + SVB.getConditionType()) + .castAs(); + + ProgramStateRef PrecedesLowerBound, WithinLowerBound; + std::tie(PrecedesLowerBound, WithinLowerBound) = + State->assume(LowerBoundCheck); + + if (PrecedesLowerBound && !WithinLowerBound) { + reportOOB(Ctx, PrecedesLowerBound, OOB_Precedes); + return nullptr; } - do { - // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so, - // we are doing a load/store after the last valid offset. - const MemRegion *MR = rawOffset.getRegion(); - DefinedOrUnknownSVal Size = getDynamicSize(state, MR, svalBuilder); - if (!Size.getAs()) - break; - - if (Size.getAs()) { - std::pair simplifiedOffsets = - getSimplifiedOffsets(rawOffset.getByteOffset(), - Size.castAs(), svalBuilder); - rawOffsetVal = simplifiedOffsets.first; - Size = simplifiedOffsets.second; - } + // Otherwise, assume the constraint of the lower bound. + assert(WithinLowerBound); + return WithinLowerBound; +} - SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal, - Size.castAs(), - svalBuilder.getConditionType()); +// Returns null state if reported bug, non-null otherwise. +ProgramStateRef +ArrayBoundCheckerV2::checkUpperBound(CheckerContext &Ctx, SValBuilder &SVB, + const ProgramStateRef State, + RegionRawOffsetV2 RawOffset) const { + DefinedOrUnknownSVal Extent = + getDynamicSize(State, RawOffset.getRegion(), SVB); + + if (!Extent.getAs()) + return State; + + NonLoc RawByteOffset = RawOffset.getByteOffset(); + if (const auto ExtentInt = Extent.getAs()) { + std::tie(RawByteOffset, Extent) = + simplify(SVB, RawOffset.getByteOffset(), *ExtentInt); + } - Optional upperboundToCheck = upperbound.getAs(); - if (!upperboundToCheck) - break; + NonLoc UpperBoundCheck = + SVB.evalBinOpNN(State, BO_GE, RawByteOffset, Extent.castAs(), + SVB.getConditionType()) + .castAs(); + + ProgramStateRef ExceedsUpperBound, WithinUpperBound; + std::tie(ExceedsUpperBound, WithinUpperBound) = + State->assume(UpperBoundCheck); + + // If we are under constrained and the index variables are tainted, report. + if (ExceedsUpperBound && WithinUpperBound) { + SVal ByteOffset = RawOffset.getByteOffset(); + if (isTainted(State, ByteOffset)) { + reportOOB(Ctx, ExceedsUpperBound, OOB_Tainted, + std::make_unique(ByteOffset)); + return nullptr; + } + } else if (ExceedsUpperBound) { + // If we are constrained enough to definitely exceed the upper bound, + // report. + assert(!WithinUpperBound); + reportOOB(Ctx, ExceedsUpperBound, OOB_Excedes); + return nullptr; + } - ProgramStateRef state_exceedsUpperBound, state_withinUpperBound; - std::tie(state_exceedsUpperBound, state_withinUpperBound) = - state->assume(*upperboundToCheck); + assert(WithinUpperBound); + return WithinUpperBound; +} - // If we are under constrained and the index variables are tainted, report. - if (state_exceedsUpperBound && state_withinUpperBound) { - SVal ByteOffset = rawOffset.getByteOffset(); - if (isTainted(state, ByteOffset)) { - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, - std::make_unique(ByteOffset)); - return; - } - } else if (state_exceedsUpperBound) { - // If we are constrained enough to definitely exceed the upper bound, - // report. - assert(!state_withinUpperBound); - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); - return; - } +void ArrayBoundCheckerV2::checkLocation(SVal Location, bool, const Stmt *, + CheckerContext &Ctx) const { + // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping + // some new logic here that reasons directly about memory region extents. + // Once that logic is more mature, we can bring it back to assumeInBound() + // for all clients to use. + // + // The algorithm we are using here for bounds checking is to see if the + // memory access is within the extent of the base region. Since we + // have some flexibility in defining the base region, we can achieve + // various levels of conservatism in our buffer overflow checking. + // TODO: Check if these comments are still valid: + // - Why would anyone use this 'experimental logic' if it does not handle + // overflows? + // - "we can achieve various levels of conservatism" What? + ProgramStateRef State = Ctx.getState(); + SValBuilder &SVB = Ctx.getSValBuilder(); + const RegionRawOffsetV2 RawOffset = RegionRawOffsetV2::computeOffset( + State, SVB, Location.castAs()); + assert(RawOffset.getRegion() && "It should be a valid region."); + + // Is byteOffset < extent begin? + // If so, we are doing a load/store before the first valid offset in the + // memory region. + State = checkLowerBound(Ctx, SVB, State, RawOffset); + if (!State) + return; - assert(state_withinUpperBound); - state = state_withinUpperBound; - } - while (false); + // Is byteOffset >= extentOf(BaseRegion)? + // If so, we are doing a load/store after the last valid offset. + State = checkUpperBound(Ctx, SVB, State, RawOffset); + if (!State) + return; - checkerContext.addTransition(state); + // Continue the analysis while we know that this access must be valid. + Ctx.addTransition(State); } void ArrayBoundCheckerV2::reportOOB( @@ -271,87 +370,6 @@ } #endif -// Lazily computes a value to be used by 'computeOffset'. If 'val' -// is unknown or undefined, we lazily substitute '0'. Otherwise, -// return 'val'. -static inline SVal getValue(SVal val, SValBuilder &svalBuilder) { - return val.getAs() ? svalBuilder.makeArrayIndex(0) : val; -} - -// Scale a base value by a scaling factor, and return the scaled -// value as an SVal. Used by 'computeOffset'. -static inline SVal scaleValue(ProgramStateRef state, - NonLoc baseVal, CharUnits scaling, - SValBuilder &sb) { - return sb.evalBinOpNN(state, BO_Mul, baseVal, - sb.makeArrayIndex(scaling.getQuantity()), - sb.getArrayIndexType()); -} - -// Add an SVal to another, treating unknown and undefined values as -// summing to UnknownVal. Used by 'computeOffset'. -static SVal addValue(ProgramStateRef state, SVal x, SVal y, - SValBuilder &svalBuilder) { - // We treat UnknownVals and UndefinedVals the same here because we - // only care about computing offsets. - if (x.isUnknownOrUndef() || y.isUnknownOrUndef()) - return UnknownVal(); - - return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs(), - y.castAs(), - svalBuilder.getArrayIndexType()); -} - -/// Compute a raw byte offset from a base region. Used for array bounds -/// checking. -RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state, - SValBuilder &svalBuilder, - SVal location) -{ - const MemRegion *region = location.getAsRegion(); - SVal offset = UndefinedVal(); - - while (region) { - switch (region->getKind()) { - default: { - if (const SubRegion *subReg = dyn_cast(region)) { - offset = getValue(offset, svalBuilder); - if (!offset.isUnknownOrUndef()) - return RegionRawOffsetV2(subReg, offset); - } - return RegionRawOffsetV2(); - } - case MemRegion::ElementRegionKind: { - const ElementRegion *elemReg = cast(region); - SVal index = elemReg->getIndex(); - if (!index.getAs()) - return RegionRawOffsetV2(); - QualType elemType = elemReg->getElementType(); - // If the element is an incomplete type, go no further. - ASTContext &astContext = svalBuilder.getContext(); - if (elemType->isIncompleteType()) - return RegionRawOffsetV2(); - - // Update the offset. - offset = addValue(state, - getValue(offset, svalBuilder), - scaleValue(state, - index.castAs(), - astContext.getTypeSizeInChars(elemType), - svalBuilder), - svalBuilder); - - if (offset.isUnknownOrUndef()) - return RegionRawOffsetV2(); - - region = elemReg->getSuperRegion(); - continue; - } - } - } - return RegionRawOffsetV2(); -} - void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { mgr.registerChecker(); }