diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2122,6 +2122,8 @@ """"""""""""""""""""""""""""""" Warn about buffer overflows (newer checker). +TODO: Update the documentation. + .. code-block:: c void test() { diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Taint.h" -#include "clang/AST/CharUnits.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" @@ -23,15 +22,77 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +// Enable dumps using the following flag combination: +// '-mllvm -debug-only=clang-analyzer-array-bound-checker-v2' +#define DEBUG_TYPE "clang-analyzer-array-bound-checker-v2" + using namespace clang; using namespace ento; using namespace taint; + using ConcreteInt = nonloc::ConcreteInt; using SymbolVal = nonloc::SymbolVal; +using APSInt = llvm::APSInt; namespace { +/// Returns true if the integer can be casted to signed with the same bit width +/// without wrapping. +static bool representableBySigned(const APSInt &X) { + return X.isSigned() || X.isSignBitClear(); +} + +/// Gets the minimal bit width signed type to be able to represent both integers +/// without losing precision or wrapping. +static APSIntType commonSignedTypeToFit(const APSInt &Lhs, const APSInt &Rhs) { + unsigned GreaterBitWidth = std::max(Lhs.getBitWidth(), Rhs.getBitWidth()); + APSIntType Signed = APSIntType(GreaterBitWidth, /*Unsigned=*/false); + APSIntType ExtendedSigned = + APSIntType(GreaterBitWidth + 1, /*Unsigned=*/false); + + const bool NeedsExtraBitToPreserveSigness = + Signed.testInRange(Lhs, /*AllowMixedSign=*/false) != + clang::ento::APSIntType::RTR_Within || + Signed.testInRange(Rhs, /*AllowMixedSign=*/false) != + clang::ento::APSIntType::RTR_Within; + + return NeedsExtraBitToPreserveSigness ? Signed : ExtendedSigned; +} + +/// Evaluates the plus/minus operator of two integers. +/// Uses a wide enough bit width to preserve the values of the operands and the +/// result as well. The returned integer will be a large enough **signed** +/// integer. Note that the bit width of the result can be larger then the +/// minimal bit width of the stored value. +template +static std::enable_if_t>::value || + std::is_same>::value, + APSInt> +applyOperatorWithoutWrapping(APSInt Lhs, Operator Op, APSInt Rhs) { + unsigned BitWidth = commonSignedTypeToFit(Lhs, Rhs).getBitWidth(); + // Use a greater type to prevent overflow/underflow. + APSIntType CommonType(BitWidth + 1, /*Unsigned=*/false); + CommonType.apply(Lhs); + CommonType.apply(Rhs); + return Op(Lhs, Rhs); +} + +/// Returns the absolut value of the integer. It handles the case when the value +/// is MIN, whose negated likely to not have a positive representation. The +/// returned integer will be a large enough **signed** integer. Note that the +/// bit width of the result can be larger then the minimal bit width of the +/// stored value. +static APSInt absWithoutWrapping(const llvm::APSInt &Value) { + // If unsigned, we might need a sign bit. + // If the value is MIN, then we can not simply take the abs, we need another + // extra bit. + APSIntType ExtendedSigned(Value.getBitWidth() + 2, /*Unsigned=*/false); + return APSInt(ExtendedSigned.convert(Value).abs(), + /*isUnsigned=*/false); +} + // FIXME: Eventually replace RegionRawOffset with this class. class RegionRawOffsetV2 { private: @@ -130,103 +191,363 @@ // Returns null state if reported bug, non-null otherwise. ProgramStateRef checkLowerBound(CheckerContext &Ctx, SValBuilder &SVB, - const ProgramStateRef State, - RegionRawOffsetV2 RawOffset) const; + ProgramStateRef State, + const APSInt &InclusiveLowerBound, + NonLoc RootExpr) const; // Returns null state if reported bug, non-null otherwise. ProgramStateRef checkUpperBound(CheckerContext &Ctx, SValBuilder &SVB, - const ProgramStateRef State, - RegionRawOffsetV2 RawOffset) const; + ProgramStateRef State, NonLoc RootExpr, + const APSInt &ExclusiveUpperBound) const; public: void checkLocation(SVal l, bool isLoad, const Stmt *S, CheckerContext &Ctx) const; }; -std::pair simplify(SValBuilder &SVB, NonLoc Root, - ConcreteInt Index) { - /// Peel of SymIntExprs one by one while folding the constants on the right. - /// - /// It reorders the expression `Root` and `Index` at the same time. - /// Eg: if `Root` is: - /// `x + 5` then `Index := Index - (typeof(Index))5` - /// `x * 5` then `Index := Index / (typeof(Index))5` - /// Then recurse on `x`. - class SymExprSimplifier final : public SymExprVisitor { - SValBuilder &SVB; - const SymExpr *RootSymbol; - ConcreteInt Index; +/// Peel of SymIntExprs and IntSymExprs one by one while folding the constants +/// into the LowerBound and UpperBound. +/// Both bounds treated as signed integers of large enough bit width to prevent +/// accidental overflow/underflow during simplification. +/// +/// This rearrangement of the inequality holds only if the symbolic expression +/// did not overflow/underflow. We enforce this assumption by having a State +/// which extended during each step of the simplification with the necessary +/// constraint to prove this. +/// This is a heuristic to make this rearrangement (simplification) process +/// valid. +/// +/// SimplificationFailed represents if an aforementioned constraint is proven to +/// be unsatisfiable, in other words the symbolic expression (RootSymbol) must +/// wrap to satisfy the inequality. +class BestEffortSymExprSimplifier final + : public SymExprVisitor { + friend class SymExprVisitor; + /// The aggregated state which has all the constraints made by the heuristic. + ProgramStateRef LastValidState; + SValBuilder &SVB; + SymbolRef RootSymbol; /// The symbol we want to simplify. + /// The inclusive lower bound (signed integer of large enough bit width). + APSInt LowerBound; + /// The exclusive upper bound (signed integer of large enough bit width). + APSInt UpperBound; + bool SimplificationFailed = false; - public: - SymExprSimplifier(SValBuilder &SVB, const SymExpr *RootSymbol, - ConcreteInt Index) - : SVB(SVB), RootSymbol(RootSymbol), Index(Index) {} - using SymExprVisitor::Visit; - - void VisitSymIntExpr(const SymIntExpr *E) { - const BinaryOperator::Opcode Op = E->getOpcode(); - if (Op != BO_Add && Op != BO_Mul) - return; - - llvm::APSInt RHSConstant = - APSIntType(Index.getValue()).convert(E->getRHS()); - - if (Op == BO_Add) { - Index = SVB.makeIntVal(Index.getValue() - RHSConstant); - RootSymbol = E->getLHS(); - Visit(RootSymbol); - return; - } - - assert(Op == BO_Mul); - - // The constant should never be 0 here, since it the result of scaling - // based on the size of a type which is never 0. - assert(!RHSConstant.isNullValue()); - - // If the NewExtent is not divisible, we can not further simplify - // expressions. - if ((Index.getValue() % RHSConstant) != 0) - return; - - Index = SVB.makeIntVal(Index.getValue() / RHSConstant); - RootSymbol = E->getLHS(); - Visit(RootSymbol); - } +public: + BestEffortSymExprSimplifier(ProgramStateRef State, SValBuilder &SVB, + const APSInt &InclusiveLowerBound, + SymbolRef RootSymbol, + const APSInt &ExclusiveUpperBound) + : LastValidState(std::move(State)), SVB(SVB), RootSymbol(RootSymbol) { + assert(LastValidState); + + // Store InclusiveLowerBound as a signed value, extend the bit width if + // necessary to preserve the value. + const unsigned LowNeededBitWidth = + InclusiveLowerBound.getBitWidth() + + (representableBySigned(InclusiveLowerBound) ? 0 : 1); + LowerBound = APSIntType(LowNeededBitWidth, /*Unsigned=*/false) + .convert(InclusiveLowerBound); + + // Do the same for the ExclusiveUpperBound. + const unsigned HighNeededBitWidth = + ExclusiveUpperBound.getBitWidth() + + (representableBySigned(ExclusiveUpperBound) ? 0 : 1); + UpperBound = APSIntType(HighNeededBitWidth, /*Unsigned=*/false) + .convert(ExclusiveUpperBound); + } - SymbolVal getRootSymbol() const { return RootSymbol; } - ConcreteInt getFoldedIndex() const { return Index; } + struct SimplificationResult { + ProgramStateRef State; + APSInt InclusiveLowerBound; + APSInt ExclusiveUpperBound; + NonLoc RootSymbol; + bool SimplificationFailed; }; - if (const auto SymbolicRoot = Root.getAs()) { - SymExprSimplifier Visitor(SVB, SymbolicRoot->getSymbol(), Index); - Visitor.Visit(SymbolicRoot->getSymbol()); - return {Visitor.getRootSymbol(), Visitor.getFoldedIndex()}; + + /// Starts the simplification process. + SimplificationResult Simplify() { + LLVM_DEBUG(llvm::dbgs() << __func__ << ": initially: '"; + ConcreteInt(LowerBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << " <= " << RootSymbol << " < "; + ConcreteInt(UpperBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << "'\n";); + SymExprVisitor::Visit(RootSymbol); + assert(LastValidState); + LLVM_DEBUG(llvm::dbgs() + << __func__ << ": simplification " + << (SimplificationFailed ? "failed" : "succeeded") << ": '"; + ConcreteInt(LowerBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << " <= " << RootSymbol << " < "; + ConcreteInt(UpperBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << "'\nLastValidState: "; + LastValidState->printJson(llvm::dbgs()); llvm::dbgs() << '\n';); + return {std::move(LastValidState), std::move(LowerBound), + std::move(UpperBound), SymbolVal(RootSymbol), SimplificationFailed}; + } + +private: + void VisitSymIntExpr(const SymIntExpr *E) { + assert(!SimplificationFailed); + + switch (E->getOpcode()) { + default: + return; + case BO_Sub: + return VisitSymSubIntExpr(E->getLHS(), E->getRHS()); + case BO_Add: + return VisitSymAddIntExpr(E->getLHS(), E->getRHS()); + case BO_Mul: + return VisitSymMulIntExpr(E->getLHS(), E->getRHS()); + } + } + + void VisitIntSymExpr(const IntSymExpr *E) { + return; // TODO: Handle this as well. + } + + // Simplify: LowerBound <= sym - C < UpperBound + // Into: LowerBound + C <= sym < UpperBound + C + void VisitSymSubIntExpr(SymbolRef LHS, const APSInt &C) { + assert(!SimplificationFailed); + assert(LowerBound.isSigned()); + assert(UpperBound.isSigned()); + + // Subtracting a negative number is like adding it. + if (C.isNegative()) { + VisitSymAddIntExpr(LHS, absWithoutWrapping(C)); + return; + } + assert(C.isNonNegative()); + + const APSInt &SymMin = + SVB.getBasicValueFactory().getMinValue(LHS->getType()); + // Type of SymMin can represent C, this conversion is fine. + ConcreteInt SmallestPossibleSymbolValue = + SVB.makeIntVal(SymMin + APSIntType(SymMin).convert(C)); + const NonLoc UnderflowCheck = + SVB.evalBinOpNN(LastValidState, BO_GE, SymbolVal(LHS), + SmallestPossibleSymbolValue, SVB.getConditionType()) + .castAs(); + if (ProgramStateRef NoUnderflowHappened = + LastValidState->assume(UnderflowCheck, true)) { + LastValidState = NoUnderflowHappened; + LLVM_DEBUG(llvm::dbgs() << __func__ << ": assuming that '" << LHS + << " >= " << SmallestPossibleSymbolValue + << "' holds (aka. 'sym - C' don't underflow)\n";); + } else { + LLVM_DEBUG(llvm::dbgs() << __func__ << ": '" << LHS + << " >= " << SmallestPossibleSymbolValue + << "' doesn't hold, could not simplify " + ", simplification stopped\n";); + SimplificationFailed = true; + return; + } + + // Otherwise safe to do the folding, and continue the simplification. + LowerBound = applyOperatorWithoutWrapping(LowerBound, std::plus<>(), C); + UpperBound = applyOperatorWithoutWrapping(UpperBound, std::plus<>(), C); + RootSymbol = LHS; + LLVM_DEBUG(llvm::dbgs() << __func__ << ": '"; + ConcreteInt(LowerBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << " <= " << RootSymbol << " < "; + ConcreteInt(UpperBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << "'\n";); + Visit(RootSymbol); // Continue the simplification. } - assert(Root.getAs() && "Root must be either int or symbol."); - return {Root, Index}; + + // Simplify: LowerBound <= sym + C < UpperBound + // Into: LowerBound - C <= sym < UpperBound - C + void VisitSymAddIntExpr(SymbolRef LHS, const APSInt &C) { + assert(!SimplificationFailed); + assert(LowerBound.isSigned()); + assert(UpperBound.isSigned()); + + // Adding a negative number is like subtracting it. + if (C.isNegative()) { + VisitSymSubIntExpr(LHS, absWithoutWrapping(C)); + return; + } + assert(C.isNonNegative()); + + const APSInt &SymMax = + SVB.getBasicValueFactory().getMaxValue(LHS->getType()); + + // Type of SymMax can represent C, this conversion is fine. + ConcreteInt GreatestPossibleSymbolValue = + SVB.makeIntVal(SymMax - APSIntType(SymMax).convert(C)); + const NonLoc OverflowCheck = + SVB.evalBinOpNN(LastValidState, BO_LE, SymbolVal(LHS), + GreatestPossibleSymbolValue, SVB.getConditionType()) + .castAs(); + if (ProgramStateRef NoOverflowHappened = + LastValidState->assume(OverflowCheck, true)) { + LastValidState = NoOverflowHappened; + LLVM_DEBUG(llvm::dbgs() << __func__ << ": assuming that '" << LHS + << " <= " << GreatestPossibleSymbolValue + << "' holds (aka. 'sym + C' don't overflow)\n";); + } else { + LLVM_DEBUG(llvm::dbgs() << __func__ << ": '" << LHS + << " <= " << GreatestPossibleSymbolValue + << "' doesn't hold, could not simplify " + ", simplification stopped\n";); + SimplificationFailed = true; + return; + } + + // Otherwise safe to do the folding, and continue the simplification. + LowerBound = applyOperatorWithoutWrapping(LowerBound, std::minus<>(), C); + UpperBound = applyOperatorWithoutWrapping(UpperBound, std::minus<>(), C); + RootSymbol = LHS; + LLVM_DEBUG(llvm::dbgs() << __func__ << ": '"; + ConcreteInt(LowerBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << " <= " << RootSymbol << " < "; + ConcreteInt(UpperBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << "'\n";); + Visit(RootSymbol); // Continue the simplification. + } + + // Simplify: LowerBound <= sym * C < UpperBound + // Into: floor(LowerBound / C) <= sym < ceil(UpperBound - C) + void VisitSymMulIntExpr(SymbolRef LHS, const APSInt &C) { + assert(!SimplificationFailed); + assert(LowerBound.isSigned()); + assert(UpperBound.isSigned()); + + const QualType SymTy = LHS->getType(); + const APSInt &SymMin = SVB.getBasicValueFactory().getMinValue(SymTy); + const APSInt &SymMax = SVB.getBasicValueFactory().getMaxValue(SymTy); + assert(!C.isNullValue() && "How can it be zero?"); + + // Check if 'sym * C' overflows. + // Type of SymMax can represent C, this conversion is fine. + ConcreteInt GreatestPossibleSymbolValue = + SVB.makeIntVal(SymMax / APSIntType(SymMax).convert(C)); + const NonLoc NoOverflowCheck = + SVB.evalBinOpNN(LastValidState, BO_LE, SymbolVal(LHS), + GreatestPossibleSymbolValue, SVB.getConditionType()) + .castAs(); + if (ProgramStateRef NoOverflowHappened = + LastValidState->assume(NoOverflowCheck, true)) { + LastValidState = NoOverflowHappened; + LLVM_DEBUG(llvm::dbgs() << __func__ << ": assuming that '" << LHS + << " <= " << GreatestPossibleSymbolValue + << "' holds (aka. 'sym * C' don't overflow)\n"; + llvm::dbgs() << "LastValidState: "; + LastValidState->printJson(llvm::dbgs()); + llvm::dbgs() << '\n';); + } else { + LLVM_DEBUG(llvm::dbgs() << __func__ << ": '" << LHS + << " <= " << GreatestPossibleSymbolValue + << "' doesn't hold, could not simplify " + ", simplification stopped\n";); + SimplificationFailed = true; + return; + } + + // Check if 'sym * C' underflows. + // Type of SymMin can represent C, this conversion is fine. + ConcreteInt SmallestPossibleSymbolValue = + SVB.makeIntVal(SymMin / APSIntType(SymMin).convert(C)); + const NonLoc UnderflowCheck = + SVB.evalBinOpNN(LastValidState, BO_GE, SymbolVal(LHS), + SmallestPossibleSymbolValue, SVB.getConditionType()) + .castAs(); + if (ProgramStateRef NoUnderflowHappened = + LastValidState->assume(UnderflowCheck, true)) { + LastValidState = NoUnderflowHappened; + LLVM_DEBUG(llvm::dbgs() << __func__ << ": assuming that '" << LHS + << " >= " << SmallestPossibleSymbolValue + << "' holds (aka. 'sym * C' don't underflow)\n";); + } else { + LLVM_DEBUG(llvm::dbgs() << __func__ << ": '" << LHS + << " >= " << SmallestPossibleSymbolValue + << "' doesn't hold, could not simplify " + ", simplification stopped\n";); + SimplificationFailed = true; + return; + } + + const auto Floor = [](const APSInt &X, const APSInt &Y) -> APSInt { + return X / Y; + }; + + const auto Ceil = [](const APSInt &X, const APSInt &Y) -> APSInt { + APSInt Quotient, Remainder; // Default constructs signed integers. + llvm::APInt::sdivrem(X, Y, Quotient, Remainder); + if (Remainder.isNullValue()) + return Quotient; + APSInt One = (APSInt(Quotient.getBitWidth(), false) = 1); + return Quotient + One; + }; + + // Fold the constants carefully. + APSIntType LowerTy = commonSignedTypeToFit(LowerBound, C); + APSIntType LowerTy2x = APSIntType(LowerTy.getBitWidth() * 2, false); + LowerBound = Floor(LowerTy2x.convert(LowerBound), LowerTy2x.convert(C)); + + APSIntType UpperTy = commonSignedTypeToFit(UpperBound, C); + APSIntType UpperTy2x = APSIntType(UpperTy.getBitWidth() * 2, false); + UpperBound = Ceil(UpperTy2x.convert(UpperBound), UpperTy2x.convert(C)); + + RootSymbol = LHS; + LLVM_DEBUG(llvm::dbgs() << __func__ << ": '"; + ConcreteInt(LowerBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << " <= " << RootSymbol << " < "; + ConcreteInt(UpperBound).dumpToStream(llvm::dbgs()); + llvm::dbgs() << "'\n";); + Visit(RootSymbol); // Continue the simplification. + } +}; // class BestEffortSymExprSimplifier + +using SimplificationResult = BestEffortSymExprSimplifier::SimplificationResult; + +SimplificationResult simplify(ProgramStateRef State, SValBuilder &SVB, + const APSInt &InclusiveLowerBound, + NonLoc RootExpr, + const APSInt &ExclusiveUpperBound) { + if (const auto SymbolicRoot = RootExpr.getAs()) { + return BestEffortSymExprSimplifier(State, SVB, InclusiveLowerBound, + SymbolicRoot->getSymbol(), + ExclusiveUpperBound) + .Simplify(); + } + assert(RootExpr.getAs() && + "Root must be either SymbolVal or a ConcreteInt."); + return {State, InclusiveLowerBound, ExclusiveUpperBound, RootExpr, false}; } } // namespace -ProgramStateRef -ArrayBoundCheckerV2::checkLowerBound(CheckerContext &Ctx, SValBuilder &SVB, - const ProgramStateRef State, - RegionRawOffsetV2 RawOffset) const { - // If we don't know that the pointer points to the beginning of the region, - // skip lower-bound check. - if (isa(RawOffset.getRegion()->getMemorySpace())) +ProgramStateRef ArrayBoundCheckerV2::checkLowerBound( + CheckerContext &Ctx, SValBuilder &SVB, ProgramStateRef State, + const APSInt &InclusiveLowerBound, NonLoc RootExpr) const { + if (const auto Index = RootExpr.getAs()) { + APSIntType STy = + commonSignedTypeToFit(Index->getValue(), InclusiveLowerBound); + + // If we index below the first valid index, report. + if (STy.convert(Index->getValue()) < STy.convert(InclusiveLowerBound)) { + reportOOB(Ctx, State, OOB_Precedes); + return nullptr; + } return State; + } - ConcreteInt Zero = SVB.makeZeroArrayIndex().castAs(); - SVal RootNonLoc; - SVal ConstantFoldedRHS; - std::tie(RootNonLoc, ConstantFoldedRHS) = - simplify(SVB, RawOffset.getByteOffset(), Zero); + const bool IsRootTyUnsigned = RootExpr.castAs() + .getSymbol() + ->getType() + ->isUnsignedIntegerOrEnumerationType(); + // Unsigned expression is always greater then any negative value. + // We need this check before the evalBinOp call, since that would potentially + // promote the negative value to a huge unsigned value before the actual + // comparison would happen. + if (IsRootTyUnsigned && InclusiveLowerBound.isNegative()) + return State; - NonLoc LowerBoundCheck = - SVB.evalBinOpNN(State, BO_LT, RootNonLoc.castAs(), - ConstantFoldedRHS.castAs(), - SVB.getConditionType()) - .castAs(); + NonLoc LowerBoundCheck = SVB.evalBinOpNN(State, BO_LT, RootExpr, + SVB.makeIntVal(InclusiveLowerBound), + SVB.getConditionType()) + .castAs(); ProgramStateRef PrecedesLowerBound, WithinLowerBound; std::tie(PrecedesLowerBound, WithinLowerBound) = @@ -239,30 +560,45 @@ // Otherwise, assume the constraint of the lower bound. assert(WithinLowerBound); + LLVM_DEBUG(llvm::dbgs() << __func__ << ": State at the end: "; + WithinLowerBound->printJson(llvm::dbgs()); llvm::dbgs() << '\n';); return WithinLowerBound; } // Returns null state if reported bug, non-null otherwise. ProgramStateRef ArrayBoundCheckerV2::checkUpperBound(CheckerContext &Ctx, SValBuilder &SVB, - const ProgramStateRef State, - RegionRawOffsetV2 RawOffset) const { - DefinedOrUnknownSVal Extent = - getDynamicSize(State, RawOffset.getRegion(), SVB); - - if (!Extent.getAs()) + ProgramStateRef State, NonLoc RootExpr, + const APSInt &ExclusiveUpperBound) const { + if (const auto Index = RootExpr.getAs()) { + APSIntType STy = + commonSignedTypeToFit(Index->getValue(), ExclusiveUpperBound); + + // If we index above the last valid index, report. + if (STy.convert(Index->getValue()) >= STy.convert(ExclusiveUpperBound)) { + reportOOB(Ctx, State, OOB_Excedes); + return nullptr; + } return State; + } - NonLoc RawByteOffset = RawOffset.getByteOffset(); - if (const auto ExtentInt = Extent.getAs()) { - std::tie(RawByteOffset, Extent) = - simplify(SVB, RawOffset.getByteOffset(), *ExtentInt); + const bool IsRootTyUnsigned = RootExpr.castAs() + .getSymbol() + ->getType() + ->isUnsignedIntegerType(); + // Unsigned expression is always greater then any non-positive value. + // We need this check before the evalBinOp call, since that would potentially + // promote the negative value to a huge unsigned value before the actual + // comparison would happen. + if (IsRootTyUnsigned && ExclusiveUpperBound.isNonPositive()) { + reportOOB(Ctx, State, OOB_Excedes); + return nullptr; } - NonLoc UpperBoundCheck = - SVB.evalBinOpNN(State, BO_GE, RawByteOffset, Extent.castAs(), - SVB.getConditionType()) - .castAs(); + NonLoc UpperBoundCheck = SVB.evalBinOpNN(State, BO_GE, RootExpr, + SVB.makeIntVal(ExclusiveUpperBound), + SVB.getConditionType()) + .castAs(); ProgramStateRef ExceedsUpperBound, WithinUpperBound; std::tie(ExceedsUpperBound, WithinUpperBound) = @@ -270,10 +606,9 @@ // If we are under constrained and the index variables are tainted, report. if (ExceedsUpperBound && WithinUpperBound) { - SVal ByteOffset = RawOffset.getByteOffset(); - if (isTainted(State, ByteOffset)) { + if (isTainted(State, RootExpr)) { reportOOB(Ctx, ExceedsUpperBound, OOB_Tainted, - std::make_unique(ByteOffset)); + std::make_unique(RootExpr)); return nullptr; } } else if (ExceedsUpperBound) { @@ -285,6 +620,8 @@ } assert(WithinUpperBound); + LLVM_DEBUG(llvm::dbgs() << __func__ << ": State at the end: "; + WithinUpperBound->printJson(llvm::dbgs()); llvm::dbgs() << '\n';); return WithinUpperBound; } @@ -299,29 +636,51 @@ // memory access is within the extent of the base region. Since we // have some flexibility in defining the base region, we can achieve // various levels of conservatism in our buffer overflow checking. - // TODO: Check if these comments are still valid: - // - Why would anyone use this 'experimental logic' if it does not handle - // overflows? - // - "we can achieve various levels of conservatism" What? + // TODO: Update the comment above. + LLVM_DEBUG(llvm::dbgs() << __func__ << ": Called with Location: " << Location + << '\n';); ProgramStateRef State = Ctx.getState(); SValBuilder &SVB = Ctx.getSValBuilder(); const RegionRawOffsetV2 RawOffset = RegionRawOffsetV2::computeOffset( State, SVB, Location.castAs()); assert(RawOffset.getRegion() && "It should be a valid region."); - // Is byteOffset < extent begin? - // If so, we are doing a load/store before the first valid offset in the - // memory region. - State = checkLowerBound(Ctx, SVB, State, RawOffset); - if (!State) + // If we don't know that the pointer points to the beginning of the region, + // skip check. + if (isa(RawOffset.getRegion()->getMemorySpace())) return; - // Is byteOffset >= extentOf(BaseRegion)? - // If so, we are doing a load/store after the last valid offset. - State = checkUpperBound(Ctx, SVB, State, RawOffset); + const ConcreteInt Zero = SVB.makeZeroArrayIndex().castAs(); + + const llvm::APSInt *ExtentValue = SVB.getKnownValue( + State, getDynamicSize(State, RawOffset.getRegion(), SVB)); + + // If we could not get the concrete extent, check only the lower bound. + // Use a dummy upper bound just to be able to do simplification. + const bool CheckUpperBoundToo = ExtentValue != nullptr; + const APSInt &ExclusiveUpperBound = + ExtentValue ? *ExtentValue : Zero.getValue(); + + SimplificationResult Simplified = + simplify(State, SVB, Zero.getValue(), RawOffset.getByteOffset(), + ExclusiveUpperBound); + if (Simplified.SimplificationFailed) + return; + + // Save the collected assumptions about overflow/underflow. + State = Simplified.State; + State = checkLowerBound(Ctx, SVB, State, Simplified.InclusiveLowerBound, + Simplified.RootSymbol); if (!State) return; + if (CheckUpperBoundToo) { + State = checkUpperBound(Ctx, SVB, State, Simplified.RootSymbol, + Simplified.ExclusiveUpperBound); + if (!State) + return; + } + // Continue the analysis while we know that this access must be valid. Ctx.addTransition(State); } @@ -335,12 +694,12 @@ return; if (!BT) - BT.reset(new BuiltinBug(this, "Out-of-bound access")); + BT = std::make_unique(this, "Out-of-bound access"); // FIXME: This diagnostics are preliminary. We should get far better // diagnostics for explaining buffer overruns. - SmallString<256> buf; + SmallString<128> buf; llvm::raw_svector_ostream os(buf); os << "Out of bound memory access "; switch (kind) { diff --git a/clang/test/Analysis/out-of-bounds-false-positive.c b/clang/test/Analysis/out-of-bounds-false-positive.c --- a/clang/test/Analysis/out-of-bounds-false-positive.c +++ b/clang/test/Analysis/out-of-bounds-false-positive.c @@ -8,12 +8,11 @@ const char a[] = "abcd"; // extent: 5 bytes void symbolic_size_t_and_int0(size_t len) { - // FIXME: Should not warn for this. - (void)a[len + 1]; // expected-warning {{Out of bound memory access}} + (void)a[len + 1]; // no-warning // We infered that the 'len' must be in a specific range to make the previous indexing valid. // len: [0,3] - clang_analyzer_eval(len <= 3); // expected - warning {{TRUE}} - clang_analyzer_eval(len <= 2); // expected - warning {{UNKNOWN}} + clang_analyzer_eval(len <= 3); // expected-warning {{TRUE}} + clang_analyzer_eval(len <= 2); // expected-warning {{UNKNOWN}} } void symbolic_size_t_and_int1(size_t len) { diff --git a/clang/test/Analysis/out-of-bounds-new.cpp b/clang/test/Analysis/out-of-bounds-new.cpp --- a/clang/test/Analysis/out-of-bounds-new.cpp +++ b/clang/test/Analysis/out-of-bounds-new.cpp @@ -1,4 +1,8 @@ -// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -analyzer-checker=unix,core,alpha.security.ArrayBoundV2 -verify %s +// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds \ +// RUN: -analyzer-checker=unix,core,alpha.security.ArrayBoundV2,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false -verify %s + +void clang_analyzer_eval(int); // Tests doing an out-of-bounds access after the end of an array using: // - constant integer index @@ -120,14 +124,26 @@ // Tests normal access of // a multi-dimensional array -void test2_multi_ok(int x) { +void test2_multi_ok(int x, int y, int z) { auto buf = new int[100][100]; - buf[0][0] = 1; // no-warning + buf[0][0] = 1; // no-warning + + // The single symbol gets the constraint. + buf[x][99] = 1; // no-warning + clang_analyzer_eval(0 <= x && x <= 99); // expected-warning{{TRUE}} + + // The lexically present complex symbol gets the constraint: + buf[x+y][99] = 1; // no-warning + clang_analyzer_eval(0 <= x+y && x+y <= 99); // expected-warning{{TRUE}} + + // The lexically equivalent pair of the expression is still unknown. + // We are not smart enough to deal with such cases... + clang_analyzer_eval(0 <= y+x); // expected-warning{{UNKNOWN}} } // Tests over-indexing using different types // array -void test_diff_types(int x) { +void test_diff_types() { int *buf = new int[10]; //10*sizeof(int) Bytes allocated char *cptr = (char *)buf; cptr[sizeof(int) * 9] = 1; // no-warning @@ -136,7 +152,7 @@ // Tests over-indexing //if the allocated area is non-array -void test_non_array(int x) { +void test_non_array() { int *ip = new int; ip[0] = 1; // no-warning ip[1] = 2; // expected-warning{{Out of bound memory access}} @@ -146,11 +162,38 @@ //if the allocated area size is a runtime parameter void test_dynamic_size(int s) { int *buf = new int[s]; - buf[0] = 1; // no-warning + buf[0] = 1; // no-warning + buf[s - 1] = 1; // no-warning + buf[s] = 1; // no-warning +} + +void test_dynamic_size2(unsigned s) { + int *buf = new int[s]; + buf[0] = 1; // no-warning + buf[s - 1] = 1; // no-warning + buf[s] = 1; // no-warning } + //Tests complex arithmetic -//in new expression -void test_dynamic_size2(unsigned m,unsigned n){ - unsigned *U = nullptr; - U = new unsigned[m + n + 1]; +//in new expression, signed +void test_dynamic_size3(int m, int n, int z){ + // The extent is symbolic, thus we check the lower bound... + unsigned *U = new unsigned[m + 1]; + U[99999] = 1; // no-warning + U[n] = 1; // no-warning + // We don't constrain the acces with the symbolic access. + // FIXME: With some smart, we could cover this case as well. + clang_analyzer_eval(n <= m + 1); // expected-warning{{UNKNOWN}} + + // Constrain z to be negative. + if (z >= 0) + return; + + // We know that we access an element at a negative index. + U[z] = 1; // expected-warning {{Out of bound memory access (accessed memory precedes memory block)}} } + +// TODO: Add tests covering VLAs, mutlidim VLAs, partially known VLAs. +// TODO: Add tests showing that the given symbolic expression gets the proper constraints. +// TODO: Add test using taint. + diff --git a/clang/test/Analysis/out-of-bounds-v2-false-positive-hunter.cpp b/clang/test/Analysis/out-of-bounds-v2-false-positive-hunter.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Analysis/out-of-bounds-v2-false-positive-hunter.cpp @@ -0,0 +1,329 @@ +// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -std=c++17 \ +// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false -verify %s + +void clang_analyzer_eval(int); +void clang_analyzer_printState(); +template void clang_analyzer_dump(T); + + +template struct is_same { + static constexpr bool value = false; +}; +template struct is_same { + static constexpr bool value = true; +}; + +template constexpr bool is_unsigned() { + return T(0) < T(-1); +}; + +template constexpr int array_size(const T (&)[N]) { + return N; +} + +/// Expression tree +struct sym {}; +template struct cons {}; +template struct plus {}; +template struct minus {}; +template struct mul {}; +template struct div {}; + +/// Count syms. +template struct count_syms; +template <> struct count_syms { + static constexpr unsigned value = 1; +}; +template struct count_syms> { + static constexpr unsigned value = 0; +}; +template struct count_syms> { + static constexpr unsigned value = count_syms::value + count_syms::value; +}; +template struct count_syms> { + static constexpr unsigned value = count_syms::value + count_syms::value; +}; +template struct count_syms> { + static constexpr unsigned value = count_syms::value + count_syms::value; +}; +template struct count_syms> { + static constexpr unsigned value = count_syms::value + count_syms::value; +}; + +/// Expression tree evaluator +template struct eval; +template <> struct eval { + template static constexpr auto f(T x) { return x; } +}; +template struct eval> { + template static constexpr auto f(T x) { return Value; } +}; +template struct eval> { + static_assert(count_syms::value + count_syms::value <= 1, "Single symbolic expression allowed currently."); + template static constexpr auto f(T x) { return eval::f(x) + eval::f(x); } +}; +template struct eval> { + static_assert(count_syms::value + count_syms::value <= 1, "Single symbolic expression allowed currently."); + template static constexpr auto f(T x) { return eval::f(x) - eval::f(x); } +}; +template struct eval> { + static_assert(count_syms::value + count_syms::value <= 1, "Single symbolic expression allowed currently."); + template static constexpr auto f(T x) { return eval::f(x) * eval::f(x); } +}; +template struct eval> { + static_assert(count_syms::value + count_syms::value <= 1, "Single symbolic expression allowed currently."); + template static constexpr auto f(T x) { return eval::f(x) / eval::f(x); } +}; + +void test_expression_tree_evaluator(int x) { + // The analyzer reorders the expression to have the symbol on the left. + clang_analyzer_dump(eval>, cons<3>>>::f(x)); + clang_analyzer_dump(eval, sym>, cons<3>>>::f(x)); + clang_analyzer_dump(eval, plus, sym>>>::f(x)); + // expected-warning-re@-3 {{((reg_${{[0-9]+}}) + 1) * 3}} + // expected-warning-re@-3 {{((reg_${{[0-9]+}}) + 1) * 3}} + // expected-warning-re@-3 {{((reg_${{[0-9]+}}) + 1) * 3}} + + // The analyzer constant folds the expression. + clang_analyzer_dump(eval, plus, sym>>>::f(x)); + // expected-warning-re@-1 {{(reg_${{[0-9]+}}) + 4}} + clang_analyzer_dump(eval, cons<2>>, sym>>::f(x)); + // expected-warning-re@-1 {{(reg_${{[0-9]+}}) + 6}} + + // Can not constant fold everything. + clang_analyzer_dump(eval, plus, sym>>>::f(x)); + // expected-warning-re@-1 {{((reg_${{[0-9]+}}) + 2) * 3}} +} + +template +struct range { + static_assert(is_same::value); + static_assert(Lowerbound <= UpperBound); + static constexpr auto lowerbound = Lowerbound; + static constexpr auto upperbound = UpperBound; +}; + +template +void valid(SymbolType x) { + static_assert(ExpectedRange::upperbound - ExpectedRange::lowerbound > 1, "Ranges require at least 3 elements for automatic testing."); + + // Using constexpr to prevent this from compilation if underflow/overflow UB happens. + constexpr auto after_lowerbound = ExpectedRange::lowerbound + 1; + constexpr auto before_upperbound = ExpectedRange::upperbound - 1; + + // Just to make sure that the test is relevant, aka. no overflow/underflow happened during the calculation. + static_assert(ExpectedRange::lowerbound < after_lowerbound, "The expected lowerbound is too large for this test."); + static_assert(before_upperbound < ExpectedRange::upperbound, "The expected upperbound is too small for this test."); + + // Test-code: + constexpr ArrayT buf = {}; + + // Sanity check if lowerbound and upperbound are valid elements of the array. + // We utilize the constexpr engine of clang to check the given access. + static_assert(1 + buf[eval::f(ExpectedRange::lowerbound)]); + static_assert(1 + buf[eval::f(ExpectedRange::upperbound)]); + if constexpr (is_unsigned()) { + static_assert(0 <= ExpectedRange::lowerbound); + } + + // Evaluate the expression tree with the symbolic variable. + const auto subscipt_expr = eval::f(x); + // clang_analyzer_dump(subscipt_expr); + + // Access the element at that index. + const auto tmp = buf[subscipt_expr]; // no-warning + //clang_analyzer_printState(); + + // Verify that we infered that the symbolic variable must be in the [lowerbound, upperbound] range to be valid. + clang_analyzer_eval(ExpectedRange::lowerbound <= x); // expected-warning {{TRUE}} + clang_analyzer_eval(x <= ExpectedRange::upperbound); // expected-warning {{TRUE}} + + // Verify that the ends of the range is sharp. + clang_analyzer_eval(after_lowerbound <= x); // expected-warning {{UNKNOWN}} + clang_analyzer_eval(x < before_upperbound); // expected-warning {{UNKNOWN}} +} + +// Valid array access test-cases: +// Each row is an explicit template function instantiation of the given test-case. + +//------- buf[x+1] ------ +// 0 <= x + 1 < 5 [ sub 1 ] +// -1 <= x < 4 +// x in [-1, 3] +using x_plus_1 = plus>; +template void valid, int[5]>(char x); +template void valid, int[5]>(signed char x); +template void valid, int[5]>(short x); +template void valid, int[5]>(int x); +template void valid, int[5]>(long x); +template void valid, int[5]>(long long x); +template void valid, int[5]>(unsigned char x); +template void valid, int[5]>(unsigned short x); +template void valid, int[5]>(unsigned int x); +template void valid, int[5]>(unsigned long x); +template void valid, int[5]>(unsigned long long x); + +//------- buf[x-1] ------ +// 0 <= x - 1 < 5 [ add 1 ] +// 1 <= x < 6 +// x in [1, 5] +using x_minus_1 = minus>; +template void valid, int[5]>(char x); +template void valid, int[5]>(signed char x); +template void valid, int[5]>(short x); +template void valid, int[5]>(int x); +template void valid, int[5]>(long x); +template void valid, int[5]>(long long x); +template void valid, int[5]>(unsigned char x); +template void valid, int[5]>(unsigned short x); +template void valid, int[5]>(unsigned int x); +template void valid, int[5]>(unsigned long x); +template void valid, int[5]>(unsigned long long x); + +//------- buf[x+1ull] ------ +// 0 <= x + 1ull < 5 [ sub 1 ] +// -1 <= x < 4 +// x in [-1, 3] +using x_plus_1ull = plus>; +template void valid, int[5]>(char x); +template void valid, int[5]>(signed char x); +template void valid, int[5]>(short x); +template void valid, int[5]>(int x); +template void valid, int[5]>(long x); +template void valid, int[5]>(long long x); +template void valid, int[5]>(unsigned char x); +template void valid, int[5]>(unsigned short x); +template void valid, int[5]>(unsigned int x); +template void valid, int[5]>(unsigned long x); +template void valid, int[5]>(unsigned long long x); + +//------- buf[x-1ull] ------ +// 0 <= x - 1ull < 5 [ add 1 ] +// 1 <= x < 6 +// x in [1, 5] +using x_minus_1ull = minus>; +template void valid, int[5]>(char x); +template void valid, int[5]>(signed char x); +template void valid, int[5]>(short x); +template void valid, int[5]>(int x); +template void valid, int[5]>(long x); +template void valid, int[5]>(long long x); +template void valid, int[5]>(unsigned char x); +template void valid, int[5]>(unsigned short x); +template void valid, int[5]>(unsigned int x); +template void valid, int[5]>(unsigned long x); +template void valid, int[5]>(unsigned long long x); + +//------- buf[2x+1] ------ +// 0 <= 2x + 1 < 9 [ sub 1 ] +// -1 <= 2x < 8 [ div 2 ] +// -0.5 <= x < 4 +// x in [0, 3] +using x2_plus_1 = plus>, cons<1>>; +template void valid, int[9]>(char x); +template void valid, int[9]>(signed char x); +template void valid, int[9]>(short x); +template void valid, int[9]>(int x); +template void valid, int[9]>(long x); +template void valid, int[9]>(long long x); +template void valid, int[9]>(unsigned char x); +template void valid, int[9]>(unsigned short x); +template void valid, int[9]>(unsigned int x); +template void valid, int[9]>(unsigned long x); +template void valid, int[9]>(unsigned long long x); + +//------- buf[2x-4ull] ------ +// 0 <= 2x - 4ull < 9 [ add 4 ] +// 4 <= 2x < 13 [ div 2 ] +// 2 <= x < 6.5 +// x in [2, 6] +using x2_minus_1ull = minus>, cons<4ull>>; +template void valid, int[9]>(char x); +template void valid, int[9]>(signed char x); +template void valid, int[9]>(short x); +template void valid, int[9]>(int x); +template void valid, int[9]>(long x); +template void valid, int[9]>(long long x); +template void valid, int[9]>(unsigned char x); +template void valid, int[9]>(unsigned short x); +template void valid, int[9]>(unsigned int x); +template void valid, int[9]>(unsigned long x); +template void valid, int[9]>(unsigned long long x); + +//------- buf[2-4ull] ------ +// 0 <= 2ulx - 4ull < 9 [ add 4 ] +// 4 <= 2ulx < 13 [ div 2ul ] +// 2 <= x < 6.5 +// x in [2, 6] +using x2ul_minus_1ull = minus>, cons<4ull>>; +template void valid, int[9]>(char x); +template void valid, int[9]>(signed char x); +template void valid, int[9]>(short x); +template void valid, int[9]>(int x); +template void valid, int[9]>(long x); +template void valid, int[9]>(long long x); +template void valid, int[9]>(unsigned char x); +template void valid, int[9]>(unsigned short x); +template void valid, int[9]>(unsigned int x); +template void valid, int[9]>(unsigned long x); +template void valid, int[9]>(unsigned long long x); + +//------- buf[x+-3] ------ +// 0 <= x + -3 < 9 [ add 3 ] +// 3 <= x < 12 +// x in [3, 11] +using x_plus_minus_3 = plus>; +template void valid, int[9]>(char x); +template void valid, int[9]>(signed char x); +template void valid, int[9]>(short x); +template void valid, int[9]>(int x); +template void valid, int[9]>(long x); +template void valid, int[9]>(long long x); +template void valid, int[9]>(unsigned char x); +template void valid, int[9]>(unsigned short x); +// The uint, ulong, ulonglong, needs spacial attention: +// Integer promotion kicks in and the unary negated value wraps around. +// +// 0 <= x + -3 < 9 [ equivalent, due to integer promotion ] +// 0 <= x + 4294967293 < 9 [ subtract 4294967293 aka. unsigned(-3) ] +// While rearranging, we will assume that the expression 'x + 4294967293' did not overflow, +// thus x must be in [0, 2] at this point. +// +// -4294967293 (33 bits) <= x < -4294967284 (33 bits) +// There is no possible x value to be in the given range, without considering wrapping, +// thus the simplification fails. +// The same happens for ulong and ulonglong as well. + + +//------- buf[x- -6] ------ +// 0 <= x - -6 < 9 +// 0 <= x - -6 < 9 +// Which is the same as: +// 0 <= x + 6 < 9 [ sub 6 ] +// -6 <= x < 3 +// x in [-6, 2] +using x_minus_minus_6 = minus>; +template void valid, int[9]>(char x); +template void valid, int[9]>(signed char x); +template void valid, int[9]>(short x); +template void valid, int[9]>(int x); +template void valid, int[9]>(long x); +template void valid, int[9]>(long long x); +template void valid, int[9]>(unsigned char x); +template void valid, int[9]>(unsigned short x); +// The uint, ulong, ulonglong, needs spacial attention: +// Integer promotion kicks in and the unary negated value wraps around. +// +// 0 <= x - -6 < 9 [ equivalent, due to integer promotion ] +// 0 <= x - 4294967290 < 9 [ add 4294967290 aka. unsigned(-6) ] +// While rearranging, we will assume that the expression 'x - 4294967290' did not underflow, +// thus x must be in [4294967290, 4294967295] at this point. +// +// 4294967290 (33 bits) <= x < 4294967299 (33 bits) +// x in [0xfffffffa, 0xffffffff] +// The same happens for ulong and ulonglong as well, but with their bit width. +template void valid, int[9]>(unsigned int x); +template void valid, int[9]>(unsigned long x); +template void valid, int[9]>(unsigned long long x); +