Index: cfe/trunk/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp =================================================================== --- cfe/trunk/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp +++ cfe/trunk/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp @@ -13,55 +13,329 @@ //===----------------------------------------------------------------------===// #include "ClangSACheckers.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/ExprCXX.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/SmallVector.h" using namespace clang; using namespace ento; namespace { +enum class AllocKind { + SingleObject, + Array, + Unknown, + Reinterpreted // Single object interpreted as an array. +}; +} // end namespace + +namespace llvm { +template <> struct FoldingSetTrait { + static inline void Profile(AllocKind X, FoldingSetNodeID &ID) { + ID.AddInteger(static_cast(X)); + } +}; +} // end namespace llvm + +namespace { class PointerArithChecker - : public Checker< check::PreStmt > { - mutable std::unique_ptr BT; + : public Checker< + check::PreStmt, check::PreStmt, + check::PreStmt, check::PreStmt, + check::PostStmt, check::PostStmt, + check::PostStmt, check::DeadSymbols> { + AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const; + const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic, + AllocKind &AKind, CheckerContext &C) const; + const MemRegion *getPointedRegion(const MemRegion *Region, + CheckerContext &C) const; + void reportPointerArithMisuse(const Expr *E, CheckerContext &C, + bool PointedNeeded = false) const; + void initAllocIdentifiers(ASTContext &C) const; + + mutable std::unique_ptr BT_pointerArith; + mutable std::unique_ptr BT_polyArray; + mutable llvm::SmallSet AllocFunctions; public: - void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; + void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const; + void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const; + void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const; + void checkPreStmt(const CastExpr *CE, CheckerContext &C) const; + void checkPostStmt(const CastExpr *CE, CheckerContext &C) const; + void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const; + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; }; +} // end namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind) + +void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + // TODO: intentional leak. Some information is garbage collected too early, + // see http://reviews.llvm.org/D14203 for further information. + /*ProgramStateRef State = C.getState(); + RegionStateTy RegionStates = State->get(); + for (RegionStateTy::iterator I = RegionStates.begin(), E = RegionStates.end(); + I != E; ++I) { + if (!SR.isLiveRegion(I->first)) + State = State->remove(I->first); + } + C.addTransition(State);*/ } -void PointerArithChecker::checkPreStmt(const BinaryOperator *B, - CheckerContext &C) const { - if (B->getOpcode() != BO_Sub && B->getOpcode() != BO_Add) - return; +AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE, + const FunctionDecl *FD) const { + // This checker try not to assume anything about placement and overloaded + // new to avoid false positives. + if (isa(FD)) + return AllocKind::Unknown; + if (FD->getNumParams() != 1 || FD->isVariadic()) + return AllocKind::Unknown; + if (NE->isArray()) + return AllocKind::Array; - ProgramStateRef state = C.getState(); - const LocationContext *LCtx = C.getLocationContext(); - SVal LV = state->getSVal(B->getLHS(), LCtx); - SVal RV = state->getSVal(B->getRHS(), LCtx); + return AllocKind::SingleObject; +} - const MemRegion *LR = LV.getAsRegion(); +const MemRegion * +PointerArithChecker::getPointedRegion(const MemRegion *Region, + CheckerContext &C) const { + assert(Region); + ProgramStateRef State = C.getState(); + SVal S = State->getSVal(Region); + return S.getAsRegion(); +} + +/// Checks whether a region is the part of an array. +/// In case there is a dericed to base cast above the array element, the +/// Polymorphic output value is set to true. AKind output value is set to the +/// allocation kind of the inspected region. +const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region, + bool &Polymorphic, + AllocKind &AKind, + CheckerContext &C) const { + assert(Region); + while (Region->getKind() == MemRegion::Kind::CXXBaseObjectRegionKind) { + Region = Region->getAs()->getSuperRegion(); + Polymorphic = true; + } + if (Region->getKind() == MemRegion::Kind::ElementRegionKind) { + Region = Region->getAs()->getSuperRegion(); + } + + ProgramStateRef State = C.getState(); + if (const AllocKind *Kind = State->get(Region)) { + AKind = *Kind; + if (*Kind == AllocKind::Array) + return Region; + else + return nullptr; + } + // When the region is symbolic and we do not have any information about it, + // assume that this is an array to avoid false positives. + if (Region->getKind() == MemRegion::Kind::SymbolicRegionKind) + return Region; + + // No AllocKind stored and not symbolic, assume that it points to a single + // object. + return nullptr; +} - if (!LR || !RV.isConstant()) +void PointerArithChecker::reportPointerArithMisuse(const Expr *E, + CheckerContext &C, + bool PointedNeeded) const { + SourceRange SR = E->getSourceRange(); + if (SR.isInvalid()) return; - // If pointer arithmetic is done on variables of non-array type, this often - // means behavior rely on memory organization, which is dangerous. - if (isa(LR) || isa(LR) || - isa(LR)) { + ProgramStateRef State = C.getState(); + const MemRegion *Region = + State->getSVal(E, C.getLocationContext()).getAsRegion(); + if (!Region) + return; + if (PointedNeeded) + Region = getPointedRegion(Region, C); + if (!Region) + return; + bool IsPolymorphic = false; + AllocKind Kind = AllocKind::Unknown; + if (const MemRegion *ArrayRegion = + getArrayRegion(Region, IsPolymorphic, Kind, C)) { + if (!IsPolymorphic) + return; if (ExplodedNode *N = C.generateNonFatalErrorNode()) { - if (!BT) - BT.reset( - new BuiltinBug(this, "Dangerous pointer arithmetic", - "Pointer arithmetic done on non-array variables " - "means reliance on memory layout, which is " - "dangerous.")); - auto R = llvm::make_unique(*BT, BT->getDescription(), N); - R->addRange(B->getSourceRange()); + if (!BT_polyArray) + BT_polyArray.reset(new BuiltinBug( + this, "Dangerous pointer arithmetic", + "Pointer arithmetic on a pointer to base class is dangerous " + "because derived and base class may have different size.")); + auto R = llvm::make_unique(*BT_polyArray, + BT_polyArray->getDescription(), N); + R->addRange(E->getSourceRange()); + R->markInteresting(ArrayRegion); C.emitReport(std::move(R)); } + return; + } + + if (Kind == AllocKind::Reinterpreted) + return; + + // We might not have enough information about symbolic regions. + if (Kind != AllocKind::SingleObject && + Region->getKind() == MemRegion::Kind::SymbolicRegionKind) + return; + + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + if (!BT_pointerArith) + BT_pointerArith.reset(new BuiltinBug(this, "Dangerous pointer arithmetic", + "Pointer arithmetic on non-array " + "variables relies on memory layout, " + "which is dangerous.")); + auto R = llvm::make_unique(*BT_pointerArith, + BT_pointerArith->getDescription(), N); + R->addRange(SR); + R->markInteresting(Region); + C.emitReport(std::move(R)); + } +} + +void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const { + if (!AllocFunctions.empty()) + return; + AllocFunctions.insert(&C.Idents.get("alloca")); + AllocFunctions.insert(&C.Idents.get("malloc")); + AllocFunctions.insert(&C.Idents.get("realloc")); + AllocFunctions.insert(&C.Idents.get("calloc")); + AllocFunctions.insert(&C.Idents.get("valloc")); +} + +void PointerArithChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return; + IdentifierInfo *FunI = FD->getIdentifier(); + initAllocIdentifiers(C.getASTContext()); + if (AllocFunctions.count(FunI) == 0) + return; + + SVal SV = State->getSVal(CE, C.getLocationContext()); + const MemRegion *Region = SV.getAsRegion(); + if (!Region) + return; + // Assume that C allocation functions allocate arrays to avoid false + // positives. + // TODO: Add heuristics to distinguish alloc calls that allocates single + // objecs. + State = State->set(Region, AllocKind::Array); + C.addTransition(State); +} + +void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE, + CheckerContext &C) const { + const FunctionDecl *FD = NE->getOperatorNew(); + if (!FD) + return; + + AllocKind Kind = getKindOfNewOp(NE, FD); + + ProgramStateRef State = C.getState(); + SVal AllocedVal = State->getSVal(NE, C.getLocationContext()); + const MemRegion *Region = AllocedVal.getAsRegion(); + if (!Region) + return; + State = State->set(Region, Kind); + C.addTransition(State); +} + +void PointerArithChecker::checkPostStmt(const CastExpr *CE, + CheckerContext &C) const { + if (CE->getCastKind() != CastKind::CK_BitCast) + return; + + const Expr *CastedExpr = CE->getSubExpr(); + ProgramStateRef State = C.getState(); + SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext()); + + const MemRegion *Region = CastedVal.getAsRegion(); + if (!Region) + return; + + // Suppress reinterpret casted hits. + State = State->set(Region, AllocKind::Reinterpreted); + C.addTransition(State); +} + +void PointerArithChecker::checkPreStmt(const CastExpr *CE, + CheckerContext &C) const { + if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay) + return; + + const Expr *CastedExpr = CE->getSubExpr(); + ProgramStateRef State = C.getState(); + SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext()); + + const MemRegion *Region = CastedVal.getAsRegion(); + if (!Region) + return; + + if (const AllocKind *Kind = State->get(Region)) { + if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted) + return; + } + State = State->set(Region, AllocKind::Array); + C.addTransition(State); +} + +void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp, + CheckerContext &C) const { + if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType()) + return; + reportPointerArithMisuse(UOp->getSubExpr(), C, true); +} + +void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + SVal Idx = State->getSVal(SubsExpr->getIdx(), C.getLocationContext()); + + // Indexing with 0 is OK. + if (Idx.isZeroConstant()) + return; + reportPointerArithMisuse(SubsExpr->getBase(), C); +} + +void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp, + CheckerContext &C) const { + BinaryOperatorKind OpKind = BOp->getOpcode(); + if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign) + return; + + const Expr *Lhs = BOp->getLHS(); + const Expr *Rhs = BOp->getRHS(); + ProgramStateRef State = C.getState(); + + if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) { + SVal RHSVal = State->getSVal(Rhs, C.getLocationContext()); + if (State->isNull(RHSVal).isConstrainedTrue()) + return; + reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp()); + } + // The int += ptr; case is not valid C++. + if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) { + SVal LHSVal = State->getSVal(Lhs, C.getLocationContext()); + if (State->isNull(LHSVal).isConstrainedTrue()) + return; + reportPointerArithMisuse(Rhs, C); } } Index: cfe/trunk/test/Analysis/PR24184.cpp =================================================================== --- cfe/trunk/test/Analysis/PR24184.cpp +++ cfe/trunk/test/Analysis/PR24184.cpp @@ -12,7 +12,7 @@ typedef int *vcreate_t(int *, DATA_TYPE, int, int); void fn1(unsigned, unsigned) { char b = 0; - for (; 1; a++, &b + a * 0) // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}} + for (; 1; a++, &b + a * 0) ; } @@ -55,7 +55,7 @@ void fn2_1(uint32_t *p1, unsigned char *p2, uint32_t p3) { unsigned i = 0; for (0; i < p3; i++) - fn1_1(p1 + i, p2 + i * 0); // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}} + fn1_1(p1 + i, p2 + i * 0); } struct A_1 { Index: cfe/trunk/test/Analysis/fields.c =================================================================== --- cfe/trunk/test/Analysis/fields.c +++ cfe/trunk/test/Analysis/fields.c @@ -16,7 +16,7 @@ void f() { struct s a; - int *p = &(a.n) + 1; + int *p = &(a.n) + 1; // expected-warning{{Pointer arithmetic on}} } typedef struct { Index: cfe/trunk/test/Analysis/ptr-arith.c =================================================================== --- cfe/trunk/test/Analysis/ptr-arith.c +++ cfe/trunk/test/Analysis/ptr-arith.c @@ -52,7 +52,7 @@ void f5() { int x, y; int *p; - p = &x + 1; // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}} + p = &x + 1; // expected-warning{{Pointer arithmetic on non-array variables relies on memory layout, which is dangerous}} int a[10]; p = a + 1; // no-warning @@ -75,7 +75,7 @@ clang_analyzer_eval(&a != 0); // expected-warning{{TRUE}} clang_analyzer_eval(&a >= 0); // expected-warning{{TRUE}} clang_analyzer_eval(&a > 0); // expected-warning{{TRUE}} - clang_analyzer_eval((&a - 0) != 0); // expected-warning{{TRUE}} expected-warning{{Pointer arithmetic done on non-array variables}} + clang_analyzer_eval((&a - 0) != 0); // expected-warning{{TRUE}} // LHS is NULL, RHS is non-symbolic // The same code is used for labels and non-symbolic values. Index: cfe/trunk/test/Analysis/ptr-arith.cpp =================================================================== --- cfe/trunk/test/Analysis/ptr-arith.cpp +++ cfe/trunk/test/Analysis/ptr-arith.cpp @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -analyze -analyzer-checker=core,debug.ExprInspection -verify %s -// expected-no-diagnostics +// RUN: %clang_cc1 -Wno-unused-value -std=c++14 -analyze -analyzer-checker=core,debug.ExprInspection,alpha.core.PointerArithm -verify %s struct X { int *p; int zero; @@ -20,3 +19,82 @@ return 5/littleX.zero; // no-warning } + +class Base {}; +class Derived : public Base {}; + +void checkPolymorphicUse() { + Derived d[10]; + + Base *p = d; + ++p; // expected-warning{{Pointer arithmetic on a pointer to base class is dangerous}} +} + +void checkBitCasts() { + long l; + char *p = (char*)&l; + p = p+2; +} + +void checkBasicarithmetic(int i) { + int t[10]; + int *p = t; + ++p; + int a = 5; + p = &a; + ++p; // expected-warning{{Pointer arithmetic on non-array variables relies on memory layout, which is dangerous}} + p = p + 2; // expected-warning{{}} + p = 2 + p; // expected-warning{{}} + p += 2; // expected-warning{{}} + a += p[2]; // expected-warning{{}} + p = i*0 + p; + p = p + i*0; + p += i*0; +} + +void checkArithOnSymbolic(int*p) { + ++p; + p = p + 2; + p = 2 + p; + p += 2; + (void)p[2]; +} + +struct S { + int t[10]; +}; + +void arrayInStruct() { + S s; + int * p = s.t; + ++p; + S *sp = new S; + p = sp->t; + ++p; + delete sp; +} + +void checkNew() { + int *p = new int; + p[1] = 1; // expected-warning{{}} +} + +void InitState(int* state) { + state[1] = 1; // expected-warning{{}} +} + +int* getArray(int size) { + if (size == 0) + return new int; + return new int[5]; +} + +void checkConditionalArray() { + int* maybeArray = getArray(0); + InitState(maybeArray); +} + +void checkMultiDimansionalArray() { + int a[5][5]; + *(*(a+1)+2) = 2; +} Index: cfe/trunk/test/Analysis/rdar-6442306-1.m =================================================================== --- cfe/trunk/test/Analysis/rdar-6442306-1.m +++ cfe/trunk/test/Analysis/rdar-6442306-1.m @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core %s -analyzer-store=region -verify +// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core -analyzer-disable-checker=alpha.core.PointerArithm %s -analyzer-store=region -verify // expected-no-diagnostics typedef int bar_return_t;