diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h --- a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h @@ -63,21 +63,28 @@ return BlockReachable[B.getBlockID()]; } + /// Returns the parent of `S` or null if the parent could not be found. + /// `S` must be a descendant of the function or statement passed to `build()`. + const Stmt *getParent(const Stmt *S) const { return StmtToParent.lookup(S); } + private: // FIXME: Once the deprecated `build` method is removed, mark `D` as "must not // be null" and add an assertion. ControlFlowContext(const Decl *D, std::unique_ptr Cfg, llvm::DenseMap StmtToBlock, - llvm::BitVector BlockReachable) + llvm::BitVector BlockReachable, + llvm::DenseMap StmtToParent) : ContainingDecl(D), Cfg(std::move(Cfg)), StmtToBlock(std::move(StmtToBlock)), - BlockReachable(std::move(BlockReachable)) {} + BlockReachable(std::move(BlockReachable)), + StmtToParent(std::move(StmtToParent)) {} /// The `Decl` containing the statement used to construct the CFG. const Decl *ContainingDecl; std::unique_ptr Cfg; llvm::DenseMap StmtToBlock; llvm::BitVector BlockReachable; + llvm::DenseMap StmtToParent; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -573,12 +573,16 @@ /// Returns the `DeclContext` of the block being analysed, if any. Otherwise, /// returns null. - const DeclContext *getDeclCtx() const { return CallStack.back(); } + const DeclContext *getDeclCtx() const { + if (CallStack.empty()) + return nullptr; + return CallStack.back(); + } /// Returns the function currently being analyzed, or null if the code being /// analyzed isn't part of a function. const FunctionDecl *getCurrentFunc() const { - return dyn_cast(getDeclCtx()); + return dyn_cast_or_null(getDeclCtx()); } /// Returns whether this `Environment` can be extended to analyze the given diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp --- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -67,6 +67,27 @@ return BlockReachable; } +static llvm::DenseMap +buildStmtToParentMap(const Stmt &S) { + llvm::DenseMap Result; + + llvm::SmallVector StmtsToVisit = {&S}; + while (!StmtsToVisit.empty()) { + const Stmt *Parent = StmtsToVisit.back(); + StmtsToVisit.pop_back(); + + for (const Stmt *Child : Parent->children()) { + if (Child == nullptr) + continue; + + StmtsToVisit.push_back(Child); + Result[Child] = Parent; + } + } + + return Result; +} + llvm::Expected ControlFlowContext::build(const FunctionDecl &Func) { if (!Func.hasBody()) @@ -106,7 +127,7 @@ llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); return ControlFlowContext(&D, std::move(Cfg), std::move(StmtToBlock), - std::move(BlockReachable)); + std::move(BlockReachable), buildStmtToParentMap(S)); } llvm::Expected diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -414,6 +414,90 @@ } } +/// Returns whether locations `Loc1` and `Loc2` associated with expression `E` +/// in two different environments `Env1` and `Env2` should be considered +/// equivalent. +/// The idea is that the location of certain expressions is almost certainly not +/// relevant for the result of the block, but changes in these locations may +/// nevertheless hinder convergence. +static bool exprLocsEquivalent(const Expr *E, StorageLocation *Loc1, + StorageLocation *Loc2, const Environment &Env1, + const Environment &Env2, + Environment::ValueModel &Model, + const ControlFlowContext *CFCtx) { + // prvalues have stable storage locations, so we don't need to compare them. + if (E->isPRValue()) + return true; + + // If we have no `ControlFlowContext`, we can't determine the parent of `E`, + // so we have to assume that location matters. + if (CFCtx == nullptr) + return Loc1 == Loc2; + + const Stmt *Parent = CFCtx->getParent(E); + // An expression should always have a parent within the body of the function. + assert(Parent != nullptr); + if (Parent == nullptr) + return Loc1 == Loc2; + + // If this an expression statement, the location is discarded. + if (isa(Parent)) + return true; + + if (auto *Cast = dyn_cast(Parent)) { + // If the expression is cast to void, its location is discarded. + if (Cast->getCastKind() == CK_ToVoid) + return true; + + // If the expression is cast from an lvalue to an rvalue, its location is + // unlikely to matter. Instead, just compare the values. + if (Cast->getCastKind() == CK_LValueToRValue) { + if (Loc1 == nullptr || Loc2 == nullptr) + return Loc1 == Loc2; + + Value *Val1 = Env1.getValue(*Loc1); + Value *Val2 = Env2.getValue(*Loc2); + + if (Val1 == nullptr || Val2 == nullptr) + return Val1 == Val2; + + return areEquivalentValues(*Val1, *Val2) || + compareDistinctValues(E->getType(), *Val1, Env1, *Val2, Env2, + Model); + } + } + + return Loc1 == Loc2; +} + +/// Returns whether two `ExprToLoc` maps should be considered equivalent. The +/// comparison ignores certain classes of expressions; see comments in +/// `exprLocsEquivalent()`. +/// FIXME: Ignoring certain classes of expressions is a temporary fix to aid +/// convergence. As discussed in https://reviews.llvm.org/D156658, the real fix +/// is to add widening for `PointerValue`s. +static bool exprToLocEquivalent( + const llvm::DenseMap &ExprToLoc1, + const llvm::DenseMap &ExprToLoc2, + const Environment &Env1, const Environment &Env2, + Environment::ValueModel &Model) { + if (ExprToLoc1.size() != ExprToLoc2.size()) + return false; + + const ControlFlowContext *CFCtx = nullptr; + if (const FunctionDecl *Func = Env1.getCurrentFunc()) + CFCtx = Env1.getDataflowAnalysisContext().getControlFlowContext(Func); + + for (auto [E, Loc1] : ExprToLoc1) { + StorageLocation *Loc2 = ExprToLoc2.lookup(E); + + if (!exprLocsEquivalent(E, Loc1, Loc2, Env1, Env2, Model, CFCtx)) + return false; + } + + return true; +} + bool Environment::equivalentTo(const Environment &Other, Environment::ValueModel &Model) const { assert(DACtx == Other.DACtx); @@ -430,7 +514,7 @@ if (DeclToLoc != Other.DeclToLoc) return false; - if (ExprToLoc != Other.ExprToLoc) + if (!exprToLocEquivalent(ExprToLoc, Other.ExprToLoc, *this, Other, Model)) return false; // Compare the contents for the intersection of their domains. diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3836,6 +3836,68 @@ }); } +TEST(TransferTest, LoopDereferencingChangingPointerConverges) { + std::string Code = R"cc( + bool some_condition(); + + void target(int i1, int i2) { + int *p = &i1; + while (true) { + // Dereference the pointer, producing an lvalue, in various ways where + // the location of the lvalue is not important. + *p; + (void)*p; + int unused = *p; + + if (some_condition()) + p = &i1; + else + p = &i2; + } + } + )cc"; + // The key property that we are verifying is implicit in `runDataflow` -- + // namely, that the analysis succeeds, rather than hitting the maximum number + // of iterations. + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) {}); +} + +TEST(TransferTest, LoopDereferencingChangingRecordPointerConverges) { + std::string Code = R"cc( + struct Lookup { + int x; + }; + + bool some_condition(); + + void target(Lookup l1, Lookup l2) { + Lookup *l = &l1; + while (true) { + // Access the member variable, producing an lvalue, in various ways + // where the location of the lvalue is not important. + l->x; + (void)l->x; + int unused = l->x; + + if (some_condition()) + l = &l1; + else + l = &l2; + } + } + )cc"; + // The key property that we are verifying is implicit in `runDataflow` -- + // namely, that the analysis succeeds, rather than hitting the maximum number + // of iterations. + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) {}); +} + TEST(TransferTest, DoesNotCrashOnUnionThisExpr) { std::string Code = R"( union Union {