Index: clang-tidy/misc/UseAfterMoveCheck.cpp =================================================================== --- clang-tidy/misc/UseAfterMoveCheck.cpp +++ clang-tidy/misc/UseAfterMoveCheck.cpp @@ -11,13 +11,12 @@ #include "clang/Analysis/CFG.h" #include "clang/Lex/Lexer.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include +#include "../utils/ExprSequence.h" using namespace clang::ast_matchers; +using namespace clang::tidy::utils; + namespace clang { namespace tidy { @@ -25,101 +24,6 @@ namespace { -/// Provides information about the evaluation order of (sub-)expressions within -/// a `CFGBlock`. -/// -/// While a `CFGBlock` does contain individual `CFGElement`s for some -/// sub-expressions, the order in which those `CFGElement`s appear reflects -/// only one possible order in which the sub-expressions may be evaluated. -/// However, we want to warn if any of the potential evaluation orders can lead -/// to a use-after-move, not just the one contained in the `CFGBlock`. -/// -/// This class implements only a simplified version of the C++ sequencing rules -/// that is, however, sufficient for the purposes of this check. The main -/// limitation is that we do not distinguish between value computation and side -/// effect -- see the "Implementation" section for more details. -/// -/// Note: `SequenceChecker` from SemaChecking.cpp does a similar job (and much -/// more thoroughly), but using it would require -/// - Pulling `SequenceChecker` out into a header file (i.e. making it part of -/// the API), -/// - Removing the dependency of `SequenceChecker` on `Sema`, and -/// - (Probably) modifying `SequenceChecker` to make it suitable to be used in -/// this context. -/// For the moment, it seems preferable to re-implement our own version of -/// sequence checking that is special-cased to what we need here. -/// -/// Implementation -/// -------------- -/// -/// `ExprSequence` uses two types of sequencing edges between nodes in the AST: -/// -/// - Every `Stmt` is assumed to be sequenced after its children. This is -/// overly optimistic because the standard only states that value computations -/// of operands are sequenced before the value computation of the operator, -/// making no guarantees about side effects (in general). -/// -/// For our purposes, this rule is sufficient, however, because this check is -/// interested in operations on objects, which are generally performed through -/// function calls (whether explicit and implicit). Function calls guarantee -/// that the value computations and side effects for all function arguments -/// are sequenced before the execution fo the function. -/// -/// - In addition, some `Stmt`s are known to be sequenced before or after -/// their siblings. For example, the `Stmt`s that make up a `CompoundStmt`are -/// all sequenced relative to each other. The function -/// `getSequenceSuccessor()` implements these sequencing rules. -class ExprSequence { -public: - /// Initializes this `ExprSequence` with sequence information for the given - /// `CFG`. - ExprSequence(const CFG *TheCFG, ASTContext *TheContext); - - /// Returns whether \p Before is sequenced before \p After. - bool inSequence(const Stmt *Before, const Stmt *After) const; - - /// Returns whether \p After can potentially be evaluated after \p Before. - /// This is exactly equivalent to `!inSequence(After, Before)` but makes some - /// conditions read more naturally. - bool potentiallyAfter(const Stmt *After, const Stmt *Before) const; - -private: - // Returns the sibling of \p S (if any) that is directly sequenced after \p S, - // or nullptr if no such sibling exists. For example, if \p S is the child of - // a `CompoundStmt`, this would return the Stmt that directly follows \p S in - // the `CompoundStmt`. - // - // As the sequencing of many constructs that change control flow is already - // encoded in the `CFG`, this function only implements the sequencing rules - // for those constructs where sequencing cannot be inferred from the `CFG`. - const Stmt *getSequenceSuccessor(const Stmt *S) const; - - const Stmt *resolveSyntheticStmt(const Stmt *S) const; - - ASTContext *Context; - - llvm::DenseMap SyntheticStmtSourceMap; -}; - -/// Maps `Stmt`s to the `CFGBlock` that contains them. Some `Stmt`s may be -/// contained in more than one `CFGBlock`; in this case, they are mapped to the -/// innermost block (i.e. the one that is furthest from the root of the tree). -class StmtToBlockMap { -public: - /// Initializes the map for the given `CFG`. - StmtToBlockMap(const CFG *TheCFG, ASTContext *TheContext); - - /// Returns the block that \p S is contained in. Some `Stmt`s may be contained - /// in more than one `CFGBlock`; in this case, this function returns the - /// innermost block (i.e. the one that is furthest from the root of the tree). - const CFGBlock *blockContainingStmt(const Stmt *S) const; - -private: - ASTContext *Context; - - llvm::DenseMap Map; -}; - /// Contains information about a use-after-move. struct UseAfterMove { // The DeclRefExpr that constituted the use of the object. @@ -163,168 +67,6 @@ } // namespace -// Returns the Stmt nodes that are parents of 'S', skipping any potential -// intermediate non-Stmt nodes. -// -// In almost all cases, this function returns a single parent or no parents at -// all. -// -// The case that a Stmt has multiple parents is rare but does actually occur in -// the parts of the AST that we're interested in. Specifically, InitListExpr -// nodes cause ASTContext::getParent() to return multiple parents for certain -// nodes in their subtree because RecursiveASTVisitor visits both the syntactic -// and semantic forms of InitListExpr, and the parent-child relationships are -// different between the two forms. -static SmallVector getParentStmts(const Stmt *S, - ASTContext *Context) { - SmallVector Result; - - ASTContext::DynTypedNodeList Parents = Context->getParents(*S); - - SmallVector NodesToProcess(Parents.begin(), - Parents.end()); - - while (!NodesToProcess.empty()) { - ast_type_traits::DynTypedNode Node = NodesToProcess.back(); - NodesToProcess.pop_back(); - - if (const auto *S = Node.get()) { - Result.push_back(S); - } else { - Parents = Context->getParents(Node); - NodesToProcess.append(Parents.begin(), Parents.end()); - } - } - - return Result; -} - -bool isDescendantOrEqual(const Stmt *Descendant, const Stmt *Ancestor, - ASTContext *Context) { - if (Descendant == Ancestor) - return true; - for (const Stmt *Parent : getParentStmts(Descendant, Context)) { - if (isDescendantOrEqual(Parent, Ancestor, Context)) - return true; - } - - return false; -} - -ExprSequence::ExprSequence(const CFG *TheCFG, ASTContext *TheContext) - : Context(TheContext) { - for (const auto &SyntheticStmt : TheCFG->synthetic_stmts()) { - SyntheticStmtSourceMap[SyntheticStmt.first] = SyntheticStmt.second; - } -} - -bool ExprSequence::inSequence(const Stmt *Before, const Stmt *After) const { - Before = resolveSyntheticStmt(Before); - After = resolveSyntheticStmt(After); - - // If 'After' is in the subtree of the siblings that follow 'Before' in the - // chain of successors, we know that 'After' is sequenced after 'Before'. - for (const Stmt *Successor = getSequenceSuccessor(Before); Successor; - Successor = getSequenceSuccessor(Successor)) { - if (isDescendantOrEqual(After, Successor, Context)) - return true; - } - - // If 'After' is a parent of 'Before' or is sequenced after one of these - // parents, we know that it is sequenced after 'Before'. - for (const Stmt *Parent : getParentStmts(Before, Context)) { - if (Parent == After || inSequence(Parent, After)) - return true; - } - - return false; -} - -bool ExprSequence::potentiallyAfter(const Stmt *After, - const Stmt *Before) const { - return !inSequence(After, Before); -} - -const Stmt *ExprSequence::getSequenceSuccessor(const Stmt *S) const { - for (const Stmt *Parent : getParentStmts(S, Context)) { - if (const auto *BO = dyn_cast(Parent)) { - // Comma operator: Right-hand side is sequenced after the left-hand side. - if (BO->getLHS() == S && BO->getOpcode() == BO_Comma) - return BO->getRHS(); - } else if (const auto *InitList = dyn_cast(Parent)) { - // Initializer list: Each initializer clause is sequenced after the - // clauses that precede it. - for (unsigned I = 1; I < InitList->getNumInits(); ++I) { - if (InitList->getInit(I - 1) == S) - return InitList->getInit(I); - } - } else if (const auto *Compound = dyn_cast(Parent)) { - // Compound statement: Each sub-statement is sequenced after the - // statements that precede it. - const Stmt *Previous = nullptr; - for (const auto *Child : Compound->body()) { - if (Previous == S) - return Child; - Previous = Child; - } - } else if (const auto *TheDeclStmt = dyn_cast(Parent)) { - // Declaration: Every initializer expression is sequenced after the - // initializer expressions that precede it. - const Expr *PreviousInit = nullptr; - for (const Decl *TheDecl : TheDeclStmt->decls()) { - if (const auto *TheVarDecl = dyn_cast(TheDecl)) { - if (const Expr *Init = TheVarDecl->getInit()) { - if (PreviousInit == S) - return Init; - PreviousInit = Init; - } - } - } - } else if (const auto *ForRange = dyn_cast(Parent)) { - // Range-based for: Loop variable declaration is sequenced before the - // body. (We need this rule because these get placed in the same - // CFGBlock.) - if (S == ForRange->getLoopVarStmt()) - return ForRange->getBody(); - } else if (const auto *TheIfStmt = dyn_cast(Parent)) { - // If statement: If a variable is declared inside the condition, the - // expression used to initialize the variable is sequenced before the - // evaluation of the condition. - if (S == TheIfStmt->getConditionVariableDeclStmt()) - return TheIfStmt->getCond(); - } - } - - return nullptr; -} - -const Stmt *ExprSequence::resolveSyntheticStmt(const Stmt *S) const { - if (SyntheticStmtSourceMap.count(S)) - return SyntheticStmtSourceMap.lookup(S); - else - return S; -} - -StmtToBlockMap::StmtToBlockMap(const CFG *TheCFG, ASTContext *TheContext) - : Context(TheContext) { - for (const auto *B : *TheCFG) { - for (const auto &Elem : *B) { - if (Optional S = Elem.getAs()) - Map[S->getStmt()] = B; - } - } -} - -const CFGBlock *StmtToBlockMap::blockContainingStmt(const Stmt *S) const { - while (!Map.count(S)) { - SmallVector Parents = getParentStmts(S, Context); - if (Parents.empty()) - return nullptr; - S = Parents[0]; - } - - return Map.lookup(S); -} // Matches nodes that are // - Part of a decltype argument or class template argument (we check this by Index: clang-tidy/utils/CMakeLists.txt =================================================================== --- clang-tidy/utils/CMakeLists.txt +++ clang-tidy/utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_clang_library(clangTidyUtils ASTUtils.cpp DeclRefExprUtils.cpp + ExprSequence.cpp FixItHintUtils.cpp HeaderFileExtensionsUtils.cpp HeaderGuard.cpp Index: clang-tidy/utils/ExprSequence.h =================================================================== --- /dev/null +++ clang-tidy/utils/ExprSequence.h @@ -0,0 +1,126 @@ +//===------------- ExprSequence.h - clang-tidy ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPRSEQUENCE_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPRSEQUENCE_H + +#include "clang/Analysis/CFG.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" + +#include "../ClangTidy.h" + +namespace clang { +namespace tidy { +namespace utils { + +/// Provides information about the evaluation order of (sub-)expressions within +/// a `CFGBlock`. +/// +/// While a `CFGBlock` does contain individual `CFGElement`s for some +/// sub-expressions, the order in which those `CFGElement`s appear reflects +/// only one possible order in which the sub-expressions may be evaluated. +/// However, we want to warn if any of the potential evaluation orders can lead +/// to a use-after-move, not just the one contained in the `CFGBlock`. +/// +/// This class implements only a simplified version of the C++ sequencing +/// rules. The main limitation is that we do not distinguish between value +/// computation and side effect -- see the "Implementation" section for more +/// details. +/// +/// Note: `SequenceChecker` from SemaChecking.cpp does a similar job (and much +/// more thoroughly), but using it would require +/// - Pulling `SequenceChecker` out into a header file (i.e. making it part of +/// the API), +/// - Removing the dependency of `SequenceChecker` on `Sema`, and +/// - (Probably) modifying `SequenceChecker` to make it suitable to be used in +/// this context. +/// For the moment, it seems preferable to re-implement our own version of +/// sequence checking that is special-cased to what we need here. +/// +/// Implementation +/// -------------- +/// +/// `ExprSequence` uses two types of sequencing edges between nodes in the AST: +/// +/// - Every `Stmt` is assumed to be sequenced after its children. This is +/// overly optimistic because the standard only states that value computations +/// of operands are sequenced before the value computation of the operator, +/// making no guarantees about side effects (in general). +/// +/// For our purposes, this rule is sufficient, however, because this check is +/// interested in operations on objects, which are generally performed through +/// function calls (whether explicit and implicit). Function calls guarantee +/// that the value computations and side effects for all function arguments +/// are sequenced before the execution of the function. +/// +/// - In addition, some `Stmt`s are known to be sequenced before or after +/// their siblings. For example, the `Stmt`s that make up a `CompoundStmt`are +/// all sequenced relative to each other. The function +/// `getSequenceSuccessor()` implements these sequencing rules. +class ExprSequence { +public: + /// Initializes this `ExprSequence` with sequence information for the given + /// `CFG`. + ExprSequence(const CFG *TheCFG, ASTContext *TheContext); + + /// Returns whether \p Before is sequenced before \p After. + bool inSequence(const Stmt *Before, const Stmt *After) const; + + /// Returns whether \p After can potentially be evaluated after \p Before. + /// This is exactly equivalent to `!inSequence(After, Before)` but makes some + /// conditions read more naturally. + bool potentiallyAfter(const Stmt *After, const Stmt *Before) const; + +private: + // Returns the sibling of \p S (if any) that is directly sequenced after \p S, + // or nullptr if no such sibling exists. For example, if \p S is the child of + // a `CompoundStmt`, this would return the Stmt that directly follows \p S in + // the `CompoundStmt`. + // + // As the sequencing of many constructs that change control flow is already + // encoded in the `CFG`, this function only implements the sequencing rules + // for those constructs where sequencing cannot be inferred from the `CFG`. + const Stmt *getSequenceSuccessor(const Stmt *S) const; + + const Stmt *resolveSyntheticStmt(const Stmt *S) const; + + ASTContext *Context; + + llvm::DenseMap SyntheticStmtSourceMap; +}; + +/// Maps `Stmt`s to the `CFGBlock` that contains them. Some `Stmt`s may be +/// contained in more than one `CFGBlock`; in this case, they are mapped to the +/// innermost block (i.e. the one that is furthest from the root of the tree). +class StmtToBlockMap { +public: + /// Initializes the map for the given `CFG`. + StmtToBlockMap(const CFG *TheCFG, ASTContext *TheContext); + + /// Returns the block that \p S is contained in. Some `Stmt`s may be contained + /// in more than one `CFGBlock`; in this case, this function returns the + /// innermost block (i.e. the one that is furthest from the root of the tree). + const CFGBlock *blockContainingStmt(const Stmt *S) const; + +private: + ASTContext *Context; + + llvm::DenseMap Map; +}; + + +} +} +} + + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPRSEQUENCE_H Index: clang-tidy/utils/ExprSequence.cpp =================================================================== --- /dev/null +++ clang-tidy/utils/ExprSequence.cpp @@ -0,0 +1,182 @@ +//===---------- ExprSequence.cpp - clang-tidy -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ExprSequence.h" + +namespace clang { +namespace tidy { +namespace utils { + +// Returns the Stmt nodes that are parents of 'S', skipping any potential +// intermediate non-Stmt nodes. +// +// In almost all cases, this function returns a single parent or no parents at +// all. +// +// The case that a Stmt has multiple parents is rare but does actually occur in +// the parts of the AST that we're interested in. Specifically, InitListExpr +// nodes cause ASTContext::getParent() to return multiple parents for certain +// nodes in their subtree because RecursiveASTVisitor visits both the syntactic +// and semantic forms of InitListExpr, and the parent-child relationships are +// different between the two forms. +static SmallVector getParentStmts(const Stmt *S, + ASTContext *Context) { + SmallVector Result; + + ASTContext::DynTypedNodeList Parents = Context->getParents(*S); + + SmallVector NodesToProcess(Parents.begin(), + Parents.end()); + + while (!NodesToProcess.empty()) { + ast_type_traits::DynTypedNode Node = NodesToProcess.back(); + NodesToProcess.pop_back(); + + if (const auto *S = Node.get()) { + Result.push_back(S); + } else { + Parents = Context->getParents(Node); + NodesToProcess.append(Parents.begin(), Parents.end()); + } + } + + return Result; +} + +bool isDescendantOrEqual(const Stmt *Descendant, const Stmt *Ancestor, + ASTContext *Context) { + if (Descendant == Ancestor) + return true; + for (const Stmt *Parent : getParentStmts(Descendant, Context)) { + if (isDescendantOrEqual(Parent, Ancestor, Context)) + return true; + } + + return false; +} + +ExprSequence::ExprSequence(const CFG *TheCFG, ASTContext *TheContext) + : Context(TheContext) { + for (const auto &SyntheticStmt : TheCFG->synthetic_stmts()) { + SyntheticStmtSourceMap[SyntheticStmt.first] = SyntheticStmt.second; + } +} + +bool ExprSequence::inSequence(const Stmt *Before, const Stmt *After) const { + Before = resolveSyntheticStmt(Before); + After = resolveSyntheticStmt(After); + + // If 'After' is in the subtree of the siblings that follow 'Before' in the + // chain of successors, we know that 'After' is sequenced after 'Before'. + for (const Stmt *Successor = getSequenceSuccessor(Before); Successor; + Successor = getSequenceSuccessor(Successor)) { + if (isDescendantOrEqual(After, Successor, Context)) + return true; + } + + // If 'After' is a parent of 'Before' or is sequenced after one of these + // parents, we know that it is sequenced after 'Before'. + for (const Stmt *Parent : getParentStmts(Before, Context)) { + if (Parent == After || inSequence(Parent, After)) + return true; + } + + return false; +} + +bool ExprSequence::potentiallyAfter(const Stmt *After, + const Stmt *Before) const { + return !inSequence(After, Before); +} + +const Stmt *ExprSequence::getSequenceSuccessor(const Stmt *S) const { + for (const Stmt *Parent : getParentStmts(S, Context)) { + if (const auto *BO = dyn_cast(Parent)) { + // Comma operator: Right-hand side is sequenced after the left-hand side. + if (BO->getLHS() == S && BO->getOpcode() == BO_Comma) + return BO->getRHS(); + } else if (const auto *InitList = dyn_cast(Parent)) { + // Initializer list: Each initializer clause is sequenced after the + // clauses that precede it. + for (unsigned I = 1; I < InitList->getNumInits(); ++I) { + if (InitList->getInit(I - 1) == S) + return InitList->getInit(I); + } + } else if (const auto *Compound = dyn_cast(Parent)) { + // Compound statement: Each sub-statement is sequenced after the + // statements that precede it. + const Stmt *Previous = nullptr; + for (const auto *Child : Compound->body()) { + if (Previous == S) + return Child; + Previous = Child; + } + } else if (const auto *TheDeclStmt = dyn_cast(Parent)) { + // Declaration: Every initializer expression is sequenced after the + // initializer expressions that precede it. + const Expr *PreviousInit = nullptr; + for (const Decl *TheDecl : TheDeclStmt->decls()) { + if (const auto *TheVarDecl = dyn_cast(TheDecl)) { + if (const Expr *Init = TheVarDecl->getInit()) { + if (PreviousInit == S) + return Init; + PreviousInit = Init; + } + } + } + } else if (const auto *ForRange = dyn_cast(Parent)) { + // Range-based for: Loop variable declaration is sequenced before the + // body. (We need this rule because these get placed in the same + // CFGBlock.) + if (S == ForRange->getLoopVarStmt()) + return ForRange->getBody(); + } else if (const auto *TheIfStmt = dyn_cast(Parent)) { + // If statement: If a variable is declared inside the condition, the + // expression used to initialize the variable is sequenced before the + // evaluation of the condition. + if (S == TheIfStmt->getConditionVariableDeclStmt()) + return TheIfStmt->getCond(); + } + } + + return nullptr; +} + +const Stmt *ExprSequence::resolveSyntheticStmt(const Stmt *S) const { + if (SyntheticStmtSourceMap.count(S)) + return SyntheticStmtSourceMap.lookup(S); + else + return S; +} + +StmtToBlockMap::StmtToBlockMap(const CFG *TheCFG, ASTContext *TheContext) + : Context(TheContext) { + for (const auto *B : *TheCFG) { + for (const auto &Elem : *B) { + if (Optional S = Elem.getAs()) + Map[S->getStmt()] = B; + } + } +} + +const CFGBlock *StmtToBlockMap::blockContainingStmt(const Stmt *S) const { + while (!Map.count(S)) { + SmallVector Parents = getParentStmts(S, Context); + if (Parents.empty()) + return nullptr; + S = Parents[0]; + } + + return Map.lookup(S); +} + + +} +} +} \ No newline at end of file