Index: include/clang/Analysis/CloneDetection.h =================================================================== --- include/clang/Analysis/CloneDetection.h +++ include/clang/Analysis/CloneDetection.h @@ -254,20 +254,37 @@ /// Searches all children of the given clones for type II clones (i.e. they are /// identical in every aspect beside the used variable names). +/// +/// This constraint is also available to be executed in two phases, see +/// RecursiveCloneTypeIIHashConstraint and RecursiveCloneTypeIIVerifyConstraint +/// for more. class RecursiveCloneTypeIIConstraint { +public: + void constrain(std::vector &Sequences); +}; - /// Generates and saves a hash code for the given Stmt. - /// \param S The given Stmt. - /// \param D The Decl containing S. - /// \param StmtsByHash Output parameter that will contain the hash codes for - /// each StmtSequence in the given Stmt. - /// \return The hash code of the given Stmt. - /// - /// If the given Stmt is a CompoundStmt, this method will also generate - /// hashes for all possible StmtSequences in the children of this Stmt. - size_t saveHash(const Stmt *S, const Decl *D, - std::vector> &StmtsByHash); +/// This constraint performs only the hashing part of the +/// RecursiveCloneTypeIIConstraint. +/// +/// It is supposed to be fast and can be used at the front of the constraint +/// chain. However, it has a tiny chance to generate false-positives where the +/// clones in a clone group are not actually type II clones of each other. +/// This happens only due to hash collisions and they can be removed by the +/// RecursiveCloneTypeIIVerifyConstraint. +class RecursiveCloneTypeIIHashConstraint { +public: + void constrain(std::vector &Sequences); +}; +/// This constraint performs only the verification part of the +/// RecursiveCloneTypeIIConstraint. +/// +/// It is supposed to be used behind the RecursiveCloneTypeIIHashConstraint +/// and verifies that all clones in a group are actually type II clones of +/// each other. However, this constraint is quite slow, so if you have faster +/// constraints that can handle false-positives generated by hash collisions, +/// then prepend those constraints to this one for optimal performance. +class RecursiveCloneTypeIIVerifyConstraint { public: void constrain(std::vector &Sequences); }; Index: lib/Analysis/CloneDetection.cpp =================================================================== --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -381,9 +381,17 @@ return HashCode; } -size_t RecursiveCloneTypeIIConstraint::saveHash( - const Stmt *S, const Decl *D, - std::vector> &StmtsByHash) { +/// Generates and saves a hash code for the given Stmt. +/// \param S The given Stmt. +/// \param D The Decl containing S. +/// \param StmtsByHash Output parameter that will contain the hash codes for +/// each StmtSequence in the given Stmt. +/// \return The hash code of the given Stmt. +/// +/// If the given Stmt is a CompoundStmt, this method will also generate +/// hashes for all possible StmtSequences in the children of this Stmt. +size_t saveHash(const Stmt *S, const Decl *D, + std::vector> &StmtsByHash) { llvm::MD5 Hash; ASTContext &Context = D->getASTContext(); @@ -474,6 +482,14 @@ void RecursiveCloneTypeIIConstraint::constrain( std::vector &Sequences) { + RecursiveCloneTypeIIHashConstraint Hash; + Hash.constrain(Sequences); + RecursiveCloneTypeIIVerifyConstraint Verify; + Verify.constrain(Sequences); +} + +void RecursiveCloneTypeIIHashConstraint::constrain( + std::vector &Sequences) { // FIXME: Maybe we can do this in-place and don't need this additional vector. std::vector Result; @@ -513,8 +529,7 @@ for (; i < StmtsByHash.size(); ++i) { // A different hash value means we have reached the end of the sequence. - if (PrototypeHash != StmtsByHash[i].first || - !areSequencesClones(StmtsByHash[i].second, Current.second)) { + if (PrototypeHash != StmtsByHash[i].first) { // The current sequence could be the start of a new CloneGroup. So we // decrement i so that we visit it again in the outer loop. // Note: i can never be 0 at this point because we are just comparing @@ -537,6 +552,14 @@ Sequences = Result; } +void RecursiveCloneTypeIIVerifyConstraint::constrain( + std::vector &Sequences) { + CloneConstraint::splitCloneGroups( + Sequences, [](const StmtSequence &A, const StmtSequence &B) { + return areSequencesClones(A, B); + }); +} + size_t MinComplexityConstraint::calculateStmtComplexity( const StmtSequence &Seq, const std::string &ParentMacroStack) { if (Seq.empty()) Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/CloneChecker.cpp +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -78,9 +78,10 @@ // because reportSuspiciousClones() wants to search them for errors. std::vector AllCloneGroups; - Detector.findClones(AllCloneGroups, RecursiveCloneTypeIIConstraint(), - MinComplexityConstraint(MinComplexity), - MinGroupSizeConstraint(2), OnlyLargestCloneConstraint()); + Detector.findClones( + AllCloneGroups, RecursiveCloneTypeIIHashConstraint(), + MinGroupSizeConstraint(2), MinComplexityConstraint(MinComplexity), + RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint()); if (ReportSuspiciousClones) reportSuspiciousClones(BR, Mgr, AllCloneGroups);