Index: cfe/trunk/include/clang/Analysis/CloneDetection.h =================================================================== --- cfe/trunk/include/clang/Analysis/CloneDetection.h +++ cfe/trunk/include/clang/Analysis/CloneDetection.h @@ -252,22 +252,25 @@ std::function Compare); }; -/// Searches all children of the given clones for type II clones (i.e. they are -/// identical in every aspect beside the used variable names). -class RecursiveCloneTypeIIConstraint { - - /// Generates and saves a hash code for the given Stmt. - /// \param S The given Stmt. - /// \param D The Decl containing S. - /// \param StmtsByHash Output parameter that will contain the hash codes for - /// each StmtSequence in the given Stmt. - /// \return The hash code of the given Stmt. - /// - /// If the given Stmt is a CompoundStmt, this method will also generate - /// hashes for all possible StmtSequences in the children of this Stmt. - size_t saveHash(const Stmt *S, const Decl *D, - std::vector> &StmtsByHash); +/// This constraint moves clones into clone groups of type II via hashing. +/// +/// Clones with different hash values are moved into separate clone groups. +/// Collisions are possible, and this constraint does nothing to address this +/// them. Add the slower RecursiveCloneTypeIIVerifyConstraint later in the +/// constraint chain, not necessarily immediately, to eliminate hash collisions +/// through a more detailed analysis. +class RecursiveCloneTypeIIHashConstraint { +public: + void constrain(std::vector &Sequences); +}; +/// This constraint moves clones into clone groups of type II by comparing them. +/// +/// Clones that aren't type II clones are moved into separate clone groups. +/// In contrast to the RecursiveCloneTypeIIHashConstraint, all clones in a clone +/// group are guaranteed to be be type II clones of each other, but it is too +/// slow to efficiently handle large amounts of clones. +class RecursiveCloneTypeIIVerifyConstraint { public: void constrain(std::vector &Sequences); }; Index: cfe/trunk/lib/Analysis/CloneDetection.cpp =================================================================== --- cfe/trunk/lib/Analysis/CloneDetection.cpp +++ cfe/trunk/lib/Analysis/CloneDetection.cpp @@ -238,9 +238,18 @@ return HashCode; } -size_t RecursiveCloneTypeIIConstraint::saveHash( - const Stmt *S, const Decl *D, - std::vector> &StmtsByHash) { +/// Generates and saves a hash code for the given Stmt. +/// \param S The given Stmt. +/// \param D The Decl containing S. +/// \param StmtsByHash Output parameter that will contain the hash codes for +/// each StmtSequence in the given Stmt. +/// \return The hash code of the given Stmt. +/// +/// If the given Stmt is a CompoundStmt, this method will also generate +/// hashes for all possible StmtSequences in the children of this Stmt. +static size_t +saveHash(const Stmt *S, const Decl *D, + std::vector> &StmtsByHash) { llvm::MD5 Hash; ASTContext &Context = D->getASTContext(); @@ -340,7 +349,7 @@ return DataLHS == DataRHS; } -void RecursiveCloneTypeIIConstraint::constrain( +void RecursiveCloneTypeIIHashConstraint::constrain( std::vector &Sequences) { // FIXME: Maybe we can do this in-place and don't need this additional vector. std::vector Result; @@ -381,8 +390,7 @@ for (; i < StmtsByHash.size(); ++i) { // A different hash value means we have reached the end of the sequence. - if (PrototypeHash != StmtsByHash[i].first || - !areSequencesClones(StmtsByHash[i].second, Current.second)) { + if (PrototypeHash != StmtsByHash[i].first) { // The current sequence could be the start of a new CloneGroup. So we // decrement i so that we visit it again in the outer loop. // Note: i can never be 0 at this point because we are just comparing @@ -405,6 +413,14 @@ Sequences = Result; } +void RecursiveCloneTypeIIVerifyConstraint::constrain( + std::vector &Sequences) { + CloneConstraint::splitCloneGroups( + Sequences, [](const StmtSequence &A, const StmtSequence &B) { + return areSequencesClones(A, B); + }); +} + size_t MinComplexityConstraint::calculateStmtComplexity( const StmtSequence &Seq, const std::string &ParentMacroStack) { if (Seq.empty()) Index: cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp =================================================================== --- cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp +++ cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -81,11 +81,11 @@ // because reportSuspiciousClones() wants to search them for errors. std::vector AllCloneGroups; - Detector.findClones(AllCloneGroups, - FilenamePatternConstraint(IgnoredFilesPattern), - RecursiveCloneTypeIIConstraint(), - MinComplexityConstraint(MinComplexity), - MinGroupSizeConstraint(2), OnlyLargestCloneConstraint()); + Detector.findClones( + AllCloneGroups, FilenamePatternConstraint(IgnoredFilesPattern), + RecursiveCloneTypeIIHashConstraint(), MinGroupSizeConstraint(2), + MinComplexityConstraint(MinComplexity), + RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint()); if (ReportSuspiciousClones) reportSuspiciousClones(BR, Mgr, AllCloneGroups); Index: cfe/trunk/unittests/Analysis/CloneDetectionTest.cpp =================================================================== --- cfe/trunk/unittests/Analysis/CloneDetectionTest.cpp +++ cfe/trunk/unittests/Analysis/CloneDetectionTest.cpp @@ -69,8 +69,9 @@ // all statements from functions which names start with "bar". std::vector CloneGroups; Detector.findClones(CloneGroups, NoBarFunctionConstraint(), - RecursiveCloneTypeIIConstraint(), + RecursiveCloneTypeIIHashConstraint(), MinComplexityConstraint(2), MinGroupSizeConstraint(2), + RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint()); ASSERT_EQ(CloneGroups.size(), 1u); @@ -86,8 +87,9 @@ // Retry above's example without the filter... CloneGroups.clear(); - Detector.findClones(CloneGroups, RecursiveCloneTypeIIConstraint(), + Detector.findClones(CloneGroups, RecursiveCloneTypeIIHashConstraint(), MinComplexityConstraint(2), MinGroupSizeConstraint(2), + RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint()); ASSERT_EQ(CloneGroups.size(), 1u); ASSERT_EQ(CloneGroups.front().size(), 4u);