Index: lib/Analysis/CloneDetection.cpp =================================================================== --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -542,6 +542,108 @@ } } // end anonymous namespace +namespace { +/// \brief Collects the data from all statements and their children. +/// +/// The data is collected by StmtDataCollector, which means that the collected +/// data is equal for StmtSequences that are considered equal by the +/// StmtDataCollector class. +class SequenceDataCollector + : public RecursiveASTVisitor { + + ASTContext &Context; + llvm::FoldingSetNodeID &D; + +public: + SequenceDataCollector(const StmtSequence& Sequence, ASTContext &Context, + llvm::FoldingSetNodeID &D) + : Context(Context), D(D) { + // We want that sub-sequences inside CompoundStmts have the same data + // as a normal CompoundStmt containing the same children. For this we have + // to manually add the data of the surrounding CompoundStmt to the result. + if (Sequence.holdsSequence()) + D.AddInteger(CompoundStmt::StmtClass::CompoundStmtClass); + + for (Stmt const * S : Sequence) { + // We cast away the const because RecursiveASTVisitor doesn't support + // traversing non-const statements. As we don't modify the statement, + // we shouldn't run in any trouble with this. + TraverseStmt(const_cast(S)); + } + } + + bool VisitStmt(const Stmt *S) { + // Append all the data in S to our data set. + StmtDataCollector(S, Context, D); + return true; + } +}; +} + +/// \brief Returns true if both sequences are clones of each other. +static bool areSequencesEqual(const StmtSequence &LHS, + const StmtSequence &RHS) { + // We collect the data from all statements in the sequence as we did before + // when generating a hash value for each sequence. But this time we don't + // hash the collected data and compare the whole data set instead. This + // prevents any false-positives due to hash code collisions. + llvm::FoldingSetNodeID DataLHS, DataRHS; + + SequenceDataCollector(LHS, LHS.getASTContext(), DataLHS); + SequenceDataCollector(RHS, RHS.getASTContext(), DataRHS); + + return DataLHS == DataRHS; +} + +/// \brief Finds all actual CloneGroups in a given CloneGroup. +/// \param Result Output parameter to which found CloneGroups are added. +/// \param HashGroup A CloneGroup that should be searched. It is recommended to +/// keep this group as small as possible. +/// +/// This function is intended to be a false-positive filter behind a fast +/// heuristic for finding CloneGroups. +static void findCloneGroups(std::vector &Result, + const CloneDetector::CloneGroup &HashGroup) { + // Contains all indexes in HashGroup that were already added to a CloneGroup. + std::vector Indexes; + Indexes.reserve(HashGroup.size()); + + for (unsigned i = 0; i < HashGroup.size(); ++i) { + // Skip indexes that are already part of a CloneGroup. + if (std::find(Indexes.begin(), Indexes.end(), i) != Indexes.end()) + continue; + + // Pick the first unhandled StmtSequence and consider it as the beginning + // of a new CloneGroup for now. + // We don't add i to Indexes because we never iterate back. + StmtSequence PrototypeStmt = HashGroup[i]; + CloneDetector::CloneGroup PotentialGroup = {PrototypeStmt}; + + // Check all following StmtSequences for clones. + for (unsigned j = i + 1; j < HashGroup.size(); ++j) { + // Skip indexes that are already part of a CloneGroup. + if (std::find(Indexes.begin(), Indexes.end(), j) + != Indexes.end()) + continue; + + // If a following StmtSequence belongs to our CloneGroup, we add it to it. + const StmtSequence &Candidate = HashGroup[j]; + if (!areSequencesEqual(Candidate, PrototypeStmt)) + continue; + PotentialGroup.push_back(Candidate); + // Make sure we never visit this StmtSequence again. + Indexes.push_back(j); + } + + // CloneGroups with only one member don't make any sense, so we don't at + // them to the result. + if (PotentialGroup.size() == 1) + continue; + // Otherwise, add it to the result and continue searching for more groups. + Result.push_back(PotentialGroup); + } +} + void CloneDetector::findClones(std::vector &Result, unsigned MinGroupComplexity) { // For creating hash groups, we need to find CloneSignatures with the same @@ -594,7 +696,7 @@ if (Group.size() <= 1) continue; - Result.push_back(Group); + findCloneGroups(Result, Group); } std::vector IndexesToRemove;