Index: include/clang/Analysis/CloneDetection.h =================================================================== --- /dev/null +++ include/clang/Analysis/CloneDetection.h @@ -0,0 +1,216 @@ +//===--- CloneDetection.h - Finds code clones in an AST ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// /file +/// This file defines classes for searching and anlyzing source code clones. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_CLONEDETECTION_H +#define LLVM_CLANG_AST_CLONEDETECTION_H + +#include "clang/Basic/SourceLocation.h" + +#include + +namespace clang { + +class Stmt; +class ASTContext; +class CompoundStmt; +class TranslationUnitDecl; + +/// \brief Identifies a list of statements. +/// +/// Can either identify a single arbitrary Stmt object, a continuous sequence of +/// child statements inside a CompoundStmt or no statements at all. +class StmtSequence { + /// If this object identifies a sequence of statements inside a CompoundStmt, + /// S points to this CompoundStmt. If this object only identifies a single + /// Stmt, then S is a pointer to this Stmt. + Stmt const *S; + + /// The related ASTContext for S. + ASTContext *Context; + + /// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence + /// instance is representing the CompoundStmt children inside the array + /// [StartIndex, EndIndex). + unsigned StartIndex; + unsigned EndIndex; + +public: + /// \brief Constructs a StmtSequence holding multiple statements. + /// + /// The resulting StmtSequence identifies a continuous sequence of statements + /// in the body of the given CompoundStmt. Which statements of the body should + /// be identified needs to be specified by providing a start and end index + /// that describe a non-empty sub-array in the body of the given CompoundStmt. + /// + /// \param Stmt A CompoundStmt that contains all statements in its body. + /// \param Context The ASTContext for the given CompoundStmt. + /// \param StartIndex The inclusive start index in the children array of + /// \p Stmt + /// \param EndIndex The exclusive end index in the children array of \p Stmt. + StmtSequence(CompoundStmt const *Stmt, ASTContext &Context, + unsigned StartIndex, unsigned EndIndex); + + /// \brief Constructs a StmtSequence holding a single statement. + /// + /// \param Stmt An arbitrary Stmt. + /// \param Context The ASTContext for the given Stmt. + StmtSequence(Stmt const *Stmt, ASTContext &Context); + + /// \brief Constructs an empty StmtSequence. + StmtSequence(); + + typedef Stmt const *const *iterator; + + /// Returns an iterator pointing to the first statement in this sequence. + iterator begin() const; + + /// Returns an iterator pointing behind the last statement in this sequence. + iterator end() const; + + /// Returns the first statement in this sequence. + /// + /// This method should only be called on a non-empty StmtSequence object. + Stmt const *front() const { + assert(!empty()); + return begin()[0]; + } + + /// Returns the last statement in this sequence. + /// + /// This method should only be called on a non-empty StmtSequence object. + Stmt const *back() const { + assert(!empty()); + return begin()[size() - 1]; + } + + /// Returns the number of statements this object holds. + unsigned size() const { + if (holdsSequence()) + return EndIndex - StartIndex; + if (S == nullptr) + return 0; + return 1; + } + + /// Returns true if and only if this StmtSequence contains no statements. + bool empty() const { return size() == 0; } + + /// Returns the related ASTContext for the stored Stmts. + ASTContext &getASTContext() const { + assert(Context); + return *Context; + } + + /// Returns true if this objects holds a list of statements. + bool holdsSequence() const { return EndIndex != 0; } + + /// Returns the start sourcelocation of the first statement in this sequence. + /// + /// This method should only be called on a non-empty StmtSequence object. + SourceLocation getStartLoc() const; + + /// Returns the end sourcelocation of the last statement in this sequence. + /// + /// This method should only be called on a non-empty StmtSequence object. + SourceLocation getEndLoc() const; + + bool operator==(const StmtSequence &Other) const { + return std::tie(S, StartIndex, EndIndex) == + std::tie(Other.S, Other.StartIndex, Other.EndIndex); + } + + bool operator!=(const StmtSequence &Other) const { + return std::tie(S, StartIndex, EndIndex) != + std::tie(Other.S, Other.StartIndex, Other.EndIndex); + } + + /// Returns true if and only if this sequence covers a source range that + /// contains the source range of the given sequence \p Other. + /// + /// This method should only be called on a non-empty StmtSequence object + /// and passed a non-empty StmtSequence object. + bool contains(const StmtSequence &Other) const; +}; + +/// \brief Searches for clones in source code. +/// +/// First, this class needs a translation unit which is passed via +/// \p analyzeTranslationUnit . It will then generate and store search data +/// for all statements inside the given translation unit. +/// Afterwards the generated data can be used to find code clones by calling +/// \p findClones . +/// +/// The generates search data is needed for efficiently finding +/// clones. This is done by hashing all statements using a locality-sensitive +/// hash function that generates identical hash values for similar statement +/// trees. +/// +/// This class only searches for clones in exectuable source code +/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations) +/// are not supported. +class CloneDetector { +public: + /// Holds the data about a StmtSequence that is needed during the search for + /// code clones. + struct CloneSignature { + /// The StmtSequence that is described by this CloneSignature. + StmtSequence Sequence; + + /// The generated hash value for the StmtSequence. Two CloneSignature + /// objects with the same hash value will probably contain two StmtSequence + /// objects that are clones of each other. However, false positives are + /// possible here due to unintended hash function collisions. + /// + /// Two CloneSignature objects with a different hash value will never + /// contain two StmtSequence objects that are clones of each other. + unsigned Hash; + + /// \brief The complexity of the StmtSequence. + /// + /// This scalar value serves as a simple way of filtering clones that are + /// too small to be reported. A greater value indicates that the related + /// StmtSequence is probably more interesting to the user. + unsigned Complexity; + CloneSignature() : Hash(0), Complexity(0) {} + + CloneSignature(const StmtSequence &S, unsigned Hash, unsigned Complexity) + : Sequence(S), Hash(Hash), Complexity(Complexity) {} + }; + + /// \brief Generates and stores search data for all statements in the given + /// translation unit. + void analyzeTranslationUnit(TranslationUnitDecl *TU); + + /// \brief Stores the CloneSignature to allow future querying. + void add(const CloneSignature &Data); + + typedef llvm::SmallVector CloneGroup; + + /// \brief Searches the provided statements for clones. + /// + /// \param Result Output parameter that is filled with a list of found + /// clone groups. Each group contains multiple StmtSequences + /// that were identified to be clones of each other. + /// \param MinGroupComplexity Only return clones which have at least this + /// complexity value. + void findClones(std::vector &Result, unsigned MinGroupComplexity); + +private: + /// Stores all provided statements and their associated search data. + std::vector Signatures; +}; + +} // end namespace clang + +#endif // LLVM_CLANG_AST_CLONEDETECTION_H Index: include/clang/StaticAnalyzer/Checkers/Checkers.td =================================================================== --- include/clang/StaticAnalyzer/Checkers/Checkers.td +++ include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -77,6 +77,8 @@ def LLVM : Package<"llvm">; def Debug : Package<"debug">; +def CloneDetectionAlpha : Package<"clone">, InPackage, Hidden; + //===----------------------------------------------------------------------===// // Core Checkers. //===----------------------------------------------------------------------===// @@ -657,3 +659,17 @@ DescFile<"DebugCheckers.cpp">; } // end "debug" + + +//===----------------------------------------------------------------------===// +// Clone Detection +//===----------------------------------------------------------------------===// + +let ParentPackage = CloneDetectionAlpha in { + +def CloneChecker : Checker<"CloneChecker">, + HelpText<"Reports similar pieces of code.">, + DescFile<"CloneChecker.cpp">; + +} // end "clone" + Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -9,6 +9,7 @@ CFGReachabilityAnalysis.cpp CFGStmtMap.cpp CallGraph.cpp + CloneDetection.cpp CocoaConventions.cpp Consumed.cpp CodeInjector.cpp Index: lib/Analysis/CloneDetection.cpp =================================================================== --- /dev/null +++ lib/Analysis/CloneDetection.cpp @@ -0,0 +1,433 @@ +//===--- CloneDetection.cpp - Finds code clones in an AST -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This file implements classes for searching and anlyzing source code clones. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/CloneDetection.h" + +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" + +using namespace clang; + +StmtSequence::StmtSequence(CompoundStmt const *Stmt, ASTContext &Context, + unsigned StartIndex, unsigned EndIndex) + : S(Stmt), Context(&Context), StartIndex(StartIndex), EndIndex(EndIndex) { + assert(Stmt && "Stmt must not be a nullptr"); + assert(StartIndex < EndIndex && "Given array should not be empty"); + assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt"); +} + +StmtSequence::StmtSequence(Stmt const *Stmt, ASTContext &Context) + : S(Stmt), Context(&Context), StartIndex(0), EndIndex(0) {} + +StmtSequence::StmtSequence() + : S(nullptr), Context(nullptr), StartIndex(0), EndIndex(0) {} + +bool StmtSequence::contains(const StmtSequence &Other) const { + // If both sequences reside in different translation units, they can never + // contain each other. + if (Context != Other.Context) + return false; + + const SourceManager &SM = Context->getSourceManager(); + + // Otherwise check if the start and end locations of the current sequence + // surround the other sequence. + bool StartIsInBounds = + SM.isBeforeInTranslationUnit(getStartLoc(), Other.getStartLoc()) || + getStartLoc() == Other.getStartLoc(); + if (!StartIsInBounds) + return false; + + bool EndIsInBounds = + SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) || + Other.getEndLoc() == getEndLoc(); + return EndIsInBounds; +} + +StmtSequence::iterator StmtSequence::begin() const { + if (!holdsSequence()) { + return &S; + } + auto CS = cast(S); + return CS->body_begin() + StartIndex; +} + +StmtSequence::iterator StmtSequence::end() const { + if (!holdsSequence()) { + return &S + 1; + } + auto CS = cast(S); + return CS->body_begin() + EndIndex; +} + +SourceLocation StmtSequence::getStartLoc() const { + return front()->getLocStart(); +} + +SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } + +namespace { +/// \brief A cache for temporarily storing a small amount of CloneSignatures. +/// +/// This cache should be manually cleaned from CloneSignatures that are +/// no longer needed by calling \p remove or \p removeChildren . Otherwise the +/// cache has to allocate memory to store the increased amount of signatures. +/// However, the lookup time for the most recently added CloneSignatures won't +/// deteriorate even if the cache is not properly cleaned. +class CloneSignatureCache { + llvm::SmallVector Cache; + +public: + /// \brief Adds the Signature into the cache. + void add(CloneDetector::CloneSignature const &Signature) { + Cache.push_back(Signature); + } + + /// \brief Removes the CloneSignature that describes the given Sequence. + void remove(StmtSequence const &Sequence) { + Cache.erase( + std::remove_if( + Cache.begin(), Cache.end(), + [&Sequence](CloneDetector::CloneSignature const &Signature) { + return Signature.Sequence == Sequence; + }), + Cache.end()); + } + + /// \brief Removes the associated signature for each child of the given Stmt. + /// \param Parent The given Stmt. + /// \param Context The ASTContext that contains Parent. + void removeChildren(Stmt const *Parent, ASTContext &Context) { + for (Stmt const *Child : Parent->children()) { + StmtSequence ChildSequence(Child, Context); + remove(ChildSequence); + } + } + + /// \brief Retrieves the CloneSignature that describes the given Sequence. + CloneDetector::CloneSignature get(StmtSequence const &S) const { + // This cache promises good lookup time for recently added CloneSignatures + // even if not properly cleaned. As the most recently added CloneSignature + // is at the end, we start searching from back of the cache to the front to + // keep that promise. + for (auto I = Cache.rbegin(); I != Cache.rend(); ++I) { + if (I->Sequence == S) { + return *I; + } + } + llvm_unreachable("Couldn't find CloneSignature for StmtSequence"); + } +}; +} // end anonymous namespace + +namespace { +/// Traverses the AST and calculates hash values and complexity values for all +/// statements. The results are stored in a CloneDetector object. +/// +/// This calculation happens in linear time and each statement is only visited a +/// fixed amount of times during this process. The hash value for a statement is +/// first calculated from the data stored directly in the statement object. +/// Afterwards, the hash values of the children are calculated into the +/// computed hash value. +class HashVisitor : public RecursiveASTVisitor { + + /// \brief Defines a placeholder hash value for missing child statements. + /// + /// The value of this constant should be a integer that is unlikely to be + /// generated by normally hashing to prevent unintended hash value collisions. + static const unsigned NullChildHashValue = 313; + + CloneDetector &CD; + ASTContext &Context; + + /// \brief Stores the signatures of visited nodes with an unvisited parent. + /// + /// This cache is necessary as the signature of a node is partly computed + /// from the data of its children. It's the reponsibility of the child node to + /// insert its own signature into the cache and the parents responsibility to + /// remove the signatures of its children. + CloneSignatureCache SignatureCache; + +public: + explicit HashVisitor(CloneDetector &CD, ASTContext &Context) + : CD(CD), Context(Context) {} + + /// \brief Overwritten function that this visitor will traverse in postorder. + /// + /// We need to traverse postorder over the AST for our algorithm + /// to work as each parent expects that its children were already processed. + bool shouldTraversePostOrder() const { return true; } + + bool VisitStmt(Stmt *S); + + // Some statements have no parent that could remove their CloneSignatures from + // the cache. We manually clean such orphaned clones in the Visit* methods + // below. It's not a critical error if a certain declaration isn't handled + // here and it only slighly affects the memory consumption during traversal. + + bool VisitFunctionDecl(FunctionDecl *D) { + if (D->hasBody()) + SignatureCache.removeChildren(D->getBody(), Context); + return true; + } + bool VisitVarDecl(VarDecl *D) { + if (D->hasInit()) + SignatureCache.removeChildren(D->getInit(), Context); + return true; + } + bool VisitParmVarDecl(ParmVarDecl *D) { + if (D->hasDefaultArg()) + SignatureCache.removeChildren(D->getDefaultArg(), Context); + return true; + } + bool VisitCXXCtorInitializer(CXXCtorInitializer *D) { + SignatureCache.removeChildren(D->getInit(), Context); + return true; + } + bool VisitFieldDecl(FieldDecl *D) { + if (D->hasInClassInitializer()) + SignatureCache.removeChildren(D->getInClassInitializer(), Context); + return true; + } + +private: + void HandleChildHashes(llvm::FoldingSetNodeID &Hash, unsigned &Complexity, + Stmt::const_child_iterator Iterator, unsigned StartPos, + unsigned Length) { + auto StartIterator = Iterator; + for (unsigned i = 0; i < StartPos; ++i) + ++StartIterator; + auto EndIterator = StartIterator; + for (unsigned i = 0; i < Length; ++i) + ++EndIterator; + Stmt::const_child_range Children(StartIterator, EndIterator); + HandleChildHashes(Hash, Complexity, Children); + } + + /// Iterates over the specified Stmt array and computes the hash values of all + /// statements into the given hash value. Also, the complexity of all + /// statements is added to the given complexity value. + void HandleChildHashes(llvm::FoldingSetNodeID &Hash, unsigned &Complexity, + Stmt::const_child_range Children) { + // Iterate over each child in the sub-sequence. + for (Stmt const *Child : Children) { + if (Child == nullptr) { + ++Complexity; + // We use an placeholder hash value for null children. + Hash.AddInteger(NullChildHashValue); + } else { + auto Signature = SignatureCache.get(StmtSequence(Child, Context)); + Complexity += Signature.Complexity; + Hash.AddInteger(Signature.Hash); + } + } + } + + /// \brief Creates and saves the CloneSignatures for all sub-sequences in the + /// given + /// CompoundStmt. + /// + /// A sub-sequence is a continuous sequence of statements inside the child + /// array of the CompoundStmt. + void SaveAllSubSequences(CompoundStmt const *CS); +}; +} // end anonymous namespace + +bool HashVisitor::VisitStmt(Stmt *S) { + StmtSequence CurrentStmt(S, Context); + llvm::FoldingSetNodeID Hash; + unsigned Complexity = 1; + + // The only relevant data for now is the class of the statement, so we + // calculate the hash class into the current hash value. + // TODO: Handle statement class specific data. + Hash.AddInteger(S->getStmtClass()); + + // Incorporate the hash values of all child Stmts into the current hash value. + HandleChildHashes(Hash, Complexity, S->children()); + + CloneDetector::CloneSignature Signature(CurrentStmt, Hash.ComputeHash(), + Complexity); + // Add the signature to the cache that the parent can use it. + SignatureCache.add(Signature); + + // Save the signature for the current sequence in the CloneDetector object. + CD.add(Signature); + + // If the currently hashed statement is a CompoundStmt, we also need to handle + // the sub-sequences inside the CompoundStmt. + if (auto CS = dyn_cast(S)) + SaveAllSubSequences(CS); + + // We no longer need the signatures of the children, so we remove them + // from the cache. + SignatureCache.removeChildren(S, Context); + return true; +} + +void HashVisitor::SaveAllSubSequences(CompoundStmt const *CS) { + // The length of the sub-sequence. We don't need to hash sequences with the + // length 1 as they are already handled when we hashed the children. We also + // don't need to hash the sub-sequence that contains the whole body as this + // would be equal to the hash of the CompoundStmt itself. + for (unsigned Length = 2; Length < CS->size(); ++Length) { + // The start index in the body of the CompoundStmt. + // We increase the position and rehash until the end of the sub-sequence + // reaches the end of the CompoundStmt body. + for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) { + llvm::FoldingSetNodeID SubHash; + unsigned SubComplexity = 1; + StmtSequence Sequence(CS, Context, Pos, Pos + Length); + + // We consider a sub-sequence to be CompoundStmt that only contains + // the children in the sub-sequence. This is necessary for matching + // sub-sequences with full CompoundStmts. + SubHash.AddInteger(CS->getStmtClass()); + + HandleChildHashes(SubHash, SubComplexity, CS->child_begin(), Pos, Length); + + CloneDetector::CloneSignature SubData(Sequence, SubHash.ComputeHash(), + SubComplexity); + CD.add(SubData); + } + } +} + +void CloneDetector::analyzeTranslationUnit(TranslationUnitDecl *TU) { + HashVisitor visitor(*this, TU->getASTContext()); + visitor.TraverseDecl(TU); +} + +void CloneDetector::add(const CloneSignature &Data) { + Signatures.push_back(Data); +} + +namespace { +/// \brief Returns true if and only if \p Stmt contains at least one other +/// sequence in the \p Group. +bool StmtContainsAnyInGroup(StmtSequence &Stmt, + CloneDetector::CloneGroup &Group) { + for (StmtSequence &GroupStmt : Group) { + if (Stmt.contains(GroupStmt)) + return true; + } + return false; +} + +/// \brief Returns true if and only if all sequences in \p OtherGroup are +/// contained by a sequence in \p Group. +bool GroupContains(CloneDetector::CloneGroup &Group, + CloneDetector::CloneGroup &OtherGroup) { + // We have less sequences in the current group than we have in the other, + // so we will never fulfill the requirement for returning true. This is only + // possible because we know that a sequence in Group can contain at most + // one sequence in OtherGroup. + if (Group.size() < OtherGroup.size()) + return false; + + for (StmtSequence &Stmt : Group) { + if (!StmtContainsAnyInGroup(Stmt, OtherGroup)) + return false; + } + return true; +} +} // end anonymous namespace + +void CloneDetector::findClones(std::vector &Result, + unsigned MinGroupComplexity) { + // For creating hash groups, we need to find CloneSignatures with the same + // hash value, so we first sort all CloneSignatures by their hash value. Then + // we search for sequences of CloneSignatures with the same hash value. + std::sort(Signatures.begin(), Signatures.end(), + [](const CloneSignature &A, const CloneSignature &B) { + return A.Hash < B.Hash; + }); + + // Check for each CloneSignature if its successor has the same hash value. + // We don't check the last CloneSignature as it has no successor. + for (unsigned i = 0; i < Signatures.size() - 1; ++i) { + CloneSignature const &Current = Signatures[i]; + CloneSignature const &Next = Signatures[i + 1]; + + if (Current.Hash != Next.Hash) + continue; + + // It's likely that we just found an sequence of CloneSignatures that + // represent a CloneGroup, so we create a new group and start checking and + // adding the CloneSignatures in this sequence. + CloneGroup Group; + + for (; i < Signatures.size(); ++i) { + CloneSignature const &Signature = Signatures[i]; + + // A different hash value means we have reached the end of the sequence. + if (Current.Hash != Signature.Hash) { + // The current Signature could be the start of a new CloneGroup. So we + // decrement i so that we visit it again in the outer loop. + // Note: i can never be 0 at this point because we are just comparing + // the hash of the Current CloneSignature with itself in the 'if' above. + assert(i != 0); + --i; + break; + } + + // Skip CloneSignatures that won't pass the complexity requirement. + if (Signature.Complexity < MinGroupComplexity) + continue; + + Group.push_back(Signature.Sequence); + } + + // There is a chance that we haven't found more than two fitting + // CloneSignature because not enough CloneSignatures passed the complexity + // requirement. As a CloneGroup with less than two members makes no sense, + // we ignore this CloneGroup and won't add it to the result. + if (Group.size() <= 1) + continue; + + Result.push_back(Group); + } + + std::vector IndexesToRemove; + + // Compare every group in the result with the rest. If one groups contains + // another group, we only need to return the bigger group. + // Note: This doesn't scale well, so if possible avoid calling any heavy + // function from this loop to reduce the negative performance impact. + for (unsigned i = 0; i < Result.size(); ++i) { + for (unsigned j = 0; j < Result.size(); ++j) { + // Don't compare a group with itself. + if (i == j) + continue; + + if (GroupContains(Result[j], Result[i])) { + IndexesToRemove.push_back(i); + break; + } + } + } + + // We need to erase invalid results from the result vector with decreasing + // unique indexes. For this we sort all indexes in descending order and then + // remove any duplicates. + std::sort(IndexesToRemove.begin(), IndexesToRemove.end(), + std::greater()); + + auto UniqueEnd = std::unique(IndexesToRemove.begin(), IndexesToRemove.end()); + IndexesToRemove.erase(UniqueEnd, IndexesToRemove.end()); + + for (unsigned i : IndexesToRemove) { + Result.erase(Result.begin() + i); + } +} Index: lib/StaticAnalyzer/Checkers/CMakeLists.txt =================================================================== --- lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -22,6 +22,7 @@ CheckerDocumentation.cpp ChrootChecker.cpp ClangCheckers.cpp + CloneChecker.cpp DeadStoresChecker.cpp DebugCheckers.cpp DereferenceChecker.cpp Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp =================================================================== --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,80 @@ +//===--- CloneChecker.cpp - Clone detection checker -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/Analysis/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + + int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger( + "MinimumCloneComplexity", 10, this); + + assert(MinComplexity >= 0); + + SourceManager &SM = BR.getSourceManager(); + + CloneDetector CloneDetector; + CloneDetector.analyzeTranslationUnit(const_cast(TU)); + + std::vector CloneGroups; + CloneDetector.findClones(CloneGroups, MinComplexity); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, + "Related code clone is here."); + + for (CloneDetector::CloneGroup &Group : CloneGroups) { + std::sort(Group.begin(), Group.end(), [&SM](const StmtSequence &LHS, + const StmtSequence &RHS) { + return SM.isBeforeInTranslationUnit(LHS.getStartLoc(), + RHS.getStartLoc()) && + SM.isBeforeInTranslationUnit(LHS.getEndLoc(), RHS.getEndLoc()); + }); + DiagEngine.Report(Group.front().getStartLoc(), WarnID); + for (unsigned J = 1; J < Group.size(); ++J) { + DiagEngine.Report(Group[J].getStartLoc(), NoteID); + } + } +} + +//===----------------------------------------------------------------------===// +// Register CloneChecker +//===----------------------------------------------------------------------===// + +void ento::registerCloneChecker(CheckerManager &Mgr) { + Mgr.registerChecker(); +} Index: test/Analysis/copypaste/test-min-max.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) + return a; + return b; +} + +int maxClone(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (x > y) + return x; + return y; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) + return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) + return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/test-sub-sequences.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) + return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) + return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +}