Index: cfe/trunk/include/clang/Analysis/CloneDetection.h =================================================================== --- cfe/trunk/include/clang/Analysis/CloneDetection.h +++ cfe/trunk/include/clang/Analysis/CloneDetection.h @@ -157,6 +157,8 @@ /// are not supported. class CloneDetector { public: + typedef unsigned DataPiece; + /// Holds the data about a StmtSequence that is needed during the search for /// code clones. struct CloneSignature { @@ -164,7 +166,7 @@ /// /// If this variable is equal for two different StmtSequences, then they can /// be considered clones of each other. - std::vector Data; + std::vector Data; /// \brief The complexity of the StmtSequence. /// Index: cfe/trunk/lib/Analysis/CloneDetection.cpp =================================================================== --- cfe/trunk/lib/Analysis/CloneDetection.cpp +++ cfe/trunk/lib/Analysis/CloneDetection.cpp @@ -16,6 +16,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" #include "llvm/ADT/StringRef.h" using namespace clang; @@ -79,6 +80,168 @@ SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } namespace { +/// \brief Collects the data of a single Stmt. +/// +/// This class defines what a code clone is: If it collects for two statements +/// the same data, then those two statements are considered to be clones of each +/// other. +class StmtDataCollector : public ConstStmtVisitor { + + ASTContext &Context; + std::vector &CollectedData; + +public: + /// \brief Collects data of the given Stmt. + /// \param S The given statement. + /// \param Context The ASTContext of S. + /// \param D The given data vector to which all collected data is appended. + StmtDataCollector(const Stmt *S, ASTContext &Context, + std::vector &D) + : Context(Context), CollectedData(D) { + Visit(S); + } + + // Below are utility methods for appending different data to the vector. + + void addData(CloneDetector::DataPiece Integer) { + CollectedData.push_back(Integer); + } + + // FIXME: The functions below add long strings to the data vector which are + // probably not good for performance. Replace the strings with pointer values + // or a some other unique integer. + + void addData(llvm::StringRef Str) { + if (Str.empty()) + return; + + const size_t OldSize = CollectedData.size(); + + const size_t PieceSize = sizeof(CloneDetector::DataPiece); + // Calculate how many vector units we need to accomodate all string bytes. + size_t RoundedUpPieceNumber = (Str.size() + PieceSize - 1) / PieceSize; + // Allocate space for the string in the data vector. + CollectedData.resize(CollectedData.size() + RoundedUpPieceNumber); + + // Copy the string to the allocated space at the end of the vector. + std::memcpy(CollectedData.data() + OldSize, Str.data(), Str.size()); + } + + void addData(const QualType &QT) { addData(QT.getAsString()); } + +// The functions below collect the class specific data of each Stmt subclass. + +// Utility macro for defining a visit method for a given class. This method +// calls back to the ConstStmtVisitor to visit all parent classes. +#define DEF_ADD_DATA(CLASS, CODE) \ + void Visit##CLASS(const CLASS *S) { \ + CODE; \ + ConstStmtVisitor::Visit##CLASS(S); \ + } + + DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); }) + DEF_ADD_DATA(Expr, { addData(S->getType()); }) + + //--- Builtin functionality ----------------------------------------------// + DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) + DEF_ADD_DATA(TypeTraitExpr, { + addData(S->getTrait()); + for (unsigned i = 0; i < S->getNumArgs(); ++i) + addData(S->getArg(i)->getType()); + }) + + //--- Calls --------------------------------------------------------------// + DEF_ADD_DATA(CallExpr, + { addData(S->getDirectCallee()->getQualifiedNameAsString()); }) + + //--- Exceptions ---------------------------------------------------------// + DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) + + //--- C++ OOP Stmts ------------------------------------------------------// + DEF_ADD_DATA(CXXDeleteExpr, { + addData(S->isArrayFormAsWritten()); + addData(S->isGlobalDelete()); + }) + + //--- Casts --------------------------------------------------------------// + DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) + + //--- Miscellaneous Exprs ------------------------------------------------// + DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) + DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) + + //--- Control flow -------------------------------------------------------// + DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) + DEF_ADD_DATA(IndirectGotoStmt, { + if (S->getConstantTarget()) + addData(S->getConstantTarget()->getName()); + }) + DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) + DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) + DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) + + //--- Objective-C --------------------------------------------------------// + DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) + DEF_ADD_DATA(ObjCPropertyRefExpr, { + addData(S->isSuperReceiver()); + addData(S->isImplicitProperty()); + }) + DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) + + //--- Miscellaneous Stmts ------------------------------------------------// + DEF_ADD_DATA(CXXFoldExpr, { + addData(S->isRightFold()); + addData(S->getOperator()); + }) + DEF_ADD_DATA(GenericSelectionExpr, { + for (unsigned i = 0; i < S->getNumAssocs(); ++i) { + addData(S->getAssocType(i)); + } + }) + DEF_ADD_DATA(LambdaExpr, { + for (const LambdaCapture &C : S->captures()) { + addData(C.isPackExpansion()); + addData(C.getCaptureKind()); + if (C.capturesVariable()) + addData(C.getCapturedVar()->getType()); + } + addData(S->isGenericLambda()); + addData(S->isMutable()); + }) + DEF_ADD_DATA(DeclStmt, { + auto numDecls = std::distance(S->decl_begin(), S->decl_end()); + addData(static_cast(numDecls)); + for (const Decl *D : S->decls()) { + if (const VarDecl *VD = dyn_cast(D)) { + addData(VD->getType()); + } + } + }) + DEF_ADD_DATA(AsmStmt, { + addData(S->isSimple()); + addData(S->isVolatile()); + addData(S->generateAsmString(Context)); + for (unsigned i = 0; i < S->getNumInputs(); ++i) { + addData(S->getInputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumOutputs(); ++i) { + addData(S->getOutputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumClobbers(); ++i) { + addData(S->getClobber(i)); + } + }) + DEF_ADD_DATA(AttributedStmt, { + for (const Attr *A : S->getAttrs()) { + addData(std::string(A->getSpelling())); + } + }) +}; +} // end anonymous namespace + +namespace { /// Generates CloneSignatures for a set of statements and stores the results in /// a CloneDetector object. class CloneSignatureGenerator { @@ -95,9 +258,8 @@ // Create an empty signature that will be filled in this method. CloneDetector::CloneSignature Signature; - // The only relevant data for now is the class of the statement. - // TODO: Collect statement class specific data. - Signature.Data.push_back(S->getStmtClass()); + // Collect all relevant data from S and put it into the empty signature. + StmtDataCollector(S, Context, Signature.Data); // Storage for the signatures of the direct child statements. This is only // needed if the current statement is a CompoundStmt. Index: cfe/trunk/test/Analysis/copypaste/asm.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/asm.cpp +++ cfe/trunk/test/Analysis/copypaste/asm.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int foo1(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm ("mov %1, %0\n\t" + "add $1, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} + +// Identical to foo1 except that it adds two instead of one, so it's no clone. +int foo2(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm ("mov %1, %0\n\t" + "add $2, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} + +// Identical to foo1 except that its a volatile asm statement, so it's no clone. +int foo3(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm volatile ("mov %1, %0\n\t" + "add $1, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} Index: cfe/trunk/test/Analysis/copypaste/attributes.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/attributes.cpp +++ cfe/trunk/test/Analysis/copypaste/attributes.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int foo1(int n) { + int result = 0; + switch (n) { + case 33: + result += 33; + [[clang::fallthrough]]; + case 44: + result += 44; + } + return result; +} + +// Identical to foo1 except the missing attribute. +int foo2(int n) { + int result = 0; + switch (n) { + case 33: + result += 33; + ; + case 44: + result += 44; + } + return result; +} Index: cfe/trunk/test/Analysis/copypaste/call.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/call.cpp +++ cfe/trunk/test/Analysis/copypaste/call.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool a(); +bool b(); + +// Calls method a with some extra code to pass the minimum complexity +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + return a(); + return true; +} + +// Calls method b with some extra code to pass the minimum complexity +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + return b(); + return true; +} Index: cfe/trunk/test/Analysis/copypaste/catch.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/catch.cpp +++ cfe/trunk/test/Analysis/copypaste/catch.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -fcxx-exceptions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (int i) {} + return true; +} + +// Uses parenthesis instead of type +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (...) {} + return true; +} + +// Catches a different type (long instead of int) +bool foo3(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (long i) {} + return true; +} Index: cfe/trunk/test/Analysis/copypaste/delete.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/delete.cpp +++ cfe/trunk/test/Analysis/copypaste/delete.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + delete a; + return true; +} + +// Explicit global delete +bool foo2(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + ::delete a; + return true; +} + +// Array delete +bool foo3(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + delete[] a; + return true; +} Index: cfe/trunk/test/Analysis/copypaste/dependent-exist.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/dependent-exist.cpp +++ cfe/trunk/test/Analysis/copypaste/dependent-exist.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -analyze -fms-extensions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x) { + if (x < 0) { + __if_exists(x) { return false; } + } + return true; +} + +// Same as above, but __if_not_exists +bool foo2(int x) { + if (x < 0) { + __if_not_exists(x) { return false; } + } + return true; +} Index: cfe/trunk/test/Analysis/copypaste/expr-types.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/expr-types.cpp +++ cfe/trunk/test/Analysis/copypaste/expr-types.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + + +int foo1(int a, int b) { + if (a > b) + return a; + return b; +} + +// Different types, so not a clone +int foo2(long a, long b) { + if (a > b) + return a; + return b; +} Index: cfe/trunk/test/Analysis/copypaste/false-positives.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/false-positives.cpp +++ cfe/trunk/test/Analysis/copypaste/false-positives.cpp @@ -12,14 +12,6 @@ return b; } -// FIXME: Detect different binary operator kinds. -int min1(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (a < b) - return a; - return b; -} - // FIXME: Detect different variable patterns. int min2(int a, int b) { // expected-note{{Related code clone is here.}} log(); Index: cfe/trunk/test/Analysis/copypaste/fold.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/fold.cpp +++ cfe/trunk/test/Analysis/copypaste/fold.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int global = 0; + +template +int foo1(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return (args + ...); + return 1; +} + +// Different opeator in fold expression. +template +int foo2(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return (args - ...); + return 1; +} + +// Parameter pack on a different side +template +int foo3(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return -1; + return (... + args); +return 1; +} Index: cfe/trunk/test/Analysis/copypaste/function-try-block.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/function-try-block.cpp +++ cfe/trunk/test/Analysis/copypaste/function-try-block.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -analyze -fcxx-exceptions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// Tests if function try blocks are correctly handled. + +void nonCompoundStmt1(int& x) + try { x += 1; } catch(...) { x -= 1; } // expected-warning{{Detected code clone.}} + +void nonCompoundStmt2(int& x) + try { x += 1; } catch(...) { x -= 1; } // expected-note{{Related code clone is here.}} Index: cfe/trunk/test/Analysis/copypaste/functions.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/functions.cpp +++ cfe/trunk/test/Analysis/copypaste/functions.cpp @@ -20,6 +20,31 @@ // Functions below are not clones and should not be reported. +// The next two functions test that statement classes are still respected when +// checking for clones in expressions. This will show that the statement +// specific data of all base classes is collected, and not just the data of the +// first base class. +int testBaseClass(int a, int b) { // no-warning + log(); + if (a > b) + return true ? a : b; + return b; +} +int testBaseClass2(int a, int b) { // no-warning + log(); + if (a > b) + return __builtin_choose_expr(true, a, b); + return b; +} + + +int min1(int a, int b) { // no-warning + log(); + if (a < b) + return a; + return b; +} + int foo(int a, int b) { // no-warning return a + b; } Index: cfe/trunk/test/Analysis/copypaste/generic.c =================================================================== --- cfe/trunk/test/Analysis/copypaste/generic.c +++ cfe/trunk/test/Analysis/copypaste/generic.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -analyze -std=c11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int global; + +int foo1() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, float: 2, default: 3); + return 1; +} + +// Different associated type (int instead of float) +int foo2() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, int: 2, default: 4); + return 1; +} + +// Different number of associated types. +int foo3() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, default: 4); + return 1; +} Index: cfe/trunk/test/Analysis/copypaste/labels.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/labels.cpp +++ cfe/trunk/test/Analysis/copypaste/labels.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -analyze -std=gnu++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + + +bool foo1(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&start; + goto start; + } + end: + return false; +} + +// Targeting a different label with the address-of-label operator. +bool foo2(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&end; + goto start; + } + end: + return false; +} + +// Different target label in goto +bool foo3(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&start; + goto end; + } + end: + return false; +} + +// FIXME: Can't detect same algorithm as in foo1 but with different label names. +bool foo4(int x) { + foo: + if (x != 3) { + ++x; + void *ptr = &&foo; + goto foo; + } + end: + return false; +} Index: cfe/trunk/test/Analysis/copypaste/lambda.cpp =================================================================== --- cfe/trunk/test/Analysis/copypaste/lambda.cpp +++ cfe/trunk/test/Analysis/copypaste/lambda.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +void foo1(int a, long b) { + auto l = [a, b](){}; +} + +void foo2(int a, long b) { + auto l = [&a, b](){}; +} + +void foo3(int a, long b) { + auto l = [a](){}; +} + +void foo4(int a, long b) { + auto l = [=](){}; +} + +void foo5(int a, long b) { + auto l = [&](){}; +} +