Index: lib/Analysis/CloneDetection.cpp =================================================================== --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -16,6 +16,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" using namespace clang; @@ -78,6 +79,157 @@ SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } namespace { +/// \brief Collects the data of a single Stmt. +/// +/// The data of a statement that isn't collected by this class is considered to +/// be unimportant when comparing two statements. This means that this class +/// defines what a 'similar' clone is. For example, this class doesn't collect +/// names of variables, which makes statements that only differ in the names of +/// the referenced variables clones of each other. +class StmtDataCollector : public ConstStmtVisitor { + + ASTContext &Context; + llvm::FoldingSetNodeID &D; + +public: + + /// \brief Collects data from the given Stmt and saves it into the given + /// FoldingSetNodeID. + StmtDataCollector(Stmt const *S, ASTContext &Context, + llvm::FoldingSetNodeID &D) : Context(Context), D(D) { + Visit(S); + } + + // Below are utility methods for adding generic data to the FoldingSetNodeID. + + void addData(unsigned Data) { + D.AddInteger(Data); + } + + void addData(llvm::StringRef const &String) { + D.AddString(String); + } + + void addData(std::string const &String) { + D.AddString(String); + } + + void addData(QualType QT) { + addData(QT.getAsString()); + } + + // The functions below collect the class specific data of each Stmt subclass. + + // Utility macro for collecting statement specific data. + #define DEF_ADD_DATA(CLASS, CODE) \ + void Visit##CLASS(CLASS const *S) { \ + CODE; \ + } + + DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); }) + DEF_ADD_DATA(Expr, { addData(S->getType()); }) + + //--- Builtin functionality ----------------------------------------------// + DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) + DEF_ADD_DATA(TypeTraitExpr, { + addData(S->getTrait()); + for (unsigned i = 0; i < S->getNumArgs(); ++i) + addData(S->getArg(i)->getType()); + }) + + //--- Calls --------------------------------------------------------------// + DEF_ADD_DATA(CallExpr, { + addData(S->getDirectCallee()->getQualifiedNameAsString()); + }) + + //--- Exceptions ---------------------------------------------------------// + DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) + + //--- C++ OOP Stmts ------------------------------------------------------// + DEF_ADD_DATA(CXXDeleteExpr, { + addData(S->isArrayFormAsWritten()); + addData(S->isGlobalDelete()); + }) + + //--- Casts --------------------------------------------------------------// + DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) + + //--- Miscellaneous Exprs ------------------------------------------------// + DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) + DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) + + //--- Control flow -------------------------------------------------------// + DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) + DEF_ADD_DATA(IndirectGotoStmt, { + if (S->getConstantTarget()) + addData(S->getConstantTarget()->getName()); + }) + DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) + DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) + DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) + + //--- Objective-C --------------------------------------------------------// + DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) + DEF_ADD_DATA(ObjCPropertyRefExpr, { + addData(S->isSuperReceiver()); + addData(S->isImplicitProperty()); + }) + DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) + + //--- Miscellaneous Stmts ------------------------------------------------// + DEF_ADD_DATA(CXXFoldExpr, { + addData(S->isRightFold()); + addData(S->getOperator()); + }) + DEF_ADD_DATA(GenericSelectionExpr, { + for (unsigned i = 0; i < S->getNumAssocs(); ++i) { + addData(S->getAssocType(i)); + } + }) + DEF_ADD_DATA(LambdaExpr, { + for (const LambdaCapture &C : S->captures()) { + addData(C.isPackExpansion()); + addData(C.getCaptureKind()); + if (C.capturesVariable()) + addData(C.getCapturedVar()->getType()); + } + addData(S->isGenericLambda()); + addData(S->isMutable()); + }) + DEF_ADD_DATA(DeclStmt, { + auto numDecls = std::distance(S->decl_begin(), S->decl_end()); + addData(static_cast(numDecls)); + for (Decl *D : S->decls()) { + if (VarDecl* VD = dyn_cast(D)) { + addData(VD->getType()); + } + } + }) + DEF_ADD_DATA(AsmStmt, { + addData(S->isSimple()); + addData(S->isVolatile()); + addData(S->generateAsmString(Context)); + for (unsigned i = 0; i < S->getNumInputs(); ++i) { + addData(S->getInputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumOutputs(); ++i) { + addData(S->getOutputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumClobbers(); ++i) { + addData(S->getClobber(i)); + } + }) + DEF_ADD_DATA(AttributedStmt, { + for (Attr const * A : S->getAttrs()) { + addData(std::string(A->getSpelling())); + } + }) +}; +} // end anonymous namespace + +namespace { /// \brief A cache for temporarily storing a small amount of CloneSignatures. /// /// This cache should be manually cleaned from CloneSignatures that are @@ -110,6 +262,8 @@ /// \param Context The ASTContext that contains Parent. void removeChildren(Stmt const *Parent, ASTContext &Context) { for (Stmt const *Child : Parent->children()) { + if (!Child) + continue; StmtSequence ChildSequence(Child, Context); remove(ChildSequence); } @@ -126,7 +280,13 @@ return *I; } } - llvm_unreachable("Couldn't find CloneSignature for StmtSequence"); + // FIXME: We return an empty signature on a cache miss. This isn't a perfect + // solution, but it's better than crashing when RecursiveASTVisitor is + // skipping some generated statements when generating signatures (which + // leads to this situation where signatures are missing in the cache). + // Unless RecursiveASTVisitor starts skipping important nodes, this won't + // influence the false-positive rate. + return CloneDetector::CloneSignature(); } }; } // end anonymous namespace @@ -249,10 +409,8 @@ llvm::FoldingSetNodeID Hash; unsigned Complexity = 1; - // The only relevant data for now is the class of the statement, so we - // calculate the hash class into the current hash value. - // TODO: Handle statement class specific data. - Hash.AddInteger(S->getStmtClass()); + // Collect all relevant data from the current statement. + StmtDataCollector(S, Context, Hash); // Incorporate the hash values of all child Stmts into the current hash value. HandleChildHashes(Hash, Complexity, S->children()); Index: test/Analysis/copypaste/test-asm.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-asm.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int foo1(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm ("mov %1, %0\n\t" + "add $1, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} + +// Identical to foo1 except that it adds two instead of one, so it's no clone. +int foo2(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm ("mov %1, %0\n\t" + "add $2, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} + +// Identical to foo1 except that its a volatile asm statement, so it's no clone. +int foo3(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm volatile ("mov %1, %0\n\t" + "add $1, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} Index: test/Analysis/copypaste/test-attributes.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-attributes.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int foo1(int n) { + int result = 0; + switch (n) { + case 33: + result += 33; + [[clang::fallthrough]]; + case 44: + result += 44; + } + return result; +} + +// Identical to foo1 except the missing attribute. +int foo2(int n) { + int result = 0; + switch (n) { + case 33: + result += 33; + ; + case 44: + result += 44; + } + return result; +} Index: test/Analysis/copypaste/test-call.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-call.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool a(); +bool b(); + +// Calls method a with some extra code to pass the minimum complexity +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + return a(); + return true; +} + +// Calls method b with some extra code to pass the minimum complexity +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + return b(); + return true; +} Index: test/Analysis/copypaste/test-catch.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-catch.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -fcxx-exceptions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (int i) {} + return true; +} + +// Uses parenthesis instead of type +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (...) {} + return true; +} + +// Catches a different type (long instead of int) +bool foo3(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (long i) {} + return true; +} Index: test/Analysis/copypaste/test-delete.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-delete.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + delete a; + return true; +} + +// Explicit global delete +bool foo2(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + ::delete a; + return true; +} + +// Array delete +bool foo3(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + delete[] a; + return true; +} Index: test/Analysis/copypaste/test-dependent-exist.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-dependent-exist.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -analyze -fms-extensions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + + +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) { + __if_exists(x) { return false; } + } + return true; +} + +// Same as above, but __if_not_exists +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) { + __if_not_exists(x) { return false; } + } + return true; +} Index: test/Analysis/copypaste/test-expr-types.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-expr-types.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + + +int foo1(int a, int b) { + if (a > b) + return a; + return b; +} + +// Different types, so not a clone +int foo2(long a, long b) { + if (a > b) + return a; + return b; +} Index: test/Analysis/copypaste/test-fold.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-fold.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int global = 0; + +template +int foo1(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return (args + ...); + return 1; +} + +// Different opeator in fold expression. +template +int foo2(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return (args - ...); + return 1; +} + +// Parameter pack on a different side +template +int foo3(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return -1; + return (... + args); +return 1; +} Index: test/Analysis/copypaste/test-generic.c =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-generic.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -analyze -std=c11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int global; + +int foo1() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, float: 2, default: 3); + return 1; +} + +// Different associated type (int instead of float) +int foo2() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, int: 2, default: 4); + return 1; +} + +// Different number of associated types. +int foo3() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, default: 4); + return 1; +} Index: test/Analysis/copypaste/test-labels.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-labels.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -analyze -std=gnu++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + + +bool foo1(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&start; + goto start; + } + end: + return false; +} + +// Targeting a different label with the address-of-label operator. +bool foo2(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&end; + goto start; + } + end: + return false; +} + +// Different target label in goto +bool foo3(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&start; + goto end; + } + end: + return false; +} + +// FIXME: Can't detect same algorithm as in foo1 but with different label names. +bool foo4(int x) { + foo: + if (x != 3) { + ++x; + void *ptr = &&foo; + goto foo; + } + end: + return false; +} Index: test/Analysis/copypaste/test-lambda.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-lambda.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +void foo1(int a, long b) { + auto l = [a, b](){}; +} + +void foo2(int a, long b) { + auto l = [&a, b](){}; +} + +void foo3(int a, long b) { + auto l = [a](){}; +} + +void foo4(int a, long b) { + auto l = [=](){}; +} + +void foo5(int a, long b) { + auto l = [&](){}; +} + Index: test/Analysis/copypaste/test-min-max.cpp =================================================================== --- test/Analysis/copypaste/test-min-max.cpp +++ test/Analysis/copypaste/test-min-max.cpp @@ -18,14 +18,6 @@ // False positives below. These clones should not be reported. -// FIXME: Detect different binary operator kinds. -int min1(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (a < b) - return a; - return b; -} - // FIXME: Detect different variable patterns. int min2(int a, int b) { // expected-note{{Related code clone is here.}} log(); @@ -36,6 +28,13 @@ // Functions below are not clones and should not be reported. +int min1(int a, int b) { // no-warning + log(); + if (a < b) + return a; + return b; +} + int foo(int a, int b) { // no-warning return a + b; } Index: test/Analysis/copypaste/test-operators.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-operators.cpp @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + return +x; + return true; +} + +// Identical to foo1 except the different unary operator kind. +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + return -x; + return true; +} + +bool foo3(int x) { + if (x > 0) + return false; + else if (x < 0) + return x - x; + return true; +} + +// Identical to foo3 except the different binary operator kind. +bool foo4(int x) { + if (x > 0) + return false; + else if (x < 0) + return x + x; + return true; +} Index: test/Analysis/copypaste/test-predefined.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-predefined.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + return sizeof(__PRETTY_FUNCTION__); + return true; +} + +// Identical to foo1 except the different predefined __func__ instead of __PRETTY_FUNCTION__ +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + return sizeof(__func__); + return true; +} Index: test/Analysis/copypaste/test-trait.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/test-trait.cpp @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + return __is_trivially_constructible(long, int*, int*); + return true; +} + +// Identical to foo1 except that long was replaced by int in the TypeTraitExpr. +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + return __is_trivially_constructible(int, int*, int*); + return true; +} + +// Identical to foo1 except the different number of types in the TypeTraitExpr. +bool foo3(int x) { + if (x > 0) + return false; + else if (x < 0) + return __is_trivially_constructible(long, int*); + return true; +} + +bool foo4(int x) { + if (x > 0) + return false; + else if (x < 0) + return __is_enum(long); + return true; +} + +// Identical to foo4 except the different TypeTraitExpr kind. +bool foo5(int x) { + if (x > 0) + return false; + else if (x < 0) + return __is_pod(long); + return true; +} + +bool foo6(int x) { + if (x > 0) + return false; + else if (x < 0) + return __is_lvalue_expr(x); + return true; +} + +// Identical to foo6 except the different ExpressionTraitExpr kind. +bool foo7(int x) { + if (x > 0) + return false; + else if (x < 0) + return __is_rvalue_expr(x); + return true; +}