Index: lib/Analysis/CloneDetection.cpp =================================================================== --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -78,6 +78,157 @@ SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } namespace { +/// \brief Collects the data of a single Stmt. +/// +/// The data of a statement that isn't collected by this class is considered to +/// be unimportant when comparing two statements. This means that this class +/// defines what a 'similar' clone is. For example, this class doesn't collect +/// names of variables, which makes statements that only differ in the names of +/// the referenced variables clones of each other. +class StmtDataCollector { + ASTContext &Context; + llvm::FoldingSetNodeID &D; + +public: + + /// \brief Collects data from the given Stmt and saves it into the given + /// FoldingSetNodeID. + StmtDataCollector(Stmt const *S, ASTContext &Context, + llvm::FoldingSetNodeID &D) : Context(Context), D(D) { + collectData(S); + } + +private: + // Below are utility methods for adding generic data to the FoldingSetNodeID. + + void addData(unsigned Data) { + D.AddInteger(Data); + } + + void addData(llvm::StringRef const &String) { + D.AddString(String); + } + + void addData(std::string const &String) { + D.AddString(String); + } + + void addData(QualType QT) { + addData(QT.getAsString()); + } + + // Utility macro for collecting and adding statement specific data. + #define DEF_ADD_DATA(CLASS, CODE) \ + if (auto S = dyn_cast(GivenStmt)) { \ + CODE; \ + } + + /// \brief Collects all node-specific data from the given Stmt. + void collectData(Stmt const *GivenStmt) { + addData(GivenStmt->getStmtClass()); + + DEF_ADD_DATA(Expr, { addData(S->getType()); }) + + //--- Builtin functionality ----------------------------------------------// + DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(AtomicExpr, { + addData(S->isVolatile()); + addData(S->getOp()); + }) + DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) + DEF_ADD_DATA(TypeTraitExpr, { addData(S->getTrait()); }) + + //--- Calls --------------------------------------------------------------// + DEF_ADD_DATA(CXXOperatorCallExpr, { addData(S->getOperator()); }) + DEF_ADD_DATA(CallExpr, { + addData(S->getDirectCallee()->getQualifiedNameAsString()); + }) + + //--- Exceptions ---------------------------------------------------------// + DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) + + //--- C++ OOP Stmts ------------------------------------------------------// + DEF_ADD_DATA(CXXDeleteExpr, { + addData(S->isArrayFormAsWritten()); + addData(S->isGlobalDelete()); + }) + + //--- Casts --------------------------------------------------------------// + DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) + + //--- Miscellaneous Exprs ------------------------------------------------// + DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) + DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) + + //--- Control flow -------------------------------------------------------// + DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) + DEF_ADD_DATA(IndirectGotoStmt, { + if (S->getConstantTarget()) + addData(S->getConstantTarget()->getName()); + }) + DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) + DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) + DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) + + //--- Objective-C --------------------------------------------------------// + DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) + DEF_ADD_DATA(ObjCPropertyRefExpr, { + addData(S->isSuperReceiver()); + addData(S->isImplicitProperty()); + }) + DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) + + //--- Miscellaneous Stmts ------------------------------------------------// + DEF_ADD_DATA(CXXFoldExpr, { + addData(S->isRightFold()); + addData(S->getOperator()); + }) + DEF_ADD_DATA(GenericSelectionExpr, { addData(S->getNumAssocs()); }) + DEF_ADD_DATA(LambdaExpr, { + for (const LambdaCapture &C : S->captures()) { + addData(C.isPackExpansion()); + addData(C.getCaptureKind()); + if (C.capturesVariable()) + addData(C.getCapturedVar()->getType()); + } + addData(S->isGenericLambda()); + addData(S->isMutable()); + addData(S->getCallOperator()->param_size()); + }) + DEF_ADD_DATA(DeclStmt, { + auto numDecls = std::distance(S->decl_begin(), S->decl_end()); + addData(static_cast(numDecls)); + for (Decl *D : S->decls()) { + if (VarDecl* VD = dyn_cast(D)) { + addData(VD->getType()); + } + } + }) + DEF_ADD_DATA(AsmStmt, { + addData(S->isSimple()); + addData(S->isVolatile()); + addData(S->generateAsmString(Context)); + for (unsigned i = 0; i < S->getNumInputs(); ++i) { + addData(S->getInputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumOutputs(); ++i) { + addData(S->getOutputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumClobbers(); ++i) { + addData(S->getClobber(i)); + } + }) + DEF_ADD_DATA(AttributedStmt, { + for (Attr const * A : S->getAttrs()) { + addData(std::string(A->getSpelling())); + } + }) + } +}; +} // end anonymous namespace + +namespace { /// \brief A cache for temporarily storing a small amount of CloneSignatures. /// /// This cache should be manually cleaned from CloneSignatures that are @@ -110,6 +261,8 @@ /// \param Context The ASTContext that contains Parent. void removeChildren(Stmt const *Parent, ASTContext &Context) { for (Stmt const *Child : Parent->children()) { + if (!Child) + continue; StmtSequence ChildSequence(Child, Context); remove(ChildSequence); } @@ -126,7 +279,13 @@ return *I; } } - llvm_unreachable("Couldn't find CloneSignature for StmtSequence"); + // FIXME: We return an empty signature on a cache miss. This isn't a perfect + // solution, but it's better than crashing when RecursiveASTVisitor is + // skipping some generated statements when generating signatures (which + // leads to this situation where signatures are missing in the cache). + // Unless RecursiveASTVisitor starts skipping important nodes, this won't + // influence the false-positive rate. + return CloneDetector::CloneSignature(); } }; } // end anonymous namespace @@ -249,10 +408,7 @@ llvm::FoldingSetNodeID Hash; unsigned Complexity = 1; - // The only relevant data for now is the class of the statement, so we - // calculate the hash class into the current hash value. - // TODO: Handle statement class specific data. - Hash.AddInteger(S->getStmtClass()); + StmtDataCollector(S, Context, Hash); // Incorporate the hash values of all child Stmts into the current hash value. HandleChildHashes(Hash, Complexity, S->children()); Index: test/Analysis/copypaste/test-min-max.cpp =================================================================== --- test/Analysis/copypaste/test-min-max.cpp +++ test/Analysis/copypaste/test-min-max.cpp @@ -18,14 +18,6 @@ // False positives below. These clones should not be reported. -// FIXME: Detect different binary operator kinds. -int min1(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (a < b) - return a; - return b; -} - // FIXME: Detect different variable patterns. int min2(int a, int b) { // expected-note{{Related code clone is here.}} log(); @@ -36,6 +28,13 @@ // Functions below are not clones and should not be reported. +int min1(int a, int b) { // no-warning + log(); + if (a < b) + return a; + return b; +} + int foo(int a, int b) { // no-warning return a + b; }