Index: lib/Analysis/CloneDetection.cpp =================================================================== --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -78,6 +78,199 @@ SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } namespace { +/// \brief Defines empty addData methods for each Stmt class. +/// +/// StmtDataCollector inherits from this class so that each Stmt has an empty +/// fallback addData method. This prevents compilation failures from missing +/// methods if a new Stmt subclass is added to clang. Also, many subclasses +/// don't have any special data attached to them, so we don't need to manually +/// define empty addData methods in StmtDataCollector if they are already +/// defined here. +class StmtDataCollectorDefiner { +protected: + #define STMT(CLASS, PARENT) \ + void addData##CLASS(CLASS const *S) { \ + } + #include "clang/AST/StmtNodes.inc" +}; +} // end anonymous namespace + +namespace { +/// \brief Collects the data of a single Stmt. +/// +/// The data of a statement that isn't collected by this class is considered to +/// be unimportant when comparing two statements. This means that this class +/// defines what a 'similar' clone is. For example, this class doesn't collect +/// names of variables for example, which makes statements that only differ in +/// the names of the referenced variables clones of each other. +class StmtDataCollector : StmtDataCollectorDefiner { + ASTContext &Context; + llvm::FoldingSetNodeID &D; + +public: + + /// \brief Collects data from the given Stmt and saves it into the given + /// FoldingSetNodeID. + StmtDataCollector(Stmt const *S, ASTContext &Context, + llvm::FoldingSetNodeID &D) : Context(Context), D(D) { + collectData(S); + } + +protected: + // Below are utility methods for adding generic data to the FoldingSetNodeID. + + void addData(unsigned Data) { + D.AddInteger(Data); + } + + void addData(llvm::StringRef const &String) { + D.AddString(String); + } + + void addData(std::string const &String) { + D.AddString(String); + } + + void addData(QualType QT) { + addData(QT.getAsString()); + } + + // Utility functions for calling the addData method for each parent class + // of a given Stmt class. + #define STMT(CLASS, PARENT) \ + void callAddData##CLASS(CLASS const *S) { \ + callAddData##PARENT(S); \ + addData##CLASS(S); \ + } + #include "clang/AST/StmtNodes.inc" + + // Above code won't define callAddDataStmt, so we have to manually define it. + void callAddDataStmt(Stmt const *S) { + addDataStmt(S); + } + + /// \brief Calls the appropriate callAddData method for the actual class of + /// the given Stmt. + void collectData(Stmt const *S) { + switch(S->getStmtClass()) { + case Stmt::NoStmtClass: + break; + #define ABSTRACT_STMT(STMT) + #define STMT(CLASS, PARENT) \ + case Stmt::CLASS##Class: \ + callAddData##CLASS(cast(S)); break; + #include "clang/AST/StmtNodes.inc" + } + } + + // Below are functions that collect the specific data for each Stmt subclass. + #define DEF_ADD_DATA(CLASS, CODE) \ + void addData##CLASS(CLASS const *S) { \ + CODE; \ + } + + DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); }) + DEF_ADD_DATA(Expr, { addData(S->getType()); }) + + //--- Builtin functionality ------------------------------------------------// + DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(AtomicExpr, { + addData(S->isVolatile()); + addData(S->getOp()); + }) + DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) + DEF_ADD_DATA(TypeTraitExpr, { addData(S->getTrait()); }) + + //--- Calls ----------------------------------------------------------------// + DEF_ADD_DATA(CXXOperatorCallExpr, { addData(S->getOperator()); }) + DEF_ADD_DATA(CallExpr, { + addData(S->getDirectCallee()->getQualifiedNameAsString()); + }) + + //--- Exceptions -----------------------------------------------------------// + DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) + + //--- C++ OOP Stmts --------------------------------------------------------// + DEF_ADD_DATA(CXXDeleteExpr, { + addData(S->isArrayFormAsWritten()); + addData(S->isGlobalDelete()); + }) + + //--- Casts ----------------------------------------------------------------// + DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) + + //--- Miscellaneous Exprs --------------------------------------------------// + DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) + DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) + + //--- Control flow ---------------------------------------------------------// + DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) + DEF_ADD_DATA(IndirectGotoStmt, { + if (S->getConstantTarget()) + addData(S->getConstantTarget()->getName()); + }) + DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) + DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) + DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) + + //--- Objective-C ----------------------------------------------------------// + DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) + DEF_ADD_DATA(ObjCPropertyRefExpr, { + addData(S->isSuperReceiver()); + addData(S->isImplicitProperty()); + }) + DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) + + //--- Miscellaneous Stmts --------------------------------------------------// + DEF_ADD_DATA(CXXFoldExpr, { + addData(S->isRightFold()); + addData(S->getOperator()); + }) + DEF_ADD_DATA(GenericSelectionExpr, { addData(S->getNumAssocs()); }) + DEF_ADD_DATA(LambdaExpr, { + for (const LambdaCapture &C : S->captures()) { + addData(C.isPackExpansion()); + addData(C.getCaptureKind()); + if (C.capturesVariable()) + addData(C.getCapturedVar()->getType()); + } + addData(S->isGenericLambda()); + addData(S->isMutable()); + addData(S->getCallOperator()->param_size()); + }) + DEF_ADD_DATA(DeclStmt, { + auto numDecls = std::distance(S->decl_begin(), S->decl_end()); + addData(static_cast(numDecls)); + for (Decl *D : S->decls()) { + if (VarDecl* VD = dyn_cast(D)) { + addData(VD->getType()); + } + } + }) + DEF_ADD_DATA(AsmStmt, { + addData(S->isSimple()); + addData(S->isVolatile()); + addData(S->generateAsmString(Context)); + for (unsigned i = 0; i < S->getNumInputs(); ++i) { + addData(S->getInputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumOutputs(); ++i) { + addData(S->getOutputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumClobbers(); ++i) { + addData(S->getClobber(i)); + } + }) + DEF_ADD_DATA(AttributedStmt, { + for (Attr const * A : S->getAttrs()) { + addData(std::string(A->getSpelling())); + } + }) +}; +} // end anonymous namespace + +namespace { /// \brief A cache for temporarily storing a small amount of CloneSignatures. /// /// This cache should be manually cleaned from CloneSignatures that are @@ -110,6 +303,8 @@ /// \param Context The ASTContext that contains Parent. void removeChildren(Stmt const *Parent, ASTContext &Context) { for (Stmt const *Child : Parent->children()) { + if (!Child) + continue; StmtSequence ChildSequence(Child, Context); remove(ChildSequence); } @@ -126,7 +321,13 @@ return *I; } } - llvm_unreachable("Couldn't find CloneSignature for StmtSequence"); + // We return an empty signature on a cache miss. This isn't a perfect + // solution, but it's better than crashing when RecursiveASTVisitor is + // skipping some generated statements when generating signatures (which + // leads to this situation where signatures are missing in the cache). + // Unless RecursiveASTVisitor starts skipping important nodes, this won't + // influence the false-positive rate. + return CloneDetector::CloneSignature(); } }; } // end anonymous namespace @@ -249,10 +450,7 @@ llvm::FoldingSetNodeID Hash; unsigned Complexity = 1; - // The only relevant data for now is the class of the statement, so we - // calculate the hash class into the current hash value. - // TODO: Handle statement class specific data. - Hash.AddInteger(S->getStmtClass()); + StmtDataCollector(S, Context, Hash); // Incorporate the hash values of all child Stmts into the current hash value. HandleChildHashes(Hash, Complexity, S->children()); Index: test/Analysis/copypaste/test-min-max.cpp =================================================================== --- test/Analysis/copypaste/test-min-max.cpp +++ test/Analysis/copypaste/test-min-max.cpp @@ -18,14 +18,6 @@ // False positives below. These clones should not be reported. -// FIXME: Detect different binary operator kinds. -int min1(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (a < b) - return a; - return b; -} - // FIXME: Detect different variable patterns. int min2(int a, int b) { // expected-note{{Related code clone is here.}} log(); @@ -36,6 +28,13 @@ // Functions below are not clones and should not be reported. +int min1(int a, int b) { // no-warning + log(); + if (a < b) + return a; + return b; +} + int foo(int a, int b) { // no-warning return a + b; }