Index: cfe/trunk/include/clang/AST/DataCollection.h =================================================================== --- cfe/trunk/include/clang/AST/DataCollection.h +++ cfe/trunk/include/clang/AST/DataCollection.h @@ -0,0 +1,65 @@ +//===--- DatatCollection.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// \brief This file declares helper methods for collecting data from AST nodes. +/// +/// To collect data from Stmt nodes, subclass ConstStmtVisitor and include +/// StmtDataCollectors.inc after defining the macros that you need. This +/// provides data collection implementations for most Stmt kinds. Note +/// that that code requires some conditions to be met: +/// +/// - There must be a method addData(const T &Data) that accepts strings, +/// integral types as well as QualType. All data is forwarded using +/// to this method. +/// - The ASTContext of the Stmt must be accessible by the name Context. +/// +/// It is also possible to override individual visit methods. Have a look at +/// the DataCollector in lib/Analysis/CloneDetection.cpp for a usage example. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_DATACOLLECTION_H +#define LLVM_CLANG_AST_DATACOLLECTION_H + +#include "clang/AST/ASTContext.h" + +namespace clang { +namespace data_collection { + +/// Returns a string that represents all macro expansions that expanded into the +/// given SourceLocation. +/// +/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations +/// A and B are expanded from the same macros in the same order. +std::string getMacroStack(SourceLocation Loc, ASTContext &Context); + +/// Utility functions for implementing addData() for a consumer that has a +/// method update(StringRef) +template +void addDataToConsumer(T &DataConsumer, llvm::StringRef Str) { + DataConsumer.update(Str); +} + +template void addDataToConsumer(T &DataConsumer, const QualType &QT) { + addDataToConsumer(DataConsumer, QT.getAsString()); +} + +template +typename std::enable_if< + std::is_integral::value || std::is_enum::value || + std::is_convertible::value // for llvm::hash_code + >::type +addDataToConsumer(T &DataConsumer, Type Data) { + DataConsumer.update(StringRef(reinterpret_cast(&Data), sizeof(Data))); +} + +} // end namespace data_collection +} // end namespace clang + +#endif // LLVM_CLANG_AST_DATACOLLECTION_H Index: cfe/trunk/include/clang/Analysis/CloneDetection.h =================================================================== --- cfe/trunk/include/clang/Analysis/CloneDetection.h +++ cfe/trunk/include/clang/Analysis/CloneDetection.h @@ -15,11 +15,7 @@ #ifndef LLVM_CLANG_AST_CLONEDETECTION_H #define LLVM_CLANG_AST_CLONEDETECTION_H -#include "clang/AST/DeclTemplate.h" #include "clang/AST/StmtVisitor.h" -#include "clang/Basic/SourceLocation.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Support/Regex.h" #include @@ -31,192 +27,6 @@ class ASTContext; class CompoundStmt; -namespace clone_detection { - -/// Returns a string that represents all macro expansions that expanded into the -/// given SourceLocation. -/// -/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations -/// A and B are expanded from the same macros in the same order. -std::string getMacroStack(SourceLocation Loc, ASTContext &Context); - -/// Collects the data of a single Stmt. -/// -/// This class defines what a code clone is: If it collects for two statements -/// the same data, then those two statements are considered to be clones of each -/// other. -/// -/// All collected data is forwarded to the given data consumer of the type T. -/// The data consumer class needs to provide a member method with the signature: -/// update(StringRef Str) -template -class StmtDataCollector : public ConstStmtVisitor> { - - ASTContext &Context; - /// The data sink to which all data is forwarded. - T &DataConsumer; - -public: - /// Collects data of the given Stmt. - /// \param S The given statement. - /// \param Context The ASTContext of S. - /// \param DataConsumer The data sink to which all data is forwarded. - StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer) - : Context(Context), DataConsumer(DataConsumer) { - this->Visit(S); - } - - typedef unsigned DataPiece; - - // Below are utility methods for appending different data to the vector. - - void addData(DataPiece Integer) { - DataConsumer.update( - StringRef(reinterpret_cast(&Integer), sizeof(Integer))); - } - - void addData(llvm::StringRef Str) { DataConsumer.update(Str); } - - void addData(const QualType &QT) { addData(QT.getAsString()); } - -// The functions below collect the class specific data of each Stmt subclass. - -// Utility macro for defining a visit method for a given class. This method -// calls back to the ConstStmtVisitor to visit all parent classes. -#define DEF_ADD_DATA(CLASS, CODE) \ - void Visit##CLASS(const CLASS *S) { \ - CODE; \ - ConstStmtVisitor::Visit##CLASS(S); \ - } - - DEF_ADD_DATA(Stmt, { - addData(S->getStmtClass()); - // This ensures that macro generated code isn't identical to macro-generated - // code. - addData(getMacroStack(S->getLocStart(), Context)); - addData(getMacroStack(S->getLocEnd(), Context)); - }) - DEF_ADD_DATA(Expr, { addData(S->getType()); }) - - //--- Builtin functionality ----------------------------------------------// - DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) - DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) - DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) - DEF_ADD_DATA(TypeTraitExpr, { - addData(S->getTrait()); - for (unsigned i = 0; i < S->getNumArgs(); ++i) - addData(S->getArg(i)->getType()); - }) - - //--- Calls --------------------------------------------------------------// - DEF_ADD_DATA(CallExpr, { - // Function pointers don't have a callee and we just skip hashing it. - if (const FunctionDecl *D = S->getDirectCallee()) { - // If the function is a template specialization, we also need to handle - // the template arguments as they are not included in the qualified name. - if (auto Args = D->getTemplateSpecializationArgs()) { - std::string ArgString; - - // Print all template arguments into ArgString - llvm::raw_string_ostream OS(ArgString); - for (unsigned i = 0; i < Args->size(); ++i) { - Args->get(i).print(Context.getLangOpts(), OS); - // Add a padding character so that 'foo()' != 'foo()'. - OS << '\n'; - } - OS.flush(); - - addData(ArgString); - } - addData(D->getQualifiedNameAsString()); - } - }) - - //--- Exceptions ---------------------------------------------------------// - DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) - - //--- C++ OOP Stmts ------------------------------------------------------// - DEF_ADD_DATA(CXXDeleteExpr, { - addData(S->isArrayFormAsWritten()); - addData(S->isGlobalDelete()); - }) - - //--- Casts --------------------------------------------------------------// - DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) - - //--- Miscellaneous Exprs ------------------------------------------------// - DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) - DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) - - //--- Control flow -------------------------------------------------------// - DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) - DEF_ADD_DATA(IndirectGotoStmt, { - if (S->getConstantTarget()) - addData(S->getConstantTarget()->getName()); - }) - DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) - DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) - DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) - - //--- Objective-C --------------------------------------------------------// - DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) - DEF_ADD_DATA(ObjCPropertyRefExpr, { - addData(S->isSuperReceiver()); - addData(S->isImplicitProperty()); - }) - DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) - - //--- Miscellaneous Stmts ------------------------------------------------// - DEF_ADD_DATA(CXXFoldExpr, { - addData(S->isRightFold()); - addData(S->getOperator()); - }) - DEF_ADD_DATA(GenericSelectionExpr, { - for (unsigned i = 0; i < S->getNumAssocs(); ++i) { - addData(S->getAssocType(i)); - } - }) - DEF_ADD_DATA(LambdaExpr, { - for (const LambdaCapture &C : S->captures()) { - addData(C.isPackExpansion()); - addData(C.getCaptureKind()); - if (C.capturesVariable()) - addData(C.getCapturedVar()->getType()); - } - addData(S->isGenericLambda()); - addData(S->isMutable()); - }) - DEF_ADD_DATA(DeclStmt, { - auto numDecls = std::distance(S->decl_begin(), S->decl_end()); - addData(static_cast(numDecls)); - for (const Decl *D : S->decls()) { - if (const VarDecl *VD = dyn_cast(D)) { - addData(VD->getType()); - } - } - }) - DEF_ADD_DATA(AsmStmt, { - addData(S->isSimple()); - addData(S->isVolatile()); - addData(S->generateAsmString(Context)); - for (unsigned i = 0; i < S->getNumInputs(); ++i) { - addData(S->getInputConstraint(i)); - } - for (unsigned i = 0; i < S->getNumOutputs(); ++i) { - addData(S->getOutputConstraint(i)); - } - for (unsigned i = 0; i < S->getNumClobbers(); ++i) { - addData(S->getClobber(i)); - } - }) - DEF_ADD_DATA(AttributedStmt, { - for (const Attr *A : S->getAttrs()) { - addData(std::string(A->getSpelling())); - } - }) -}; -} // namespace clone_detection - /// Identifies a list of statements. /// /// Can either identify a single arbitrary Stmt object, a continuous sequence of Index: cfe/trunk/lib/AST/CMakeLists.txt =================================================================== --- cfe/trunk/lib/AST/CMakeLists.txt +++ cfe/trunk/lib/AST/CMakeLists.txt @@ -20,6 +20,7 @@ CommentLexer.cpp CommentParser.cpp CommentSema.cpp + DataCollection.cpp Decl.cpp DeclarationName.cpp DeclBase.cpp Index: cfe/trunk/lib/AST/DataCollection.cpp =================================================================== --- cfe/trunk/lib/AST/DataCollection.cpp +++ cfe/trunk/lib/AST/DataCollection.cpp @@ -0,0 +1,50 @@ +//===-- DataCollection.cpp --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/DataCollection.h" + +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace data_collection { + +/// Prints the macro name that contains the given SourceLocation into the given +/// raw_string_ostream. +static void printMacroName(llvm::raw_string_ostream &MacroStack, + ASTContext &Context, SourceLocation Loc) { + MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(), + Context.getLangOpts()); + + // Add an empty space at the end as a padding to prevent + // that macro names concatenate to the names of other macros. + MacroStack << " "; +} + +/// Returns a string that represents all macro expansions that expanded into the +/// given SourceLocation. +/// +/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations +/// A and B are expanded from the same macros in the same order. +std::string getMacroStack(SourceLocation Loc, ASTContext &Context) { + std::string MacroStack; + llvm::raw_string_ostream MacroStackStream(MacroStack); + SourceManager &SM = Context.getSourceManager(); + + // Iterate over all macros that expanded into the given SourceLocation. + while (Loc.isMacroID()) { + // Add the macro name to the stream. + printMacroName(MacroStackStream, Context, Loc); + Loc = SM.getImmediateMacroCallerLoc(Loc); + } + MacroStackStream.flush(); + return MacroStack; +} + +} // end namespace data_collection +} // end namespace clang Index: cfe/trunk/lib/AST/StmtDataCollectors.inc =================================================================== --- cfe/trunk/lib/AST/StmtDataCollectors.inc +++ cfe/trunk/lib/AST/StmtDataCollectors.inc @@ -0,0 +1,141 @@ +// The functions below collect the class specific data of each Stmt subclass. + +DEF_ADD_DATA(Stmt, { + addData(S->getStmtClass()); + // This ensures that non-macro-generated code isn't identical to + // macro-generated code. + addData(data_collection::getMacroStack(S->getLocStart(), Context)); + addData(data_collection::getMacroStack(S->getLocEnd(), Context)); +}) +DEF_ADD_DATA(Expr, { addData(S->getType()); }) + +//--- Builtin functionality ----------------------------------------------// +DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) +DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) +DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) +DEF_ADD_DATA(TypeTraitExpr, { + addData(S->getTrait()); + for (unsigned i = 0; i < S->getNumArgs(); ++i) + addData(S->getArg(i)->getType()); +}) + +//--- Calls --------------------------------------------------------------// +DEF_ADD_DATA(CallExpr, { + // Function pointers don't have a callee and we just skip hashing it. + if (const FunctionDecl *D = S->getDirectCallee()) { + // If the function is a template specialization, we also need to handle + // the template arguments as they are not included in the qualified name. + if (auto Args = D->getTemplateSpecializationArgs()) { + std::string ArgString; + + // Print all template arguments into ArgString + llvm::raw_string_ostream OS(ArgString); + for (unsigned i = 0; i < Args->size(); ++i) { + Args->get(i).print(Context.getLangOpts(), OS); + // Add a padding character so that 'foo()' != 'foo()'. + OS << '\n'; + } + OS.flush(); + + addData(ArgString); + } + addData(D->getQualifiedNameAsString()); + } +}) + +//--- Value references ---------------------------------------------------// +DEF_ADD_DATA(DeclRefExpr, + { addData(S->getDecl()->getQualifiedNameAsString()); }) +DEF_ADD_DATA(MemberExpr, + { addData(S->getMemberDecl()->getName()); }) + +//--- Literals -----------------------------------------------------------// +DEF_ADD_DATA(IntegerLiteral, { addData(llvm::hash_value(S->getValue())); }) +DEF_ADD_DATA(FloatingLiteral, { addData(llvm::hash_value(S->getValue())); }) +DEF_ADD_DATA(StringLiteral, { addData(S->getString()); }) +DEF_ADD_DATA(CXXBoolLiteralExpr, { addData(S->getValue()); }) +DEF_ADD_DATA(CharacterLiteral, { addData(S->getValue()); }) + +//--- Exceptions ---------------------------------------------------------// +DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) + +//--- C++ OOP Stmts ------------------------------------------------------// +DEF_ADD_DATA(CXXDeleteExpr, { + addData(S->isArrayFormAsWritten()); + addData(S->isGlobalDelete()); +}) + +//--- Casts --------------------------------------------------------------// +DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) + +//--- Miscellaneous Exprs ------------------------------------------------// +DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) +DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) + +//--- Control flow -------------------------------------------------------// +DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) +DEF_ADD_DATA(IndirectGotoStmt, { + if (S->getConstantTarget()) + addData(S->getConstantTarget()->getName()); +}) +DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) +DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) +DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) + +//--- Objective-C --------------------------------------------------------// +DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) +DEF_ADD_DATA(ObjCPropertyRefExpr, { + addData(S->isSuperReceiver()); + addData(S->isImplicitProperty()); +}) +DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) + +//--- Miscellaneous Stmts ------------------------------------------------// +DEF_ADD_DATA(CXXFoldExpr, { + addData(S->isRightFold()); + addData(S->getOperator()); +}) +DEF_ADD_DATA(GenericSelectionExpr, { + for (unsigned i = 0; i < S->getNumAssocs(); ++i) { + addData(S->getAssocType(i)); + } +}) +DEF_ADD_DATA(LambdaExpr, { + for (const LambdaCapture &C : S->captures()) { + addData(C.isPackExpansion()); + addData(C.getCaptureKind()); + if (C.capturesVariable()) + addData(C.getCapturedVar()->getType()); + } + addData(S->isGenericLambda()); + addData(S->isMutable()); +}) +DEF_ADD_DATA(DeclStmt, { + auto numDecls = std::distance(S->decl_begin(), S->decl_end()); + addData(static_cast(numDecls)); + for (const Decl *D : S->decls()) { + if (const VarDecl *VD = dyn_cast(D)) { + addData(VD->getType()); + } + } +}) +DEF_ADD_DATA(AsmStmt, { + addData(S->isSimple()); + addData(S->isVolatile()); + addData(S->generateAsmString(Context)); + for (unsigned i = 0; i < S->getNumInputs(); ++i) { + addData(S->getInputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumOutputs(); ++i) { + addData(S->getOutputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumClobbers(); ++i) { + addData(S->getClobber(i)); + } +}) +DEF_ADD_DATA(AttributedStmt, { + for (const Attr *A : S->getAttrs()) { + addData(std::string(A->getSpelling())); + } +}) +#undef DEF_ADD_DATA Index: cfe/trunk/lib/Analysis/CloneDetection.cpp =================================================================== --- cfe/trunk/lib/Analysis/CloneDetection.cpp +++ cfe/trunk/lib/Analysis/CloneDetection.cpp @@ -13,16 +13,12 @@ #include "clang/Analysis/CloneDetection.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/AST/Stmt.h" -#include "clang/Lex/Lexer.h" +#include "clang/AST/DataCollection.h" +#include "clang/AST/DeclTemplate.h" #include "llvm/Support/MD5.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Path.h" using namespace clang; -using namespace clang::clone_detection; StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D, unsigned StartIndex, unsigned EndIndex) @@ -91,34 +87,6 @@ return SourceRange(getStartLoc(), getEndLoc()); } -/// Prints the macro name that contains the given SourceLocation into the given -/// raw_string_ostream. -static void printMacroName(llvm::raw_string_ostream &MacroStack, - ASTContext &Context, SourceLocation Loc) { - MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(), - Context.getLangOpts()); - - // Add an empty space at the end as a padding to prevent - // that macro names concatenate to the names of other macros. - MacroStack << " "; -} - -std::string clone_detection::getMacroStack(SourceLocation Loc, - ASTContext &Context) { - std::string MacroStack; - llvm::raw_string_ostream MacroStackStream(MacroStack); - SourceManager &SM = Context.getSourceManager(); - - // Iterate over all macros that expanded into the given SourceLocation. - while (Loc.isMacroID()) { - // Add the macro name to the stream. - printMacroName(MacroStackStream, Context, Loc); - Loc = SM.getImmediateMacroCallerLoc(Loc); - } - MacroStackStream.flush(); - return MacroStack; -} - void CloneDetector::analyzeCodeBody(const Decl *D) { assert(D); assert(D->hasBody()); @@ -184,16 +152,17 @@ } } -bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) { +bool FilenamePatternConstraint::isAutoGenerated( + const CloneDetector::CloneGroup &Group) { std::string Error; - if (IgnoredFilesPattern.empty() || Group.empty() || + if (IgnoredFilesPattern.empty() || Group.empty() || !IgnoredFilesRegex->isValid(Error)) return false; for (const StmtSequence &S : Group) { const SourceManager &SM = S.getASTContext().getSourceManager(); - StringRef Filename = llvm::sys::path::filename(SM.getFilename( - S.getContainingDecl()->getLocation())); + StringRef Filename = llvm::sys::path::filename( + SM.getFilename(S.getContainingDecl()->getLocation())); if (IgnoredFilesRegex->match(Filename)) return true; } @@ -201,6 +170,59 @@ return false; } +/// This class defines what a type II code clone is: If it collects for two +/// statements the same data, then those two statements are considered to be +/// clones of each other. +/// +/// All collected data is forwarded to the given data consumer of the type T. +/// The data consumer class needs to provide a member method with the signature: +/// update(StringRef Str) +namespace { +template +class CloneTypeIIStmtDataCollector + : public ConstStmtVisitor> { + ASTContext &Context; + /// The data sink to which all data is forwarded. + T &DataConsumer; + + template void addData(const Ty &Data) { + data_collection::addDataToConsumer(DataConsumer, Data); + } + +public: + CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context, + T &DataConsumer) + : Context(Context), DataConsumer(DataConsumer) { + this->Visit(S); + } + +// Define a visit method for each class to collect data and subsequently visit +// all parent classes. This uses a template so that custom visit methods by us +// take precedence. +#define DEF_ADD_DATA(CLASS, CODE) \ + template void Visit##CLASS(const CLASS *S) { \ + CODE; \ + ConstStmtVisitor>::Visit##CLASS(S); \ + } + +#include "../AST/StmtDataCollectors.inc" + +// Type II clones ignore variable names and literals, so let's skip them. +#define SKIP(CLASS) \ + void Visit##CLASS(const CLASS *S) { \ + ConstStmtVisitor>::Visit##CLASS(S); \ + } + SKIP(DeclRefExpr) + SKIP(MemberExpr) + SKIP(IntegerLiteral) + SKIP(FloatingLiteral) + SKIP(StringLiteral) + SKIP(CXXBoolLiteralExpr) + SKIP(CharacterLiteral) +#undef SKIP +}; +} // end anonymous namespace + static size_t createHash(llvm::MD5 &Hash) { size_t HashCode; @@ -222,7 +244,7 @@ llvm::MD5 Hash; ASTContext &Context = D->getASTContext(); - StmtDataCollector(S, Context, Hash); + CloneTypeIIStmtDataCollector(S, Context, Hash); auto CS = dyn_cast(S); SmallVector ChildHashes; @@ -288,8 +310,8 @@ static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData) { for (const Stmt *S : Sequence) { - StmtDataCollector(S, Sequence.getASTContext(), - OutputData); + CloneTypeIIStmtDataCollector( + S, Sequence.getASTContext(), OutputData); for (const Stmt *Child : S->children()) { if (!Child) @@ -339,7 +361,7 @@ // Sort hash_codes in StmtsByHash. std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(), [](std::pair LHS, - std::pair RHS) { + std::pair RHS) { return LHS.first < RHS.first; }); @@ -393,8 +415,10 @@ ASTContext &Context = Seq.getASTContext(); // Look up what macros expanded into the current statement. - std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context); - std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context); + std::string StartMacroStack = + data_collection::getMacroStack(Seq.getStartLoc(), Context); + std::string EndMacroStack = + data_collection::getMacroStack(Seq.getEndLoc(), Context); // First, check if ParentMacroStack is not empty which means we are currently // dealing with a parent statement which was expanded from a macro. Index: cfe/trunk/unittests/AST/CMakeLists.txt =================================================================== --- cfe/trunk/unittests/AST/CMakeLists.txt +++ cfe/trunk/unittests/AST/CMakeLists.txt @@ -9,6 +9,7 @@ ASTVectorTest.cpp CommentLexer.cpp CommentParser.cpp + DataCollectionTest.cpp DeclPrinterTest.cpp DeclTest.cpp EvaluateAsRValueTest.cpp Index: cfe/trunk/unittests/AST/DataCollectionTest.cpp =================================================================== --- cfe/trunk/unittests/AST/DataCollectionTest.cpp +++ cfe/trunk/unittests/AST/DataCollectionTest.cpp @@ -0,0 +1,173 @@ +//===- unittests/AST/DataCollectionTest.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains tests for the DataCollection module. +// +// They work by hashing the collected data of two nodes and asserting that the +// hash values are equal iff the nodes are considered equal. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/DataCollection.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Tooling/Tooling.h" +#include "gtest/gtest.h" + +using namespace clang; +using namespace tooling; +using namespace ast_matchers; + +namespace { +class StmtDataCollector : public ConstStmtVisitor { + ASTContext &Context; + llvm::MD5 &DataConsumer; + + template void addData(const T &Data) { + data_collection::addDataToConsumer(DataConsumer, Data); + } + +public: + StmtDataCollector(const Stmt *S, ASTContext &Context, llvm::MD5 &DataConsumer) + : Context(Context), DataConsumer(DataConsumer) { + this->Visit(S); + } + +#define DEF_ADD_DATA(CLASS, CODE) \ + template Dummy Visit##CLASS(const CLASS *S) { \ + CODE; \ + ConstStmtVisitor::Visit##CLASS(S); \ + } + +#include "../../lib/AST/StmtDataCollectors.inc" +}; +} // end anonymous namespace + +namespace { +struct StmtHashMatch : public MatchFinder::MatchCallback { + unsigned NumFound; + llvm::MD5::MD5Result &Hash; + StmtHashMatch(llvm::MD5::MD5Result &Hash) : NumFound(0), Hash(Hash) {} + + void run(const MatchFinder::MatchResult &Result) override { + const Stmt *S = Result.Nodes.getNodeAs("id"); + if (!S) + return; + ++NumFound; + if (NumFound > 1) + return; + llvm::MD5 MD5; + StmtDataCollector(S, *Result.Context, MD5); + MD5.final(Hash); + } +}; +} // end anonymous namespace + +static testing::AssertionResult hashStmt(llvm::MD5::MD5Result &Hash, + const StatementMatcher &StmtMatch, + StringRef Code) { + StmtHashMatch Hasher(Hash); + MatchFinder Finder; + Finder.addMatcher(StmtMatch, &Hasher); + std::unique_ptr Factory( + newFrontendActionFactory(&Finder)); + if (!runToolOnCode(Factory->create(), Code)) + return testing::AssertionFailure() + << "Parsing error in \"" << Code.str() << "\""; + if (Hasher.NumFound == 0) + return testing::AssertionFailure() << "Matcher didn't find any statements"; + if (Hasher.NumFound > 1) + return testing::AssertionFailure() + << "Matcher should match only one statement " + "(found " + << Hasher.NumFound << ")"; + return testing::AssertionSuccess(); +} + +static testing::AssertionResult +isStmtHashEqual(const StatementMatcher &StmtMatch, StringRef Code1, + StringRef Code2) { + llvm::MD5::MD5Result Hash1, Hash2; + testing::AssertionResult Result = hashStmt(Hash1, StmtMatch, Code1); + if (!Result) + return Result; + if (!(Result = hashStmt(Hash2, StmtMatch, Code2))) + return Result; + + return testing::AssertionResult(Hash1 == Hash2); +} + +TEST(StmtDataCollector, TestDeclRefExpr) { + ASSERT_TRUE(isStmtHashEqual(declRefExpr().bind("id"), "int x, r = x;", + "int x, r = x;")); + ASSERT_FALSE(isStmtHashEqual(declRefExpr().bind("id"), "int x, r = x;", + "int y, r = y;")); + ASSERT_FALSE(isStmtHashEqual(declRefExpr().bind("id"), "int x, r = x;", + "namespace n { int x, r = x; };")); +} + +TEST(StmtDataCollector, TestMemberExpr) { + ASSERT_TRUE(isStmtHashEqual(memberExpr().bind("id"), + "struct { int x; } X; int r = X.x;", + "struct { int x; } X; int r = (&X)->x;")); + ASSERT_TRUE(isStmtHashEqual(memberExpr().bind("id"), + "struct { int x; } X; int r = X.x;", + "struct { int x; } Y; int r = Y.x;")); + ASSERT_TRUE(isStmtHashEqual(memberExpr().bind("id"), + "struct { int x; } X; int r = X.x;", + "struct C { int x; } X; int r = X.C::x;")); + ASSERT_FALSE(isStmtHashEqual(memberExpr().bind("id"), + "struct { int x; } X; int r = X.x;", + "struct { int y; } X; int r = X.y;")); +} + +TEST(StmtDataCollector, TestIntegerLiteral) { + ASSERT_TRUE( + isStmtHashEqual(integerLiteral().bind("id"), "int x = 0;", "int x = 0;")); + ASSERT_TRUE( + isStmtHashEqual(integerLiteral().bind("id"), "int x = 0;", "int x =00;")); + ASSERT_FALSE( + isStmtHashEqual(integerLiteral().bind("id"), "int x = 0;", "int x = 1;")); +} + +TEST(StmtDataCollector, TestFloatingLiteral) { + ASSERT_TRUE(isStmtHashEqual(floatLiteral().bind("id"), "double x = .0;", + "double x = .0;")); + ASSERT_TRUE(isStmtHashEqual(floatLiteral().bind("id"), "double x = .10;", + "double x = .1;")); + ASSERT_TRUE(isStmtHashEqual(floatLiteral().bind("id"), "double x = .1;", + "double x = 1e-1;")); + ASSERT_FALSE(isStmtHashEqual(floatLiteral().bind("id"), "double x = .0;", + "double x = .1;")); +} + +TEST(StmtDataCollector, TestStringLiteral) { + ASSERT_TRUE(isStmtHashEqual(stringLiteral().bind("id"), R"(char x[] = "0";)", + R"(char x[] = "0";)")); + ASSERT_FALSE(isStmtHashEqual(stringLiteral().bind("id"), R"(char x[] = "0";)", + R"(char x[] = "1";)")); +} + +TEST(StmtDataCollector, TestCXXBoolLiteral) { + ASSERT_TRUE(isStmtHashEqual(cxxBoolLiteral().bind("id"), "bool x = false;", + "bool x = false;")); + ASSERT_FALSE(isStmtHashEqual(cxxBoolLiteral().bind("id"), "bool x = false;", + "bool x = true;")); +} + +TEST(StmtDataCollector, TestCharacterLiteral) { + ASSERT_TRUE(isStmtHashEqual(characterLiteral().bind("id"), "char x = '0';", + "char x = '0';")); + ASSERT_TRUE(isStmtHashEqual(characterLiteral().bind("id"), + R"(char x = '\0';)", + R"(char x = '\x00';)")); + ASSERT_FALSE(isStmtHashEqual(characterLiteral().bind("id"), "char x = '0';", + "char x = '1';")); +}