Index: include/clang/Tooling/ASTDiff/ASTDiff.h =================================================================== --- include/clang/Tooling/ASTDiff/ASTDiff.h +++ include/clang/Tooling/ASTDiff/ASTDiff.h @@ -25,41 +25,28 @@ namespace clang { namespace diff { -/// This represents a match between two nodes in the source and destination -/// trees, meaning that they are likely to be related. -struct Match { - NodeId Src, Dst; -}; - enum ChangeKind { - Delete, // (Src): delete node Src. - Update, // (Src, Dst): update the value of node Src to match Dst. - Insert, // (Src, Dst, Pos): insert Src as child of Dst at offset Pos. - Move // (Src, Dst, Pos): move Src to be a child of Dst at offset Pos. -}; - -struct Change { - ChangeKind Kind; - NodeId Src, Dst; - size_t Position; - - Change(ChangeKind Kind, NodeId Src, NodeId Dst, size_t Position) - : Kind(Kind), Src(Src), Dst(Dst), Position(Position) {} - Change(ChangeKind Kind, NodeId Src) : Kind(Kind), Src(Src) {} - Change(ChangeKind Kind, NodeId Src, NodeId Dst) - : Kind(Kind), Src(Src), Dst(Dst) {} + None, + Delete, // (Src): delete node Src. + Update, // (Src, Dst): update the value of node Src to match Dst. + Insert, // (Src, Dst, Pos): insert Src as child of Dst at offset Pos. + Move, // (Src, Dst, Pos): move Src to be a child of Dst at offset Pos. + UpdateMove // Same as Move plus Update. }; /// Represents a Clang AST node, alongside some additional information. struct Node { NodeId Parent, LeftMostDescendant, RightMostDescendant; - int Depth, Height; + int Depth, Height, Shift; ast_type_traits::DynTypedNode ASTNode; SmallVector Children; + ChangeKind ChangeKind = None; - ast_type_traits::ASTNodeKind getType() const { return ASTNode.getNodeKind(); } - const StringRef getTypeLabel() const { return getType().asStringRef(); } + ast_type_traits::ASTNodeKind getType() const; + StringRef getTypeLabel() const; bool isLeaf() const { return Children.empty(); } + llvm::Optional getIdentifier() const; + llvm::Optional getQualifiedIdentifier() const; }; class ASTDiff { @@ -67,15 +54,8 @@ ASTDiff(SyntaxTree &Src, SyntaxTree &Dst, const ComparisonOptions &Options); ~ASTDiff(); - // Returns a list of matches. - std::vector getMatches(); - /// Returns an edit script. - std::vector getChanges(); - - // Prints an edit action. - void printChange(raw_ostream &OS, const Change &Chg) const; - // Prints a match between two nodes. - void printMatch(raw_ostream &OS, const Match &M) const; + // Returns the ID of the node that is mapped to the given node in SourceTree. + NodeId getMapped(const SyntaxTree &SourceTree, NodeId Id) const; class Impl; @@ -88,21 +68,33 @@ class SyntaxTree { public: /// Constructs a tree from a translation unit. - SyntaxTree(const ASTContext &AST); + SyntaxTree(ASTContext &AST); /// Constructs a tree from any AST node. template - SyntaxTree(T *Node, const ASTContext &AST) + SyntaxTree(T *Node, ASTContext &AST) : TreeImpl(llvm::make_unique(this, Node, AST)) {} + SyntaxTree(const SyntaxTree &Tree) = delete; ~SyntaxTree(); + ASTContext &getASTContext() const; + StringRef getFilename() const; + + int getSize() const; + NodeId getRootId() const; + using PreorderIterator = NodeId; + PreorderIterator begin() const; + PreorderIterator end() const; + const Node &getNode(NodeId Id) const; + int findPositionInParent(NodeId Id) const; + + std::pair getFileOffsets(const Node &N) const; /// Serialize the node attributes to a string representation. This should /// uniquely distinguish nodes of the same kind. Note that this function just /// returns a representation of the node value, not considering descendants. - std::string getNodeValue(const DynTypedNode &DTN) const; - - void printAsJson(raw_ostream &OS); + std::string getNodeValue(NodeId Id) const; + std::string getNodeValue(const Node &Node) const; class Impl; std::unique_ptr TreeImpl; @@ -114,19 +106,17 @@ /// During bottom-up matching, match only nodes with at least this value as /// the ratio of their common descendants. - double MinSimilarity = 0.2; + double MinSimilarity = 0.5; /// Whenever two subtrees are matched in the bottom-up phase, the optimal /// mapping is computed, unless the size of either subtrees exceeds this. int MaxSize = 100; - /// If this is set to true, nodes that have parents that must not be matched - /// (see NodeComparison) will be allowed to be matched. - bool EnableMatchingWithUnmatchableParents = false; + bool StopAfterTopDown = false; /// Returns false if the nodes should never be matched. - bool isMatchingAllowed(const DynTypedNode &N1, const DynTypedNode &N2) const { - return N1.getNodeKind().isSame(N2.getNodeKind()); + bool isMatchingAllowed(const Node &N1, const Node &N2) const { + return N1.getType().isSame(N2.getType()); } }; Index: include/clang/Tooling/ASTDiff/ASTDiffInternal.h =================================================================== --- include/clang/Tooling/ASTDiff/ASTDiffInternal.h +++ include/clang/Tooling/ASTDiff/ASTDiffInternal.h @@ -11,8 +11,6 @@ #ifndef LLVM_CLANG_TOOLING_ASTDIFF_ASTDIFFINTERNAL_H #define LLVM_CLANG_TOOLING_ASTDIFF_ASTDIFFINTERNAL_H -#include - #include "clang/AST/ASTTypeTraits.h" namespace clang { @@ -38,6 +36,8 @@ operator int() const { return Id; } NodeId &operator++() { return ++Id, *this; } NodeId &operator--() { return --Id, *this; } + // Support defining iterators on NodeId. + NodeId &operator*() { return *this; } bool isValid() const { return Id != InvalidNodeId; } bool isInvalid() const { return Id == InvalidNodeId; } Index: include/clang/Tooling/CommonOptionsParser.h =================================================================== --- include/clang/Tooling/CommonOptionsParser.h +++ include/clang/Tooling/CommonOptionsParser.h @@ -27,6 +27,7 @@ #ifndef LLVM_CLANG_TOOLING_COMMONOPTIONSPARSER_H #define LLVM_CLANG_TOOLING_COMMONOPTIONSPARSER_H +#include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/Support/CommandLine.h" @@ -111,6 +112,29 @@ std::vector ExtraArgsAfter; }; +class ArgumentsAdjustingCompilations : public CompilationDatabase { +public: + ArgumentsAdjustingCompilations( + std::unique_ptr Compilations) + : Compilations(std::move(Compilations)) {} + + void appendArgumentsAdjuster(ArgumentsAdjuster Adjuster); + + std::vector + getCompileCommands(StringRef FilePath) const override; + + std::vector getAllFiles() const override; + + std::vector getAllCompileCommands() const override; + +private: + std::unique_ptr Compilations; + std::vector Adjusters; + + std::vector + adjustCommands(std::vector Commands) const; +}; + } // namespace tooling } // namespace clang Index: lib/Tooling/ASTDiff/ASTDiff.cpp =================================================================== --- lib/Tooling/ASTDiff/ASTDiff.cpp +++ lib/Tooling/ASTDiff/ASTDiff.cpp @@ -27,92 +27,63 @@ namespace clang { namespace diff { +namespace { /// Maps nodes of the left tree to ones on the right, and vice versa. class Mapping { public: Mapping() = default; Mapping(Mapping &&Other) = default; Mapping &operator=(Mapping &&Other) = default; - Mapping(int Size1, int Size2) { - // Maximum possible size after patching one tree. - int Size = Size1 + Size2; - SrcToDst = llvm::make_unique[]>(Size); - DstToSrc = llvm::make_unique[]>(Size); + + Mapping(size_t Size) { + SrcToDst = llvm::make_unique(Size); + DstToSrc = llvm::make_unique(Size); } void link(NodeId Src, NodeId Dst) { - SrcToDst[Src].push_back(Dst); - DstToSrc[Dst].push_back(Src); - } - - NodeId getDst(NodeId Src) const { - if (hasSrc(Src)) - return SrcToDst[Src][0]; - return NodeId(); - } - NodeId getSrc(NodeId Dst) const { - if (hasDst(Dst)) - return DstToSrc[Dst][0]; - return NodeId(); - } - const SmallVector &getAllDsts(NodeId Src) const { - return SrcToDst[Src]; - } - const SmallVector &getAllSrcs(NodeId Dst) const { - return DstToSrc[Dst]; - } - bool hasSrc(NodeId Src) const { return !SrcToDst[Src].empty(); } - bool hasDst(NodeId Dst) const { return !DstToSrc[Dst].empty(); } - bool hasSrcDst(NodeId Src, NodeId Dst) const { - for (NodeId DstId : SrcToDst[Src]) - if (DstId == Dst) - return true; - for (NodeId SrcId : DstToSrc[Dst]) - if (SrcId == Src) - return true; - return false; + SrcToDst[Src] = Dst, DstToSrc[Dst] = Src; } + NodeId getDst(NodeId Src) const { return SrcToDst[Src]; } + NodeId getSrc(NodeId Dst) const { return DstToSrc[Dst]; } + bool hasSrc(NodeId Src) const { return getDst(Src).isValid(); } + bool hasDst(NodeId Dst) const { return getSrc(Dst).isValid(); } + private: - std::unique_ptr[]> SrcToDst, DstToSrc; + std::unique_ptr SrcToDst, DstToSrc; }; +} // end anonymous namespace class ASTDiff::Impl { public: SyntaxTree::Impl &T1, &T2; - bool IsMappingDone = false; Mapping TheMapping; Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, - const ComparisonOptions &Options) - : T1(T1), T2(T2), Options(Options) {} + const ComparisonOptions &Options); /// Matches nodes one-by-one based on their similarity. void computeMapping(); - std::vector getMatches(Mapping &M); - - /// Finds an edit script that converts T1 to T2. - std::vector computeChanges(Mapping &M); + // Compute ChangeKind for each node based on similarity. + void computeChangeKinds(Mapping &M); - void printChangeImpl(raw_ostream &OS, const Change &Chg) const; - void printMatchImpl(raw_ostream &OS, const Match &M) const; - - // Returns a mapping of identical subtrees. - Mapping matchTopDown() const; + NodeId getMapped(const SyntaxTree::Impl &Tree, NodeId Id) const { + if (&Tree == &T1) + return TheMapping.getDst(Id); + assert(&Tree == &T2 && "Invalid tree."); + return TheMapping.getSrc(Id); + } private: // Returns true if the two subtrees are identical. bool identical(NodeId Id1, NodeId Id2) const; - bool canBeAddedToMapping(const Mapping &M, NodeId Id1, NodeId Id2) const; - // Returns false if the nodes must not be mached. bool isMatchingPossible(NodeId Id1, NodeId Id2) const; - // Adds all corresponding subtrees of the two nodes to the mapping. - // The two nodes must be identical. - void addIsomorphicSubTrees(Mapping &M, NodeId Id1, NodeId Id2) const; + // Returns true if the nodes' parents are matched. + bool haveSameParents(const Mapping &M, NodeId Id1, NodeId Id2) const; // Uses an optimal albeit slow algorithm to compute a mapping between two // subtrees, but only if both have fewer nodes than MaxSize. @@ -120,11 +91,14 @@ // Computes the ratio of common descendants between the two nodes. // Descendants are only considered to be equal when they are mapped in M. - double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; + double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; // Returns the node that has the highest degree of similarity. NodeId findCandidate(const Mapping &M, NodeId Id1) const; + // Returns a mapping of identical subtrees. + Mapping matchTopDown() const; + // Tries to match any yet unmapped nodes, in a bottom-up fashion. void matchBottomUp(Mapping &M) const; @@ -137,29 +111,32 @@ class SyntaxTree::Impl { public: /// Constructs a tree from the entire translation unit. - Impl(SyntaxTree *Parent, const ASTContext &AST); + Impl(SyntaxTree *Parent, ASTContext &AST); /// Constructs a tree from an AST node. - Impl(SyntaxTree *Parent, Decl *N, const ASTContext &AST); - Impl(SyntaxTree *Parent, Stmt *N, const ASTContext &AST); + Impl(SyntaxTree *Parent, Decl *N, ASTContext &AST); + Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST); template Impl(SyntaxTree *Parent, typename std::enable_if::value, T>::type *Node, - const ASTContext &AST) + ASTContext &AST) : Impl(Parent, dyn_cast(Node), AST) {} template Impl(SyntaxTree *Parent, typename std::enable_if::value, T>::type *Node, - const ASTContext &AST) + ASTContext &AST) : Impl(Parent, dyn_cast(Node), AST) {} SyntaxTree *Parent; - const ASTContext &AST; + ASTContext &AST; std::vector Leaves; // Maps preorder indices to postorder ones. std::vector PostorderIds; + std::vector NodesBfs; int getSize() const { return Nodes.size(); } NodeId getRootId() const { return 0; } + PreorderIterator begin() const { return getRootId(); } + PreorderIterator end() const { return getSize(); } const Node &getNode(NodeId Id) const { return Nodes[Id]; } Node &getMutableNode(NodeId Id) { return Nodes[Id]; } @@ -167,19 +144,22 @@ void addNode(Node &N) { Nodes.push_back(N); } int getNumberOfDescendants(NodeId Id) const; bool isInSubtree(NodeId Id, NodeId SubtreeRoot) const; + int findPositionInParent(NodeId Id, bool Shifted = false) const; std::string getNodeValue(NodeId Id) const; - std::string getNodeValue(const DynTypedNode &DTN) const; - /// Prints the node as "[: ](getLocStart(); - return SLoc.isValid() && SrcMgr.isInSystemHeader(SLoc); + if (!SLoc.isValid()) + return false; + // Ignore everything from other files. + if (!SrcMgr.isInMainFile(SLoc)) + return true; + // Ignore macros. + if (N->getLocStart() != SrcMgr.getSpellingLoc(N->getLocStart())) + return true; + return false; } +static bool isDeclExcluded(const Decl *D) { return D->isImplicit(); } +static bool isStmtExcluded(const Stmt *S) { return false; } + namespace { /// Counts the number of nodes that will be compared. struct NodeCountVisitor : public RecursiveASTVisitor { @@ -204,14 +195,16 @@ const SyntaxTree::Impl &Tree; NodeCountVisitor(const SyntaxTree::Impl &Tree) : Tree(Tree) {} bool TraverseDecl(Decl *D) { - if (isNodeExcluded(Tree.AST.getSourceManager(), D)) + if (isNodeExcluded(Tree.AST.getSourceManager(), D) || isDeclExcluded(D)) return true; ++Count; RecursiveASTVisitor::TraverseDecl(D); return true; } bool TraverseStmt(Stmt *S) { - if (isNodeExcluded(Tree.AST.getSourceManager(), S)) + if (S) + S = S->IgnoreImplicit(); + if (isNodeExcluded(Tree.AST.getSourceManager(), S) || isStmtExcluded(S)) return true; ++Count; RecursiveASTVisitor::TraverseStmt(S); @@ -254,7 +247,7 @@ Parent = PreviousParent; --Depth; Node &N = Tree.getMutableNode(MyId); - N.RightMostDescendant = Id; + N.RightMostDescendant = Id - 1; if (N.isLeaf()) Tree.Leaves.push_back(MyId); N.Height = 1; @@ -262,7 +255,7 @@ N.Height = std::max(N.Height, 1 + Tree.getNode(Child).Height); } bool TraverseDecl(Decl *D) { - if (isNodeExcluded(Tree.AST.getSourceManager(), D)) + if (isNodeExcluded(Tree.AST.getSourceManager(), D) || isDeclExcluded(D)) return true; auto SavedState = PreTraverse(D); RecursiveASTVisitor::TraverseDecl(D); @@ -270,7 +263,9 @@ return true; } bool TraverseStmt(Stmt *S) { - if (isNodeExcluded(Tree.AST.getSourceManager(), S)) + if (S) + S = S->IgnoreImplicit(); + if (isNodeExcluded(Tree.AST.getSourceManager(), S) || isStmtExcluded(S)) return true; auto SavedState = PreTraverse(S); RecursiveASTVisitor::TraverseStmt(S); @@ -281,10 +276,10 @@ }; } // end anonymous namespace -SyntaxTree::Impl::Impl(SyntaxTree *Parent, const ASTContext &AST) +SyntaxTree::Impl::Impl(SyntaxTree *Parent, ASTContext &AST) : Impl(Parent, AST.getTranslationUnitDecl(), AST) {} -SyntaxTree::Impl::Impl(SyntaxTree *Parent, Decl *N, const ASTContext &AST) +SyntaxTree::Impl::Impl(SyntaxTree *Parent, Decl *N, ASTContext &AST) : Parent(Parent), AST(AST) { NodeCountVisitor NodeCounter(*this); NodeCounter.TraverseDecl(N); @@ -294,7 +289,7 @@ initTree(); } -SyntaxTree::Impl::Impl(SyntaxTree *Parent, Stmt *N, const ASTContext &AST) +SyntaxTree::Impl::Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST) : Parent(Parent), AST(AST) { NodeCountVisitor NodeCounter(*this); NodeCounter.TraverseStmt(N); @@ -304,6 +299,30 @@ initTree(); } +static std::vector getSubtreePostorder(const SyntaxTree::Impl &Tree, + NodeId Root) { + std::vector Postorder; + std::function Traverse = [&](NodeId Id) { + const Node &N = Tree.getNode(Id); + for (NodeId Child : N.Children) + Traverse(Child); + Postorder.push_back(Id); + }; + Traverse(Root); + return Postorder; +} + +static std::vector getSubtreeBfs(const SyntaxTree::Impl &Tree, + NodeId Root) { + std::vector Ids; + size_t Expanded = 0; + Ids.push_back(Root); + while (Expanded < Ids.size()) + for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) + Ids.push_back(Child); + return Ids; +} + void SyntaxTree::Impl::initTree() { setLeftMostDescendants(); int PostorderId = 0; @@ -315,6 +334,7 @@ ++PostorderId; }; PostorderTraverse(getRootId()); + NodesBfs = getSubtreeBfs(*this, getRootId()); } void SyntaxTree::Impl::setLeftMostDescendants() { @@ -329,136 +349,149 @@ } } -static std::vector getSubtreePostorder(const SyntaxTree::Impl &Tree, - NodeId Root) { - std::vector Postorder; - std::function Traverse = [&](NodeId Id) { - const Node &N = Tree.getNode(Id); - for (NodeId Child : N.Children) - Traverse(Child); - Postorder.push_back(Id); - }; - Traverse(Root); - return Postorder; +int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const { + return getNode(Id).RightMostDescendant - Id + 1; } -static std::vector getSubtreeBfs(const SyntaxTree::Impl &Tree, - NodeId Root) { - std::vector Ids; - size_t Expanded = 0; - Ids.push_back(Root); - while (Expanded < Ids.size()) - for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) - Ids.push_back(Child); - return Ids; +bool SyntaxTree::Impl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const { + return Id >= SubtreeRoot && Id <= getNode(SubtreeRoot).RightMostDescendant; } -int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const { - return getNode(Id).RightMostDescendant - Id + 1; +int SyntaxTree::Impl::findPositionInParent(NodeId Id, bool Shifted) const { + NodeId Parent = getNode(Id).Parent; + if (Parent.isInvalid()) + return 0; + const auto &Siblings = getNode(Parent).Children; + int Position = 0; + for (size_t I = 0, E = Siblings.size(); I < E; ++I) { + if (Shifted) + Position += getNode(Siblings[I]).Shift; + if (Siblings[I] == Id) { + Position += I; + return Position; + } + } + llvm_unreachable("Node not found in parent's children."); } -bool SyntaxTree::Impl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const { - NodeId Lower = SubtreeRoot; - NodeId Upper = getNode(SubtreeRoot).RightMostDescendant; - return Id >= Lower && Id <= Upper; +// Returns the qualified name of ND. If the is declared in Context then +// the name is made relative with respect to the qualified name of Context. +static std::string getRelativeName(const NamedDecl *ND, + const DeclContext *Context) { + std::string ContextPrefix; + if (auto *Namespace = dyn_cast(Context)) + ContextPrefix = Namespace->getQualifiedNameAsString(); + else if (auto *Tag = dyn_cast(Context)) + ContextPrefix = Tag->getQualifiedNameAsString(); + std::string Val = ND->getQualifiedNameAsString(); + // Strip the qualifier, if Val refers to somthing in the current scope. + // But leave one leading ':' in place, so that we know that this is a + // relative path. + if (!ContextPrefix.empty() && + Val.substr(0, ContextPrefix.size()) == ContextPrefix) + Val = Val.substr(ContextPrefix.size() + 1); + return Val; } -std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const { - return getNodeValue(getNode(Id).ASTNode); +static std::string getRelativeName(const NamedDecl *ND) { + return getRelativeName(ND, ND->getDeclContext()); } -std::string SyntaxTree::Impl::getNodeValue(const DynTypedNode &DTN) const { - if (auto *X = DTN.get()) - return X->getOpcodeStr(); - if (auto *X = DTN.get()) { - CharSourceRange Range(X->getSourceRange(), false); - return Lexer::getSourceText(Range, AST.getSourceManager(), - AST.getLangOpts()); - } - if (auto *X = DTN.get()) { - SmallString<256> Str; - X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false); - return Str.str(); +static const DeclContext *getEnclosingDeclContext(ASTContext &AST, + const Stmt *S) { + while (S) { + const auto &Parents = AST.getParents(*S); + if (Parents.empty()) + return nullptr; + const auto &P = Parents[0]; + if (const auto *D = P.get()) + return D->getDeclContext(); + S = P.get(); } - if (auto *X = DTN.get()) - return X->getString(); - if (auto *X = DTN.get()) - return X->getNameAsString() + "(" + X->getType().getAsString() + ")"; - if (DTN.get() || DTN.get()) - return ""; - std::string Value; - if (auto *X = DTN.get()) { - if (X->hasQualifier()) { - llvm::raw_string_ostream OS(Value); - PrintingPolicy PP(AST.getLangOpts()); - X->getQualifier()->print(OS, PP); - } - Value += X->getDecl()->getNameAsString(); - return Value; - } - if (auto *X = DTN.get()) - Value += X->getNameAsString() + ";"; - if (auto *X = DTN.get()) - return Value + X->getUnderlyingType().getAsString() + ";"; - if (DTN.get()) - return Value; - if (auto *X = DTN.get()) - if (X->getTypeForDecl()) - Value += - X->getTypeForDecl()->getCanonicalTypeInternal().getAsString() + ";"; - if (DTN.get()) - return Value; - if (DTN.get()) - return ""; - llvm_unreachable("Fatal: unhandled AST node.\n"); + llvm_unreachable("Could not find Decl ancestor."); } -void SyntaxTree::Impl::printTree() const { printTree(getRootId()); } -void SyntaxTree::Impl::printTree(NodeId Root) const { - printTree(llvm::outs(), Root); +std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const { + return getNodeValue(getNode(Id)); } -void SyntaxTree::Impl::printTree(raw_ostream &OS, NodeId Root) const { - const Node &N = getNode(Root); - for (int I = 0; I < N.Depth; ++I) - OS << " "; - printNode(OS, Root); - OS << "\n"; - for (NodeId Child : N.Children) - printTree(OS, Child); +std::string SyntaxTree::Impl::getNodeValue(const Node &N) const { + const DynTypedNode &DTN = N.ASTNode; + if (auto *S = DTN.get()) + return getStmtValue(S); + if (auto *D = DTN.get()) + return getDeclValue(D); + llvm_unreachable("Fatal: unhandled AST node.\n"); } -void SyntaxTree::Impl::printNode(raw_ostream &OS, NodeId Id) const { - if (Id.isInvalid()) { - OS << "None"; - return; - } - OS << getNode(Id).getTypeLabel(); - if (getNodeValue(Id) != "") - OS << ": " << getNodeValue(Id); - OS << "(" << PostorderIds[Id] << ")"; -} - -void SyntaxTree::Impl::printNodeAsJson(raw_ostream &OS, NodeId Id) const { - auto N = getNode(Id); - OS << R"({"type":")" << N.getTypeLabel() << R"(")"; - if (getNodeValue(Id) != "") - OS << R"(,"value":")" << getNodeValue(Id) << R"(")"; - OS << R"(,"children":[)"; - if (N.Children.size() > 0) { - printNodeAsJson(OS, N.Children[0]); - for (size_t I = 1, E = N.Children.size(); I < E; ++I) { - OS << ","; - printNodeAsJson(OS, N.Children[I]); +std::string SyntaxTree::Impl::getDeclValue(const Decl *D) const { + std::string Value; + PrintingPolicy TypePP(AST.getLangOpts()); + TypePP.AnonymousTagLocations = false; + + if (auto *X = dyn_cast(D)) { + Value += getRelativeName(X) + "(" + X->getType().getAsString(TypePP) + ")"; + if (auto *X = dyn_cast(D)) { + for (auto *Init : X->inits()) { + if (!Init->isWritten()) + continue; + if (Init->isBaseInitializer()) { + Value += Init->getBaseClass()->getCanonicalTypeInternal().getAsString( + TypePP) + + ","; + } else if (Init->isDelegatingInitializer()) { + Value += X->getNameAsString() + ","; + } else { + assert(Init->isAnyMemberInitializer()); + Value += getRelativeName(Init->getMember()) + ","; + } + } } + return Value; } - OS << "]}"; + if (auto *X = dyn_cast(D)) + Value += getRelativeName(X) + ";"; + if (auto *X = dyn_cast(D)) + return Value + X->getUnderlyingType().getAsString(TypePP) + ";"; + if (auto *X = dyn_cast(D)) + if (X->getTypeForDecl()) + Value += + X->getTypeForDecl()->getCanonicalTypeInternal().getAsString(TypePP) + + ";"; + if (auto *X = dyn_cast(D)) + return X->getNominatedNamespace()->getName(); + if (auto *X = dyn_cast(D)) { + CharSourceRange Range(X->getSourceRange(), false); + return Lexer::getSourceText(Range, AST.getSourceManager(), + AST.getLangOpts()); + } + return Value; } -void SyntaxTree::Impl::printAsJsonImpl(raw_ostream &OS) const { - OS << R"({"root":)"; - printNodeAsJson(OS, getRootId()); - OS << "}\n"; +std::string SyntaxTree::Impl::getStmtValue(const Stmt *S) const { + if (auto *X = dyn_cast(S)) + return UnaryOperator::getOpcodeStr(X->getOpcode()); + if (auto *X = dyn_cast(S)) + return X->getOpcodeStr(); + if (auto *X = dyn_cast(S)) + return getRelativeName(X->getMemberDecl()); + if (auto *X = dyn_cast(S)) { + SmallString<256> Str; + X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false); + return Str.str(); + } + if (auto *X = dyn_cast(S)) { + SmallString<256> Str; + X->getValue().toString(Str); + return Str.str(); + } + if (auto *X = dyn_cast(S)) + return getRelativeName(X->getDecl(), getEnclosingDeclContext(AST, S)); + if (auto *X = dyn_cast(S)) + return X->getString(); + if (auto *X = dyn_cast(S)) + return X->getValue() ? "true" : "false"; + return ""; } /// Identifies a node in a subtree by its postorder offset, starting at 1. @@ -625,12 +658,11 @@ } private: - /// Simple cost model for edit actions. + /// We use a simple cost model for edit actions, which seems good enough. + /// Simple cost model for edit actions. This seems to make the matching + /// algorithm perform reasonably well. /// The values range between 0 and 1, or infinity if this edit action should /// always be avoided. - - /// These costs could be modified to better model the estimated cost of / - /// inserting / deleting the current node. static constexpr double DeletionCost = 1; static constexpr double InsertionCost = 1; @@ -676,6 +708,28 @@ } }; +ast_type_traits::ASTNodeKind Node::getType() const { + return ASTNode.getNodeKind(); +} + +StringRef Node::getTypeLabel() const { return getType().asStringRef(); } + +llvm::Optional Node::getQualifiedIdentifier() const { + if (auto *ND = ASTNode.get()) { + if (ND->getDeclName().isIdentifier()) + return ND->getQualifiedNameAsString(); + } + return llvm::None; +} + +llvm::Optional Node::getIdentifier() const { + if (auto *ND = ASTNode.get()) { + if (ND->getDeclName().isIdentifier()) + return ND->getName(); + } + return llvm::None; +} + namespace { // Compares nodes by their depth. struct HeightLess { @@ -687,6 +741,7 @@ }; } // end anonymous namespace +namespace { // Priority queue for nodes, sorted descendingly by their height. class PriorityList { const SyntaxTree::Impl &Tree; @@ -723,6 +778,7 @@ push(Child); } }; +} // end anonymous namespace bool ASTDiff::Impl::identical(NodeId Id1, NodeId Id2) const { const Node &N1 = T1.getNode(Id1); @@ -737,75 +793,58 @@ return true; } -bool ASTDiff::Impl::canBeAddedToMapping(const Mapping &M, NodeId Id1, - NodeId Id2) const { - assert(isMatchingPossible(Id1, Id2) && - "Matching must be possible in the first place."); - if (M.hasSrcDst(Id1, Id2)) - return false; - if (Options.EnableMatchingWithUnmatchableParents) - return true; - const Node &N1 = T1.getNode(Id1); - const Node &N2 = T2.getNode(Id2); - NodeId P1 = N1.Parent; - NodeId P2 = N2.Parent; - // Only allow matching if parents can be matched. - return (P1.isInvalid() && P2.isInvalid()) || - (P1.isValid() && P2.isValid() && isMatchingPossible(P1, P2)); -} - bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const { - return Options.isMatchingAllowed(T1.getNode(Id1).ASTNode, - T2.getNode(Id2).ASTNode); + return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2)); } -void ASTDiff::Impl::addIsomorphicSubTrees(Mapping &M, NodeId Id1, - NodeId Id2) const { - assert(identical(Id1, Id2) && "Can only be called on identical subtrees."); - M.link(Id1, Id2); - const Node &N1 = T1.getNode(Id1); - const Node &N2 = T2.getNode(Id2); - for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id) - addIsomorphicSubTrees(M, N1.Children[Id], N2.Children[Id]); +bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1, + NodeId Id2) const { + NodeId P1 = T1.getNode(Id1).Parent; + NodeId P2 = T2.getNode(Id2).Parent; + return (P1.isInvalid() && P2.isInvalid()) || + (P1.isValid() && P2.isValid() && M.getDst(P1) == P2); } void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const { - if (std::max(T1.getNumberOfDescendants(Id1), - T2.getNumberOfDescendants(Id2)) >= Options.MaxSize) + if (std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) > + Options.MaxSize) return; ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2); std::vector> R = Matcher.getMatchingNodes(); for (const auto Tuple : R) { NodeId Src = Tuple.first; NodeId Dst = Tuple.second; - if (canBeAddedToMapping(M, Src, Dst)) + if (!M.hasSrc(Src) && !M.hasDst(Dst)) M.link(Src, Dst); } } -double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1, - NodeId Id2) const { - if (Id1.isInvalid() || Id2.isInvalid()) - return 0.0; +double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1, + NodeId Id2) const { int CommonDescendants = 0; const Node &N1 = T1.getNode(Id1); - for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id) - CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2)); - return 2.0 * CommonDescendants / - (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2)); + for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) { + NodeId Dst = M.getDst(Src); + CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2)); + } + double Denominator = T1.getNumberOfDescendants(Id1) - 1 + + T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants; + if (Denominator == 0) + return 0; + return CommonDescendants / Denominator; } NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { NodeId Candidate; double HighestSimilarity = 0.0; - for (NodeId Id2 = 0, E = T2.getSize(); Id2 < E; ++Id2) { + for (NodeId Id2 : T2) { if (!isMatchingPossible(Id1, Id2)) continue; if (M.hasDst(Id2)) continue; - double Similarity = getSimilarity(M, Id1, Id2); - if (Similarity > HighestSimilarity) { + double Similarity = getJaccardSimilarity(M, Id1, Id2); + if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) { HighestSimilarity = Similarity; Candidate = Id2; } @@ -816,26 +855,22 @@ void ASTDiff::Impl::matchBottomUp(Mapping &M) const { std::vector Postorder = getSubtreePostorder(T1, T1.getRootId()); for (NodeId Id1 : Postorder) { - if (Id1 == T1.getRootId()) { + if (Id1 == T1.getRootId() && !M.hasSrc(T1.getRootId()) && + !M.hasDst(T2.getRootId())) { if (isMatchingPossible(T1.getRootId(), T2.getRootId())) { M.link(T1.getRootId(), T2.getRootId()); addOptimalMapping(M, T1.getRootId(), T2.getRootId()); } break; } - const Node &N1 = T1.getNode(Id1); bool Matched = M.hasSrc(Id1); - bool MatchedChildren = - std::any_of(N1.Children.begin(), N1.Children.end(), - [&](NodeId Child) { return M.hasSrc(Child); }); - if (Matched || !MatchedChildren) + if (Matched) continue; NodeId Id2 = findCandidate(M, Id1); - if (Id2.isInvalid() || !canBeAddedToMapping(M, Id1, Id2) || - getSimilarity(M, Id1, Id2) < Options.MinSimilarity) - continue; - M.link(Id1, Id2); - addOptimalMapping(M, Id1, Id2); + if (Id2.isValid()) { + M.link(Id1, Id2); + addOptimalMapping(M, Id1, Id2); + } } } @@ -843,7 +878,7 @@ PriorityList L1(T1); PriorityList L2(T2); - Mapping M(T1.getSize(), T2.getSize()); + Mapping M(T1.getSize() + T2.getSize()); L1.push(T1.getRootId()); L2.push(T2.getRootId()); @@ -865,9 +900,12 @@ H1 = L1.pop(); H2 = L2.pop(); for (NodeId Id1 : H1) { - for (NodeId Id2 : H2) - if (identical(Id1, Id2) && canBeAddedToMapping(M, Id1, Id2)) - addIsomorphicSubTrees(M, Id1, Id2); + for (NodeId Id2 : H2) { + if (identical(Id1, Id2) && !M.hasSrc(Id1) && !M.hasDst(Id2)) { + for (int I = 0, E = T1.getNumberOfDescendants(Id1); I < E; ++I) + M.link(Id1 + I, Id2 + I); + } + } } for (NodeId Id1 : H1) { if (!M.hasSrc(Id1)) @@ -881,99 +919,62 @@ return M; } +ASTDiff::Impl::Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, + const ComparisonOptions &Options) + : T1(T1), T2(T2), Options(Options) { + computeMapping(); + computeChangeKinds(TheMapping); +} + void ASTDiff::Impl::computeMapping() { - if (IsMappingDone) - return; TheMapping = matchTopDown(); + if (Options.StopAfterTopDown) + return; matchBottomUp(TheMapping); - IsMappingDone = true; -} - -std::vector ASTDiff::Impl::getMatches(Mapping &M) { - std::vector Matches; - for (NodeId Id1 = 0, Id2, E = T1.getSize(); Id1 < E; ++Id1) - if ((Id2 = M.getDst(Id1)).isValid()) - Matches.push_back({Id1, Id2}); - return Matches; } -std::vector ASTDiff::Impl::computeChanges(Mapping &M) { - std::vector Changes; - for (NodeId Id2 : getSubtreeBfs(T2, T2.getRootId())) { - const Node &N2 = T2.getNode(Id2); - NodeId Id1 = M.getSrc(Id2); - if (Id1.isValid()) { - assert(isMatchingPossible(Id1, Id2) && "Invalid matching."); - if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) { - Changes.emplace_back(Update, Id1, Id2); - } - continue; +void ASTDiff::Impl::computeChangeKinds(Mapping &M) { + for (NodeId Id1 : T1) { + if (!M.hasSrc(Id1)) { + T1.getMutableNode(Id1).ChangeKind = Delete; + T1.getMutableNode(Id1).Shift -= 1; + } + } + for (NodeId Id2 : T2) { + if (!M.hasDst(Id2)) { + T2.getMutableNode(Id2).ChangeKind = Insert; + T2.getMutableNode(Id2).Shift -= 1; } - NodeId P2 = N2.Parent; - NodeId P1 = M.getSrc(P2); - assert(P1.isValid() && - "Parents must be matched for determining the change type."); - Node &Parent1 = T1.getMutableNode(P1); - const Node &Parent2 = T2.getNode(P2); - auto &Siblings1 = Parent1.Children; - const auto &Siblings2 = Parent2.Children; - size_t Position; - for (Position = 0; Position < Siblings2.size(); ++Position) - if (Siblings2[Position] == Id2 || Position >= Siblings1.size()) - break; - Changes.emplace_back(Insert, Id2, P2, Position); - Node PatchNode; - PatchNode.Parent = P1; - PatchNode.LeftMostDescendant = N2.LeftMostDescendant; - PatchNode.RightMostDescendant = N2.RightMostDescendant; - PatchNode.Depth = N2.Depth; - PatchNode.ASTNode = N2.ASTNode; - // TODO update Depth if needed - NodeId PatchNodeId = T1.getSize(); - // TODO maybe choose a different data structure for Children. - Siblings1.insert(Siblings1.begin() + Position, PatchNodeId); - T1.addNode(PatchNode); - M.link(PatchNodeId, Id2); - } - for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) { + } + for (NodeId Id1 : T1.NodesBfs) { NodeId Id2 = M.getDst(Id1); if (Id2.isInvalid()) - Changes.emplace_back(Delete, Id1, Id2); - } - return Changes; -} - -void ASTDiff::Impl::printChangeImpl(raw_ostream &OS, const Change &Chg) const { - switch (Chg.Kind) { - case Delete: - OS << "Delete "; - T1.printNode(OS, Chg.Src); - OS << "\n"; - break; - case Update: - OS << "Update "; - T1.printNode(OS, Chg.Src); - OS << " to " << T2.getNodeValue(Chg.Dst) << "\n"; - break; - case Insert: - OS << "Insert "; - T2.printNode(OS, Chg.Src); - OS << " into "; - T2.printNode(OS, Chg.Dst); - OS << " at " << Chg.Position << "\n"; - break; - case Move: - llvm_unreachable("TODO"); - break; - }; -} - -void ASTDiff::Impl::printMatchImpl(raw_ostream &OS, const Match &M) const { - OS << "Match "; - T1.printNode(OS, M.Src); - OS << " to "; - T2.printNode(OS, M.Dst); - OS << "\n"; + continue; + if (!haveSameParents(M, Id1, Id2) || + T1.findPositionInParent(Id1, true) != + T2.findPositionInParent(Id2, true)) { + T1.getMutableNode(Id1).Shift -= 1; + T2.getMutableNode(Id2).Shift -= 1; + } + } + for (NodeId Id2 : T2.NodesBfs) { + NodeId Id1 = M.getSrc(Id2); + if (Id1.isInvalid()) + continue; + Node &N1 = T1.getMutableNode(Id1); + Node &N2 = T2.getMutableNode(Id2); + if (Id1.isInvalid()) + continue; + if (!haveSameParents(M, Id1, Id2) || + T1.findPositionInParent(Id1, true) != + T2.findPositionInParent(Id2, true)) { + N1.ChangeKind = N2.ChangeKind = Move; + } + if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) { + N1.ChangeKind = N2.ChangeKind = + (N1.ChangeKind == Move ? UpdateMove : Update); + } + } } ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2, @@ -982,34 +983,54 @@ ASTDiff::~ASTDiff() = default; -SyntaxTree::SyntaxTree(const ASTContext &AST) +NodeId ASTDiff::getMapped(const SyntaxTree &SourceTree, NodeId Id) const { + return DiffImpl->getMapped(*SourceTree.TreeImpl, Id); +} + +SyntaxTree::SyntaxTree(ASTContext &AST) : TreeImpl(llvm::make_unique( this, AST.getTranslationUnitDecl(), AST)) {} -std::vector ASTDiff::getMatches() { - DiffImpl->computeMapping(); - return DiffImpl->getMatches(DiffImpl->TheMapping); -} +SyntaxTree::~SyntaxTree() = default; -std::vector ASTDiff::getChanges() { - DiffImpl->computeMapping(); - return DiffImpl->computeChanges(DiffImpl->TheMapping); +ASTContext &SyntaxTree::getASTContext() const { return TreeImpl->AST; } + +const Node &SyntaxTree::getNode(NodeId Id) const { + return TreeImpl->getNode(Id); } -void ASTDiff::printChange(raw_ostream &OS, const Change &Chg) const { - DiffImpl->printChangeImpl(OS, Chg); +int SyntaxTree::getSize() const { return TreeImpl->getSize(); } +NodeId SyntaxTree::getRootId() const { return TreeImpl->getRootId(); } +SyntaxTree::PreorderIterator SyntaxTree::begin() const { + return TreeImpl->begin(); } +SyntaxTree::PreorderIterator SyntaxTree::end() const { return TreeImpl->end(); } -void ASTDiff::printMatch(raw_ostream &OS, const Match &M) const { - DiffImpl->printMatchImpl(OS, M); +int SyntaxTree::findPositionInParent(NodeId Id) const { + return TreeImpl->findPositionInParent(Id); } -SyntaxTree::~SyntaxTree() = default; +std::pair SyntaxTree::getFileOffsets(const Node &N) const { + const SourceManager &SrcMgr = TreeImpl->AST.getSourceManager(); + SourceRange Range = N.ASTNode.getSourceRange(); + SourceLocation BeginLoc = Range.getBegin(); + SourceLocation EndLoc = Lexer::getLocForEndOfToken( + Range.getEnd(), /*Offset=*/0, SrcMgr, TreeImpl->AST.getLangOpts()); + if (auto *ThisExpr = N.ASTNode.get()) { + if (ThisExpr->isImplicit()) + EndLoc = BeginLoc; + } + unsigned Begin = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(BeginLoc)); + unsigned End = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(EndLoc)); + return {Begin, End}; +} -void SyntaxTree::printAsJson(raw_ostream &OS) { TreeImpl->printAsJsonImpl(OS); } +std::string SyntaxTree::getNodeValue(NodeId Id) const { + return TreeImpl->getNodeValue(Id); +} -std::string SyntaxTree::getNodeValue(const DynTypedNode &DTN) const { - return TreeImpl->getNodeValue(DTN); +std::string SyntaxTree::getNodeValue(const Node &N) const { + return TreeImpl->getNodeValue(N); } } // end namespace diff Index: lib/Tooling/CommonOptionsParser.cpp =================================================================== --- lib/Tooling/CommonOptionsParser.cpp +++ lib/Tooling/CommonOptionsParser.cpp @@ -25,7 +25,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/CommandLine.h" -#include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Tooling.h" @@ -54,43 +53,33 @@ "\tsuffix of a path in the compile command database.\n" "\n"; -namespace { -class ArgumentsAdjustingCompilations : public CompilationDatabase { -public: - ArgumentsAdjustingCompilations( - std::unique_ptr Compilations) - : Compilations(std::move(Compilations)) {} - - void appendArgumentsAdjuster(ArgumentsAdjuster Adjuster) { - Adjusters.push_back(std::move(Adjuster)); - } - - std::vector - getCompileCommands(StringRef FilePath) const override { - return adjustCommands(Compilations->getCompileCommands(FilePath)); - } +void ArgumentsAdjustingCompilations::appendArgumentsAdjuster( + ArgumentsAdjuster Adjuster) { + Adjusters.push_back(std::move(Adjuster)); +} - std::vector getAllFiles() const override { - return Compilations->getAllFiles(); - } +std::vector ArgumentsAdjustingCompilations::getCompileCommands( + StringRef FilePath) const { + return adjustCommands(Compilations->getCompileCommands(FilePath)); +} - std::vector getAllCompileCommands() const override { - return adjustCommands(Compilations->getAllCompileCommands()); - } +std::vector +ArgumentsAdjustingCompilations::getAllFiles() const { + return Compilations->getAllFiles(); +} -private: - std::unique_ptr Compilations; - std::vector Adjusters; +std::vector +ArgumentsAdjustingCompilations::getAllCompileCommands() const { + return adjustCommands(Compilations->getAllCompileCommands()); +} - std::vector - adjustCommands(std::vector Commands) const { - for (CompileCommand &Command : Commands) - for (const auto &Adjuster : Adjusters) - Command.CommandLine = Adjuster(Command.CommandLine, Command.Filename); - return Commands; - } -}; -} // namespace +std::vector ArgumentsAdjustingCompilations::adjustCommands( + std::vector Commands) const { + for (CompileCommand &Command : Commands) + for (const auto &Adjuster : Adjusters) + Command.CommandLine = Adjuster(Command.CommandLine, Command.Filename); + return Commands; +} CommonOptionsParser::CommonOptionsParser( int &argc, const char **argv, cl::OptionCategory &Category, Index: test/Tooling/clang-diff-ast.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-ast.cpp @@ -0,0 +1,59 @@ +// RUN: clang-diff -no-compilation-database -ast-dump %s -extra-arg='-std=c++11' | FileCheck %s +// +// This tests the getNodeValue function from Tooling/ASTDiff/ASTDiff.h + +// CHECK: NamespaceDecl: test;( +namespace test { + +// CHECK: FunctionDecl: :f( +// CHECK: CompoundStmt( +void f() { + // CHECK: VarDecl: i(int)( + // CHECK: IntegerLiteral: 1 + auto i = 1; + // CHECK: FloatingLiteral: 1.5( + auto r = 1.5; + // CHECK: CXXBoolLiteralExpr: true( + auto b = true; + // CHECK: CallExpr( + // CHECK: DeclRefExpr: :f( + f(); + // CHECK: UnaryOperator: ++( + ++i; + // CHECK: BinaryOperator: =( + i = i; +} + +} // end namespace test + +// CHECK: UsingDirectiveDecl: test( +using namespace test; + +// CHECK: TypedefDecl: nat;unsigned int;( +typedef unsigned nat; +// CHECK: TypeAliasDecl: real;double;( +using real = double; + +class Base { +}; + +// CHECK: CXXRecordDecl: X;X;( +class X : Base { + int m; + // CHECK: CXXMethodDecl: :foo(const char *(int))( + // CHECK: ParmVarDecl: i(int)( + const char *foo(int i) { + if (i == 0) + // CHECK: StringLiteral: foo( + return "foo"; + return 0; + } + + // CHECK: AccessSpecDecl: public( +public: + // CHECK: CXXConstructorDecl: :X(void (char, int))Base,:m,( + X(char, int) : Base(), m(0) { + // CHECK: MemberExpr: :m( + int x = m; + } +}; Index: test/Tooling/clang-diff-basic.cpp =================================================================== --- test/Tooling/clang-diff-basic.cpp +++ test/Tooling/clang-diff-basic.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -E %s > %T/src.cpp -// RUN: %clang_cc1 -E %s > %T/dst.cpp -DDEST -// RUN: clang-diff -no-compilation-database %T/src.cpp %T/dst.cpp | FileCheck %s +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database %t.src.cpp %t.dst.cpp | FileCheck %s #ifndef DEST namespace src { @@ -31,6 +31,8 @@ int id(int i) { return i; } }; } + +void f1() {{ (void) __func__;;; }} #else // CHECK: Match TranslationUnitDecl{{.*}} to TranslationUnitDecl // CHECK: Match NamespaceDecl: src{{.*}} to NamespaceDecl: dst @@ -43,19 +45,19 @@ } } -// CHECK: Match DeclRefExpr: foo{{.*}} to DeclRefExpr: inner::foo +// CHECK: Match DeclRefExpr: :foo{{.*}} to DeclRefExpr: :inner::foo void main() { inner::foo(); } // CHECK: Match StringLiteral: foo{{.*}} to StringLiteral: foo const char *b = "f" "o" "o"; // unsigned is canonicalized to unsigned int -// CHECK: Match TypedefDecl: nat;unsigned int;{{.*}} to TypedefDecl: nat;unsigned int; +// CHECK: Match TypedefDecl: :nat;unsigned int;{{.*}} to TypedefDecl: :nat;unsigned int; typedef unsigned nat; -// CHECK: Match VarDecl: p(int){{.*}} to VarDecl: prod(double) +// CHECK: Match VarDecl: :p(int){{.*}} to VarDecl: :prod(double) +// CHECK: Update VarDecl: :p(int){{.*}} to :prod(double) // CHECK: Match BinaryOperator: *{{.*}} to BinaryOperator: * -// CHECK: Update VarDecl: p(int){{.*}} to prod(double) double prod = 1 * 2 * 10; // CHECK: Update DeclRefExpr int squared = prod * prod; @@ -70,9 +72,15 @@ return "foo"; return 0; } - // CHECK: Delete AccessSpecDecl: public X(){}; - // CHECK: Delete CXXMethodDecl }; } + +namespace { +// match with parents of different type +// CHECK: Match FunctionDecl: f1{{.*}} to FunctionDecl: (anonymous namespace)::f1 +void f1() {{ (void) __func__;;; }} +} #endif +// CHECK: Delete AccessSpecDecl: public +// CHECK: Delete CXXMethodDecl Index: test/Tooling/clang-diff-bottomup.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database -s=0 %t.src.cpp %t.dst.cpp | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void f1() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5 +// CHECK: Match FunctionDecl: f1(void ())(1) to FunctionDecl: f1(void ())(1) +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK: Match CompoundStmt(4) to CompoundStmt(3) +// CHECK: Match CompoundStmt(5) to CompoundStmt(4) +// CHECK: Match NullStmt(6) to NullStmt(5) + {{;}} ;; +} + +void f2() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5 +// CHECK-NOT: Match FunctionDecl(9) +// CHECK-NOT: Match CompoundStmt(10) +// CHECK: Match CompoundStmt(11) to CompoundStmt(10) +// CHECK: Match CompoundStmt(12) to CompoundStmt(11) +// CHECK: Match NullStmt(13) to NullStmt(12) +// CHECK-NOT: Match NullStmt(13) + {{;}} ;;; +} + +#endif Index: test/Tooling/clang-diff-opt.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database %t.src.cpp %t.dst.cpp | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +class A { int x; void f() { int a1 = x; } }; + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5 +// The optimal matching algorithm should move the ; into the outer block +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK-NOT: Match CompoundStmt(3) +// CHECK: Match NullStmt(4) to NullStmt(3) + ; {{;}} +} + +class B { + // Only the class name changed; it is not included in the field value, + // therefore there is no update. + // CHECK: Match FieldDecl: :x(int)(9) to FieldDecl: :x(int)(8) + // CHECK-NOT: Update FieldDecl: :x(int)(9) + int x; + void f() { + // CHECK: Match MemberExpr: :x(14) to MemberExpr: :x(13) + // CHECK-NOT: Update MemberExpr: :x(14) + int b2 = B::x; + } +}; + +#endif Index: test/Tooling/clang-diff-topdown.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-topdown.cpp @@ -0,0 +1,66 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database -stop-after=topdown %t.src.cpp %t.dst.cpp | FileCheck %s +// +// Test the top-down matching of identical subtrees only. + +#ifndef DEST + +void f1() +{ + // Match some subtree of height greater than 2. + // CHECK: Match CompoundStmt(3) to CompoundStmt(3) + // CHECK: Match CompoundStmt(4) to CompoundStmt(4) + // CHECK: Match NullStmt(5) to NullStmt(5) + {{;}} + + // Don't match subtrees that are smaller. + // CHECK-NOT: Match CompoundStmt(6) + // CHECK-NOT: Match NullStmt(7) + {;} + + // Greedy approach - use the first matching subtree when there are multiple + // identical subtrees. + // CHECK: Match CompoundStmt(8) to CompoundStmt(8) + // CHECK: Match CompoundStmt(9) to CompoundStmt(9) + // CHECK: Match NullStmt(10) to NullStmt(10) + {{;;}} +} + +int x; + +namespace src { + int x; + int x1 = x + 1; + int x2 = ::x + 1; +} + +#else + +void f1() { + + {{;}} + + {;} + + {{;;}} + // CHECK-NOT: Match {{.*}} to CompoundStmt(11) + // CHECK-NOT: Match {{.*}} to CompoundStmt(12) + // CHECK-NOT: Match {{.*}} to NullStmt(13) + {{;;}} + + // CHECK-NOT: Match {{.*}} to NullStmt(14) + ; +} + +int x; + +namespace dst { + int x; + // CHECK: Match DeclRefExpr: :x(17) to DeclRefExpr: :x(22) + int x1 = x + 1; + // CHECK: Match DeclRefExpr: x(21) to DeclRefExpr: x(26) + int x2 = ::x + 1; +} + +#endif Index: tools/clang-diff/CMakeLists.txt =================================================================== --- tools/clang-diff/CMakeLists.txt +++ tools/clang-diff/CMakeLists.txt @@ -7,6 +7,7 @@ ) target_link_libraries(clang-diff + clangBasic clangFrontend clangTooling clangToolingASTDiff Index: tools/clang-diff/ClangDiff.cpp =================================================================== --- tools/clang-diff/ClangDiff.cpp +++ tools/clang-diff/ClangDiff.cpp @@ -25,15 +25,28 @@ static cl::opt ASTDump("ast-dump", - cl::desc("Print the internal representation of the AST as JSON."), + cl::desc("Print the internal representation of the AST."), cl::init(false), cl::cat(ClangDiffCategory)); +static cl::opt ASTDumpJson( + "ast-dump-json", + cl::desc("Print the internal representation of the AST as JSON."), + cl::init(false), cl::cat(ClangDiffCategory)); + +static cl::opt + PrintMatches("m", cl::desc("Print the matched nodes (verbose)."), + cl::init(false), cl::cat(ClangDiffCategory)); + static cl::opt NoCompilationDatabase( "no-compilation-database", cl::desc( "Do not attempt to load build settings from a compilation database"), cl::init(false), cl::cat(ClangDiffCategory)); +static cl::opt HtmlDiff("html", + cl::desc("Output a side-by-side diff in HTML."), + cl::init(false), cl::cat(ClangDiffCategory)); + static cl::opt SourcePath(cl::Positional, cl::desc(""), cl::Required, cl::cat(ClangDiffCategory)); @@ -43,12 +56,33 @@ cl::Optional, cl::cat(ClangDiffCategory)); +static cl::opt StopAfter("stop-after", + cl::desc(""), + cl::Optional, cl::init(""), + cl::cat(ClangDiffCategory)); + +static cl::opt MaxSize("s", cl::desc(""), cl::Optional, + cl::init(-1), cl::cat(ClangDiffCategory)); + +static cl::opt BuildPath("p", cl::desc("Build path"), cl::init(""), + cl::Optional, cl::cat(ClangDiffCategory)); + +static cl::list ArgsAfter( + "extra-arg", + cl::desc("Additional argument to append to the compiler command line"), + cl::cat(ClangDiffCategory)); + +static cl::list ArgsBefore( + "extra-arg-before", + cl::desc("Additional argument to prepend to the compiler command line"), + cl::cat(ClangDiffCategory)); + static std::unique_ptr getAST(const StringRef Filename) { std::string ErrorMessage; std::unique_ptr Compilations; if (!NoCompilationDatabase) - Compilations = - CompilationDatabase::autoDetectFromSource(Filename, ErrorMessage); + Compilations = CompilationDatabase::autoDetectFromSource( + BuildPath.empty() ? Filename : BuildPath, ErrorMessage); if (!Compilations) { if (!NoCompilationDatabase) llvm::errs() @@ -58,6 +92,14 @@ Compilations = llvm::make_unique( ".", std::vector()); } + auto AdjustingCompilations = + llvm::make_unique( + std::move(Compilations)); + AdjustingCompilations->appendArgumentsAdjuster( + getInsertArgumentAdjuster(ArgsBefore, ArgumentInsertPosition::BEGIN)); + AdjustingCompilations->appendArgumentsAdjuster( + getInsertArgumentAdjuster(ArgsAfter, ArgumentInsertPosition::END)); + Compilations = std::move(AdjustingCompilations); std::array Files = {{Filename}}; ClangTool Tool(*Compilations, Files); std::vector> ASTs; @@ -67,6 +109,287 @@ return std::move(ASTs[0]); } +static char hexdigit(int N) { return N &= 0xf, N + (N < 10 ? '0' : 'a' - 10); } + +static const char HtmlDiffHeader[] = R"( + + + + + + + +
+)"; + +static void printHtml(raw_ostream &OS, char C) { + switch (C) { + case '&': + OS << "&"; + break; + case '<': + OS << "<"; + break; + case '>': + OS << ">"; + break; + case '\'': + OS << "'"; + break; + case '"': + OS << """; + break; + default: + OS << C; + } +} + +static void printHtml(raw_ostream &OS, const StringRef Str) { + for (char C : Str) + printHtml(OS, C); +} + +static std::string getChangeKindAbbr(diff::ChangeKind Kind) { + switch (Kind) { + case diff::None: + return ""; + case diff::Delete: + return "d"; + case diff::Update: + return "u"; + case diff::Insert: + return "i"; + case diff::Move: + return "m"; + case diff::UpdateMove: + return "u m"; + } +} + +static unsigned printHtmlForNode(raw_ostream &OS, const diff::ASTDiff &Diff, + diff::SyntaxTree &Tree, bool IsLeft, + diff::NodeId Id, unsigned Offset) { + const diff::Node &Node = Tree.getNode(Id); + char MyTag, OtherTag; + diff::NodeId LeftId, RightId; + diff::NodeId TargetId = Diff.getMapped(Tree, Id); + if (IsLeft) { + MyTag = 'L'; + OtherTag = 'R'; + LeftId = Id; + RightId = TargetId; + } else { + MyTag = 'R'; + OtherTag = 'L'; + LeftId = TargetId; + RightId = Id; + } + unsigned Begin, End; + std::tie(Begin, End) = Tree.getFileOffsets(Node); + const SourceManager &SrcMgr = Tree.getASTContext().getSourceManager(); + auto Code = SrcMgr.getBuffer(SrcMgr.getMainFileID())->getBuffer(); + for (; Offset < Begin; ++Offset) + printHtml(OS, Code[Offset]); + OS << ""; + + for (diff::NodeId Child : Node.Children) + Offset = printHtmlForNode(OS, Diff, Tree, IsLeft, Child, Offset); + + for (; Offset < End; ++Offset) + printHtml(OS, Code[Offset]); + if (Id == Tree.getRootId()) { + End = Code.size(); + for (; Offset < End; ++Offset) + printHtml(OS, Code[Offset]); + } + OS << ""; + return Offset; +} + +static void printJsonString(raw_ostream &OS, const StringRef Str) { + for (char C : Str) { + switch (C) { + case '"': + OS << R"(\")"; + break; + case '\\': + OS << R"(\\)"; + break; + case '\n': + OS << R"(\n)"; + break; + case '\t': + OS << R"(\t)"; + break; + default: + if ('\x00' <= C && C <= '\x1f') { + OS << R"(\u00)" << hexdigit(C >> 4) << hexdigit(C); + } else { + OS << C; + } + } + } +} + +static void printNodeAttributes(raw_ostream &OS, diff::SyntaxTree &Tree, + diff::NodeId Id) { + const diff::Node &N = Tree.getNode(Id); + OS << R"("id":)" << int(Id); + OS << R"(,"type":")" << N.getTypeLabel() << '"'; + auto Offsets = Tree.getFileOffsets(N); + OS << R"(,"begin":)" << Offsets.first; + OS << R"(,"end":)" << Offsets.second; + std::string Value = Tree.getNodeValue(N); + if (!Value.empty()) { + OS << R"(,"value":")"; + printJsonString(OS, Value); + OS << '"'; + } +} + +static void printNodeAsJson(raw_ostream &OS, diff::SyntaxTree &Tree, + diff::NodeId Id) { + const diff::Node &N = Tree.getNode(Id); + OS << "{"; + printNodeAttributes(OS, Tree, Id); + auto Identifier = N.getIdentifier(); + auto QualifiedIdentifier = N.getQualifiedIdentifier(); + if (Identifier) { + OS << R"(,"identifier":")"; + printJsonString(OS, *Identifier); + OS << R"(")"; + if (QualifiedIdentifier && *Identifier != *QualifiedIdentifier) { + OS << R"(,"qualified_identifier":")"; + printJsonString(OS, *QualifiedIdentifier); + OS << R"(")"; + } + } + OS << R"(,"children":[)"; + if (N.Children.size() > 0) { + printNodeAsJson(OS, Tree, N.Children[0]); + for (size_t I = 1, E = N.Children.size(); I < E; ++I) { + OS << ","; + printNodeAsJson(OS, Tree, N.Children[I]); + } + } + OS << "]}"; +} + +static void printNode(raw_ostream &OS, diff::SyntaxTree &Tree, + diff::NodeId Id) { + if (Id.isInvalid()) { + OS << "None"; + return; + } + OS << Tree.getNode(Id).getTypeLabel(); + std::string Value = Tree.getNodeValue(Id); + if (!Value.empty()) + OS << ": " << Value; + OS << "(" << Id << ")"; +} + +static void printTree(raw_ostream &OS, diff::SyntaxTree &Tree) { + for (diff::NodeId Id : Tree) { + for (int I = 0; I < Tree.getNode(Id).Depth; ++I) + OS << " "; + printNode(OS, Tree, Id); + OS << "\n"; + } +} + +static void printDstChange(raw_ostream &OS, diff::ASTDiff &Diff, + diff::SyntaxTree &SrcTree, diff::SyntaxTree &DstTree, + diff::NodeId Dst) { + const diff::Node &DstNode = DstTree.getNode(Dst); + diff::NodeId Src = Diff.getMapped(DstTree, Dst); + switch (DstNode.ChangeKind) { + case diff::None: + break; + case diff::Delete: + llvm_unreachable("The destination tree can't have deletions."); + case diff::Update: + OS << "Update "; + printNode(OS, SrcTree, Src); + OS << " to " << DstTree.getNodeValue(Dst) << "\n"; + break; + case diff::Insert: + case diff::Move: + case diff::UpdateMove: + if (DstNode.ChangeKind == diff::Insert) + OS << "Insert"; + else if (DstNode.ChangeKind == diff::Move) + OS << "Move"; + else if (DstNode.ChangeKind == diff::UpdateMove) + OS << "Update and Move"; + OS << " "; + printNode(OS, DstTree, Dst); + OS << " into "; + printNode(OS, DstTree, DstNode.Parent); + OS << " at " << DstTree.findPositionInParent(Dst) << "\n"; + break; + } +} + int main(int argc, const char **argv) { cl::HideUnrelatedOptions(ClangDiffCategory); if (!cl::ParseCommandLineOptions(argc, argv)) { @@ -74,7 +397,7 @@ return 1; } - if (ASTDump) { + if (ASTDump || ASTDumpJson) { if (!DestinationPath.empty()) { llvm::errs() << "Error: Please specify exactly one filename.\n"; return 1; @@ -83,7 +406,15 @@ if (!AST) return 1; diff::SyntaxTree Tree(AST->getASTContext()); - Tree.printAsJson(llvm::outs()); + if (ASTDump) { + printTree(llvm::outs(), Tree); + return 0; + } + llvm::outs() << R"({"filename":")"; + printJsonString(llvm::outs(), SourcePath); + llvm::outs() << R"(","root":)"; + printNodeAsJson(llvm::outs(), Tree, Tree.getRootId()); + llvm::outs() << "}\n"; return 0; } @@ -98,13 +429,51 @@ return 1; diff::ComparisonOptions Options; + if (MaxSize != -1) + Options.MaxSize = MaxSize; + if (!StopAfter.empty()) { + if (StopAfter == "topdown") + Options.StopAfterTopDown = true; + else if (StopAfter != "bottomup") { + llvm::errs() << "Error: Invalid argument for -stop-after"; + return 1; + } + } diff::SyntaxTree SrcTree(Src->getASTContext()); diff::SyntaxTree DstTree(Dst->getASTContext()); - diff::ASTDiff DiffTool(SrcTree, DstTree, Options); - for (const auto &Match : DiffTool.getMatches()) - DiffTool.printMatch(llvm::outs(), Match); - for (const auto &Change : DiffTool.getChanges()) - DiffTool.printChange(llvm::outs(), Change); + diff::ASTDiff Diff(SrcTree, DstTree, Options); + + if (HtmlDiff) { + llvm::outs() << HtmlDiffHeader << "
";
+    llvm::outs() << "
"; + printHtmlForNode(llvm::outs(), Diff, SrcTree, true, SrcTree.getRootId(), 0); + llvm::outs() << "
"; + llvm::outs() << "
"; + printHtmlForNode(llvm::outs(), Diff, DstTree, false, DstTree.getRootId(), + 0); + llvm::outs() << "
"; + llvm::outs() << "
\n"; + return 0; + } + + for (diff::NodeId Dst : DstTree) { + diff::NodeId Src = Diff.getMapped(DstTree, Dst); + if (PrintMatches && Src.isValid()) { + llvm::outs() << "Match "; + printNode(llvm::outs(), SrcTree, Src); + llvm::outs() << " to "; + printNode(llvm::outs(), DstTree, Dst); + llvm::outs() << "\n"; + } + printDstChange(llvm::outs(), Diff, SrcTree, DstTree, Dst); + } + for (diff::NodeId Src : SrcTree) { + if (Diff.getMapped(SrcTree, Src).isInvalid()) { + llvm::outs() << "Delete "; + printNode(llvm::outs(), SrcTree, Src); + llvm::outs() << "\n"; + } + } return 0; }