Index: include/clang/Tooling/ASTDiff/ASTDiff.h =================================================================== --- include/clang/Tooling/ASTDiff/ASTDiff.h +++ include/clang/Tooling/ASTDiff/ASTDiff.h @@ -25,26 +25,39 @@ namespace clang { namespace diff { -class SyntaxTree; +enum ChangeKind { + None, + Delete, // (Src): delete node Src. + Update, // (Src, Dst): update the value of node Src to match Dst. + Insert, // (Src, Dst, Pos): insert Src as child of Dst at offset Pos. + Move, // (Src, Dst, Pos): move Src to be a child of Dst at offset Pos. + UpdateMove // Same as Move plus Update. +}; + +/// Represents a Clang AST node, alongside some additional information. +struct Node { + NodeId Parent, LeftMostDescendant, RightMostDescendant; + int Depth, Height, Shift; + ast_type_traits::DynTypedNode ASTNode; + SmallVector Children; + ChangeKind ChangeKind = None; + + ast_type_traits::ASTNodeKind getType() const; + const StringRef getTypeLabel() const; + bool isLeaf() const { return Children.empty(); } +}; class ASTDiff { public: - ASTDiff(SyntaxTree &T1, SyntaxTree &T2, const ComparisonOptions &Options); + ASTDiff(SyntaxTree &SrcTree, SyntaxTree &DstTree, const ComparisonOptions &Options); ~ASTDiff(); - // Returns a list of matches. - std::vector getMatches(); - /// Returns an edit script. - std::vector getChanges(); + const SyntaxTree &SrcTree, &DstTree; - // Prints an edit action. - void printChange(raw_ostream &OS, const Change &Chg) const; - // Prints a match between two nodes. - void printMatch(raw_ostream &OS, const Match &M) const; + // Returns the ID of the node that is mapped to the given node in SourceTree. + NodeId getMapped(const SyntaxTree &SourceTree, NodeId Id) const; class Impl; - -private: std::unique_ptr DiffImpl; }; @@ -58,15 +71,28 @@ template SyntaxTree(T *Node, const ASTContext &AST) : TreeImpl(llvm::make_unique(this, Node, AST)) {} + ~SyntaxTree(); + + const ASTContext &getASTContext() const; + const StringRef getFilename() const; + int getSize() const; + + NodeId getRootId() const; + const Node &getNode(NodeId Id) const; + int findPositionInParent(NodeId Id) const; + + std::pair getFileOffsets(const Node &N) const; /// Serialize the node attributes to a string representation. This should /// uniquely distinguish nodes of the same kind. Note that this function just /// returns a representation of the node value, not considering descendants. - std::string getNodeValue(const DynTypedNode &DTN) const; + std::string getNodeValue(NodeId Id) const; + std::string getNodeValue(const Node &Node) const; - void printAsJson(raw_ostream &OS); + const std::vector &getNodesBreadthFirst() const; - std::unique_ptr TreeImpl; + class Impl; + std::unique_ptr TreeImpl; }; struct ComparisonOptions { @@ -75,28 +101,24 @@ /// During bottom-up matching, match only nodes with at least this value as /// the ratio of their common descendants. - double MinSimilarity = 0.2; + double MinSimilarity = 0.5; /// Whenever two subtrees are matched in the bottom-up phase, the optimal /// mapping is computed, unless the size of either subtrees exceeds this. int MaxSize = 100; - /// If this is set to true, nodes that have parents that must not be matched - /// (see NodeComparison) will be allowed to be matched. - bool EnableMatchingWithUnmatchableParents = false; - /// Returns false if the nodes should never be matched. - bool isMatchingAllowed(const DynTypedNode &N1, const DynTypedNode &N2) const { - return N1.getNodeKind().isSame(N2.getNodeKind()); + bool isMatchingAllowed(const Node &N1, const Node &N2) const { + return N1.getType().isSame(N2.getType()); } /// Returns zero if the nodes are considered to be equal. Returns a value /// indicating the editing distance between the nodes otherwise. /// There is no need to consider nodes that cannot be matched as input for /// this function (see isMatchingAllowed). - double getNodeDistance(const SyntaxTree &T1, const DynTypedNode &N1, - const SyntaxTree &T2, const DynTypedNode &N2) const { - if (T1.getNodeValue(N1) == T2.getNodeValue(N2)) + double getNodeDistance(const SyntaxTree &SrcTree, const Node &N1, + const SyntaxTree &DstTree, const Node &N2) const { + if (SrcTree.getNodeValue(N1) == DstTree.getNodeValue(N2)) return 0; return 1; } Index: include/clang/Tooling/ASTDiff/ASTDiffInternal.h =================================================================== --- include/clang/Tooling/ASTDiff/ASTDiffInternal.h +++ include/clang/Tooling/ASTDiff/ASTDiffInternal.h @@ -11,17 +11,16 @@ #ifndef LLVM_CLANG_TOOLING_ASTDIFF_ASTDIFFINTERNAL_H #define LLVM_CLANG_TOOLING_ASTDIFF_ASTDIFFINTERNAL_H -#include - #include "clang/AST/ASTTypeTraits.h" namespace clang { namespace diff { -using DynTypedNode = ast_type_traits::DynTypedNode; - -struct ComparisonOptions; class SyntaxTree; +class SyntaxTreeImpl; +struct ComparisonOptions; + +using DynTypedNode = ast_type_traits::DynTypedNode; /// Within a tree, this identifies a node by its preorder offset. struct NodeId { @@ -35,157 +34,13 @@ NodeId(int Id) : Id(Id) {} operator int() const { return Id; } - NodeId &operator++() { return ++Id, *this; } - NodeId &operator--() { return --Id, *this; } + NodeId &operator++() { return ++this->Id, *this; } + NodeId &operator--() { return --this->Id, *this; } bool isValid() const { return Id != InvalidNodeId; } bool isInvalid() const { return Id == InvalidNodeId; } }; -/// This represents a match between two nodes in the source and destination -/// trees, meaning that they are likely to be related. -struct Match { - NodeId Src, Dst; -}; - -enum ChangeKind { - Delete, // (Src): delete node Src. - Update, // (Src, Dst): update the value of node Src to match Dst. - Insert, // (Src, Dst, Pos): insert Src as child of Dst at offset Pos. - Move // (Src, Dst, Pos): move Src to be a child of Dst at offset Pos. -}; - -struct Change { - ChangeKind Kind; - NodeId Src, Dst; - size_t Position; - - Change(ChangeKind Kind, NodeId Src, NodeId Dst, size_t Position) - : Kind(Kind), Src(Src), Dst(Dst), Position(Position) {} - Change(ChangeKind Kind, NodeId Src) : Kind(Kind), Src(Src) {} - Change(ChangeKind Kind, NodeId Src, NodeId Dst) - : Kind(Kind), Src(Src), Dst(Dst) {} -}; - -/// Represents a Clang AST node, alongside some additional information. -struct Node { - NodeId Parent, LeftMostDescendant, RightMostDescendant; - int Depth, Height; - DynTypedNode ASTNode; - SmallVector Children; - - ast_type_traits::ASTNodeKind getType() const { return ASTNode.getNodeKind(); } - const StringRef getTypeLabel() const { return getType().asStringRef(); } - bool isLeaf() const { return Children.empty(); } -}; - -/// Maps nodes of the left tree to ones on the right, and vice versa. -class Mapping { -public: - Mapping() = default; - Mapping(Mapping &&Other) = default; - Mapping &operator=(Mapping &&Other) = default; - Mapping(int Size1, int Size2) { - // Maximum possible size after patching one tree. - int Size = Size1 + Size2; - SrcToDst = llvm::make_unique[]>(Size); - DstToSrc = llvm::make_unique[]>(Size); - } - - void link(NodeId Src, NodeId Dst) { - SrcToDst[Src].push_back(Dst); - DstToSrc[Dst].push_back(Src); - } - - NodeId getDst(NodeId Src) const { - if (hasSrc(Src)) - return SrcToDst[Src][0]; - return NodeId(); - } - NodeId getSrc(NodeId Dst) const { - if (hasDst(Dst)) - return DstToSrc[Dst][0]; - return NodeId(); - } - const SmallVector &getAllDsts(NodeId Src) const { - return SrcToDst[Src]; - } - const SmallVector &getAllSrcs(NodeId Dst) const { - return DstToSrc[Dst]; - } - bool hasSrc(NodeId Src) const { return !SrcToDst[Src].empty(); } - bool hasDst(NodeId Dst) const { return !DstToSrc[Dst].empty(); } - bool hasSrcDst(NodeId Src, NodeId Dst) const { - for (NodeId DstId : SrcToDst[Src]) - if (DstId == Dst) - return true; - for (NodeId SrcId : DstToSrc[Dst]) - if (SrcId == Src) - return true; - return false; - } - -private: - std::unique_ptr[]> SrcToDst, DstToSrc; -}; - -/// Represents the AST of a TranslationUnit. -class SyntaxTreeImpl { -public: - /// Constructs a tree from the entire translation unit. - SyntaxTreeImpl(SyntaxTree *Parent, const ASTContext &AST); - /// Constructs a tree from an AST node. - SyntaxTreeImpl(SyntaxTree *Parent, Decl *N, const ASTContext &AST); - SyntaxTreeImpl(SyntaxTree *Parent, Stmt *N, const ASTContext &AST); - template - SyntaxTreeImpl( - SyntaxTree *Parent, - typename std::enable_if::value, T>::type *Node, - const ASTContext &AST) - : SyntaxTreeImpl(Parent, dyn_cast(Node), AST) {} - template - SyntaxTreeImpl( - SyntaxTree *Parent, - typename std::enable_if::value, T>::type *Node, - const ASTContext &AST) - : SyntaxTreeImpl(Parent, dyn_cast(Node), AST) {} - - SyntaxTree *Parent; - const ASTContext &AST; - std::vector Leaves; - // Maps preorder indices to postorder ones. - std::vector PostorderIds; - - int getSize() const { return Nodes.size(); } - NodeId root() const { return 0; } - - const Node &getNode(NodeId Id) const { return Nodes[Id]; } - Node &getMutableNode(NodeId Id) { return Nodes[Id]; } - bool isValidNodeId(NodeId Id) const { return Id >= 0 && Id < getSize(); } - void addNode(Node &N) { Nodes.push_back(N); } - int getNumberOfDescendants(NodeId Id) const; - bool isInSubtree(NodeId Id, NodeId SubtreeRoot) const; - - std::string getNodeValueImpl(NodeId Id) const; - std::string getNodeValueImpl(const DynTypedNode &DTN) const; - /// Prints the node as "[: ]( Nodes; - - void initTree(); - void setLeftMostDescendants(); -}; } // end namespace diff } // end namespace clang Index: lib/Tooling/ASTDiff/ASTDiff.cpp =================================================================== --- lib/Tooling/ASTDiff/ASTDiff.cpp +++ lib/Tooling/ASTDiff/ASTDiff.cpp @@ -27,84 +27,199 @@ namespace clang { namespace diff { +/// Maps nodes of the left tree to ones on the right, and vice versa. +class Mapping { +public: + Mapping() = default; + Mapping(Mapping &&Other) = default; + Mapping &operator=(Mapping &&Other) = default; + Mapping(size_t Size); + + void link(NodeId Src, NodeId Dst); + void unlink(NodeId Src, NodeId Dst); + + NodeId getDst(NodeId Src) const; + NodeId getSrc(NodeId Dst) const; + bool hasSrc(NodeId Src) const; + bool hasDst(NodeId Dst) const; + +private: + std::unique_ptr SrcToDst, DstToSrc; +}; + +/// Maps nodes to multiple possible mapping candidates. +class MultiMapping { +public: + size_t Size; + + MultiMapping(int Size1, int Size2); + + void link(NodeId Src, NodeId Dst); + + const SmallVector &getDsts(NodeId Src) const; + const SmallVector &getSrcs(NodeId Dst) const; + bool hasSrc(NodeId Src) const; + bool hasDst(NodeId Dst) const; + bool hasSrcDst(NodeId Src, NodeId Dst) const; + +private: + std::unique_ptr[]> SrcToDst, DstToSrc; +}; + class ASTDiff::Impl { public: - SyntaxTreeImpl &T1, &T2; + SyntaxTree::Impl &T1, &T2; bool IsMappingDone = false; Mapping TheMapping; - Impl(SyntaxTreeImpl &T1, SyntaxTreeImpl &T2, const ComparisonOptions &Options) + Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, + const ComparisonOptions &Options) : T1(T1), T2(T2), Options(Options) {} /// Matches nodes one-by-one based on their similarity. void computeMapping(); - std::vector getMatches(Mapping &M); - - /// Finds an edit script that converts T1 to T2. - std::vector computeChanges(Mapping &M); - - void printChangeImpl(raw_ostream &OS, const Change &Chg) const; - void printMatchImpl(raw_ostream &OS, const Match &M) const; + void computeChangeKinds(Mapping &M); - // Returns a mapping of isomorphic subtrees. - Mapping matchTopDown() const; + NodeId getMapped(const SyntaxTree::Impl &Tree, NodeId Id) const { + if (&Tree == &T1) + return TheMapping.getDst(Id); + assert(&Tree == &T2 && "Invalid tree."); + return TheMapping.getSrc(Id); + } private: // Returns true if the two subtrees are identical. - bool isomorphic(NodeId Id1, NodeId Id2) const; + bool identical(NodeId Id1, NodeId Id2) const; - bool canBeAddedToMapping(const Mapping &M, NodeId Id1, NodeId Id2) const; + bool haveSameParents(const Mapping &M, NodeId Id1, NodeId Id2) const; // Returns false if the nodes must not be mached. bool isMatchingPossible(NodeId Id1, NodeId Id2) const; - // Adds all corresponding subtrees of the two nodes to the mapping. - // The two nodes must be isomorphic. - void addIsomorphicSubTrees(Mapping &M, NodeId Id1, NodeId Id2) const; - // Uses an optimal albeit slow algorithm to compute a mapping between two // subtrees, but only if both have fewer nodes than MaxSize. void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const; - // Computes the ratio of common descendants between the two nodes. - // Descendants are only considered to be equal when they are mapped in M. double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; + double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; // Returns the node that has the highest degree of similarity. - NodeId findCandidate(const Mapping &M, NodeId Id1) const; + std::pair findCandidate(const Mapping &M, NodeId Id1, + NodeId Subtree) const; + + NodeId tryToMap(Mapping &M, NodeId Id1, NodeId Id2, double Similarity) const; + + // Returns a mapping of identical subtrees. + Mapping matchTopDown() const; // Tries to match any yet unmapped nodes, in a bottom-up fashion. void matchBottomUp(Mapping &M) const; + Mapping resolveMultiMapping(const MultiMapping &M) const; + + void printJsonDiffOfTree(raw_ostream &OS, const SyntaxTree::Impl &Tree) const; + void printJsonDiffOfTreeNode(raw_ostream &OS, const SyntaxTree::Impl &Tree, + NodeId Id) const; + const ComparisonOptions &Options; friend class ZhangShashaMatcher; }; +class SyntaxTree::Impl { +public: + Impl(SyntaxTree *Parent, const ASTContext &AST); + Impl(SyntaxTree *Parent, Decl *N, const ASTContext &AST); + Impl(SyntaxTree *Parent, Stmt *N, const ASTContext &AST); + template + Impl(SyntaxTree *Parent, + typename std::enable_if::value, T>::type *Node, + const ASTContext &AST) + : SyntaxTree::Impl(Parent, dyn_cast(Node), AST) {} + template + Impl(SyntaxTree *Parent, + typename std::enable_if::value, T>::type *Node, + const ASTContext &AST) + : SyntaxTree::Impl(Parent, dyn_cast(Node), AST) {} + + SyntaxTree *Parent; + const ASTContext &AST; + std::vector Leaves; + // Maps preorder indices to postorder ones. + std::vector PostorderIds; + std::vector BfsIds; + + const StringRef getFilename() const; + int getSize() const { return Nodes.size(); } + NodeId root() const { return 0; } + + const Node &getNode(NodeId Id) const { return Nodes[Id]; } + Node &getMutableNode(NodeId Id) { return Nodes[Id]; } + bool isValidNodeId(NodeId Id) const { return Id >= 0 && Id < getSize(); } + void addNode(Node &N) { Nodes.push_back(N); } + // Returns the number of nodes in the subtree, including this one. + int getNumberOfDescendants(NodeId Id) const; + bool isDescendantOf(NodeId Child, NodeId Parent) const; + int findPositionInParent(NodeId Id) const; + int findPositionInParentShifted(NodeId Id) const; + + std::string getNodeValue(NodeId Id) const; + std::string getNodeValue(const DynTypedNode &DTN) const; + std::string getDeclValue(const Decl *D) const; + std::string getStmtValue(const Stmt *S) const; + + /// Prints the node as "[: ]( Nodes; + + void initTree(); + void setLeftMostDescendants(); +}; + template static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) { if (!N) return true; SourceLocation SLoc = N->getLocStart(); - return SLoc.isValid() && SrcMgr.isInSystemHeader(SLoc); + if (!SLoc.isValid()) + return false; + // Ignore everything from other files. + if (!SrcMgr.isInMainFile(SLoc)) + return true; + // Ignore macros. + if (N->getLocStart() != SrcMgr.getSpellingLoc(N->getLocStart())) + return true; + return false; } +static bool isStmtExcluded(const Stmt *S) { return false; } + namespace { /// Counts the number of nodes that will be compared. struct NodeCountVisitor : public RecursiveASTVisitor { int Count = 0; - const SyntaxTreeImpl &Root; - NodeCountVisitor(const SyntaxTreeImpl &Root) : Root(Root) {} + const SyntaxTree::Impl &Root; + NodeCountVisitor(const SyntaxTree::Impl &Root) : Root(Root) {} bool TraverseDecl(Decl *D) { - if (isNodeExcluded(Root.AST.getSourceManager(), D)) + if (isNodeExcluded(Root.AST.getSourceManager(), D) || D->isImplicit()) return true; ++Count; RecursiveASTVisitor::TraverseDecl(D); return true; } bool TraverseStmt(Stmt *S) { - if (isNodeExcluded(Root.AST.getSourceManager(), S)) + if (S) + S = S->IgnoreImplicit(); + if (isNodeExcluded(Root.AST.getSourceManager(), S) || isStmtExcluded(S)) return true; ++Count; RecursiveASTVisitor::TraverseStmt(S); @@ -119,9 +234,9 @@ struct PreorderVisitor : public RecursiveASTVisitor { int Id = 0, Depth = 0; NodeId Parent; - SyntaxTreeImpl &Root; + SyntaxTree::Impl &Root; - PreorderVisitor(SyntaxTreeImpl &Root) : Root(Root) {} + PreorderVisitor(SyntaxTree::Impl &Root) : Root(Root) {} template std::tuple PreTraverse(T *ASTNode) { NodeId MyId = Id; @@ -147,7 +262,7 @@ Parent = PreviousParent; --Depth; Node &N = Root.getMutableNode(MyId); - N.RightMostDescendant = Id; + N.RightMostDescendant = Id - 1; if (N.isLeaf()) Root.Leaves.push_back(MyId); N.Height = 1; @@ -155,7 +270,7 @@ N.Height = std::max(N.Height, 1 + Root.getNode(Child).Height); } bool TraverseDecl(Decl *D) { - if (isNodeExcluded(Root.AST.getSourceManager(), D)) + if (isNodeExcluded(Root.AST.getSourceManager(), D) || D->isImplicit()) return true; auto SavedState = PreTraverse(D); RecursiveASTVisitor::TraverseDecl(D); @@ -163,7 +278,9 @@ return true; } bool TraverseStmt(Stmt *S) { - if (isNodeExcluded(Root.AST.getSourceManager(), S)) + if (S) + S = S->IgnoreImplicit(); + if (isNodeExcluded(Root.AST.getSourceManager(), S) || isStmtExcluded(S)) return true; auto SavedState = PreTraverse(S); RecursiveASTVisitor::TraverseStmt(S); @@ -174,11 +291,10 @@ }; } // end anonymous namespace -SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, const ASTContext &AST) - : SyntaxTreeImpl(Parent, AST.getTranslationUnitDecl(), AST) {} +SyntaxTree::Impl::Impl(SyntaxTree *Parent, const ASTContext &AST) + : Impl(Parent, AST.getTranslationUnitDecl(), AST) {} -SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, Decl *N, - const ASTContext &AST) +SyntaxTree::Impl::Impl(SyntaxTree *Parent, Decl *N, const ASTContext &AST) : Parent(Parent), AST(AST) { NodeCountVisitor NodeCounter(*this); NodeCounter.TraverseDecl(N); @@ -188,8 +304,7 @@ initTree(); } -SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, Stmt *N, - const ASTContext &AST) +SyntaxTree::Impl::Impl(SyntaxTree *Parent, Stmt *N, const ASTContext &AST) : Parent(Parent), AST(AST) { NodeCountVisitor NodeCounter(*this); NodeCounter.TraverseStmt(N); @@ -199,7 +314,31 @@ initTree(); } -void SyntaxTreeImpl::initTree() { +static std::vector getSubtreePostorder(const SyntaxTree::Impl &Tree, + NodeId Root) { + std::vector Postorder; + std::function Traverse = [&](NodeId Id) { + const Node &N = Tree.getNode(Id); + for (NodeId Child : N.Children) + Traverse(Child); + Postorder.push_back(Id); + }; + Traverse(Root); + return Postorder; +} + +static std::vector getSubtreeBfs(const SyntaxTree::Impl &Tree, + NodeId Root) { + std::vector Ids; + unsigned Expanded = 0; + Ids.push_back(Root); + while (Expanded < Ids.size()) + for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) + Ids.push_back(Child); + return Ids; +} + +void SyntaxTree::Impl::initTree() { setLeftMostDescendants(); int PostorderId = 0; PostorderIds.resize(getSize()); @@ -210,9 +349,10 @@ ++PostorderId; }; PostorderTraverse(root()); + BfsIds = getSubtreeBfs(*this, root()); } -void SyntaxTreeImpl::setLeftMostDescendants() { +void SyntaxTree::Impl::setLeftMostDescendants() { for (NodeId Leaf : Leaves) { getMutableNode(Leaf).LeftMostDescendant = Leaf; NodeId Parent, Cur = Leaf; @@ -224,157 +364,115 @@ } } -static std::vector getSubtreePostorder(const SyntaxTreeImpl &Tree, - NodeId Root) { - std::vector Postorder; - std::function Traverse = [&](NodeId Id) { - const Node &N = Tree.getNode(Id); - for (NodeId Child : N.Children) - Traverse(Child); - Postorder.push_back(Id); - }; - Traverse(Root); - return Postorder; +const StringRef SyntaxTree::Impl::getFilename() const { + SourceLocation RootLoc = getNode(getNode(root()).LeftMostDescendant) + .ASTNode.getSourceRange() + .getBegin(); + return AST.getSourceManager().getFilename(RootLoc); } -static std::vector getSubtreeBfs(const SyntaxTreeImpl &Tree, - NodeId Root) { - std::vector Ids; - size_t Expanded = 0; - Ids.push_back(Root); - while (Expanded < Ids.size()) - for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) - Ids.push_back(Child); - return Ids; +int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const { + return getNode(Id).RightMostDescendant - Id + 1; } -int SyntaxTreeImpl::getNumberOfDescendants(NodeId Id) const { - return getNode(Id).RightMostDescendant - Id + 1; +bool SyntaxTree::Impl::isDescendantOf(NodeId Child, NodeId Parent) const { + return Child >= Parent && Child <= getNode(Parent).RightMostDescendant; } -bool SyntaxTreeImpl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const { - NodeId Lower = SubtreeRoot; - NodeId Upper = getNode(SubtreeRoot).RightMostDescendant; - return Id >= Lower && Id <= Upper; +std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const { + return getNodeValue(getNode(Id).ASTNode); } -std::string SyntaxTreeImpl::getNodeValueImpl(NodeId Id) const { - return getNodeValueImpl(getNode(Id).ASTNode); +std::string SyntaxTree::Impl::getNodeValue(const DynTypedNode &DTN) const { + if (auto *S = DTN.get()) + return getStmtValue(S); + if (auto *D = DTN.get()) + return getDeclValue(D); + llvm_unreachable("Fatal: unhandled AST node.\n"); } -std::string SyntaxTreeImpl::getNodeValueImpl(const DynTypedNode &DTN) const { - if (auto *X = DTN.get()) - return X->getOpcodeStr(); - if (auto *X = DTN.get()) { - CharSourceRange Range(X->getSourceRange(), false); - return Lexer::getSourceText(Range, AST.getSourceManager(), - AST.getLangOpts()); - } - if (auto *X = DTN.get()) { - SmallString<256> Str; - X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false); - return Str.str(); - } - if (auto *X = DTN.get()) - return X->getString(); - if (auto *X = DTN.get()) - return X->getNameAsString() + "(" + X->getType().getAsString() + ")"; - if (auto *X = DTN.get()) - return ""; - if (auto *X = DTN.get()) - return ""; +std::string SyntaxTree::Impl::getDeclValue(const Decl *D) const { std::string Value; - if (auto *X = DTN.get()) { - if (X->hasQualifier()) { - llvm::raw_string_ostream OS(Value); - PrintingPolicy PP(AST.getLangOpts()); - X->getQualifier()->print(OS, PP); + PrintingPolicy TPP(AST.getLangOpts()); + TPP.AnonymousTagLocations = false; + + if (auto *X = dyn_cast(D)) { + Value += X->getNameAsString() + "(" + X->getType().getAsString(TPP) + ")"; + if (auto *X = dyn_cast(D)) { + for (auto *Init : X->inits()) { + if (!Init->isWritten()) + continue; + if (Init->isBaseInitializer()) { + Value += + Init->getBaseClass()->getCanonicalTypeInternal().getAsString(TPP); + } else { + assert(Init->isAnyMemberInitializer()); + Value += Init->getMember()->getNameAsString() + ","; + } + } } - Value += X->getDecl()->getNameAsString(); return Value; } - if (auto *X = DTN.get()) + if (auto *X = dyn_cast(D)) Value += X->getNameAsString() + ";"; - if (auto *X = DTN.get()) - return Value + X->getUnderlyingType().getAsString() + ";"; - if (auto *X = DTN.get()) - return Value; - if (auto *X = DTN.get()) + if (auto *X = dyn_cast(D)) + return Value + X->getUnderlyingType().getAsString(TPP) + ";"; + if (auto *X = dyn_cast(D)) if (X->getTypeForDecl()) Value += - X->getTypeForDecl()->getCanonicalTypeInternal().getAsString() + ";"; - if (auto *X = DTN.get()) - return Value; - if (auto *X = DTN.get()) - return ""; - llvm_unreachable("Fatal: unhandled AST node.\n"); -} + X->getTypeForDecl()->getCanonicalTypeInternal().getAsString(TPP) + + ";"; -void SyntaxTreeImpl::printTree() const { printTree(root()); } -void SyntaxTreeImpl::printTree(NodeId Root) const { - printTree(llvm::outs(), Root); -} + if (auto *X = dyn_cast(D)) + return X->getNominatedNamespace()->getNameAsString(); + if (auto *X = dyn_cast(D)) { + CharSourceRange Range(X->getSourceRange(), false); + return Lexer::getSourceText(Range, AST.getSourceManager(), + AST.getLangOpts()); + } -void SyntaxTreeImpl::printTree(raw_ostream &OS, NodeId Root) const { - const Node &N = getNode(Root); - for (int I = 0; I < N.Depth; ++I) - OS << " "; - printNode(OS, Root); - OS << "\n"; - for (NodeId Child : N.Children) - printTree(OS, Child); + return Value; } -void SyntaxTreeImpl::printNode(raw_ostream &OS, NodeId Id) const { - if (Id.isInvalid()) { - OS << "None"; - return; - } - OS << getNode(Id).getTypeLabel(); - if (getNodeValueImpl(Id) != "") - OS << ": " << getNodeValueImpl(Id); - OS << "(" << PostorderIds[Id] << ")"; -} - -void SyntaxTreeImpl::printNodeAsJson(raw_ostream &OS, NodeId Id) const { - auto N = getNode(Id); - OS << R"({"type":")" << N.getTypeLabel() << R"(")"; - if (getNodeValueImpl(Id) != "") - OS << R"(,"value":")" << getNodeValueImpl(Id) << R"(")"; - OS << R"(,"children":[)"; - if (N.Children.size() > 0) { - printNodeAsJson(OS, N.Children[0]); - for (size_t I = 1, E = N.Children.size(); I < E; ++I) { - OS << ","; - printNodeAsJson(OS, N.Children[I]); - } +std::string SyntaxTree::Impl::getStmtValue(const Stmt *S) const { + if (auto *X = dyn_cast(S)) + return UnaryOperator::getOpcodeStr(X->getOpcode()); + if (auto *X = dyn_cast(S)) + return X->getOpcodeStr(); + if (auto *X = dyn_cast(S)) + return X->getMemberDecl()->getName(); + if (auto *X = dyn_cast(S)) { + SmallString<256> Str; + X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false); + return Str.str(); } - OS << "]}"; -} + if (auto *X = dyn_cast(S)) + return X->getDecl()->getNameAsString(); + if (auto *X = dyn_cast(S)) + return X->getMemberDecl()->getNameAsString(); -void SyntaxTreeImpl::printAsJsonImpl(raw_ostream &OS) const { - OS << R"({"root":)"; - printNodeAsJson(OS, root()); - OS << "}\n"; + if (auto *X = dyn_cast(S)) + return X->getString(); + return ""; } /// Identifies a node in a subtree by its postorder offset, starting at 1. struct SNodeId { int Id = 0; - explicit SNodeId(int Id) : Id(Id) {} + explicit SNodeId(int Id) : Id(Id){}; explicit SNodeId() = default; operator int() const { return Id; } - SNodeId &operator++() { return ++Id, *this; } - SNodeId &operator--() { return --Id, *this; } - SNodeId operator+(int Other) const { return SNodeId(Id + Other); } + SNodeId &operator++() { return ++this->Id, *this; } + SNodeId &operator--() { return --this->Id, *this; } + SNodeId operator+(int Other) const { return SNodeId(this->Id + Other); } }; class Subtree { private: /// The parent tree. - const SyntaxTreeImpl &Tree; + const SyntaxTree::Impl &Tree; /// Maps SNodeIds to original ids. std::vector RootIds; /// Maps subtree nodes to their leftmost descendants wtihin the subtree. @@ -383,7 +481,7 @@ public: std::vector KeyRoots; - Subtree(const SyntaxTreeImpl &Tree, NodeId SubtreeRoot) : Tree(Tree) { + Subtree(const SyntaxTree::Impl &Tree, NodeId SubtreeRoot) : Tree(Tree) { RootIds = getSubtreePostorder(Tree, SubtreeRoot); int NumLeaves = setLeftMostDescendants(); computeKeyRoots(NumLeaves); @@ -448,8 +546,8 @@ std::unique_ptr[]> TreeDist, ForestDist; public: - ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTreeImpl &T1, - const SyntaxTreeImpl &T2, NodeId Id1, NodeId Id2) + ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTree::Impl &T1, + const SyntaxTree::Impl &T2, NodeId Id1, NodeId Id2) : DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) { TreeDist = llvm::make_unique[]>( size_t(S1.getSize()) + 1); @@ -529,12 +627,11 @@ static constexpr double InsertionCost = 1; double getUpdateCost(SNodeId Id1, SNodeId Id2) { - const DynTypedNode &DTN1 = S1.getNode(Id1).ASTNode, - &DTN2 = S2.getNode(Id2).ASTNode; - if (!DiffImpl.Options.isMatchingAllowed(DTN1, DTN2)) + const Node &N1 = S1.getNode(Id1), N2 = S2.getNode(Id2); + if (!DiffImpl.Options.isMatchingAllowed(N1, N2)) return std::numeric_limits::max(); - return DiffImpl.Options.getNodeDistance(*DiffImpl.T1.Parent, DTN1, - *DiffImpl.T2.Parent, DTN2); + return DiffImpl.Options.getNodeDistance(*DiffImpl.T1.Parent, N1, + *DiffImpl.T2.Parent, N2); } void computeTreeDist() { @@ -573,11 +670,57 @@ } }; +ast_type_traits::ASTNodeKind Node::getType() const { + return ASTNode.getNodeKind(); +} + +const StringRef Node::getTypeLabel() const { return getType().asStringRef(); } + +Mapping::Mapping(size_t Size) { + SrcToDst = llvm::make_unique(Size); + DstToSrc = llvm::make_unique(Size); +} + +void Mapping::link(NodeId Src, NodeId Dst) { + SrcToDst[Src] = Dst; + DstToSrc[Dst] = Src; +} + +void Mapping::unlink(NodeId Src, NodeId Dst) { + SrcToDst[Src] = NodeId(); + DstToSrc[Dst] = NodeId(); +} + +NodeId Mapping::getDst(NodeId Src) const { return SrcToDst[Src]; } +NodeId Mapping::getSrc(NodeId Dst) const { return DstToSrc[Dst]; } +bool Mapping::hasSrc(NodeId Src) const { return getDst(Src).isValid(); } +bool Mapping::hasDst(NodeId Dst) const { return getSrc(Dst).isValid(); } + +MultiMapping::MultiMapping(int Size1, int Size2) : Size(Size1 + Size2) { + SrcToDst = llvm::make_unique[]>(Size); + DstToSrc = llvm::make_unique[]>(Size); +} + +void MultiMapping::link(NodeId Src, NodeId Dst) { + SrcToDst[Src].push_back(Dst); + DstToSrc[Dst].push_back(Src); +} + +bool MultiMapping::hasSrc(NodeId Src) const { return !getDsts(Src).empty(); } +bool MultiMapping::hasDst(NodeId Dst) const { return !getSrcs(Dst).empty(); } + +const SmallVector &MultiMapping::getDsts(NodeId Src) const { + return SrcToDst[Src]; +} +const SmallVector &MultiMapping::getSrcs(NodeId Dst) const { + return DstToSrc[Dst]; +} + namespace { // Compares nodes by their depth. struct HeightLess { - const SyntaxTreeImpl &Tree; - HeightLess(const SyntaxTreeImpl &Tree) : Tree(Tree) {} + const SyntaxTree::Impl &Tree; + HeightLess(const SyntaxTree::Impl &Tree) : Tree(Tree) {} bool operator()(NodeId Id1, NodeId Id2) const { return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height; } @@ -586,13 +729,13 @@ // Priority queue for nodes, sorted descendingly by their height. class PriorityList { - const SyntaxTreeImpl &Tree; + const SyntaxTree::Impl &Tree; HeightLess Cmp; std::vector Container; PriorityQueue, HeightLess> List; public: - PriorityList(const SyntaxTreeImpl &Tree) + PriorityList(const SyntaxTree::Impl &Tree) : Tree(Tree), Cmp(Tree), List(Cmp, Container) {} void push(NodeId id) { List.push(id); } @@ -621,50 +764,29 @@ } }; -bool ASTDiff::Impl::isomorphic(NodeId Id1, NodeId Id2) const { +bool ASTDiff::Impl::identical(NodeId Id1, NodeId Id2) const { const Node &N1 = T1.getNode(Id1); const Node &N2 = T2.getNode(Id2); if (N1.Children.size() != N2.Children.size() || !isMatchingPossible(Id1, Id2) || - Options.getNodeDistance(*T1.Parent, N1.ASTNode, *T2.Parent, N2.ASTNode) != - 0) + Options.getNodeDistance(*T1.Parent, N1, *T2.Parent, N2) != 0) return false; for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id) - if (!isomorphic(N1.Children[Id], N2.Children[Id])) + if (!identical(N1.Children[Id], N2.Children[Id])) return false; return true; } -bool ASTDiff::Impl::canBeAddedToMapping(const Mapping &M, NodeId Id1, - NodeId Id2) const { - assert(isMatchingPossible(Id1, Id2) && - "Matching must be possible in the first place."); - if (M.hasSrcDst(Id1, Id2)) - return false; - if (Options.EnableMatchingWithUnmatchableParents) - return true; - const Node &N1 = T1.getNode(Id1); - const Node &N2 = T2.getNode(Id2); - NodeId P1 = N1.Parent; - NodeId P2 = N2.Parent; - // Only allow matching if parents can be matched. +bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1, + NodeId Id2) const { + NodeId P1 = T1.getNode(Id1).Parent; + NodeId P2 = T2.getNode(Id2).Parent; return (P1.isInvalid() && P2.isInvalid()) || - (P1.isValid() && P2.isValid() && isMatchingPossible(P1, P2)); + (P1.isValid() && P2.isValid() && M.getDst(P1) == P2); } bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const { - return Options.isMatchingAllowed(T1.getNode(Id1).ASTNode, - T2.getNode(Id2).ASTNode); -} - -void ASTDiff::Impl::addIsomorphicSubTrees(Mapping &M, NodeId Id1, - NodeId Id2) const { - assert(isomorphic(Id1, Id2) && "Can only be called on isomorphic subtrees."); - M.link(Id1, Id2); - const Node &N1 = T1.getNode(Id1); - const Node &N2 = T2.getNode(Id2); - for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id) - addIsomorphicSubTrees(M, N1.Children[Id], N2.Children[Id]); + return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2)); } void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, @@ -677,38 +799,80 @@ for (const auto Tuple : R) { NodeId Src = Tuple.first; NodeId Dst = Tuple.second; - if (canBeAddedToMapping(M, Src, Dst)) + if (!M.hasSrc(Src) && !M.hasDst(Dst)) M.link(Src, Dst); } } double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const { - if (Id1.isInvalid() || Id2.isInvalid()) - return 0.0; + const Node &N1 = T1.getNode(Id1); + const Node &N2 = T2.getNode(Id2); + bool SameValue = T1.getNodeValue(Id1) == T2.getNodeValue(Id2); + + return 0.001 // + + 0.5 * Options.MinSimilarity * haveSameParents(M, Id1, Id2) // + + 0.5 * Options.MinSimilarity * SameValue // + + 1 * getJaccardSimilarity(M, Id1, Id2) // + + Options.MinSimilarity * (N1.isLeaf() && N2.isLeaf()) // + ; +} + +double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1, + NodeId Id2) const { int CommonDescendants = 0; const Node &N1 = T1.getNode(Id1); - for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id) - CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2)); - return 2.0 * CommonDescendants / - (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2)); + for (NodeId Src = Id1; Src <= N1.RightMostDescendant; ++Src) { + NodeId Dst = M.getDst(Src); + CommonDescendants += int(Dst.isValid() && T2.isDescendantOf(Dst, Id2)); + } + double Denominator = T1.getNumberOfDescendants(Id1) + + T2.getNumberOfDescendants(Id2) - CommonDescendants; + return CommonDescendants / Denominator; } -NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { +std::pair ASTDiff::Impl::findCandidate(const Mapping &M, + NodeId Id1, + NodeId Subtree) const { NodeId Candidate; double MaxSimilarity = 0.0; - for (NodeId Id2 = 0, E = T2.getSize(); Id2 < E; ++Id2) { + for (NodeId Id2 = Subtree, E = T2.getNode(Subtree).RightMostDescendant + 1; + Id2 < E; ++Id2) { if (!isMatchingPossible(Id1, Id2)) continue; - if (M.hasDst(Id2)) - continue; double Similarity = getSimilarity(M, Id1, Id2); if (Similarity > MaxSimilarity) { + if (M.hasDst(Id2)) { + NodeId OldId1 = M.getSrc(Id2); + if (Similarity <= getSimilarity(M, OldId1, Id2)) + continue; + } MaxSimilarity = Similarity; Candidate = Id2; } } - return Candidate; + return {Candidate, MaxSimilarity}; +} + +NodeId ASTDiff::Impl::tryToMap(Mapping &M, NodeId Id1, NodeId Id2, + double Similarity) const { + if (Id2.isInvalid() || Similarity <= Options.MinSimilarity) + return NodeId(); + assert(isMatchingPossible(Id1, Id2)); + assert(!M.hasSrc(Id1)); + NodeId OldId1 = M.getSrc(Id2); + bool Removed = false; + if (OldId1.isValid()) { + if ((Removed = Similarity > getSimilarity(M, OldId1, Id2))) + M.unlink(OldId1, Id2); + } + if (!M.hasDst(Id2)) { + M.link(Id1, Id2); + addOptimalMapping(M, Id1, Id2); + } + if (Removed) + return OldId1; + return NodeId(); } void ASTDiff::Impl::matchBottomUp(Mapping &M) const { @@ -728,12 +892,27 @@ [&](NodeId Child) { return M.hasSrc(Child); }); if (Matched || !MatchedChildren) continue; - NodeId Id2 = findCandidate(M, Id1); - if (Id2.isInvalid() || !canBeAddedToMapping(M, Id1, Id2) || - getSimilarity(M, Id1, Id2) < Options.MinSimilarity) - continue; - M.link(Id1, Id2); - addOptimalMapping(M, Id1, Id2); + while (Id1.isValid()) { + NodeId Id2; + double Similarity; + std::tie(Id2, Similarity) = findCandidate(M, Id1, T2.root()); + Id1 = tryToMap(M, Id1, Id2, Similarity); + } + } + for (NodeId Id1 = 0, E = T1.getSize(); Id1 < E; ++Id1) { + NodeId Cur = Id1; + while (Cur.isValid()) { + NodeId P1 = T1.getNode(Cur).Parent; + if (P1.isInvalid() || !M.hasSrc(P1)) + break; + if (M.hasSrc(Cur)) + break; + NodeId P2 = M.getDst(P1); + NodeId Id2; + double Similarity; + std::tie(Id2, Similarity) = findCandidate(M, Cur, P2); + Cur = tryToMap(M, Cur, Id2, Similarity); + } } } @@ -741,7 +920,7 @@ PriorityList L1(T1); PriorityList L2(T2); - Mapping M(T1.getSize(), T2.getSize()); + MultiMapping M(T1.getSize(), T2.getSize()); L1.push(T1.root()); L2.push(T2.root()); @@ -763,9 +942,10 @@ H1 = L1.pop(); H2 = L2.pop(); for (NodeId Id1 : H1) { - for (NodeId Id2 : H2) - if (isomorphic(Id1, Id2) && canBeAddedToMapping(M, Id1, Id2)) - addIsomorphicSubTrees(M, Id1, Id2); + for (NodeId Id2 : H2) { + if (identical(Id1, Id2)) + M.link(Id1, Id2); + } } for (NodeId Id1 : H1) { if (!M.hasSrc(Id1)) @@ -776,7 +956,50 @@ L2.open(Id2); } } - return M; + return resolveMultiMapping(M); +} + +Mapping ASTDiff::Impl::resolveMultiMapping(const MultiMapping &M) const { + Mapping UniqueMapping(M.Size); + // Add all unambiguous matches. + for (NodeId Src = 0, E = T1.getSize(); Src < E; ++Src) { + if (M.getDsts(Src).size() != 1) + continue; + NodeId Dst = M.getDsts(Src)[0]; + if (UniqueMapping.hasSrc(Src) || UniqueMapping.hasDst(Dst)) + continue; + UniqueMapping.link(Src, Dst); + } + // For others, choose the destination with the highest similarity to the + // source (the similarity is partly based on the unambiguous matches). + for (NodeId Src = 0, E = T1.getSize(); Src < E; ++Src) { + if (M.getDsts(Src).size() < 2) + continue; + if (UniqueMapping.hasSrc(Src)) + continue; + double MaxSimilarity = 0.0; + NodeId BestDst; + for (NodeId Dst : M.getDsts(Src)) { + double Similarity = getSimilarity(UniqueMapping, Src, Dst); + if (Similarity > MaxSimilarity) { + MaxSimilarity = Similarity; + BestDst = Dst; + } + } + if (BestDst.isValid() && !UniqueMapping.hasDst(BestDst)) + UniqueMapping.link(Src, BestDst); + } + for (NodeId Id1 = 0, Last = T1.getSize(); Id1 < Last; ++Id1) { + NodeId Id2 = UniqueMapping.getDst(Id1); + if (Id2.isInvalid()) + continue; + // Recursively connect all identical subtrees. + for (int I = 1, E = T1.getNumberOfDescendants(Id1); I < E; ++I) { + assert(isMatchingPossible(Id1 + I, Id2 + I)); + UniqueMapping.link(Id1 + I, Id2 + I); + } + } + return UniqueMapping; } void ASTDiff::Impl::computeMapping() { @@ -787,125 +1010,131 @@ IsMappingDone = true; } -std::vector ASTDiff::Impl::getMatches(Mapping &M) { - std::vector Matches; - for (NodeId Id1 = 0, Id2, E = T1.getSize(); Id1 < E; ++Id1) - if ((Id2 = M.getDst(Id1)).isValid()) - Matches.push_back({Id1, Id2}); - return Matches; +int SyntaxTree::Impl::findPositionInParent(NodeId Id) const { + NodeId Parent = getNode(Id).Parent; + return Id - Parent - 1; } -std::vector ASTDiff::Impl::computeChanges(Mapping &M) { - std::vector Changes; - for (NodeId Id2 : getSubtreeBfs(T2, T2.root())) { - const Node &N2 = T2.getNode(Id2); - NodeId Id1 = M.getSrc(Id2); - if (Id1.isValid()) { - assert(isMatchingPossible(Id1, Id2) && "Invalid matching."); - if (T1.getNodeValueImpl(Id1) != T2.getNodeValueImpl(Id2)) { - Changes.emplace_back(Update, Id1, Id2); - } - continue; +int SyntaxTree::Impl::findPositionInParentShifted(NodeId Id) const { + NodeId Parent = getNode(Id).Parent; + if (Parent.isInvalid()) + return 0; + const auto &Siblings = getNode(Parent).Children; + int Position = 0; + for (size_t I = 0, E = Siblings.size(); I < E; ++I) { + Position += getNode(Siblings[I]).Shift; + if (Siblings[I] == Id) { + Position += I; + return Position; } - NodeId P2 = N2.Parent; - NodeId P1 = M.getSrc(P2); - assert(P1.isValid() && - "Parents must be matched for determining the change type."); - Node &Parent1 = T1.getMutableNode(P1); - const Node &Parent2 = T2.getNode(P2); - auto &Siblings1 = Parent1.Children; - const auto &Siblings2 = Parent2.Children; - size_t Position; - for (Position = 0; Position < Siblings2.size(); ++Position) - if (Siblings2[Position] == Id2 || Position >= Siblings1.size()) - break; - Changes.emplace_back(Insert, Id2, P2, Position); - Node PatchNode; - PatchNode.Parent = P1; - PatchNode.LeftMostDescendant = N2.LeftMostDescendant; - PatchNode.RightMostDescendant = N2.RightMostDescendant; - PatchNode.Depth = N2.Depth; - PatchNode.ASTNode = N2.ASTNode; - // TODO update Depth if needed - NodeId PatchNodeId = T1.getSize(); - // TODO maybe choose a different data structure for Children. - Siblings1.insert(Siblings1.begin() + Position, PatchNodeId); - T1.addNode(PatchNode); - M.link(PatchNodeId, Id2); } + llvm_unreachable("Node not found in parent's children."); +} + +void ASTDiff::Impl::computeChangeKinds(Mapping &M) { for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) { + if (!M.hasSrc(Id1)) { + T1.getMutableNode(Id1).ChangeKind = Delete; + T1.getMutableNode(Id1).Shift -= 1; + } + } + for (NodeId Id2 = 0; Id2 < T2.getSize(); ++Id2) { + if (!M.hasDst(Id2)) { + T2.getMutableNode(Id2).ChangeKind = Insert; + T2.getMutableNode(Id2).Shift -= 1; + } + } + for (NodeId Id1 : T1.BfsIds) { NodeId Id2 = M.getDst(Id1); if (Id2.isInvalid()) - Changes.emplace_back(Delete, Id1, Id2); - } - return Changes; -} - -void ASTDiff::Impl::printChangeImpl(raw_ostream &OS, const Change &Chg) const { - switch (Chg.Kind) { - case Delete: - OS << "Delete "; - T1.printNode(OS, Chg.Src); - OS << "\n"; - break; - case Update: - OS << "Update "; - T1.printNode(OS, Chg.Src); - OS << " to " << T2.getNodeValueImpl(Chg.Dst) << "\n"; - break; - case Insert: - OS << "Insert "; - T2.printNode(OS, Chg.Src); - OS << " into "; - T2.printNode(OS, Chg.Dst); - OS << " at " << Chg.Position << "\n"; - break; - case Move: - llvm_unreachable("TODO"); - break; - }; + continue; + if (!haveSameParents(M, Id1, Id2) || + T1.findPositionInParentShifted(Id1) != + T2.findPositionInParentShifted(Id2)) { + T1.getMutableNode(Id1).Shift -= 1; + T2.getMutableNode(Id2).Shift -= 1; + } + } + for (NodeId Id2 : T2.BfsIds) { + NodeId Id1 = M.getSrc(Id2); + if (Id1.isInvalid()) + continue; + Node &N1 = T1.getMutableNode(Id1); + Node &N2 = T2.getMutableNode(Id2); + if (Id1.isInvalid()) + continue; + if (!haveSameParents(M, Id1, Id2) || + T1.findPositionInParentShifted(Id1) != + T2.findPositionInParentShifted(Id2)) { + N1.ChangeKind = N2.ChangeKind = Move; + } + if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) { + N1.ChangeKind = N2.ChangeKind = + (N1.ChangeKind == Move ? UpdateMove : Update); + } + } } -void ASTDiff::Impl::printMatchImpl(raw_ostream &OS, const Match &M) const { - OS << "Match "; - T1.printNode(OS, M.Src); - OS << " to "; - T2.printNode(OS, M.Dst); - OS << "\n"; +ASTDiff::ASTDiff(SyntaxTree &SrcTree, SyntaxTree &DstTree, + const ComparisonOptions &Options) + : SrcTree(SrcTree), DstTree(DstTree), + DiffImpl(llvm::make_unique(*SrcTree.TreeImpl, *DstTree.TreeImpl, + Options)) { + DiffImpl->computeMapping(); + DiffImpl->computeChangeKinds(DiffImpl->TheMapping); } -ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2, - const ComparisonOptions &Options) - : DiffImpl(llvm::make_unique(*T1.TreeImpl, *T2.TreeImpl, Options)) {} +ASTDiff::~ASTDiff() = default; -ASTDiff::~ASTDiff() {} +NodeId ASTDiff::getMapped(const SyntaxTree &SourceTree, NodeId Id) const { + return DiffImpl->getMapped(*SourceTree.TreeImpl, Id); +} SyntaxTree::SyntaxTree(const ASTContext &AST) - : TreeImpl(llvm::make_unique( + : TreeImpl(llvm::make_unique( this, AST.getTranslationUnitDecl(), AST)) {} -std::vector ASTDiff::getMatches() { - DiffImpl->computeMapping(); - return DiffImpl->getMatches(DiffImpl->TheMapping); +SyntaxTree::~SyntaxTree() = default; + +const ASTContext &SyntaxTree::getASTContext() const { return TreeImpl->AST; } + +const StringRef SyntaxTree::getFilename() const { + return TreeImpl->getFilename(); } -std::vector ASTDiff::getChanges() { - DiffImpl->computeMapping(); - return DiffImpl->computeChanges(DiffImpl->TheMapping); +int SyntaxTree::getSize() const { return TreeImpl->getSize(); } + +NodeId SyntaxTree::getRootId() const { return TreeImpl->root(); } + +const Node &SyntaxTree::getNode(NodeId Id) const { + return TreeImpl->getNode(Id); } -void ASTDiff::printChange(raw_ostream &OS, const Change &Chg) const { - DiffImpl->printChangeImpl(OS, Chg); +int SyntaxTree::findPositionInParent(NodeId Id) const { + return TreeImpl->findPositionInParent(Id); } -void ASTDiff::printMatch(raw_ostream &OS, const Match &M) const { - DiffImpl->printMatchImpl(OS, M); +std::pair SyntaxTree::getFileOffsets(const Node &N) const { + const SourceManager &SrcMgr = TreeImpl->AST.getSourceManager(); + SourceRange Range = N.ASTNode.getSourceRange(); + SourceLocation BeginLoc = Range.getBegin(); + SourceLocation EndLoc = Lexer::getLocForEndOfToken( + Range.getEnd(), /*Offset=*/0, SrcMgr, TreeImpl->AST.getLangOpts()); + unsigned Begin = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(BeginLoc)); + unsigned End = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(EndLoc)); + return {Begin, End}; } -void SyntaxTree::printAsJson(raw_ostream &OS) { TreeImpl->printAsJsonImpl(OS); } +std::string SyntaxTree::getNodeValue(NodeId Id) const { + return TreeImpl->getNodeValue(Id); +} + +std::string SyntaxTree::getNodeValue(const Node &Node) const { + return TreeImpl->getNodeValue(Node.ASTNode); +} -std::string SyntaxTree::getNodeValue(const DynTypedNode &DTN) const { - return TreeImpl->getNodeValueImpl(DTN); +const std::vector &SyntaxTree::getNodesBreadthFirst() const { + return TreeImpl->BfsIds; } } // end namespace diff Index: test/Tooling/clang-diff-basic.cpp =================================================================== --- test/Tooling/clang-diff-basic.cpp +++ test/Tooling/clang-diff-basic.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -E %s > %T/src.cpp // RUN: %clang_cc1 -E %s > %T/dst.cpp -DDEST -// RUN: clang-diff -no-compilation-database %T/src.cpp %T/dst.cpp | FileCheck %s +// RUN: clang-diff -no-compilation-database -m %T/src.cpp %T/dst.cpp | FileCheck %s #ifndef DEST namespace src { @@ -11,6 +11,8 @@ void main() { foo(); }; +int f() { return 1 * 2 * 3 * 4; } + const char *a = "foo"; typedef unsigned int nat; @@ -43,9 +45,13 @@ } } -// CHECK: Match DeclRefExpr: foo{{.*}} to DeclRefExpr: inner::foo +// CHECK: Match DeclRefExpr: foo{{.*}} to DeclRefExpr: foo void main() { inner::foo(); } +// CHECK: Match ReturnStmt +int f1() { return 1 * 2 * 3 * 4; } +int f2() { return 1 * 2 * 3 * 4; } + // CHECK: Match StringLiteral: foo{{.*}} to StringLiteral: foo const char *b = "f" "o" "o"; Index: tools/clang-diff/CMakeLists.txt =================================================================== --- tools/clang-diff/CMakeLists.txt +++ tools/clang-diff/CMakeLists.txt @@ -7,6 +7,7 @@ ) target_link_libraries(clang-diff + clangBasic clangFrontend clangTooling clangToolingASTDiff Index: tools/clang-diff/ClangDiff.cpp =================================================================== --- tools/clang-diff/ClangDiff.cpp +++ tools/clang-diff/ClangDiff.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/ASTDiff/ASTDiff.h" +#include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Tooling.h" #include "llvm/Support/CommandLine.h" @@ -24,15 +25,20 @@ static cl::OptionCategory ClangDiffCategory("clang-diff options"); static cl::opt - DumpAST("ast-dump", + ASTDump("ast-dump", cl::desc("Print the internal representation of the AST as JSON."), cl::init(false), cl::cat(ClangDiffCategory)); -static cl::opt NoCompilationDatabase( - "no-compilation-database", - cl::desc( - "Do not attempt to load build settings from a compilation database"), - cl::init(false), cl::cat(ClangDiffCategory)); +static cl::opt + PrintMatches("m", cl::desc("Print the matched nodes (verbose)."), + cl::init(false), cl::cat(ClangDiffCategory)); + +static cl::opt JsonDiff("json", cl::desc("Print the diff as JSON."), + cl::init(false), cl::cat(ClangDiffCategory)); + +static cl::opt HtmlDiff("html", + cl::desc("Output a side-by-side diff in HTML."), + cl::init(false), cl::cat(ClangDiffCategory)); static cl::opt SourcePath(cl::Positional, cl::desc(""), cl::Required, @@ -43,12 +49,69 @@ cl::Optional, cl::cat(ClangDiffCategory)); +static cl::opt NoCompilationDatabase( + "no-compilation-database", + cl::desc( + "Do not attempt to load build settings from a compilation database"), + cl::init(false), cl::cat(ClangDiffCategory)); + +static cl::opt BuildPath("p", cl::desc("Build path"), cl::Optional, + cl::cat(ClangDiffCategory)); + +static cl::list ArgsAfter( + "extra-arg", + cl::desc("Additional argument to append to the compiler command line"), + cl::cat(ClangDiffCategory)); + +static cl::list ArgsBefore( + "extra-arg-before", + cl::desc("Additional argument to prepend to the compiler command line"), + cl::cat(ClangDiffCategory)); + +namespace { +class ArgumentsAdjustingCompilations : public CompilationDatabase { +public: + ArgumentsAdjustingCompilations( + std::unique_ptr Compilations) + : Compilations(std::move(Compilations)) {} + + void appendArgumentsAdjuster(ArgumentsAdjuster Adjuster) { + Adjusters.push_back(std::move(Adjuster)); + } + + std::vector + getCompileCommands(StringRef FilePath) const override { + return adjustCommands(Compilations->getCompileCommands(FilePath)); + } + + std::vector getAllFiles() const override { + return Compilations->getAllFiles(); + } + + std::vector getAllCompileCommands() const override { + return adjustCommands(Compilations->getAllCompileCommands()); + } + +private: + std::unique_ptr Compilations; + std::vector Adjusters; + + std::vector + adjustCommands(std::vector Commands) const { + for (CompileCommand &Command : Commands) + for (const auto &Adjuster : Adjusters) + Command.CommandLine = Adjuster(Command.CommandLine, Command.Filename); + return Commands; + } +}; +} // namespace + static std::unique_ptr getAST(const StringRef Filename) { std::string ErrorMessage; std::unique_ptr Compilations; if (!NoCompilationDatabase) - Compilations = - CompilationDatabase::autoDetectFromSource(Filename, ErrorMessage); + Compilations = CompilationDatabase::autoDetectFromSource( + BuildPath.empty() ? Filename : BuildPath, ErrorMessage); if (!Compilations) { if (!NoCompilationDatabase) llvm::errs() @@ -58,6 +121,14 @@ Compilations = llvm::make_unique( ".", std::vector()); } + auto AdjustingCompilations = + llvm::make_unique( + std::move(Compilations)); + AdjustingCompilations->appendArgumentsAdjuster( + getInsertArgumentAdjuster(ArgsBefore, ArgumentInsertPosition::BEGIN)); + AdjustingCompilations->appendArgumentsAdjuster( + getInsertArgumentAdjuster(ArgsAfter, ArgumentInsertPosition::END)); + Compilations = std::move(AdjustingCompilations); std::array Files = {{Filename}}; ClangTool Tool(*Compilations, Files); std::vector> ASTs; @@ -67,6 +138,339 @@ return std::move(ASTs[0]); } +static const char HtmlDiffHeader[] = R"( + + + + + + + +
+)"; + +static void printHtml(raw_ostream &OS, char C) { + switch (C) { + case '&': + OS << "&"; + break; + case '<': + OS << "<"; + break; + case '>': + OS << ">"; + break; + case '\'': + OS << "'"; + break; + case '"': + OS << """; + break; + default: + OS << C; + } +} + +static void printHtml(raw_ostream &OS, const StringRef Str) { + for (char C : Str) + printHtml(OS, C); +} + +static std::string getChangeKindAbbr(diff::ChangeKind Kind) { + switch (Kind) { + case diff::None: + return ""; + case diff::Delete: + return "d"; + case diff::Update: + return "u"; + case diff::Insert: + return "i"; + case diff::Move: + return "m"; + case diff::UpdateMove: + return "u m"; + } +} + +static unsigned printHtmlForNode(raw_ostream &OS, const diff::ASTDiff &Diff, + const diff::SyntaxTree &Tree, diff::NodeId Id, + unsigned Offset) { + const diff::Node &Node = Tree.getNode(Id); + bool IsLeft = &Tree == &Diff.SrcTree; + char MyTag, OtherTag; + diff::NodeId LeftId, RightId; + diff::NodeId TargetId = Diff.getMapped(Tree, Id); + if (IsLeft) { + MyTag = 'L'; + OtherTag = 'R'; + LeftId = Id; + RightId = TargetId; + } else { + MyTag = 'R'; + OtherTag = 'L'; + LeftId = TargetId; + RightId = Id; + } + unsigned Begin, End; + std::tie(Begin, End) = Tree.getFileOffsets(Node); + const SourceManager &SrcMgr = Tree.getASTContext().getSourceManager(); + auto Code = SrcMgr.getBuffer(SrcMgr.getMainFileID())->getBuffer(); + for (; Offset < Begin; ++Offset) + printHtml(OS, Code[Offset]); + OS << ""; + + for (diff::NodeId Child : Node.Children) + Offset = printHtmlForNode(OS, Diff, Tree, Child, Offset); + + for (; Offset < End; ++Offset) + printHtml(OS, Code[Offset]); + if (Id == Tree.getRootId()) { + End = Code.size(); + for (; Offset < End; ++Offset) + printHtml(OS, Code[Offset]); + } + OS << ""; + return Offset; +} + +static void printJsonString(raw_ostream &OS, const StringRef Str) { + for (char C : Str) { + switch (C) { + case '"': + OS << R"(\")"; + break; + case '\\': + OS << R"(\\)"; + break; + case '\b': + OS << R"(\b)"; + break; + case '\f': + OS << R"(\f)"; + break; + case '\n': + OS << R"(\n)"; + break; + case '\r': + OS << R"(\r)"; + break; + case '\t': + OS << R"(\t)"; + break; + default: + OS << C; + } + } +} + +static void printNodeAttributes(raw_ostream &OS, const diff::SyntaxTree &Tree, + diff::NodeId Id) { + const diff::Node &N = Tree.getNode(Id); + OS << R"("id":)" << int(Id); + OS << R"(,"type":")" << N.getTypeLabel() << '"'; + auto Offsets = Tree.getFileOffsets(N); + OS << R"(,"begin":)" << Offsets.first; + OS << R"(,"end":)" << Offsets.second; + if (N.ChangeKind != diff::None) + OS << R"(,"change":")" << getChangeKindAbbr(N.ChangeKind) << '"'; + std::string Value = Tree.getNodeValue(N); + if (!Value.empty()) { + OS << R"(,"value":")"; + printJsonString(OS, Value); + OS << '"'; + } +} + +static void printNodeAsJson(raw_ostream &OS, const diff::SyntaxTree &Tree, + diff::NodeId Id) { + const diff::Node &N = Tree.getNode(Id); + OS << "{"; + printNodeAttributes(OS, Tree, Id); + OS << R"(,"children":[)"; + if (N.Children.size() > 0) { + printNodeAsJson(OS, Tree, N.Children[0]); + for (size_t I = 1, E = N.Children.size(); I < E; ++I) { + OS << ","; + printNodeAsJson(OS, Tree, N.Children[I]); + } + } + OS << "]}"; +} + +static void printJsonDiffOfTreeNode(raw_ostream &OS, const diff::ASTDiff &Diff, + const diff::SyntaxTree &Tree, + diff::NodeId Id) { + const diff::Node &N = Tree.getNode(Id); + OS << "{"; + printNodeAttributes(OS, Tree, Id); + if (Diff.getMapped(Tree, Id).isValid()) + OS << R"(,"tid":)" << int(Diff.getMapped(Tree, Id)); + OS << R"(,"children":[)"; + if (N.Children.size() > 0) { + printJsonDiffOfTreeNode(OS, Diff, Tree, N.Children[0]); + for (size_t I = 1, E = N.Children.size(); I < E; ++I) { + OS << ","; + printJsonDiffOfTreeNode(OS, Diff, Tree, N.Children[I]); + } + } + OS << "]}"; +} + +static void printJsonDiffOfTree(raw_ostream &OS, const diff::ASTDiff &Diff, + const diff::SyntaxTree &Tree) { + OS << R"({"filename":")"; + printJsonString(OS, Tree.getFilename()); + OS << R"(","root":)"; + printJsonDiffOfTreeNode(OS, Diff, Tree, Tree.getRootId()); + OS << R"(})"; +} + +static void printNode(raw_ostream &OS, const diff::SyntaxTree &Tree, + diff::NodeId Id) { + if (Id.isInvalid()) { + OS << "None"; + return; + } + OS << Tree.getNode(Id).getTypeLabel(); + std::string Value = Tree.getNodeValue(Id); + if (!Value.empty()) + OS << ": " << Value; + OS << "(" << Id << ")"; +} + +struct Change { + diff::ChangeKind Kind; + diff::NodeId Src, Dst; + unsigned Position; + + Change(diff::ChangeKind Kind, diff::NodeId Src, diff::NodeId Dst, + unsigned Position) + : Kind(Kind), Src(Src), Dst(Dst), Position(Position) {} + Change(diff::ChangeKind Kind, diff::NodeId Src, diff::NodeId Dst) + : Kind(Kind), Src(Src), Dst(Dst) {} + Change(diff::ChangeKind Kind, diff::NodeId Src) : Kind(Kind), Src(Src) {} +}; + +static std::vector getChanges(const diff::ASTDiff &Diff) { + std::vector Changes; + for (diff::NodeId Id2 : Diff.DstTree.getNodesBreadthFirst()) { + const diff::Node &N2 = Diff.DstTree.getNode(Id2); + diff::NodeId Id1 = Diff.getMapped(Diff.DstTree, Id2); + diff::NodeId P2 = N2.Parent; + switch (N2.ChangeKind) { + case diff::None: + case diff::Delete: + break; + case diff::Update: + Changes.emplace_back(N2.ChangeKind, Id1, Id2); + break; + case diff::Insert: + case diff::Move: + case diff::UpdateMove: + Changes.emplace_back(N2.ChangeKind, Id2, P2, + Diff.DstTree.findPositionInParent(Id2)); + break; + } + } + for (diff::NodeId Id1 : Diff.SrcTree.getNodesBreadthFirst()) + if (Diff.SrcTree.getNode(Id1).ChangeKind == diff::Delete) + Changes.emplace_back(diff::Delete, Id1); + return Changes; +} + +static void printChange(raw_ostream &OS, const diff::ASTDiff &Diff, + const Change &Chg) { + switch (Chg.Kind) { + case diff::Delete: + OS << "Delete "; + printNode(OS, Diff.SrcTree, Chg.Src); + OS << "\n"; + break; + case diff::Update: + OS << "Update "; + printNode(OS, Diff.SrcTree, Chg.Src); + OS << " to " << Diff.DstTree.getNodeValue(Chg.Dst) << "\n"; + break; + case diff::Insert: + case diff::Move: + case diff::UpdateMove: + switch (Chg.Kind) { + case diff::Insert: + OS << "Insert"; + break; + case diff::Move: + OS << "Move"; + break; + case diff::UpdateMove: + OS << "Update and Move"; + break; + default: + break; + } + OS << " "; + printNode(OS, Diff.DstTree, Chg.Src); + OS << " into "; + printNode(OS, Diff.DstTree, Chg.Dst); + OS << " at " << Chg.Position << "\n"; + break; + case diff::None: + llvm_unreachable("Expected a valid change kind."); + break; + }; +} + int main(int argc, const char **argv) { cl::HideUnrelatedOptions(ClangDiffCategory); if (!cl::ParseCommandLineOptions(argc, argv)) { @@ -74,7 +478,7 @@ return 1; } - if (DumpAST) { + if (ASTDump) { if (!DestinationPath.empty()) { llvm::errs() << "Error: Please specify exactly one filename.\n"; return 1; @@ -83,7 +487,12 @@ if (!AST) return 1; diff::SyntaxTree Tree(AST->getASTContext()); - Tree.printAsJson(llvm::outs()); + llvm::outs() << R"({"filename":")"; + printJsonString(llvm::outs(), Tree.getFilename()); + llvm::outs() << R"(","root":)"; + printNodeAsJson(llvm::outs(), Tree, Tree.getRootId()); + llvm::outs() << "}\n"; + return 0; } @@ -100,11 +509,41 @@ diff::ComparisonOptions Options; diff::SyntaxTree SrcTree(Src->getASTContext()); diff::SyntaxTree DstTree(Dst->getASTContext()); - diff::ASTDiff DiffTool(SrcTree, DstTree, Options); - for (const auto &Match : DiffTool.getMatches()) - DiffTool.printMatch(llvm::outs(), Match); - for (const auto &Change : DiffTool.getChanges()) - DiffTool.printChange(llvm::outs(), Change); + diff::ASTDiff Diff(SrcTree, DstTree, Options); + + if (HtmlDiff) { + llvm::outs() << HtmlDiffHeader << "
";
+    llvm::outs() << "
"; + printHtmlForNode(llvm::outs(), Diff, SrcTree, SrcTree.getRootId(), 0); + llvm::outs() << "
"; + llvm::outs() << "
"; + printHtmlForNode(llvm::outs(), Diff, DstTree, DstTree.getRootId(), 0); + llvm::outs() << "
"; + llvm::outs() << "
\n"; + return 0; + } + if (JsonDiff) { + llvm::outs() << R"({"src":)"; + printJsonDiffOfTree(llvm::outs(), Diff, SrcTree); + llvm::outs() << R"(,"dst":)"; + printJsonDiffOfTree(llvm::outs(), Diff, DstTree); + llvm::outs() << R"(})"; + return 0; + } + + if (PrintMatches) { + for (diff::NodeId Src = 0, Dst, E = SrcTree.getSize(); Src < E; ++Src) { + if ((Dst = Diff.getMapped(SrcTree, Src)).isValid()) { + llvm::outs() << "Match "; + printNode(llvm::outs(), SrcTree, Src); + llvm::outs() << " to "; + printNode(llvm::outs(), DstTree, Dst); + llvm::outs() << "\n"; + } + } + } + for (const auto &Change : getChanges(Diff)) + printChange(llvm::outs(), Diff, Change); return 0; }