Index: include/clang/Tooling/ASTDiff/ASTDiff.h =================================================================== --- include/clang/Tooling/ASTDiff/ASTDiff.h +++ include/clang/Tooling/ASTDiff/ASTDiff.h @@ -25,37 +25,22 @@ namespace clang { namespace diff { -/// This represents a match between two nodes in the source and destination -/// trees, meaning that they are likely to be related. -struct Match { - NodeId Src, Dst; -}; - enum ChangeKind { - Delete, // (Src): delete node Src. - Update, // (Src, Dst): update the value of node Src to match Dst. - Insert, // (Src, Dst, Pos): insert Src as child of Dst at offset Pos. - Move // (Src, Dst, Pos): move Src to be a child of Dst at offset Pos. -}; - -struct Change { - ChangeKind Kind; - NodeId Src, Dst; - size_t Position; - - Change(ChangeKind Kind, NodeId Src, NodeId Dst, size_t Position) - : Kind(Kind), Src(Src), Dst(Dst), Position(Position) {} - Change(ChangeKind Kind, NodeId Src) : Kind(Kind), Src(Src) {} - Change(ChangeKind Kind, NodeId Src, NodeId Dst) - : Kind(Kind), Src(Src), Dst(Dst) {} + None, + Delete, // (Src): delete node Src. + Update, // (Src, Dst): update the value of node Src to match Dst. + Insert, // (Src, Dst, Pos): insert Src as child of Dst at offset Pos. + Move, // (Src, Dst, Pos): move Src to be a child of Dst at offset Pos. + UpdateMove // Same as Move plus Update. }; /// Represents a Clang AST node, alongside some additional information. struct Node { NodeId Parent, LeftMostDescendant, RightMostDescendant; - int Depth, Height; + int Depth, Height, Shift; ast_type_traits::DynTypedNode ASTNode; SmallVector Children; + ChangeKind ChangeKind = None; ast_type_traits::ASTNodeKind getType() const; StringRef getTypeLabel() const; @@ -67,15 +52,8 @@ ASTDiff(SyntaxTree &Src, SyntaxTree &Dst, const ComparisonOptions &Options); ~ASTDiff(); - // Returns a list of matches. - std::vector getMatches(); - /// Returns an edit script. - std::vector getChanges(); - - // Prints an edit action. - void printChange(raw_ostream &OS, const Change &Chg) const; - // Prints a match between two nodes. - void printMatch(raw_ostream &OS, const Match &M) const; + // Returns the ID of the node that is mapped to the given node in SourceTree. + NodeId getMapped(const SyntaxTree &SourceTree, NodeId Id) const; class Impl; @@ -99,15 +77,22 @@ const ASTContext &getASTContext() const; StringRef getFilename() const; - const Node &getNode(NodeId Id) const; + int getSize() const; NodeId getRootId() const; + using PreorderIterator = NodeId; + PreorderIterator begin() const; + PreorderIterator end() const; + + const Node &getNode(NodeId Id) const; + int findPositionInParent(NodeId Id) const; std::pair getFileOffsets(const Node &N) const; /// Serialize the node attributes to a string representation. This should /// uniquely distinguish nodes of the same kind. Note that this function just /// returns a representation of the node value, not considering descendants. - std::string getNodeValue(const DynTypedNode &DTN) const; + std::string getNodeValue(NodeId Id) const; + std::string getNodeValue(const Node &Node) const; class Impl; std::unique_ptr TreeImpl; @@ -132,8 +117,8 @@ bool StopAfterTopDown = false; /// Returns false if the nodes should never be matched. - bool isMatchingAllowed(const DynTypedNode &N1, const DynTypedNode &N2) const { - return N1.getNodeKind().isSame(N2.getNodeKind()); + bool isMatchingAllowed(const Node &N1, const Node &N2) const { + return N1.getType().isSame(N2.getType()); } }; Index: include/clang/Tooling/ASTDiff/ASTDiffInternal.h =================================================================== --- include/clang/Tooling/ASTDiff/ASTDiffInternal.h +++ include/clang/Tooling/ASTDiff/ASTDiffInternal.h @@ -36,6 +36,8 @@ operator int() const { return Id; } NodeId &operator++() { return ++Id, *this; } NodeId &operator--() { return --Id, *this; } + // Support defining iterators on NodeId. + NodeId &operator*() { return *this; } bool isValid() const { return Id != InvalidNodeId; } bool isInvalid() const { return Id == InvalidNodeId; } Index: lib/Tooling/ASTDiff/ASTDiff.cpp =================================================================== --- lib/Tooling/ASTDiff/ASTDiff.cpp +++ lib/Tooling/ASTDiff/ASTDiff.cpp @@ -82,26 +82,23 @@ class ASTDiff::Impl { public: SyntaxTree::Impl &T1, &T2; - bool IsMappingDone = false; Mapping TheMapping; Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, - const ComparisonOptions &Options) - : T1(T1), T2(T2), Options(Options) {} + const ComparisonOptions &Options); /// Matches nodes one-by-one based on their similarity. void computeMapping(); - std::vector getMatches(Mapping &M); + // Compute ChangeKind for each node based on similarity. + void computeChangeKinds(Mapping &M); - /// Finds an edit script that converts T1 to T2. - std::vector computeChanges(Mapping &M); - - void printChangeImpl(raw_ostream &OS, const Change &Chg) const; - void printMatchImpl(raw_ostream &OS, const Match &M) const; - - // Returns a mapping of identical subtrees. - Mapping matchTopDown() const; + NodeId getMapped(const SyntaxTree::Impl &Tree, NodeId Id) const { + if (&Tree == &T1) + return TheMapping.getDst(Id); + assert(&Tree == &T2 && "Invalid tree."); + return TheMapping.getSrc(Id); + } private: // Returns true if the two subtrees are identical. @@ -112,6 +109,9 @@ // Returns false if the nodes must not be mached. bool isMatchingPossible(NodeId Id1, NodeId Id2) const; + // Returns true if the nodes' parents are matched. + bool haveSameParents(const Mapping &M, NodeId Id1, NodeId Id2) const; + // Uses an optimal albeit slow algorithm to compute a mapping between two // subtrees, but only if both have fewer nodes than MaxSize. void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const; @@ -123,6 +123,9 @@ // Returns the node that has the highest degree of similarity. NodeId findCandidate(const Mapping &M, NodeId Id1) const; + // Returns a mapping of identical subtrees. + Mapping matchTopDown() const; + // Tries to match any yet unmapped nodes, in a bottom-up fashion. void matchBottomUp(Mapping &M) const; @@ -155,9 +158,12 @@ std::vector Leaves; // Maps preorder indices to postorder ones. std::vector PostorderIds; + std::vector NodesBfs; int getSize() const { return Nodes.size(); } NodeId getRootId() const { return 0; } + PreorderIterator begin() const { return getRootId(); } + PreorderIterator end() const { return getSize(); } const Node &getNode(NodeId Id) const { return Nodes[Id]; } Node &getMutableNode(NodeId Id) { return Nodes[Id]; } @@ -165,16 +171,20 @@ void addNode(Node &N) { Nodes.push_back(N); } int getNumberOfDescendants(NodeId Id) const; bool isInSubtree(NodeId Id, NodeId SubtreeRoot) const; + int findPositionInParent(NodeId Id, bool Shifted = false) const; std::string getNodeValue(NodeId Id) const; - std::string getNodeValue(const DynTypedNode &DTN) const; - /// Prints the node as "[: ]( getSubtreePostorder(const SyntaxTree::Impl &Tree, + NodeId Root) { + std::vector Postorder; + std::function Traverse = [&](NodeId Id) { + const Node &N = Tree.getNode(Id); + for (NodeId Child : N.Children) + Traverse(Child); + Postorder.push_back(Id); + }; + Traverse(Root); + return Postorder; +} + +static std::vector getSubtreeBfs(const SyntaxTree::Impl &Tree, + NodeId Root) { + std::vector Ids; + size_t Expanded = 0; + Ids.push_back(Root); + while (Expanded < Ids.size()) + for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) + Ids.push_back(Child); + return Ids; +} + void SyntaxTree::Impl::initTree() { setLeftMostDescendants(); int PostorderId = 0; @@ -310,6 +344,7 @@ ++PostorderId; }; PostorderTraverse(getRootId()); + NodesBfs = getSubtreeBfs(*this, getRootId()); } void SyntaxTree::Impl::setLeftMostDescendants() { @@ -324,30 +359,6 @@ } } -static std::vector getSubtreePostorder(const SyntaxTree::Impl &Tree, - NodeId Root) { - std::vector Postorder; - std::function Traverse = [&](NodeId Id) { - const Node &N = Tree.getNode(Id); - for (NodeId Child : N.Children) - Traverse(Child); - Postorder.push_back(Id); - }; - Traverse(Root); - return Postorder; -} - -static std::vector getSubtreeBfs(const SyntaxTree::Impl &Tree, - NodeId Root) { - std::vector Ids; - size_t Expanded = 0; - Ids.push_back(Root); - while (Expanded < Ids.size()) - for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) - Ids.push_back(Child); - return Ids; -} - int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const { return getNode(Id).RightMostDescendant - Id + 1; } @@ -356,11 +367,29 @@ return Id >= SubtreeRoot && Id <= getNode(SubtreeRoot).RightMostDescendant; } +int SyntaxTree::Impl::findPositionInParent(NodeId Id, bool Shifted) const { + NodeId Parent = getNode(Id).Parent; + if (Parent.isInvalid()) + return 0; + const auto &Siblings = getNode(Parent).Children; + int Position = 0; + for (size_t I = 0, E = Siblings.size(); I < E; ++I) { + if (Shifted) + Position += getNode(Siblings[I]).Shift; + if (Siblings[I] == Id) { + Position += I; + return Position; + } + } + llvm_unreachable("Node not found in parent's children."); +} + std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const { - return getNodeValue(getNode(Id).ASTNode); + return getNodeValue(getNode(Id)); } -std::string SyntaxTree::Impl::getNodeValue(const DynTypedNode &DTN) const { +std::string SyntaxTree::Impl::getNodeValue(const Node &N) const { + const DynTypedNode &DTN = N.ASTNode; if (auto *X = DTN.get()) return X->getOpcodeStr(); if (auto *X = DTN.get()) { @@ -406,32 +435,6 @@ llvm_unreachable("Fatal: unhandled AST node.\n"); } -void SyntaxTree::Impl::printTree() const { printTree(getRootId()); } -void SyntaxTree::Impl::printTree(NodeId Root) const { - printTree(llvm::outs(), Root); -} - -void SyntaxTree::Impl::printTree(raw_ostream &OS, NodeId Root) const { - const Node &N = getNode(Root); - for (int I = 0; I < N.Depth; ++I) - OS << " "; - printNode(OS, Root); - OS << "\n"; - for (NodeId Child : N.Children) - printTree(OS, Child); -} - -void SyntaxTree::Impl::printNode(raw_ostream &OS, NodeId Id) const { - if (Id.isInvalid()) { - OS << "None"; - return; - } - OS << getNode(Id).getTypeLabel(); - if (getNodeValue(Id) != "") - OS << ": " << getNodeValue(Id); - OS << "(" << PostorderIds[Id] << ")"; -} - /// Identifies a node in a subtree by its postorder offset, starting at 1. struct SNodeId { int Id = 0; @@ -733,8 +736,15 @@ } bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const { - return Options.isMatchingAllowed(T1.getNode(Id1).ASTNode, - T2.getNode(Id2).ASTNode); + return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2)); +} + +bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1, + NodeId Id2) const { + NodeId P1 = T1.getNode(Id1).Parent; + NodeId P2 = T2.getNode(Id2).Parent; + return (P1.isInvalid() && P2.isInvalid()) || + (P1.isValid() && P2.isValid() && M.getDst(P1) == P2); } void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, @@ -767,7 +777,7 @@ NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { NodeId Candidate; double HighestSimilarity = 0.0; - for (NodeId Id2 = 0, E = T2.getSize(); Id2 < E; ++Id2) { + for (NodeId Id2 : T2) { if (!isMatchingPossible(Id1, Id2)) continue; if (M.hasDst(Id2)) @@ -852,101 +862,62 @@ return M; } +ASTDiff::Impl::Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, + const ComparisonOptions &Options) + : T1(T1), T2(T2), Options(Options) { + computeMapping(); + computeChangeKinds(TheMapping); +} + void ASTDiff::Impl::computeMapping() { - if (IsMappingDone) - return; TheMapping = matchTopDown(); if (Options.StopAfterTopDown) return; matchBottomUp(TheMapping); - IsMappingDone = true; } -std::vector ASTDiff::Impl::getMatches(Mapping &M) { - std::vector Matches; - for (NodeId Id1 = 0, Id2, E = T1.getSize(); Id1 < E; ++Id1) - if ((Id2 = M.getDst(Id1)).isValid()) - Matches.push_back({Id1, Id2}); - return Matches; -} - -std::vector ASTDiff::Impl::computeChanges(Mapping &M) { - std::vector Changes; - for (NodeId Id2 : getSubtreeBfs(T2, T2.getRootId())) { - const Node &N2 = T2.getNode(Id2); - NodeId Id1 = M.getSrc(Id2); - if (Id1.isValid()) { - assert(isMatchingPossible(Id1, Id2) && "Invalid matching."); - if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) { - Changes.emplace_back(Update, Id1, Id2); - } - continue; +void ASTDiff::Impl::computeChangeKinds(Mapping &M) { + for (NodeId Id1 : T1) { + if (!M.hasSrc(Id1)) { + T1.getMutableNode(Id1).ChangeKind = Delete; + T1.getMutableNode(Id1).Shift -= 1; + } + } + for (NodeId Id2 : T2) { + if (!M.hasDst(Id2)) { + T2.getMutableNode(Id2).ChangeKind = Insert; + T2.getMutableNode(Id2).Shift -= 1; } - NodeId P2 = N2.Parent; - NodeId P1 = M.getSrc(P2); - assert(P1.isValid() && - "Parents must be matched for determining the change type."); - Node &Parent1 = T1.getMutableNode(P1); - const Node &Parent2 = T2.getNode(P2); - auto &Siblings1 = Parent1.Children; - const auto &Siblings2 = Parent2.Children; - size_t Position; - for (Position = 0; Position < Siblings2.size(); ++Position) - if (Siblings2[Position] == Id2 || Position >= Siblings1.size()) - break; - Changes.emplace_back(Insert, Id2, P2, Position); - Node PatchNode; - PatchNode.Parent = P1; - PatchNode.LeftMostDescendant = N2.LeftMostDescendant; - PatchNode.RightMostDescendant = N2.RightMostDescendant; - PatchNode.Depth = N2.Depth; - PatchNode.ASTNode = N2.ASTNode; - // TODO update Depth if needed - NodeId PatchNodeId = T1.getSize(); - // TODO maybe choose a different data structure for Children. - Siblings1.insert(Siblings1.begin() + Position, PatchNodeId); - T1.addNode(PatchNode); - M.link(PatchNodeId, Id2); - } - for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) { + } + for (NodeId Id1 : T1.NodesBfs) { NodeId Id2 = M.getDst(Id1); if (Id2.isInvalid()) - Changes.emplace_back(Delete, Id1, Id2); - } - return Changes; -} - -void ASTDiff::Impl::printChangeImpl(raw_ostream &OS, const Change &Chg) const { - switch (Chg.Kind) { - case Delete: - OS << "Delete "; - T1.printNode(OS, Chg.Src); - OS << "\n"; - break; - case Update: - OS << "Update "; - T1.printNode(OS, Chg.Src); - OS << " to " << T2.getNodeValue(Chg.Dst) << "\n"; - break; - case Insert: - OS << "Insert "; - T2.printNode(OS, Chg.Src); - OS << " into "; - T2.printNode(OS, Chg.Dst); - OS << " at " << Chg.Position << "\n"; - break; - case Move: - llvm_unreachable("TODO"); - break; - }; -} - -void ASTDiff::Impl::printMatchImpl(raw_ostream &OS, const Match &M) const { - OS << "Match "; - T1.printNode(OS, M.Src); - OS << " to "; - T2.printNode(OS, M.Dst); - OS << "\n"; + continue; + if (!haveSameParents(M, Id1, Id2) || + T1.findPositionInParent(Id1, true) != + T2.findPositionInParent(Id2, true)) { + T1.getMutableNode(Id1).Shift -= 1; + T2.getMutableNode(Id2).Shift -= 1; + } + } + for (NodeId Id2 : T2.NodesBfs) { + NodeId Id1 = M.getSrc(Id2); + if (Id1.isInvalid()) + continue; + Node &N1 = T1.getMutableNode(Id1); + Node &N2 = T2.getMutableNode(Id2); + if (Id1.isInvalid()) + continue; + if (!haveSameParents(M, Id1, Id2) || + T1.findPositionInParent(Id1, true) != + T2.findPositionInParent(Id2, true)) { + N1.ChangeKind = N2.ChangeKind = Move; + } + if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) { + N1.ChangeKind = N2.ChangeKind = + (N1.ChangeKind == Move ? UpdateMove : Update); + } + } } ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2, @@ -955,28 +926,14 @@ ASTDiff::~ASTDiff() = default; +NodeId ASTDiff::getMapped(const SyntaxTree &SourceTree, NodeId Id) const { + return DiffImpl->getMapped(*SourceTree.TreeImpl, Id); +} + SyntaxTree::SyntaxTree(const ASTContext &AST) : TreeImpl(llvm::make_unique( this, AST.getTranslationUnitDecl(), AST)) {} -std::vector ASTDiff::getMatches() { - DiffImpl->computeMapping(); - return DiffImpl->getMatches(DiffImpl->TheMapping); -} - -std::vector ASTDiff::getChanges() { - DiffImpl->computeMapping(); - return DiffImpl->computeChanges(DiffImpl->TheMapping); -} - -void ASTDiff::printChange(raw_ostream &OS, const Change &Chg) const { - DiffImpl->printChangeImpl(OS, Chg); -} - -void ASTDiff::printMatch(raw_ostream &OS, const Match &M) const { - DiffImpl->printMatchImpl(OS, M); -} - SyntaxTree::~SyntaxTree() = default; const ASTContext &SyntaxTree::getASTContext() const { return TreeImpl->AST; } @@ -985,7 +942,16 @@ return TreeImpl->getNode(Id); } +int SyntaxTree::getSize() const { return TreeImpl->getSize(); } NodeId SyntaxTree::getRootId() const { return TreeImpl->getRootId(); } +SyntaxTree::PreorderIterator SyntaxTree::begin() const { + return TreeImpl->begin(); +} +SyntaxTree::PreorderIterator SyntaxTree::end() const { return TreeImpl->end(); } + +int SyntaxTree::findPositionInParent(NodeId Id) const { + return TreeImpl->findPositionInParent(Id); +} std::pair SyntaxTree::getFileOffsets(const Node &N) const { const SourceManager &SrcMgr = TreeImpl->AST.getSourceManager(); @@ -1002,8 +968,12 @@ return {Begin, End}; } -std::string SyntaxTree::getNodeValue(const DynTypedNode &DTN) const { - return TreeImpl->getNodeValue(DTN); +std::string SyntaxTree::getNodeValue(NodeId Id) const { + return TreeImpl->getNodeValue(Id); +} + +std::string SyntaxTree::getNodeValue(const Node &N) const { + return TreeImpl->getNodeValue(N); } } // end namespace diff Index: test/Tooling/clang-diff-basic.cpp =================================================================== --- test/Tooling/clang-diff-basic.cpp +++ test/Tooling/clang-diff-basic.cpp @@ -54,8 +54,8 @@ typedef unsigned nat; // CHECK: Match VarDecl: p(int){{.*}} to VarDecl: prod(double) -// CHECK: Match BinaryOperator: *{{.*}} to BinaryOperator: * // CHECK: Update VarDecl: p(int){{.*}} to prod(double) +// CHECK: Match BinaryOperator: *{{.*}} to BinaryOperator: * double prod = 1 * 2 * 10; // CHECK: Update DeclRefExpr int squared = prod * prod; Index: tools/clang-diff/ClangDiff.cpp =================================================================== --- tools/clang-diff/ClangDiff.cpp +++ tools/clang-diff/ClangDiff.cpp @@ -131,7 +131,7 @@ auto Offsets = Tree.getFileOffsets(N); OS << R"(,"begin":)" << Offsets.first; OS << R"(,"end":)" << Offsets.second; - std::string Value = Tree.getNodeValue(N.ASTNode); + std::string Value = Tree.getNodeValue(N); if (!Value.empty()) { OS << R"(,"value":")"; printJsonString(OS, Value); @@ -155,6 +155,52 @@ OS << "]}"; } +static void printNode(raw_ostream &OS, diff::SyntaxTree &Tree, + diff::NodeId Id) { + if (Id.isInvalid()) { + OS << "None"; + return; + } + OS << Tree.getNode(Id).getTypeLabel(); + std::string Value = Tree.getNodeValue(Id); + if (!Value.empty()) + OS << ": " << Value; + OS << "(" << Id << ")"; +} + +static void printDstChange(raw_ostream &OS, diff::ASTDiff &Diff, + diff::SyntaxTree &SrcTree, diff::SyntaxTree &DstTree, + diff::NodeId Dst) { + const diff::Node &DstNode = DstTree.getNode(Dst); + diff::NodeId Src = Diff.getMapped(DstTree, Dst); + switch (DstNode.ChangeKind) { + case diff::None: + break; + case diff::Delete: + llvm_unreachable("The destination tree can't have deletions."); + case diff::Update: + OS << "Update "; + printNode(OS, SrcTree, Src); + OS << " to " << DstTree.getNodeValue(Dst) << "\n"; + break; + case diff::Insert: + case diff::Move: + case diff::UpdateMove: + if (DstNode.ChangeKind == diff::Insert) + OS << "Insert"; + else if (DstNode.ChangeKind == diff::Move) + OS << "Move"; + else if (DstNode.ChangeKind == diff::UpdateMove) + OS << "Update and Move"; + OS << " "; + printNode(OS, DstTree, Dst); + OS << " into "; + printNode(OS, DstTree, DstNode.Parent); + OS << " at " << DstTree.findPositionInParent(Dst) << "\n"; + break; + } +} + int main(int argc, const char **argv) { cl::HideUnrelatedOptions(ClangDiffCategory); if (!cl::ParseCommandLineOptions(argc, argv)) { @@ -202,11 +248,26 @@ } diff::SyntaxTree SrcTree(Src->getASTContext()); diff::SyntaxTree DstTree(Dst->getASTContext()); - diff::ASTDiff DiffTool(SrcTree, DstTree, Options); - for (const auto &Match : DiffTool.getMatches()) - DiffTool.printMatch(llvm::outs(), Match); - for (const auto &Change : DiffTool.getChanges()) - DiffTool.printChange(llvm::outs(), Change); + diff::ASTDiff Diff(SrcTree, DstTree, Options); + + for (diff::NodeId Dst : DstTree) { + diff::NodeId Src = Diff.getMapped(DstTree, Dst); + if (Src.isValid()) { + llvm::outs() << "Match "; + printNode(llvm::outs(), SrcTree, Src); + llvm::outs() << " to "; + printNode(llvm::outs(), DstTree, Dst); + llvm::outs() << "\n"; + } + printDstChange(llvm::outs(), Diff, SrcTree, DstTree, Dst); + } + for (diff::NodeId Src : SrcTree) { + if (Diff.getMapped(SrcTree, Src).isInvalid()) { + llvm::outs() << "Delete "; + printNode(llvm::outs(), SrcTree, Src); + llvm::outs() << "\n"; + } + } return 0; }