Index: include/clang/Tooling/ASTDiff/ASTDiff.h =================================================================== --- include/clang/Tooling/ASTDiff/ASTDiff.h +++ include/clang/Tooling/ASTDiff/ASTDiff.h @@ -105,7 +105,7 @@ /// During bottom-up matching, match only nodes with at least this value as /// the ratio of their common descendants. - double MinSimilarity = 0.2; + double MinSimilarity = 0.5; /// Whenever two subtrees are matched in the bottom-up phase, the optimal /// mapping is computed, unless the size of either subtrees exceeds this. Index: lib/Tooling/ASTDiff/ASTDiff.cpp =================================================================== --- lib/Tooling/ASTDiff/ASTDiff.cpp +++ lib/Tooling/ASTDiff/ASTDiff.cpp @@ -68,7 +68,8 @@ // Compute ChangeKind for each node based on similarity. void computeChangeKinds(Mapping &M); - NodeId getMapped(const std::unique_ptr &Tree, NodeId Id) const { + NodeId getMapped(const std::unique_ptr &Tree, + NodeId Id) const { if (&*Tree == &T1) return TheMapping.getDst(Id); assert(&*Tree == &T2 && "Invalid tree."); @@ -91,7 +92,7 @@ // Computes the ratio of common descendants between the two nodes. // Descendants are only considered to be equal when they are mapped in M. - double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; + double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; // Returns the node that has the highest degree of similarity. NodeId findCandidate(const Mapping &M, NodeId Id1) const; @@ -713,8 +714,8 @@ void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const { - if (std::max(T1.getNumberOfDescendants(Id1), - T2.getNumberOfDescendants(Id2)) >= Options.MaxSize) + if (std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) > + Options.MaxSize) return; ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2); std::vector> R = Matcher.getMatchingNodes(); @@ -726,16 +727,19 @@ } } -double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1, - NodeId Id2) const { - if (Id1.isInvalid() || Id2.isInvalid()) - return 0.0; +double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1, + NodeId Id2) const { int CommonDescendants = 0; const Node &N1 = T1.getNode(Id1); - for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id) - CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2)); - return 2.0 * CommonDescendants / - (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2)); + for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) { + NodeId Dst = M.getDst(Src); + CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2)); + } + double Denominator = T1.getNumberOfDescendants(Id1) - 1 + + T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants; + if (Denominator == 0) + return 0; + return CommonDescendants / Denominator; } NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { @@ -746,7 +750,7 @@ continue; if (M.hasDst(Id2)) continue; - double Similarity = getSimilarity(M, Id1, Id2); + double Similarity = getJaccardSimilarity(M, Id1, Id2); if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) { HighestSimilarity = Similarity; Candidate = Id2; @@ -766,12 +770,8 @@ } break; } - const Node &N1 = T1.getNode(Id1); bool Matched = M.hasSrc(Id1); - bool MatchedChildren = - std::any_of(N1.Children.begin(), N1.Children.end(), - [&](NodeId Child) { return M.hasSrc(Child); }); - if (Matched || !MatchedChildren) + if (Matched) continue; NodeId Id2 = findCandidate(M, Id1); if (Id2.isValid()) { Index: test/Tooling/clang-diff-bottomup.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void f1() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5 +// CHECK: Match FunctionDecl: f1(void (void))(1) to FunctionDecl: f1(void (void))(1) +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK: Match CompoundStmt(4) to CompoundStmt(3) +// CHECK: Match CompoundStmt(5) to CompoundStmt(4) +// CHECK: Match NullStmt(6) to NullStmt(5) + {{;}} ;; +} + +void f2() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5 +// CHECK-NOT: Match FunctionDecl(9) +// CHECK-NOT: Match CompoundStmt(10) +// CHECK: Match CompoundStmt(11) to CompoundStmt(10) +// CHECK: Match CompoundStmt(12) to CompoundStmt(11) +// CHECK: Match NullStmt(13) to NullStmt(12) +// CHECK-NOT: Match NullStmt(13) + {{;}} ;;; +} + +#endif Index: test/Tooling/clang-diff-opt.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. +// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than +// this size will not be processed by the optimal algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +void f2() { {;} {{;;;;;}} } + +void f3() { {;} {{;;;;;;}} } + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5 +// The optimal matching algorithm should move the ; into the outer block +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK-NOT: Match CompoundStmt(3) +// CHECK-NEXT: Match NullStmt(4) to NullStmt(3) + ; {{;}} +} + +void f2() { + // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5 + // As none of the subtrees is bigger than 10 nodes, the optimal algorithm + // will be run. + // CHECK: Match NullStmt(11) to NullStmt(9) + ;; {{;;;;;}} +} + +void f3() { + // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5 + // As the subtrees are bigger than 10 nodes, the optimal algorithm will not + // be run. + // CHECK: Delete NullStmt(22) + ;; {{;;;;;;}} +} +#endif Index: test/Tooling/clang-diff-topdown.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-topdown.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the top-down matching of identical subtrees only. + +#ifndef DEST + +void f1() +{ + // Match some subtree of height greater than 2. + // CHECK: Match CompoundStmt(3) to CompoundStmt(3) + // CHECK: Match CompoundStmt(4) to CompoundStmt(4) + // CHECK: Match NullStmt(5) to NullStmt(5) + {{;}} + + // Don't match subtrees that are smaller. + // CHECK-NOT: Match CompoundStmt(6) + // CHECK-NOT: Match NullStmt(7) + {;} + + // Greedy approach - use the first matching subtree when there are multiple + // identical subtrees. + // CHECK: Match CompoundStmt(8) to CompoundStmt(8) + // CHECK: Match CompoundStmt(9) to CompoundStmt(9) + // CHECK: Match NullStmt(10) to NullStmt(10) + {{;;}} +} + +#else + +void f1() { + + {{;}} + + {;} + + {{;;}} + // CHECK-NOT: Match {{.*}} to CompoundStmt(11) + // CHECK-NOT: Match {{.*}} to CompoundStmt(12) + // CHECK-NOT: Match {{.*}} to NullStmt(13) + {{;;}} + + // CHECK-NOT: Match {{.*}} to NullStmt(14) + ; +} + +#endif