Index: lib/Tooling/ASTDiff/ASTDiff.cpp =================================================================== --- lib/Tooling/ASTDiff/ASTDiff.cpp +++ lib/Tooling/ASTDiff/ASTDiff.cpp @@ -91,7 +91,7 @@ // Computes the ratio of common descendants between the two nodes. // Descendants are only considered to be equal when they are mapped in M. - double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; + double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; // Returns the node that has the highest degree of similarity. NodeId findCandidate(const Mapping &M, NodeId Id1) const; @@ -733,16 +733,19 @@ } } -double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1, - NodeId Id2) const { - if (Id1.isInvalid() || Id2.isInvalid()) - return 0.0; +double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1, + NodeId Id2) const { int CommonDescendants = 0; const Node &N1 = T1.getNode(Id1); - for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id) - CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2)); - return 2.0 * CommonDescendants / - (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2)); + for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) { + NodeId Dst = M.getDst(Src); + CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2)); + } + double Denominator = T1.getNumberOfDescendants(Id1) - 1 + + T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants; + if (Denominator == 0) + return 0; + return CommonDescendants / Denominator; } NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { @@ -753,7 +756,7 @@ continue; if (M.hasDst(Id2)) continue; - double Similarity = getSimilarity(M, Id1, Id2); + double Similarity = getJaccardSimilarity(M, Id1, Id2); if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) { HighestSimilarity = Similarity; Candidate = Id2; @@ -773,12 +776,8 @@ } break; } - const Node &N1 = T1.getNode(Id1); bool Matched = M.hasSrc(Id1); - bool MatchedChildren = - std::any_of(N1.Children.begin(), N1.Children.end(), - [&](NodeId Child) { return M.hasSrc(Child); }); - if (Matched || !MatchedChildren) + if (Matched) continue; NodeId Id2 = findCandidate(M, Id1); if (Id2.isValid()) { Index: test/Tooling/clang-diff-basic.cpp =================================================================== --- test/Tooling/clang-diff-basic.cpp +++ test/Tooling/clang-diff-basic.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -E %s > %T/src.cpp -// RUN: %clang_cc1 -E %s > %T/dst.cpp -DDEST -// RUN: clang-diff -m -no-compilation-database %T/src.cpp %T/dst.cpp | FileCheck %s +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database %t.src.cpp %t.dst.cpp | FileCheck %s #ifndef DEST namespace src { Index: test/Tooling/clang-diff-bottomup.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database -s=0 %t.src.cpp %t.dst.cpp | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void f1() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5 +// CHECK: Match FunctionDecl: f1(void ())(1) to FunctionDecl: f1(void ())(1) +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK: Match CompoundStmt(4) to CompoundStmt(3) +// CHECK: Match CompoundStmt(5) to CompoundStmt(4) +// CHECK: Match NullStmt(6) to NullStmt(5) + {{;}} ;; +} + +void f2() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5 +// CHECK-NOT: Match FunctionDecl(9) +// CHECK-NOT: Match CompoundStmt(10) +// CHECK: Match CompoundStmt(11) to CompoundStmt(10) +// CHECK: Match CompoundStmt(12) to CompoundStmt(11) +// CHECK: Match NullStmt(13) to NullStmt(12) +// CHECK-NOT: Match NullStmt(13) + {{;}} ;;; +} + +#endif Index: test/Tooling/clang-diff-opt.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database %t.src.cpp %t.dst.cpp | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5 +// The optimal matching algorithm should move the ; into the outer block +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK-NOT: Match CompoundStmt(3) +// CHECK: Match NullStmt(4) to NullStmt(3) + ; {{;}} +} + +#endif Index: test/Tooling/clang-diff-topdown.cpp =================================================================== --- /dev/null +++ test/Tooling/clang-diff-topdown.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database -stop-after=topdown %t.src.cpp %t.dst.cpp | FileCheck %s +// +// Test the top-down matching of identical subtrees only. + +#ifndef DEST + +void f1() +{ + // Match some subtree of height greater than 2. + // CHECK: Match CompoundStmt(3) to CompoundStmt(3) + // CHECK: Match CompoundStmt(4) to CompoundStmt(4) + // CHECK: Match NullStmt(5) to NullStmt(5) + {{;}} + + // Don't match subtrees that are smaller. + // CHECK-NOT: Match CompoundStmt(6) + // CHECK-NOT: Match NullStmt(7) + {;} + + // Greedy approach - use the first matching subtree when there are multiple + // identical subtrees. + // CHECK: Match CompoundStmt(8) to CompoundStmt(8) + // CHECK: Match CompoundStmt(9) to CompoundStmt(9) + // CHECK: Match NullStmt(10) to NullStmt(10) + {{;;}} +} + +#else + +void f1() { + + {{;}} + + {;} + + {{;;}} + // CHECK-NOT: Match {{.*}} to CompoundStmt(11) + // CHECK-NOT: Match {{.*}} to CompoundStmt(12) + // CHECK-NOT: Match {{.*}} to NullStmt(13) + {{;;}} + + // CHECK-NOT: Match {{.*}} to NullStmt(14) + ; +} + +#endif