Index: include/clang/Tooling/ASTDiff/ASTDiff.h =================================================================== --- include/clang/Tooling/ASTDiff/ASTDiff.h +++ include/clang/Tooling/ASTDiff/ASTDiff.h @@ -153,10 +153,22 @@ /// to the parent. CharSourceRange getSourceRange() const; + /// Returns the sub-ranges of getSourceRange() that are exclusively owned by + /// this node, that is, none of its descendants includes them. + SmallVector getOwnedSourceRanges() const; + /// Returns the offsets for the range returned by getSourceRange(). std::pair getSourceRangeOffsets() const; + + /// Returns the starting locations of all tokens in getOwnedSourceRanges(). + SmallVector getOwnedTokens() const; }; +void forEachTokenInRange(CharSourceRange Range, SyntaxTree &Tree, + std::function Body); + +inline bool isListSeparator(Token &Tok) { return Tok.is(tok::comma); } + struct NodeRefIterator { SyntaxTree::Impl *Tree; const NodeId *IdPointer; Index: lib/Tooling/ASTDiff/ASTDiff.cpp =================================================================== --- lib/Tooling/ASTDiff/ASTDiff.cpp +++ lib/Tooling/ASTDiff/ASTDiff.cpp @@ -16,6 +16,7 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/PriorityQueue.h" +#include "llvm/Support/MD5.h" #include #include @@ -111,6 +112,8 @@ }; } // end anonymous namespace +using HashType = std::array; + /// Represents the AST of a TranslationUnit. class SyntaxTree::Impl { public: @@ -463,6 +466,30 @@ return ""; } +static HashType hashNode(NodeRef N) { + llvm::MD5 Hash; + SourceManager &SM = N.getTree().getSourceManager(); + const LangOptions &LangOpts = N.getTree().getLangOpts(); + Token Tok; + for (auto TokenLocation : N.getOwnedTokens()) { + bool Failure = Lexer::getRawToken(TokenLocation, Tok, SM, LangOpts, + /*IgnoreWhiteSpace=*/true); + assert(!Failure); + auto Range = CharSourceRange::getCharRange(TokenLocation, Tok.getEndLoc()); + // This is here to make CompoundStmt nodes compare equal, to make the tests + // pass. It should be changed to include changes to comments. + if (!Tok.isOneOf(tok::comment, tok::semi)) + Hash.update(Lexer::getSourceText(Range, SM, LangOpts)); + } + llvm::MD5::MD5Result HashResult; + Hash.final(HashResult); + return HashResult; +} + +static bool areNodesDifferent(NodeRef N1, NodeRef N2) { + return hashNode(N1) != hashNode(N2); +} + /// Identifies a node in a subtree by its postorder offset, starting at 1. struct SNodeId { int Id = 0; @@ -637,7 +664,7 @@ NodeRef N1 = S1.getNode(Id1), N2 = S2.getNode(Id2); if (!DiffImpl.isMatchingPossible(N1, N2)) return std::numeric_limits::max(); - return S1.getNodeValue(Id1) != S2.getNodeValue(Id2); + return areNodesDifferent(S1.getNode(Id1), S2.getNode(Id2)); } void computeTreeDist() { @@ -795,6 +822,91 @@ return {Begin, End}; } +static bool onlyWhitespace(StringRef Str) { + return std::all_of(Str.begin(), Str.end(), + [](char C) { return std::isspace(C); }); +} + +SmallVector Node::getOwnedSourceRanges() const { + SmallVector SourceRanges; + SourceManager &SM = getTree().getSourceManager(); + const LangOptions &LangOpts = getTree().getLangOpts(); + CharSourceRange Range = getSourceRange(); + SourceLocation Offset = Range.getBegin(); + BeforeThanCompare Less(SM); + auto AddSegment = [&](SourceLocation Until) { + if (Offset.isValid() && Until.isValid() && Less(Offset, Until)) { + CharSourceRange R = CharSourceRange::getCharRange({Offset, Until}); + StringRef Text = Lexer::getSourceText(R, SM, LangOpts); + if (onlyWhitespace(Text)) + return; + SourceRanges.emplace_back(CharSourceRange::getCharRange({Offset, Until})); + } + }; + int ChildIndex = 0; + for (NodeRef Descendant : *this) { + CharSourceRange DescendantRange = Descendant.getSourceRange(); + CharSourceRange LMDRange = + getTree().getNode(Descendant.LeftMostDescendant).getSourceRange(); + CharSourceRange RMDRange = + getTree().getNode(Descendant.RightMostDescendant).getSourceRange(); + auto MinValidBegin = [&Less](CharSourceRange &Range1, + CharSourceRange &Range2) { + SourceLocation Begin1 = Range1.getBegin(), Begin2 = Range2.getBegin(); + if (Begin1.isInvalid()) + return Begin2; + if (Begin2.isInvalid()) + return Begin1; + return std::min(Begin1, Begin2, Less); + }; + auto MaxValidEnd = [&Less](CharSourceRange &Range1, + CharSourceRange &Range2) { + SourceLocation End1 = Range1.getEnd(), End2 = Range2.getEnd(); + if (End1.isInvalid()) + return End2; + if (End2.isInvalid()) + return End1; + return std::max(End1, End2, Less); + }; + auto Min = MinValidBegin(DescendantRange, LMDRange); + auto Max = MaxValidEnd(DescendantRange, RMDRange); + AddSegment(Min); + if (Max.isValid()) + Offset = Max; + ++ChildIndex; + } + AddSegment(Range.getEnd()); + return SourceRanges; +} + +void forEachTokenInRange(CharSourceRange Range, SyntaxTree &Tree, + std::function Body) { + SourceLocation Begin = Range.getBegin(), End = Range.getEnd(); + SourceManager &SM = Tree.getSourceManager(); + BeforeThanCompare Less(SM); + Token Tok; + while (Begin.isValid() && Less(Begin, End) && + !Lexer::getRawToken(Begin, Tok, SM, Tree.getLangOpts(), + /*IgnoreWhiteSpace=*/true) && + Less(Tok.getLocation(), End)) { + Body(Tok); + Begin = Tok.getEndLoc(); + } +} + +SmallVector Node::getOwnedTokens() const { + SmallVector TokenLocations; + BeforeThanCompare Less(getTree().getSourceManager()); + const auto &SourceRanges = getOwnedSourceRanges(); + for (auto &Range : SourceRanges) { + forEachTokenInRange(Range, getTree(), [&TokenLocations](Token &Tok) { + if (!isListSeparator(Tok)) + TokenLocations.push_back(Tok.getLocation()); + }); + } + return TokenLocations; +} + namespace { // Compares nodes by their depth. struct HeightLess { @@ -847,7 +959,7 @@ bool ASTDiff::Impl::identical(NodeRef N1, NodeRef N2) const { if (N1.getNumChildren() != N2.getNumChildren() || - !isMatchingPossible(N1, N2) || T1.getNodeValue(N1) != T2.getNodeValue(N2)) + !isMatchingPossible(N1, N2) || areNodesDifferent(N1, N2)) return false; for (size_t Id = 0, E = N1.getNumChildren(); Id < E; ++Id) if (!identical(N1.getChild(Id), N2.getChild(Id))) @@ -901,7 +1013,7 @@ double ASTDiff::Impl::getNodeSimilarity(NodeRef N1, NodeRef N2) const { auto Ident1 = N1.getIdentifier(), Ident2 = N2.getIdentifier(); - bool SameValue = T1.getNodeValue(N1) == T2.getNodeValue(N2); + bool SameValue = !areNodesDifferent(N1, N2); auto SameIdent = Ident1 && Ident2 && *Ident1 == *Ident2; double NodeSimilarity = 0; @@ -1091,7 +1203,7 @@ findNewPosition(N1) != findNewPosition(N2)) { MutableN1.Change = MutableN2.Change = Move; } - if (T1.getNodeValue(N1) != T2.getNodeValue(N2)) { + if (areNodesDifferent(N1, N2)) { MutableN1.Change = MutableN2.Change = (N1.Change == Move ? UpdateMove : Update); } Index: test/Tooling/clang-diff-basic.cpp =================================================================== --- test/Tooling/clang-diff-basic.cpp +++ test/Tooling/clang-diff-basic.cpp @@ -30,10 +30,10 @@ class X { const char *foo(int i) { + // CHECK: Insert IfStmt(29) into CompoundStmt(28) if (i == 0) return "Bar"; - // CHECK: Insert IfStmt{{.*}} into IfStmt - // CHECK: Insert BinaryOperator: =={{.*}} into IfStmt + // CHECK: Move IfStmt(35) into IfStmt else if (i == -1) return "foo"; return 0; @@ -64,7 +64,7 @@ M1; // CHECK: Update Macro: M1{{.*}} to M2 M2; - // CHECK: Update Macro: F(1, 1){{.*}} to F(1, /*b=*/1) + // CHECK: Match Macro: F(1, 1)(74) F(1, /*b=*/1); } Index: test/Tooling/clang-diff-html.test =================================================================== --- test/Tooling/clang-diff-html.test +++ test/Tooling/clang-diff-html.test @@ -1,7 +1,7 @@ RUN: clang-diff -html %S/Inputs/clang-diff-basic-src.cpp %S/clang-diff-basic.cpp -- | FileCheck %s CHECK:
+CHECK-NEXT: 0 -> 0' class='u'> match, update CHECK: "Bar" comments -CHECK: // CHECK: Insert IfStmt{{.*}} into IfStmt CHECK: // CHECK: Delete AccessSpecDecl: public Index: test/Tooling/clang-diff-topdown.cpp =================================================================== --- test/Tooling/clang-diff-topdown.cpp +++ test/Tooling/clang-diff-topdown.cpp @@ -35,8 +35,6 @@ int x2 = ::x + 1; } -class A { int x = 1 + 1; void f() { int x1 = x; } }; - #else @@ -66,18 +64,4 @@ int x2 = ::x + 1; } -class B { - // Only the class name changed; it is not included in the field value, - // therefore there is no update. - // CHECK: Match FieldDecl: :x(int)(24) to FieldDecl: :x(int)(29) - // CHECK-NOT: Update FieldDecl: :x(int)(24) - int x = 1+1; - void f() { - // CHECK: Match MemberExpr: :x(32) to MemberExpr: :x(37) - // CHECK-NOT: Update MemberExpr: :x(32) - int x1 = B::x; - } - -}; - #endif