diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -63,6 +63,7 @@ Dump = [&](const ForestNode *P, Token::Index End, llvm::Optional ElidedParent, LineDecoration LineDec) { + bool SharedNode = VisitCounts.find(P)->getSecond() > 1; llvm::ArrayRef Children; auto EndOfElement = [&](size_t ChildIndex) { return ChildIndex + 1 == Children.size() @@ -74,7 +75,14 @@ } else if (P->kind() == Sequence) { Children = P->elements(); if (Abbreviated) { - if (Children.size() == 1) { + // Abbreviate chains of trivial sequence nodes. + // A := B, B := C, C := D, D := X Y Z + // becomes + // A~D := X Y Z + // + // We can't hide nodes that appear multiple times in the tree, + // because we need to call out their identity with IDs. + if (Children.size() == 1 && !SharedNode) { assert(Children[0]->startTokenIndex() == P->startTokenIndex() && EndOfElement(0) == End); return Dump(Children[0], End, @@ -94,14 +102,21 @@ Result += G.symbolName(*ElidedParent); Result += "~"; } - Result.append(P->dump(G)); - if (VisitCounts.find(P)->getSecond() > 1 && - P->kind() != ForestNode::Terminal) { - // The first time, print as #1. Later, =#1. + if (SharedNode && P->kind() != ForestNode::Terminal) { auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1); - Result += - llvm::formatv(" {0}#{1}", It.second ? "" : "=", It.first->second); + bool First = It.second; + unsigned ID = It.first->second; + + // The first time, print as #1. Later, =#1. + if (First) { + Result += llvm::formatv("{0} #{1}", P->dump(G), ID); + } else { + Result += llvm::formatv("{0} =#{1}", G.symbolName(P->symbol()), ID); + Children = {}; // Don't walk the children again. + } + } else { + Result.append(P->dump(G)); } Result.push_back('\n'); diff --git a/clang-tools-extra/pseudo/test/glr.cpp b/clang-tools-extra/pseudo/test/glr.cpp --- a/clang-tools-extra/pseudo/test/glr.cpp +++ b/clang-tools-extra/pseudo/test/glr.cpp @@ -7,7 +7,8 @@ // CHECK-NEXT: │ ├─expression~multiplicative-expression := multiplicative-expression * pm-expression // CHECK-NEXT: │ │ ├─multiplicative-expression~IDENTIFIER := tok[5] // CHECK-NEXT: │ │ ├─* := tok[6] -// CHECK-NEXT: │ │ └─pm-expression~IDENTIFIER := tok[7] +// CHECK-NEXT: │ │ └─pm-expression~id-expression := unqualified-id #1 +// CHECK-NEXT: │ │ └─unqualified-id~IDENTIFIER := tok[7] // CHECK-NEXT: │ └─; := tok[8] // CHECK-NEXT: └─statement~simple-declaration := decl-specifier-seq init-declarator-list ; // CHECK-NEXT: ├─decl-specifier-seq~simple-type-specifier := @@ -18,6 +19,6 @@ // CHECK-NEXT: │ └─simple-type-specifier~IDENTIFIER := tok[5] // CHECK-NEXT: ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator // CHECK-NEXT: │ ├─ptr-operator~* := tok[6] -// CHECK-NEXT: │ └─ptr-declarator~IDENTIFIER := tok[7] +// CHECK-NEXT: │ └─ptr-declarator~id-expression =#1 // CHECK-NEXT: └─; := tok[8] } diff --git a/clang-tools-extra/pseudo/unittests/ForestTest.cpp b/clang-tools-extra/pseudo/unittests/ForestTest.cpp --- a/clang-tools-extra/pseudo/unittests/ForestTest.cpp +++ b/clang-tools-extra/pseudo/unittests/ForestTest.cpp @@ -122,8 +122,33 @@ "[ 0, end) │ └─IDENTIFIER := tok[0]\n" "[ 0, end) └─type := enum-type\n" "[ 0, end) └─enum-type := shared-type\n" - "[ 0, end) └─shared-type := IDENTIFIER =#1\n" - "[ 0, end) └─IDENTIFIER := tok[0]\n"); + "[ 0, end) └─shared-type =#1\n"); +} + +TEST_F(ForestTest, DumpAbbreviatedShared) { + build(R"cpp( + _ := A + A := B + B := * + )cpp"); + + ForestArena Arena; + const auto *Star = &Arena.createTerminal(tok::star, 0); + + const auto *B = &Arena.createSequence(symbol("B"), ruleFor("B"), {Star}); + // We have two identical (but distinct) A nodes. + // The GLR parser would never produce this, but it makes the example simpler. + const auto *A1 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B}); + const auto *A2 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B}); + const auto *A = &Arena.createAmbiguous(symbol("A"), {A1, A2}); + + // We must not abbreviate away shared nodes: if we show A~* there's no way to + // show that the intermediate B node is shared between A1 and A2. + EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true), + "[ 0, end) A := \n" + "[ 0, end) ├─A~B := * #1\n" + "[ 0, end) │ └─* := tok[0]\n" + "[ 0, end) └─A~B =#1\n"); } } // namespace diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -399,8 +399,7 @@ "[ 0, end) └─test := left-paren expr\n" "[ 0, 1) ├─left-paren := {\n" "[ 0, 1) │ └─{ := tok[0]\n" - "[ 1, end) └─expr := IDENTIFIER =#1\n" - "[ 1, end) └─IDENTIFIER := tok[1]\n"); + "[ 1, end) └─expr =#1\n"); } TEST_F(GLRTest, GLRReduceOrder) {