diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h @@ -43,6 +43,7 @@ // doesn't have parent pointers. class alignas(class ForestNode *) ForestNode { public: + class RecursiveIterator; enum Kind { // A Terminal node is a single terminal symbol bound to a token. Terminal, @@ -87,6 +88,22 @@ return children(Data); } + llvm::ArrayRef children() const { + switch (kind()) { + case Sequence: + return elements(); + case Ambiguous: + return alternatives(); + case Terminal: + case Opaque: + return {}; + } + llvm_unreachable("Bad kind"); + } + + // Iteration over all nodes in the forest, including this. + llvm::iterator_range descendants() const; + std::string dump(const Grammar &) const; std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const; @@ -181,6 +198,25 @@ uint32_t NodeCount = 0; }; +class ForestNode::RecursiveIterator + : public std::iterator { + llvm::DenseSet Seen; + struct StackFrame { + const ForestNode *Parent; + unsigned ChildIndex; + }; + std::vector Stack; + const ForestNode *Cur; + +public: + RecursiveIterator(const ForestNode *N = nullptr) : Cur(N) {} + + const ForestNode &operator*() const { return *Cur; }; + void operator++(); + bool operator==(const RecursiveIterator &I) const { return Cur == I.Cur; } + bool operator!=(const RecursiveIterator &I) const { return !(*this == I); } +}; + } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -16,6 +16,35 @@ namespace clang { namespace pseudo { +void ForestNode::RecursiveIterator::operator++() { + auto C = Cur->children(); + // Try to find a child of the current node to descend into. + for (unsigned I = 0; I < C.size(); ++I) { + if (Seen.insert(C[I]).second) { + Stack.push_back({Cur, I}); + Cur = C[I]; + return; + } + } + // Try to find a sibling af an ancestor to advance to. + for (; !Stack.empty(); Stack.pop_back()) { + C = Stack.back().Parent->children(); + unsigned &Index = Stack.back().ChildIndex; + while (++Index < C.size()) { + if (Seen.insert(C[Index]).second) { + Cur = C[Index]; + return; + } + } + } + Cur = nullptr; +} + +llvm::iterator_range +ForestNode::descendants() const { + return {RecursiveIterator(this), RecursiveIterator()}; +} + std::string ForestNode::dump(const Grammar &G) const { switch (kind()) { case Ambiguous: diff --git a/clang-tools-extra/pseudo/test/glr.cpp b/clang-tools-extra/pseudo/test/glr.cpp --- a/clang-tools-extra/pseudo/test/glr.cpp +++ b/clang-tools-extra/pseudo/test/glr.cpp @@ -1,4 +1,4 @@ -// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest | FileCheck %s +// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest -print-statistics | FileCheck %s void foo() { T* a; // a multiply expression or a pointer declaration? @@ -22,3 +22,10 @@ // CHECK-NEXT: │ └─ptr-declarator~id-expression =#1 // CHECK-NEXT: └─; := tok[8] } + +// CHECK: 3 Ambiguous nodes: +// CHECK-NEXT: 1 simple-type-specifier +// CHECK-NEXT: 1 statement +// CHECK-NEXT: 1 type-name +// CHECK-EMPTY: +// CHECK-NEXT: 0 Opaque nodes: diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -14,6 +14,8 @@ #include "clang-pseudo/grammar/LRGraph.h" #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/LangOptions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" @@ -59,6 +61,34 @@ return Text.get()->getBuffer().str(); } +namespace clang { +namespace pseudo { +namespace { + +struct NodeStats { + unsigned Total = 0; + std::vector> BySymbol; + + NodeStats(const ForestNode &Root, + llvm::function_ref Filter) { + llvm::DenseMap Map; + for (const ForestNode &N : Root.descendants()) + if (Filter(N)) { + ++Total; + ++Map[N.symbol()]; + } + BySymbol = {Map.begin(), Map.end()}; + // Sort by count descending, then symbol ascending. + llvm::sort(BySymbol, [](const auto &L, const auto &R) { + return std::tie(R.second, L.first) < std::tie(L.second, R.first); + }); + } +}; + +} // namespace +} // namespace pseudo +} // namespace clang + int main(int argc, char *argv[]) { llvm::cl::ParseCommandLineOptions(argc, argv, ""); llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); @@ -135,6 +165,17 @@ << " nodes: " << Arena.nodeCount() << "\n"; llvm::outs() << "GSS bytes: " << GSS.bytes() << " nodes: " << GSS.nodesCreated() << "\n"; + + for (auto &P : + {std::make_pair("Ambiguous", clang::pseudo::ForestNode::Ambiguous), + std::make_pair("Opaque", clang::pseudo::ForestNode::Opaque)}) { + clang::pseudo::NodeStats Stats( + Root, [&](const auto &N) { return N.kind() == P.second; }); + llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n"; + for (const auto &S : Stats.BySymbol) + llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second, + G.symbolName(S.first)); + } } } } diff --git a/clang-tools-extra/pseudo/unittests/ForestTest.cpp b/clang-tools-extra/pseudo/unittests/ForestTest.cpp --- a/clang-tools-extra/pseudo/unittests/ForestTest.cpp +++ b/clang-tools-extra/pseudo/unittests/ForestTest.cpp @@ -151,6 +151,30 @@ "[ 0, end) └─A~B =#1\n"); } +TEST_F(ForestTest, Iteration) { + // Z + // / \ + // X Y + // |\| + // A B + ForestArena Arena; + const auto *A = &Arena.createTerminal(tok::identifier, 0); + const auto *B = &Arena.createOpaque(1, 0); + const auto *X = &Arena.createSequence(2, 1, {A, B}); + const auto *Y = &Arena.createSequence(2, 2, {B}); + const auto *Z = &Arena.createAmbiguous(2, {X, Y}); + + std::vector Nodes; + for (const ForestNode &N : Z->descendants()) + Nodes.push_back(&N); + EXPECT_THAT(Nodes, testing::UnorderedElementsAre(A, B, X, Y, Z)); + + Nodes.clear(); + for (const ForestNode &N : X->descendants()) + Nodes.push_back(&N); + EXPECT_THAT(Nodes, testing::UnorderedElementsAre(X, A, B)); +} + } // namespace } // namespace pseudo } // namespace clang