diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -68,6 +68,8 @@ struct alignas(struct Node *) Node { // LR state describing how parsing should continue from this head. LRTable::StateID State; + // Used internally to track reachability during garbage collection. + bool GCParity; // Number of the parents of this node. // The parents hold previous parsed symbols, and may resume control after // this node is reduced. @@ -77,10 +79,6 @@ // (In the literature, the node is attached to the *edge* to the parent). const ForestNode *Payload = nullptr; - // FIXME: Most nodes live a fairly short time, and are simply discarded. - // Is it worth refcounting them (we have empty padding) and returning to a - // freelist, to keep the working set small? - llvm::ArrayRef parents() const { return llvm::makeArrayRef(reinterpret_cast(this + 1), ParentCount); @@ -90,23 +88,26 @@ // Allocates a new node in the graph. const Node *addNode(LRTable::StateID State, const ForestNode *Symbol, - llvm::ArrayRef Parents) { - ++NodeCount; - Node *Result = new (Arena.Allocate( - sizeof(Node) + Parents.size() * sizeof(Node *), alignof(Node))) - Node({State, static_cast(Parents.size())}); - Result->Payload = Symbol; - if (!Parents.empty()) - llvm::copy(Parents, reinterpret_cast(Result + 1)); - return Result; - } + llvm::ArrayRef Parents); + // Frees all nodes not reachable as ancestors of Roots, and returns the count. + // Calling this periodically prevents steady memory growth of the GSS. + unsigned gc(std::vector &&Roots); size_t bytes() const { return Arena.getTotalMemory() + sizeof(*this); } - size_t nodeCount() const { return NodeCount; } + size_t nodesCreated() const { return NodesCreated; } private: + // Nodes are recycled using freelists. + // They are variable size, so use one free-list per distinct #parents. + std::vector> FreeList; + Node *allocate(unsigned Parents); + void destroy(Node *N); + // The list of nodes created and not destroyed - our candidates for gc(). + std::vector Alive; + bool GCParity = false; // All nodes should match this, except during GC. + llvm::BumpPtrAllocator Arena; - unsigned NodeCount = 0; + unsigned NodesCreated = 0; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &); diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -12,6 +12,7 @@ #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -65,6 +66,18 @@ std::vector NewHeads = { GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol), /*ForestNode=*/nullptr, {})}; + auto MaybeGC = [&, Roots(std::vector{}), I(0u)]() mutable { + assert(PendingShift.empty() && PendingReduce.empty() && + PendingAccept.empty() && "Running GC at the wrong time!"); + + if (++I != 20) // Run periodically to balance CPU and memory usage. + return; + I = 0; + + // We need to copy the list: Roots is consumed by the GC. + Roots = NewHeads; + GSS.gc(std::move(Roots)); + }; for (const ForestNode &Terminal : Terminals) { LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n", G.symbolName(Terminal.symbol()), @@ -80,6 +93,7 @@ glrShift(PendingShift, Terminal, Params, [&](const GSS::Node *NewHead) { NewHeads.push_back(NewHead); }); + MaybeGC(); } LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n")); for (const auto *Heads : NewHeads) @@ -373,5 +387,72 @@ assert(Sequences.empty()); } +const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol, + llvm::ArrayRef Parents) { + Node *Result = new (allocate(Parents.size())) + Node({State, GCParity, static_cast(Parents.size())}); + Alive.push_back(Result); + ++NodesCreated; + Result->Payload = Symbol; + if (!Parents.empty()) + llvm::copy(Parents, reinterpret_cast(Result + 1)); + return Result; +} + +GSS::Node *GSS::allocate(unsigned Parents) { + if (FreeList.size() <= Parents) + FreeList.resize(Parents + 1); + auto &SizedList = FreeList[Parents]; + if (!SizedList.empty()) { + auto *Result = SizedList.back(); + SizedList.pop_back(); + return Result; + } + return static_cast( + Arena.Allocate(sizeof(Node) + Parents * sizeof(Node *), alignof(Node))); +} + +void GSS::destroy(Node *N) { + unsigned ParentCount = N->ParentCount; + N->~Node(); + assert(FreeList.size() > ParentCount && "established on construction!"); + FreeList[ParentCount].push_back(N); +} + +unsigned GSS::gc(std::vector &&Queue) { +#ifndef NDEBUG + auto ParityMatches = [&](const Node *N) { return N->GCParity == GCParity; }; + assert("Before GC" && llvm::all_of(Alive, ParityMatches)); + auto Deferred = llvm::make_scope_exit( + [&] { assert("After GC" && llvm::all_of(Alive, ParityMatches)); }); + assert(llvm::all_of( + Queue, [&](const Node *R) { return llvm::is_contained(Alive, R); })); +#endif + unsigned InitialCount = Alive.size(); + + // Mark + GCParity = !GCParity; + while (!Queue.empty()) { + Node *N = const_cast(Queue.back()); // Safe: we created these nodes. + Queue.pop_back(); + if (N->GCParity != GCParity) { // Not seen yet + N->GCParity = GCParity; // Mark as seen + for (const Node *P : N->parents()) // And walk parents + Queue.push_back(P); + } + } + // Sweep + llvm::erase_if(Alive, [&](Node *N) { + if (N->GCParity == GCParity) // Walk reached this node. + return false; + destroy(N); + return true; + }); + + LLVM_DEBUG(llvm::dbgs() << "GC pruned " << (InitialCount - Alive.size()) + << "/" << InitialCount << " GSS nodes\n"); + return InitialCount - Alive.size(); +} + } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -134,7 +134,7 @@ llvm::outs() << "Forest bytes: " << Arena.bytes() << " nodes: " << Arena.nodeCount() << "\n"; llvm::outs() << "GSS bytes: " << GSS.bytes() - << " nodes: " << GSS.nodeCount() << "\n"; + << " nodes: " << GSS.nodesCreated() << "\n"; } } } diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -393,6 +393,28 @@ "[ 0, end) └─IDENTIFIER := tok[0]\n"); } +TEST(GSSTest, GC) { + // ┌-A-┬-AB + // ├-B-┘ + // Root-+-C + // ├-D + // └-E + GSS GSStack; + auto *Root = GSStack.addNode(0, nullptr, {}); + auto *A = GSStack.addNode(0, nullptr, {Root}); + auto *B = GSStack.addNode(0, nullptr, {Root}); + auto *C = GSStack.addNode(0, nullptr, {Root}); + auto *D = GSStack.addNode(0, nullptr, {Root}); + auto *AB = GSStack.addNode(0, nullptr, {A, B}); + + EXPECT_EQ(1u, GSStack.gc({AB, C})) << "D is destroyed"; + EXPECT_EQ(0u, GSStack.gc({AB, C})) << "D is already gone"; + auto *E = GSStack.addNode(0, nullptr, {Root}); + EXPECT_EQ(D, E) << "Storage of GCed node D is reused for E"; + EXPECT_EQ(3u, GSStack.gc({A, E})) << "Destroys B, AB, C"; + EXPECT_EQ(1u, GSStack.gc({E})) << "Destroys A"; +} + } // namespace } // namespace pseudo } // namespace clang