diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -68,6 +68,8 @@ struct alignas(struct Node *) Node { // LR state describing how parsing should continue from this head. LRTable::StateID State; + // Used internally to track reachability during garbage collection. + bool GCParity; // Number of the parents of this node. // The parents hold previous parsed symbols, and may resume control after // this node is reduced. @@ -90,21 +92,24 @@ // Allocates a new node in the graph. const Node *addNode(LRTable::StateID State, const ForestNode *Symbol, - llvm::ArrayRef Parents) { - ++NodeCount; - Node *Result = new (Arena.Allocate( - sizeof(Node) + Parents.size() * sizeof(Node *), alignof(Node))) - Node({State, static_cast(Parents.size())}); - Result->Payload = Symbol; - if (!Parents.empty()) - llvm::copy(Parents, reinterpret_cast(Result + 1)); - return Result; - } + llvm::ArrayRef Parents); + // Frees all nodes not reachable as ancestors of Roots, and returns the count. + // Calling this periodically prevents steady memory growth of the GSS. + unsigned gc(std::vector &&Roots); size_t bytes() const { return Arena.getTotalMemory() + sizeof(*this); } size_t nodeCount() const { return NodeCount; } private: + // Nodes are recycled using freelists. + // They are variable size, so use one free-list per distinct #parents. + std::vector> FreeList; + Node *allocate(unsigned Parents); + void destroy(Node *N); + // The list of all allocated nodes - these are our candidates for gc(). + std::vector Allocated; + bool GCParity = false; // All nodes should match this, except during GC. + llvm::BumpPtrAllocator Arena; unsigned NodeCount = 0; }; diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -12,6 +12,7 @@ #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -65,6 +66,18 @@ std::vector NewHeads = { GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol), /*ForestNode=*/nullptr, {})}; + auto MaybeGC = [&, Roots(std::vector{}), I(0u)]() mutable { + assert(NewHeads.empty() && "Running GC at the wrong time!"); + if (++I != 20) // Run periodically to balance CPU and memory usage. + return; + I = 0; + Roots.clear(); + for (const auto *Pending : {&PendingShift, &PendingReduce, &PendingAccept}) + for (const auto &E : *Pending) + Roots.push_back(E.Head); + GSS.gc(std::move(Roots)); + }; + std::vector GCRoots; for (const ForestNode &Terminal : Terminals) { LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n", G.symbolName(Terminal.symbol()), @@ -72,6 +85,7 @@ for (const auto *Head : NewHeads) AddSteps(Head, Terminal.symbol()); NewHeads.clear(); + MaybeGC(); glrReduce(PendingReduce, Params, [&](const GSS::Node * NewHead) { // A reduce will enable more steps. @@ -373,5 +387,73 @@ assert(Sequences.empty()); } +const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol, + llvm::ArrayRef Parents) { + ++NodeCount; + Node *Result = new (allocate(Parents.size())) + Node({State, GCParity, static_cast(Parents.size())}); + Allocated.push_back(Result); + Result->Payload = Symbol; + if (!Parents.empty()) + llvm::copy(Parents, reinterpret_cast(Result + 1)); + return Result; +} + +GSS::Node *GSS::allocate(unsigned Parents) { + ++NodeCount; + if (FreeList.size() <= Parents) + FreeList.resize(Parents + 1); + auto &SizedList = FreeList[Parents]; + if (!SizedList.empty()) { + auto *Result = SizedList.back(); + SizedList.pop_back(); + return Result; + } + return static_cast( + Arena.Allocate(sizeof(Node) + Parents * sizeof(Node *), alignof(Node))); +} + +void GSS::destroy(Node *N) { + unsigned ParentCount = N->ParentCount; + N->~Node(); + assert(FreeList.size() > ParentCount && "established on construction!"); + FreeList[ParentCount].push_back(N); +} + +unsigned GSS::gc(std::vector &&Queue) { +#ifndef NDEBUG + auto ParityMatches = [&](const Node *N) { return N->GCParity == GCParity; }; + assert("Before GC" && llvm::all_of(Allocated, ParityMatches)); + auto Deferred = llvm::make_scope_exit( + [&] { assert("After GC" && llvm::all_of(Allocated, ParityMatches)); }); + assert(llvm::all_of( + Queue, [&](const Node *R) { return llvm::is_contained(Allocated, R); })); +#endif + unsigned InitialCount = Allocated.size(); + + // Mark + GCParity = !GCParity; + while (!Queue.empty()) { + Node *N = const_cast(Queue.back()); // Safe: we created these nodes. + Queue.pop_back(); + if (N->GCParity != GCParity) { // Not seen yet + N->GCParity = GCParity; // Mark as seen + for (const Node *P : N->parents()) // And walk parents + Queue.push_back(P); + } + } + // Sweep + llvm::erase_if(Allocated, [&](Node *N) { + if (N->GCParity == GCParity) // Walk reached this node. + return false; + destroy(N); + return true; + }); + + LLVM_DEBUG(llvm::dbgs() << "GC pruned " << (InitialCount - Allocated.size()) + << "/" << InitialCount << " GSS nodes\n"); + return InitialCount - Allocated.size(); +} + } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -393,6 +393,29 @@ "[ 0, end) └─IDENTIFIER := tok[0]\n"); } +TEST(GSSTest, GC) { + // ┌-A-┬-AB + // ├-B-┘ + // Root-+-C + // ├-D + // └-E + GSS GSStack; + auto *Root = GSStack.addNode(0, nullptr, {}); + auto *A = GSStack.addNode(0, nullptr, {Root}); + auto *B = GSStack.addNode(0, nullptr, {Root}); + auto *C = GSStack.addNode(0, nullptr, {Root}); + auto *D = GSStack.addNode(0, nullptr, {Root}); + auto *AB = GSStack.addNode(0, nullptr, {A, B}); + + EXPECT_EQ(1u, GSStack.gc({AB, C})); // destroys D + EXPECT_EQ(0u, GSStack.gc({AB, C})); // D is already gone. + auto *E = GSStack.addNode(0, nullptr, {Root}); + EXPECT_EQ(3u, GSStack.gc({A, E})); // destroys B, AB, C + EXPECT_EQ(1u, GSStack.gc({E})); // destroys A + + (void)D; +} + } // namespace } // namespace pseudo } // namespace clang