diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -55,6 +55,7 @@ // The parser is responsible for creating nodes and keeping track of the set of // heads. The GSS class is mostly an arena for them. struct GSS { + ~GSS(); // A node represents a partial parse of the input up to some point. // // It is the equivalent of a frame in an LR parse stack. @@ -71,16 +72,15 @@ // Number of the parents of this node. // The parents hold previous parsed symbols, and may resume control after // this node is reduced. - unsigned ParentCount; + uint16_t ParentCount; + // Number of references to this node. + // Typically this is 1 if the node is a head, plus the number of children. + uint16_t RefCount; // The parse node for the last parsed symbol. // This symbol appears on the left of the dot in the parse state's items. // (In the literature, the node is attached to the *edge* to the parent). const ForestNode *Payload = nullptr; - // FIXME: Most nodes live a fairly short time, and are simply discarded. - // Is it worth refcounting them (we have empty padding) and returning to a - // freelist, to keep the working set small? - llvm::ArrayRef parents() const { return llvm::makeArrayRef(reinterpret_cast(this + 1), ParentCount); @@ -89,24 +89,27 @@ }; // Allocates a new node in the graph. + // It has a refcount of 1, and should be pased to deref() when done. const Node *addNode(LRTable::StateID State, const ForestNode *Symbol, - llvm::ArrayRef Parents) { - ++NodeCount; - Node *Result = new (Arena.Allocate( - sizeof(Node) + Parents.size() * sizeof(Node *), alignof(Node))) - Node({State, static_cast(Parents.size())}); - Result->Payload = Symbol; - if (!Parents.empty()) - llvm::copy(Parents, reinterpret_cast(Result + 1)); - return Result; - } + llvm::ArrayRef Parents); + void ref(const Node *N); + void unref(const Node *N); size_t bytes() const { return Arena.getTotalMemory() + sizeof(*this); } - size_t nodeCount() const { return NodeCount; } + size_t nodeCount() const { return NodesCreated; } private: + // Nodes are recycled using freelists. + // They are variable size, so use one free-list per distinct #parents. + std::vector> FreeList; + + Node *allocate(unsigned Parents); + void destroy(Node *N); + llvm::BumpPtrAllocator Arena; - unsigned NodeCount = 0; + unsigned NodesCreated = 0; + unsigned NodesDestroyed = 0; + unsigned NodesAllocated = 0; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &); diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -31,11 +31,43 @@ std::vector ParentStates; for (const auto *Parent : N.parents()) ParentStates.push_back(llvm::formatv("{0}", Parent->State)); - OS << llvm::formatv("state {0}, parsed symbol {1}, parents {2}", N.State, - N.Payload->symbol(), llvm::join(ParentStates, ", ")); + OS << llvm::formatv("state {0}, parsed symbol {1}, parents {{{2}}, refcount={3}", + N.State, N.Payload ? N.Payload->symbol() : 0, + llvm::join(ParentStates, ", "), N.RefCount); return OS; } +#ifndef NDEBUG +static void checkRefcounts(std::vector Queue) { + llvm::DenseSet Seen; + llvm::DenseMap Expected; + for (const auto *N : Queue) + ++Expected[N]; + while (!Queue.empty()) { + const GSS::Node *N = Queue.back(); + Queue.pop_back(); + if (!Seen.insert(N).second) + continue; + for (const auto *P : N->parents()) { + ++Expected[P]; + Queue.push_back(P); + } + } + bool AnyWrong = false; + llvm::dbgs() << "Checking refcounts:\n"; + for (const auto& E : Expected) { + llvm::dbgs() << E.first << " " << *E.first << "\n"; + if (E.second != E.first->RefCount) { + llvm::dbgs() << "Wrong refcount: actual=" << E.second << " for " + << E.first << " " << *E.first << "\n"; + AnyWrong = true; + } + } + if (AnyWrong) + abort(); +} +#endif + const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params, SymbolID StartSymbol) { llvm::ArrayRef Terminals = Params.Forest.createTerminals(Tokens); @@ -49,12 +81,15 @@ for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) { switch (Action.kind()) { case LRTable::Action::Shift: + GSS.ref(Head); PendingShift.push_back({Head, Action}); break; case LRTable::Action::Reduce: + GSS.ref(Head); PendingReduce.push_back({Head, Action}); break; case LRTable::Action::Accept: + GSS.ref(Head); PendingAccept.push_back({Head, Action}); break; default: @@ -65,31 +100,57 @@ std::vector NewHeads = { GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol), /*ForestNode=*/nullptr, {})}; + auto CheckRefcounts = [&] { + DEBUG_WITH_TYPE("GSSNode", { + std::vector Roots = NewHeads; + for (const auto *Pending : + {&PendingShift, &PendingReduce, &PendingAccept}) + for (const ParseStep &Step : *Pending) + Roots.push_back(Step.Head); + checkRefcounts(Roots); + }); + }; for (const ForestNode &Terminal : Terminals) { LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n", G.symbolName(Terminal.symbol()), Terminal.symbol())); - for (const auto *Head : NewHeads) + CheckRefcounts(); + for (const auto *Head : NewHeads) { AddSteps(Head, Terminal.symbol()); + GSS.unref(Head); + } NewHeads.clear(); + CheckRefcounts(); + LLVM_DEBUG(llvm::dbgs() << " Reduce\n"); glrReduce(PendingReduce, Params, [&](const GSS::Node * NewHead) { // A reduce will enable more steps. AddSteps(NewHead, Terminal.symbol()); }); + CheckRefcounts(); - glrShift(PendingShift, Terminal, Params, - [&](const GSS::Node *NewHead) { NewHeads.push_back(NewHead); }); + glrShift(PendingShift, Terminal, Params, [&](const GSS::Node *NewHead) { + GSS.ref(NewHead); + NewHeads.push_back(NewHead); + }); } LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n")); - for (const auto *Heads : NewHeads) - AddSteps(Heads, tokenSymbol(tok::eof)); + CheckRefcounts(); + for (const auto *Head : NewHeads) { + AddSteps(Head, tokenSymbol(tok::eof)); + GSS.unref(Head); + } + NewHeads.clear(); + CheckRefcounts(); glrReduce(PendingReduce, Params, [&](const GSS::Node * NewHead) { // A reduce will enable more steps. AddSteps(NewHead, tokenSymbol(tok::eof)); }); + CheckRefcounts(); + assert(PendingShift.empty()); + assert(PendingReduce.empty()); if (!PendingAccept.empty()) { LLVM_DEBUG({ llvm::dbgs() << llvm::formatv("Accept: {0} accepted result:\n", @@ -99,7 +160,9 @@ << "\n"; }); assert(PendingAccept.size() == 1); - return *PendingAccept.front().Head->Payload; + auto *Result = PendingAccept.front().Head->Payload; + GSS.unref(PendingAccept.front().Head); + return *Result; } // We failed to parse the input, returning an opaque forest node for recovery. return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0); @@ -120,6 +183,7 @@ // └---3---┘ void glrShift(std::vector &PendingShift, const ForestNode &NewTok, const ParseParams &Params, NewHeadCallback NewHeadCB) { + auto &GSS = Params.GSStack; assert(NewTok.kind() == ForestNode::Terminal); assert(llvm::all_of(PendingShift, [](const ParseStep &Step) { @@ -145,14 +209,18 @@ for (const auto &Base : Rest) { if (Base.Action.getShiftState() != NextState) break; - Parents.push_back(Base.Head); + Parents.push_back(Base.Head); // safe borrow } Rest = Rest.drop_front(Parents.size()); LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" --> S{0} ({1} heads)\n", NextState, Parents.size())); - NewHeadCB(Params.GSStack.addNode(NextState, &NewTok, Parents)); + const auto *NewHead = Params.GSStack.addNode(NextState, &NewTok, Parents); + NewHeadCB(NewHead); + GSS.unref(NewHead); } + for (const auto & PS : PendingShift) + GSS.unref(PS.Head); PendingShift.clear(); } @@ -213,6 +281,7 @@ // 0--5(pointer) // 5 is goto(0, pointer) void glrReduce(std::vector &PendingReduce, const ParseParams &Params, NewHeadCallback NewHeadCB) { + auto &GSS = Params.GSStack; // There are two interacting complications: // 1. Performing one reduce can unlock new reduces on the newly-created head. // 2a. The ambiguous ForestNodes must be complete (have all sequence nodes). @@ -283,6 +352,7 @@ auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) { if (I == Rule.Size) { F.Start = TempSequence.front()->startTokenIndex(); + GSS.ref(N); Bases.emplace(F, N); LLVM_DEBUG(llvm::dbgs() << " --> base at S" << N->State << "\n"); Sequences.emplace(F, TempSequence); @@ -295,8 +365,10 @@ DFS(Head, 0, DFS); }; auto PopPending = [&] { - for (const ParseStep &Pending : PendingReduce) + for (const ParseStep &Pending : PendingReduce) { Pop(Pending.Head, Pending.Action.getReduceRule()); + GSS.unref(Pending.Head); + } PendingReduce.clear(); }; @@ -343,16 +415,19 @@ // Grab the bases for this family. // As well as deduplicating them, we'll group by the goto state. - FamilyBases.clear(); do { FamilyBases.emplace_back( Params.Table.getGoToState(Bases.top().second->State, F.Symbol), - Bases.top().second); + Bases.top().second); // transfer ownership Bases.pop(); } while (!Bases.empty() && Bases.top().first == F); - sortAndUnique(FamilyBases); + llvm::sort(FamilyBases); + // XXX sortAndUnique is not aware of ref/unref, copy for simplicity. + std::vector UniqueBases; + std::unique_copy(FamilyBases.begin(), FamilyBases.end(), + std::back_inserter(UniqueBases)); // Create a GSS node for each unique goto state. - llvm::ArrayRef BasesLeft = FamilyBases; + llvm::ArrayRef BasesLeft = UniqueBases; while (!BasesLeft.empty()) { StateID NextState = BasesLeft.front().first; auto &Parents = TempGSSNodes; @@ -360,18 +435,86 @@ for (const auto &Base : BasesLeft) { if (Base.first != NextState) break; - Parents.push_back(Base.second); + Parents.push_back(Base.second); // safe borrow } BasesLeft = BasesLeft.drop_front(Parents.size()); // Invoking the callback for new heads, a real GLR parser may add new // reduces to the PendingReduce queue! - NewHeadCB(Params.GSStack.addNode(NextState, Parsed, Parents)); + const auto *NewNode = Params.GSStack.addNode(NextState, Parsed, Parents); + NewHeadCB(NewNode); + GSS.unref(NewNode); } + for (auto &FB : FamilyBases) + GSS.unref(FB.second); + FamilyBases.clear(); PopPending(); } assert(Sequences.empty()); } +static void logLifetime(llvm::StringRef Sigil, const GSS::Node* N) { + DEBUG_WITH_TYPE("GSSNode", llvm::dbgs() + << Sigil << " " << N << " " << *N << "\n"); +} + +const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol, + llvm::ArrayRef Parents) { + ++NodesCreated; + Node *Result = new (allocate(Parents.size())) + Node({State, static_cast(Parents.size()), /*RefCount=*/0}); + Result->Payload = Symbol; + auto *ParentArray = reinterpret_cast(Result + 1); + for (const auto *Parent : Parents) { + ref(Parent); + *ParentArray++ = Parent; + } + logLifetime("*", Result); + ref(Result); + return Result; +} + +void GSS::ref(const Node *N) { + const_cast(N)->RefCount++; + logLifetime("+", N); +} + +void GSS::unref(const Node *N) { + assert(N->RefCount != 0); + Node *NN = const_cast(N); // I created you, and I can destroy you! + --NN->RefCount; + logLifetime("-", N); + // Node is const from the caller's perspective, but we created it + if (0 == NN->RefCount) + destroy(NN); +} + +GSS::Node *GSS::allocate(unsigned Parents) { + if (FreeList.size() <= Parents) + FreeList.resize(Parents + 1); + auto &SizedList = FreeList[Parents]; + if (!SizedList.empty()) { + auto *Result = SizedList.back(); + SizedList.pop_back(); + return Result; + } + ++NodesAllocated; + return static_cast( + Arena.Allocate(sizeof(Node) + Parents * sizeof(Node *), alignof(Node))); +} + +void GSS::destroy(Node *N) { + logLifetime("~", N); + ++NodesDestroyed; + for (auto *Parent : N->parents()) + unref(Parent); + unsigned ParentCount = N->ParentCount; + N->~Node(); + assert(FreeList.size() > ParentCount && "established on construction!"); + FreeList[ParentCount].push_back(N); +} + +GSS::~GSS() { assert(NodesCreated == NodesDestroyed); } + } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -85,11 +85,17 @@ NewHeadCallback captureNewHeads() { return [this](const GSS::Node *NewHead) { + GSStack.ref(NewHead); NewHeadResults.push_back(NewHead); }; }; protected: + ~GLRTest() { + for (auto *N : NewHeadResults) + GSStack.unref(N); + } + std::unique_ptr G; ForestArena Arena; GSS GSStack; @@ -115,6 +121,7 @@ /*Parents=*/{GSSNode0}); auto *GSSNode3 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{GSSNode0}); + GSStack.unref(GSSNode0); buildGrammar({}, {}); // Create a fake empty grammar. LRTable T = LRTable::buildForTests(G->table(), /*Entries=*/{}); @@ -154,6 +161,8 @@ GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{}); const auto *GSSNode1 = GSStack.addNode(3, &Arena.createTerminal(tok::identifier, 0), {GSSNode0}); + GSStack.unref(GSSNode0); + GSStack.ref(GSSNode1); std::vector PendingReduce = { {GSSNode1, Action::reduce(ruleFor("class-name"))}, @@ -188,6 +197,8 @@ const auto *GSSNode4 = GSStack.addNode( /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1), /*Parents=*/{GSSNode2, GSSNode3}); + GSStack.unref(GSSNode2); + GSStack.unref(GSSNode3); LRTable Table = LRTable::buildForTests( G->table(), @@ -237,6 +248,9 @@ const auto *GSSNode4 = GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode, /*Parents=*/{GSSNode2}); + GSStack.unref(GSSNode0); + GSStack.unref(GSSNode1); + GSStack.unref(GSSNode2); LRTable Table = LRTable::buildForTests( G->table(), @@ -295,6 +309,9 @@ const auto *GSSNode4 = GSStack.addNode(/*State=*/4, /*ForestNode=*/StartTerminal, /*Parents=*/{GSSNode2}); + GSStack.unref(GSSNode0); + GSStack.unref(GSSNode1); + GSStack.unref(GSSNode2); LRTable Table = LRTable::buildForTests( G->table(), {{/*State=*/0, id("pointer"), Action::goTo(5)}});