diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -73,7 +73,10 @@ // Number of the parents of this node. // The parents hold previous parsed symbols, and may resume control after // this node is reduced. - unsigned ParentCount; + uint16_t ParentCount; + // The end of the token range covered by this node. + // The start of the range is Payload->startTokenIndex(). + Token::Index NextToken = 0; // The parse node for the last parsed symbol. // This symbol appears on the left of the dot in the parse state's items. // (In the literature, the node is attached to the *edge* to the parent). @@ -88,6 +91,7 @@ // Allocates a new node in the graph. const Node *addNode(LRTable::StateID State, const ForestNode *Symbol, + Token::Index NextToken, llvm::ArrayRef Parents); // Frees all nodes not reachable as ancestors of Roots, and returns the count. // Calling this periodically prevents steady memory growth of the GSS. @@ -141,8 +145,8 @@ // Applies available reductions on Heads, appending resulting heads to the list. // // Exposed for testing only. -void glrReduce(std::vector &Heads, SymbolID Lookahead, - const ParseParams &Params); +void glrReduce(std::vector &Heads, Token::Index NextToken, + SymbolID Lookahead, const ParseParams &Params); } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h @@ -81,9 +81,15 @@ assert(SID < NumTerminals); return static_cast(SID); } -inline SymbolID tokenSymbol(tok::TokenKind TK) { +inline constexpr SymbolID tokenSymbol(tok::TokenKind TK) { return TokenFlag | static_cast(TK); } +// A sentinel terminal symbol representing "parsing failed at this point". +// Used in error recovery. (We don't otherwise use eod for anything). +static constexpr SymbolID ErrorSymbol = tokenSymbol(tok::eod); +// Error recovery strategies. +// FIXME: these should be provided as extensions instead. +enum class RecoveryStrategy : uint8_t { None, Braces }; // An extension is a piece of native code specific to a grammar that modifies // the behavior of annotated rules. One ExtensionID is assigned for each unique @@ -107,7 +113,7 @@ // length to 9 (this is the longest sequence in cxx grammar). static constexpr unsigned SizeBits = 4; static constexpr unsigned MaxElements = 9; - static_assert(MaxElements <= (1 << SizeBits), "Exceeds the maximum limit"); + static_assert(MaxElements < (1 << SizeBits), "Exceeds the maximum limit"); static_assert(SizeBits + SymbolBits <= 16, "Must be able to store symbol ID + size efficiently"); @@ -123,6 +129,13 @@ // being set for this rule. ExtensionID Guard = 0; + // Specifies the index within Sequence eligible for error recovery. + // Given stmt := { stmt-seq_opt }, if we fail to parse the stmt-seq then we + // should recover by finding the matching brace, and forcing stmt-seq to match + // everything between braces. + uint8_t RecoveryIndex = -1; + RecoveryStrategy Recovery = RecoveryStrategy::None; + llvm::ArrayRef seq() const { return llvm::ArrayRef(Sequence, Size); } diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h @@ -137,8 +137,20 @@ SymbolID Label; }; + // A possible error recovery: choose to match some tokens against a symbol. + // + // e.g. a state that contains + // stmt := { . stmt-seq [recover=braces] } + // has a Recovery { Src = S, Strategy=braces, Result=stmt-seq }. + struct Recovery { + StateID Src; // The state we are in when encountering the error. + RecoveryStrategy Strategy; // Heuristic choosing the tokens to match. + SymbolID Result; // The symbol that is produced. + }; + llvm::ArrayRef states() const { return States; } llvm::ArrayRef edges() const { return Edges; } + llvm::ArrayRef recoveries() const { return Recoveries; } llvm::ArrayRef> startStates() const { return StartStates; } @@ -147,12 +159,15 @@ private: LRGraph(std::vector States, std::vector Edges, + std::vector Recoveries, std::vector> StartStates) : States(std::move(States)), Edges(std::move(Edges)), - StartStates(std::move(StartStates)) {} + Recoveries(std::move(Recoveries)), StartStates(std::move(StartStates)) { + } std::vector States; std::vector Edges; + std::vector Recoveries; std::vector> StartStates; }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h @@ -123,6 +123,11 @@ uint16_t Value : ValueBits; }; + struct Recovery { + RecoveryStrategy Strategy; + SymbolID Result; + }; + // Returns all available actions for the given state on a terminal. // Expected to be called by LR parsers. llvm::ArrayRef getActions(StateID State, SymbolID Terminal) const; @@ -139,6 +144,12 @@ // Returns empty if no available actions in the table. llvm::ArrayRef find(StateID State, SymbolID Symbol) const; + // Looks up available recovery actions if we stopped parsing in this state. + llvm::ArrayRef getRecovery(StateID State) const { + return llvm::makeArrayRef(Recoveries.data() + RecoveryOffset[State], + Recoveries.data() + RecoveryOffset[State + 1]); + } + // Returns the state from which the LR parser should start to parse the input // tokens as the given StartSymbol. // @@ -188,6 +199,11 @@ std::vector Actions; // A sorted table, storing the start state for each target parsing symbol. std::vector> StartStates; + + // Recovery stores all recovery actions from all states. + // A given state has [RecoveryOffset[S], RecoveryOffset[S+1]). + std::vector RecoveryOffset; + std::vector Recoveries; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &); diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -25,14 +25,107 @@ namespace clang { namespace pseudo { +void glrRecover(llvm::ArrayRef OldHeads, + std::vector &NewHeads, + const TokenStream &Tokens, unsigned &TokenIndex, + const ParseParams &Params) { + LLVM_DEBUG(llvm::dbgs() << "Recovery at token " << TokenIndex << "...\n"); + // Describes a possibility to recover by forcibly interpreting a range of + // tokens around the cursor as a nonterminal that we expected to see. + struct PlaceholderRecovery { + // The token prior to the nonterminal which is being recovered. + // This starts of the region we're skipping, so higher Position is better. + Token::Index Position; + // The nonterminal which will be created in order to recover. + SymbolID Symbol; + // The heuristic used to choose the bounds of the nonterminal to recover. + RecoveryStrategy Strategy; + + // The GSS head where we are expecting the recovered nonterminal. + const GSS::Node *RecoveryNode; + // Payload of nodes on the way back from the OldHead to the recovery node. + // These represent the partial parse that is being discarded. + // They should become the children of the opaque recovery node. + // + // There may be multiple paths leading to the same recovery node, we choose + // one arbitrarily. + std::vector Path; + }; + std::vector Options; + + std::vector Path; + llvm::DenseSet Seen; + auto DFS = [&](const GSS::Node *N, Token::Index NextTok, auto &DFS) { + llvm::errs() << N << " " << *N + << " actions=" << Params.Table.getRecovery(N->State).size() + << "\n"; + if (!Seen.insert(N).second) + return; + for (auto Strategy : Params.Table.getRecovery(N->State)) { + Options.push_back(PlaceholderRecovery{ + NextTok, + Strategy.Result, + Strategy.Strategy, + N, + Path, + }); + LLVM_DEBUG(llvm::dbgs() + << "Option: recover " << Params.G.symbolName(Strategy.Result) + << " at token " << NextTok << "\n"); + } + Path.push_back(N->Payload); + for (const GSS::Node *Parent : N->parents()) + DFS(Parent, N->NextToken, DFS); + Path.pop_back(); + }; + llvm::errs() << OldHeads.size() << "\n"; + for (auto *N : llvm::reverse(OldHeads)) + DFS(N, TokenIndex, DFS); + + llvm::stable_sort(Options, [&](const auto &L, const auto &R) { + return L.Position > R.Position; + }); + + for (const PlaceholderRecovery &Option : Options) { + auto End = [&]() -> llvm::Optional { + assert(Option.Strategy == RecoveryStrategy::Braces); + const ForestNode *LBrace = Option.RecoveryNode->Payload; + assert(LBrace->kind() == ForestNode::Terminal && + LBrace->symbol() == tokenSymbol(tok::l_brace)); + if (const Token *RBrace = + Tokens.tokens()[LBrace->startTokenIndex()].pair()) { + assert(Tokens.index(*RBrace) >= TokenIndex && + "Saw matching closing bracket, but recovery still possible?"); + return Tokens.index(*RBrace); + } + return llvm::None; + }(); + if (End) { + LLVM_DEBUG(llvm::dbgs() + << "Chose to recover " << Params.G.symbolName(Option.Symbol) + << " on " << (Token::Range{Option.Position, *End}) << "\n"); + const ForestNode &Placeholder = + Params.Forest.createOpaque(Option.Symbol, Option.Position); + const GSS::Node *NewHead = Params.GSStack.addNode( + Params.Table.getGoToState(Option.RecoveryNode->State, Option.Symbol), + &Placeholder, *End, {Option.RecoveryNode}); + NewHeads.push_back(NewHead); + TokenIndex = *End - 1; + return; + } + } +} + using StateID = LRTable::StateID; llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) { std::vector ParentStates; for (const auto *Parent : N.parents()) ParentStates.push_back(llvm::formatv("{0}", Parent->State)); - OS << llvm::formatv("state {0}, parsed symbol {1}, parents {2}", N.State, - N.Payload->symbol(), llvm::join(ParentStates, ", ")); + OS << llvm::formatv( + "state {0}, parsed symbol {1}, nextToken {2}, parents {3}", N.State, + N.Payload ? N.Payload->symbol() : 0, N.NextToken, + llvm::join(ParentStates, ", ")); return OS; } @@ -81,7 +174,8 @@ LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" --> S{0} ({1} heads)\n", NextState, Parents.size())); - NewHeads.push_back(Params.GSStack.addNode(NextState, &NewTok, Parents)); + NewHeads.push_back(Params.GSStack.addNode( + NextState, &NewTok, NewTok.startTokenIndex() + 1, Parents)); } } @@ -227,12 +321,14 @@ std::vector *Heads; unsigned NextPopHead; SymbolID Lookahead; + Token::Index NextToken; Sequence TempSequence; public: GLRReduce(const ParseParams &Params) : Params(Params) {} - void operator()(std::vector &Heads, SymbolID Lookahead) { + void operator()(std::vector &Heads, Token::Index NextToken, + SymbolID Lookahead) { assert(isToken(Lookahead)); NextPopHead = 0; @@ -351,7 +447,8 @@ Parents.push_back(Base.second); } BasesLeft = BasesLeft.drop_front(Parents.size()); - Heads->push_back(Params.GSStack.addNode(NextState, Parsed, Parents)); + Heads->push_back( + Params.GSStack.addNode(NextState, Parsed, NextToken, Parents)); } } @@ -393,7 +490,8 @@ const ForestNode *Parsed = &Params.Forest.createSequence(Rule.Target, *RID, TempSequence); StateID NextState = Params.Table.getGoToState(Base->State, Rule.Target); - Heads->push_back(Params.GSStack.addNode(NextState, Parsed, {Base})); + Heads->push_back( + Params.GSStack.addNode(NextState, Parsed, NextToken, {Base})); return true; } }; @@ -413,7 +511,7 @@ // Heads correspond to the parse of tokens [0, I), NextHeads to [0, I+1). std::vector Heads = {GSS.addNode(/*State=*/StartState, /*ForestNode=*/nullptr, - {})}; + /*NextToken=*/0, {})}; std::vector NextHeads; auto MaybeGC = [&, Roots(std::vector{}), I(0u)]() mutable { assert(NextHeads.empty() && "Running GC at the wrong time!"); @@ -432,10 +530,14 @@ G.symbolName(Terminals[I].symbol()), Terminals[I].symbol())); // Consume the token. glrShift(Heads, Terminals[I], Params, NextHeads); + + if (NextHeads.empty()) + glrRecover(Heads, NextHeads, Tokens, I, Params); + // Form nonterminals containing the token we just consumed. SymbolID Lookahead = I + 1 == Terminals.size() ? tokenSymbol(tok::eof) : Terminals[I + 1].symbol(); - Reduce(NextHeads, Lookahead); + Reduce(NextHeads, I + 1, Lookahead); // Prepare for the next token. std::swap(Heads, NextHeads); NextHeads.clear(); @@ -462,16 +564,17 @@ return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0); } -void glrReduce(std::vector &Heads, SymbolID Lookahead, - const ParseParams &Params) { +void glrReduce(std::vector &Heads, Token::Index NextToken, + SymbolID Lookahead, const ParseParams &Params) { // Create a new GLRReduce each time for tests, performance doesn't matter. - GLRReduce{Params}(Heads, Lookahead); + GLRReduce{Params}(Heads, NextToken, Lookahead); } const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol, + Token::Index NextToken, llvm::ArrayRef Parents) { Node *Result = new (allocate(Parents.size())) - Node({State, GCParity, static_cast(Parents.size())}); + Node({State, GCParity, static_cast(Parents.size()), NextToken}); Alive.push_back(Result); ++NodesCreated; Result->Payload = Symbol; diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp --- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp @@ -59,8 +59,11 @@ llvm::raw_string_ostream OS(Result); const Rule &R = T->Rules[RID]; OS << symbolName(R.Target) << " :="; - for (SymbolID SID : R.seq()) - OS << " " << symbolName(SID); + for (unsigned I = 0; I < R.Size; ++I) { + OS << " " << symbolName(R.Sequence[I]); + if (R.RecoveryIndex == I) + OS << " [recover=" << static_cast(R.Recovery) << "]"; + } if (R.Guard) OS << " [guard=" << T->AttributeValues[R.Guard] << "]"; return Result; diff --git a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp --- a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp @@ -106,6 +106,16 @@ assert(T->Rules.size() < (1 << RuleBits) && "Too many rules to fit in RuleID bits!"); + // Wherever RHS contains { foo }, mark foo for brace-recovery. + // FIXME: this should be grammar annotations instead. + for (auto &Rule : T->Rules) { + for (unsigned I = 2; I < Rule.Size; ++I) + if (Rule.Sequence[I] == tokenSymbol(tok::r_brace) && + Rule.Sequence[I - 2] == tokenSymbol(tok::l_brace)) { + Rule.Recovery = RecoveryStrategy::Braces; + Rule.RecoveryIndex = I - 1; + } + } const auto &SymbolOrder = getTopologicalOrder(T.get()); llvm::stable_sort( T->Rules, [&SymbolOrder](const Rule &Left, const Rule &Right) { diff --git a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp --- a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp @@ -120,6 +120,20 @@ return Results; } +std::vector> +availableRecovery(const State &S, const Grammar &G) { + std::vector> Result; + for (const Item &I : S.Items) { + const auto &Rule = G.lookupRule(I.rule()); + if (I.dot() != Rule.RecoveryIndex) + continue; + Result.push_back({Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]}); + } + llvm::sort(Result); + Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); + return Result; +} + } // namespace std::string Item::dump(const Grammar &G) const { @@ -130,9 +144,10 @@ Results.push_back(G.symbolName(SID)); return Results; }; - return llvm::formatv("{0} := {1} • {2}", G.symbolName(Rule.Target), + return llvm::formatv("{0} := {1} • {2}{3}", G.symbolName(Rule.Target), llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "), - llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " ")) + llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "), + Rule.RecoveryIndex == DotPos ? " [recovery]" : "") .str(); } @@ -181,6 +196,11 @@ Edges.push_back({Src, Dst, Label}); } + void insertRecovery(StateID Src, RecoveryStrategy Strategy, + SymbolID Result) { + Recoveries.push_back({Src, Strategy, Result}); + } + // Returns a state with the given id. const State &find(StateID ID) const { assert(ID < States.size()); @@ -194,9 +214,10 @@ LRGraph build() && { States.shrink_to_fit(); Edges.shrink_to_fit(); + Recoveries.shrink_to_fit(); llvm::sort(StartStates); StartStates.shrink_to_fit(); - return LRGraph(std::move(States), std::move(Edges), + return LRGraph(std::move(States), std::move(Edges), std::move(Recoveries), std::move(StartStates)); } @@ -205,6 +226,7 @@ llvm::DenseMap StatesIndex; std::vector States; std::vector Edges; + std::vector Recoveries; const Grammar &G; std::vector> StartStates; } Builder(G); @@ -225,15 +247,16 @@ } while (!PendingStates.empty()) { - auto CurrentStateID = PendingStates.back(); + auto StateID = PendingStates.back(); PendingStates.pop_back(); - for (auto Next : - nextAvailableKernelItems(Builder.find(CurrentStateID), G)) { + for (auto Next : nextAvailableKernelItems(Builder.find(StateID), G)) { auto Insert = Builder.insert(Next.second); if (Insert.second) // new state, insert to the pending queue. PendingStates.push_back(Insert.first); - Builder.insertEdge(CurrentStateID, Insert.first, Next.first); + Builder.insertEdge(StateID, Insert.first, Next.first); } + for (auto Recovery : availableRecovery(Builder.find(StateID), G)) + Builder.insertRecovery(StateID, Recovery.first, Recovery.second); } return std::move(Builder).build(); } diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp --- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp @@ -10,6 +10,7 @@ #include "clang-pseudo/grammar/LRGraph.h" #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/TokenKinds.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { @@ -40,8 +41,9 @@ class LRTable::Builder { public: - Builder(llvm::ArrayRef> StartStates) - : StartStates(StartStates) {} + Builder(llvm::ArrayRef Recoveries, + llvm::ArrayRef> StartStates) + : Recoveries(Recoveries), StartStates(StartStates) {} bool insert(Entry E) { return Entries.insert(std::move(E)).second; } LRTable build(const GrammarTable >, unsigned NumStates) && { @@ -86,11 +88,39 @@ ++SortedIndex; } Table.StartStates = std::move(StartStates); + + // Error recovery entries: sort (no dups already), and build offset lookup. + llvm::sort(Recoveries, + [&](const LRGraph::Recovery &L, const LRGraph::Recovery &R) { + return std::tie(L.Src, L.Result, L.Strategy) < + std::tie(R.Src, R.Result, R.Strategy); + }); + // XXX get #states here in some more principled way. This isn't correct! + Table.Recoveries.reserve(Recoveries.size()); + SortedIndex = 0; + for (const auto &E : Recoveries) { + llvm::errs() << E.Src << "=src, " << E.Result << "=result\n"; + } + for (StateID State = 0; State < NumStates; ++State) { + for (Table.RecoveryOffset.push_back(Table.Recoveries.size()); + SortedIndex < Recoveries.size() && + Recoveries[SortedIndex].Src == State; + ++SortedIndex) { + llvm::outs() << "adding (state=" << State + << ", result=" << Recoveries[SortedIndex].Result << ")\n"; + Table.Recoveries.push_back( + {Recoveries[SortedIndex].Strategy, Recoveries[SortedIndex].Result}); + } + } + assert(SortedIndex == Recoveries.size()); + Table.RecoveryOffset.push_back(Table.Recoveries.size()); + return Table; } private: llvm::DenseSet Entries; + std::vector Recoveries; std::vector> StartStates; }; @@ -104,7 +134,7 @@ if (Entry.Act.kind() == LRTable::Action::GoTo) MaxState = std::max(MaxState, Entry.Act.getGoToState()); } - Builder Build({}); + Builder Build({}, {}); for (const Entry &E : Entries) Build.insert(E); return std::move(Build).build(GT, /*NumStates=*/MaxState + 1); @@ -112,7 +142,7 @@ LRTable LRTable::buildSLR(const Grammar &G) { auto Graph = LRGraph::buildLR0(G); - Builder Build(Graph.startStates()); + Builder Build(Graph.recoveries(), Graph.startStates()); for (const auto &T : Graph.edges()) { Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst); Build.insert({T.Src, T.Label, Act}); diff --git a/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp b/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp --- a/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp +++ b/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp @@ -2,7 +2,7 @@ class Foo { public: }; -// CHECK: decl-specifier-seq~class-specifier := class-head { member-specification } +// CHECK: decl-specifier-seq~class-specifier := class-head { member-specification [recover=1] } // CHECK-NEXT: ├─class-head := class-key class-head-name // CHECK-NEXT: │ ├─class-key~CLASS := tok[0] // CHECK-NEXT: │ └─class-head-name~IDENTIFIER := tok[1] diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -36,6 +36,7 @@ MATCHER_P(state, StateID, "") { return arg->State == StateID; } MATCHER_P(parsedSymbol, FNode, "") { return arg->Payload == FNode; } MATCHER_P(parsedSymbolID, SID, "") { return arg->Payload->symbol() == SID; } +MATCHER_P(nextToken, Index, "") { return arg->NextToken == unsigned(Index); } testing::Matcher parents(llvm::ArrayRef Parents) { @@ -101,13 +102,13 @@ // 0---1---4 // └---2---┘ // └---3---5 - auto *GSSNode0 = - GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{}); - auto *GSSNode1 = GSStack.addNode(/*State=*/1, /*ForestNode=*/nullptr, + auto *GSSNode0 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, + /*NextToken=*/0, /*Parents=*/{}); + auto *GSSNode1 = GSStack.addNode(/*State=*/1, /*ForestNode=*/nullptr, /*NextToken=*/0, /*Parents=*/{GSSNode0}); - auto *GSSNode2 = GSStack.addNode(/*State=*/2, /*ForestNode=*/nullptr, + auto *GSSNode2 = GSStack.addNode(/*State=*/2, /*ForestNode=*/nullptr, /*NextToken=*/0, /*Parents=*/{GSSNode0}); - auto *GSSNode3 = GSStack.addNode(/*State=*/3, /*ForestNode=*/nullptr, + auto *GSSNode3 = GSStack.addNode(/*State=*/3, /*ForestNode=*/nullptr, /*NextToken=*/0, /*Parents=*/{GSSNode0}); buildGrammar({}, {}); // Create a fake empty grammar. @@ -123,11 +124,11 @@ glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal, {*G, T, Arena, GSStack}, NewHeads); - EXPECT_THAT(NewHeads, - UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal), - parents({GSSNode1, GSSNode2})), - AllOf(state(5), parsedSymbol(&SemiTerminal), - parents({GSSNode3})))) + EXPECT_THAT(NewHeads, UnorderedElementsAre( + AllOf(state(4), parsedSymbol(&SemiTerminal), + nextToken(1), parents({GSSNode1, GSSNode2})), + AllOf(state(5), parsedSymbol(&SemiTerminal), + nextToken(1), parents({GSSNode3})))) << NewHeads; } @@ -151,18 +152,19 @@ Action::reduce(ruleFor("enum-name"))}, }); - const auto *GSSNode0 = - GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{}); + const auto *GSSNode0 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, + /*NextToken=*/0, /*Parents=*/{}); const auto *GSSNode1 = - GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0}); + GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), /*NextToken=*/1, {GSSNode0}); std::vector Heads = {GSSNode1}; - glrReduce(Heads, tokenSymbol(tok::l_brace), {*G, Table, Arena, GSStack}); + glrReduce(Heads, /*NextToken=*/1, tokenSymbol(tok::l_brace), + {*G, Table, Arena, GSStack}); EXPECT_THAT(Heads, UnorderedElementsAre( GSSNode1, - AllOf(state(2), parsedSymbolID(id("class-name")), + AllOf(state(2), parsedSymbolID(id("class-name")), nextToken(1), parents({GSSNode0})), - AllOf(state(3), parsedSymbolID(id("enum-name")), + AllOf(state(3), parsedSymbolID(id("enum-name")), nextToken(1), parents({GSSNode0})))) << Heads; } @@ -181,11 +183,12 @@ auto *EnumNameNode = &Arena.createOpaque(id("enum-name"), /*TokenIndex=*/0); const auto *GSSNode2 = - GSStack.addNode(/*State=*/2, /*ForestNode=*/ClassNameNode, /*Parents=*/{}); + GSStack.addNode(/*State=*/2, /*ForestNode=*/ClassNameNode, /*NextToken=*/1, /*Parents=*/{}); const auto *GSSNode3 = - GSStack.addNode(/*State=*/3, /*ForestNode=*/EnumNameNode, /*Parents=*/{}); + GSStack.addNode(/*State=*/3, /*ForestNode=*/EnumNameNode, /*NextToken=*/1, /*Parents=*/{}); const auto *GSSNode4 = GSStack.addNode( /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1), + /*NextToken=*/2, /*Parents=*/{GSSNode2, GSSNode3}); LRTable Table = LRTable::buildForTests( @@ -197,13 +200,13 @@ Action::reduce(ruleFor("ptr-operator"))}, }); std::vector Heads = {GSSNode4}; - glrReduce(Heads, tokenSymbol(tok::identifier), {*G, Table, Arena, GSStack}); + glrReduce(Heads, /*NextToken=*/2, tokenSymbol(tok::identifier), {*G, Table, Arena, GSStack}); EXPECT_THAT(Heads, UnorderedElementsAre( GSSNode4, - AllOf(state(5), parsedSymbolID(id("ptr-operator")), + AllOf(state(5), parsedSymbolID(id("ptr-operator")), nextToken(2), parents({GSSNode2})), - AllOf(state(6), parsedSymbolID(id("ptr-operator")), + AllOf(state(6), parsedSymbolID(id("ptr-operator")), nextToken(2), parents({GSSNode3})))) << Heads; // Verify that the payload of the two new heads is shared, only a single @@ -227,17 +230,19 @@ auto *ClassNameNode = &Arena.createOpaque(id("class-name"), /*TokenIndex=*/1); auto *EnumNameNode = &Arena.createOpaque(id("enum-name"), /*TokenIndex=*/1); - const auto *GSSNode0 = - GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{}); + const auto *GSSNode0 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, + /*NextToken=*/0, /*Parents=*/{}); const auto *GSSNode1 = GSStack.addNode( - /*State=*/1, /*ForestNode=*/CVQualifierNode, /*Parents=*/{GSSNode0}); + /*State=*/1, /*ForestNode=*/CVQualifierNode, /*NextToken=*/1, + /*Parents=*/{GSSNode0}); const auto *GSSNode2 = GSStack.addNode( - /*State=*/2, /*ForestNode=*/CVQualifierNode, /*Parents=*/{GSSNode0}); - const auto *GSSNode3 = - GSStack.addNode(/*State=*/3, /*ForestNode=*/ClassNameNode, - /*Parents=*/{GSSNode1}); + /*State=*/2, /*ForestNode=*/CVQualifierNode, /*NextToken=*/1, + /*Parents=*/{GSSNode0}); + const auto *GSSNode3 = GSStack.addNode( + /*State=*/3, /*ForestNode=*/ClassNameNode, /*NextToken=*/2, + /*Parents=*/{GSSNode1}); const auto *GSSNode4 = - GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode, + GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode, /*NextToken=*/2, /*Parents=*/{GSSNode2}); // FIXME: figure out a way to get rid of the hard-coded reduce RuleID! @@ -252,13 +257,14 @@ Action::reduce(/* type-name := enum-name */ 1)}, }); std::vector Heads = {GSSNode3, GSSNode4}; - glrReduce(Heads, tokenSymbol(tok::l_paren), {*G, Table, Arena, GSStack}); + glrReduce(Heads, /*NextToken=*/2, tokenSymbol(tok::l_paren), + {*G, Table, Arena, GSStack}); // Verify that the stack heads are joint at state 5 after reduces. - EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4, - AllOf(state(5), - parsedSymbolID(id("type-name")), - parents({GSSNode1, GSSNode2})))) + EXPECT_THAT(Heads, UnorderedElementsAre( + GSSNode3, GSSNode4, + AllOf(state(5), parsedSymbolID(id("type-name")), + nextToken(2), parents({GSSNode1, GSSNode2})))) << Heads; // Verify that we create an ambiguous ForestNode of two parses of `type-name`. EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G), @@ -283,20 +289,20 @@ auto *EnumNameNode = &Arena.createOpaque(id("enum-name"), /*TokenIndex=*/0); auto *StartTerminal = &Arena.createTerminal(tok::star, /*TokenIndex=*/1); - const auto *GSSNode0 = - GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{}); - const auto *GSSNode1 = - GSStack.addNode(/*State=*/1, /*ForestNode=*/ClassNameNode, - /*Parents=*/{GSSNode0}); + const auto *GSSNode0 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, + /*NextToken=*/0, /*Parents=*/{}); + const auto *GSSNode1 = GSStack.addNode( + /*State=*/1, /*ForestNode=*/ClassNameNode, /*NextToken=*/1, + /*Parents=*/{GSSNode0}); const auto *GSSNode2 = - GSStack.addNode(/*State=*/2, /*ForestNode=*/EnumNameNode, + GSStack.addNode(/*State=*/2, /*ForestNode=*/EnumNameNode, /*NextToken=*/1, /*Parents=*/{GSSNode0}); - const auto *GSSNode3 = - GSStack.addNode(/*State=*/3, /*ForestNode=*/StartTerminal, - /*Parents=*/{GSSNode1}); - const auto *GSSNode4 = - GSStack.addNode(/*State=*/4, /*ForestNode=*/StartTerminal, - /*Parents=*/{GSSNode2}); + const auto *GSSNode3 = GSStack.addNode( + /*State=*/3, /*ForestNode=*/StartTerminal, /*NextToken=*/2, + /*Parents=*/{GSSNode1}); + const auto *GSSNode4 = GSStack.addNode( + /*State=*/4, /*ForestNode=*/StartTerminal, /*NextToken=*/2, + /*Parents=*/{GSSNode2}); // FIXME: figure out a way to get rid of the hard-coded reduce RuleID! LRTable Table = LRTable::buildForTests( @@ -308,12 +314,13 @@ Action::reduce(/* pointer := enum-name */ 1)}, }); std::vector Heads = {GSSNode3, GSSNode4}; - glrReduce(Heads, tokenSymbol(tok::l_paren), {*G, Table, Arena, GSStack}); + glrReduce(Heads, /*NextToken=*/2, tokenSymbol(tok::l_paren), + {*G, Table, Arena, GSStack}); EXPECT_THAT( Heads, UnorderedElementsAre(GSSNode3, GSSNode4, AllOf(state(5), parsedSymbolID(id("pointer")), - parents({GSSNode0})))) + nextToken(2), parents({GSSNode0})))) << Heads; EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G), "[ 0, end) pointer := \n" @@ -425,16 +432,16 @@ // ├-D // └-E GSS GSStack; - auto *Root = GSStack.addNode(0, nullptr, {}); - auto *A = GSStack.addNode(0, nullptr, {Root}); - auto *B = GSStack.addNode(0, nullptr, {Root}); - auto *C = GSStack.addNode(0, nullptr, {Root}); - auto *D = GSStack.addNode(0, nullptr, {Root}); - auto *AB = GSStack.addNode(0, nullptr, {A, B}); + auto *Root = GSStack.addNode(0, nullptr, /*NextToken=*/0, {}); + auto *A = GSStack.addNode(0, nullptr, /*NextToken=*/1, {Root}); + auto *B = GSStack.addNode(0, nullptr, /*NextToken=*/1, {Root}); + auto *C = GSStack.addNode(0, nullptr, /*NextToken=*/1, {Root}); + auto *D = GSStack.addNode(0, nullptr, /*NextToken=*/1, {Root}); + auto *AB = GSStack.addNode(0, nullptr, /*NextToken=*/2, {A, B}); EXPECT_EQ(1u, GSStack.gc({AB, C})) << "D is destroyed"; EXPECT_EQ(0u, GSStack.gc({AB, C})) << "D is already gone"; - auto *E = GSStack.addNode(0, nullptr, {Root}); + auto *E = GSStack.addNode(0, nullptr, 1, {Root}); EXPECT_EQ(D, E) << "Storage of GCed node D is reused for E"; EXPECT_EQ(3u, GSStack.gc({A, E})) << "Destroys B, AB, C"; EXPECT_EQ(1u, GSStack.gc({E})) << "Destroys A";