diff --git a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp --- a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp +++ b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp @@ -77,11 +77,11 @@ } BENCHMARK(parseBNF); -static void buildSLR(benchmark::State &State) { +static void buildLR0(benchmark::State &State) { for (auto _ : State) - LRTable::buildSLR(*G); + LRTable::buildLR0(*G); } -BENCHMARK(buildSLR); +BENCHMARK(buildLR0); TokenStream lexAndPreprocess() { clang::LangOptions LangOpts = genericLangOpts(); @@ -129,7 +129,7 @@ BENCHMARK(preprocess); static void glrParse(benchmark::State &State) { - LRTable Table = clang::pseudo::LRTable::buildSLR(*G); + LRTable Table = clang::pseudo::LRTable::buildLR0(*G); SymbolID StartSymbol = *G->findNonterminal("translation-unit"); TokenStream Stream = lexAndPreprocess(); for (auto _ : State) { @@ -143,7 +143,7 @@ BENCHMARK(glrParse); static void full(benchmark::State &State) { - LRTable Table = clang::pseudo::LRTable::buildSLR(*G); + LRTable Table = clang::pseudo::LRTable::buildLR0(*G); SymbolID StartSymbol = *G->findNonterminal("translation-unit"); for (auto _ : State) { TokenStream Stream = lexAndPreprocess(); diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -25,7 +25,7 @@ class Fuzzer { clang::LangOptions LangOpts = clang::pseudo::genericLangOpts(); std::unique_ptr G; - LRTable T; + llvm::Optional T; bool Print; public: @@ -44,7 +44,7 @@ llvm::errs() << Diag << "\n"; std::exit(1); } - T = LRTable::buildSLR(*G); + T = LRTable::buildLR0(*G); } void operator()(llvm::StringRef Code) { @@ -58,9 +58,9 @@ clang::pseudo::ForestArena Arena; clang::pseudo::GSS GSS; - auto &Root = - glrParse(ParseableStream, clang::pseudo::ParseParams{*G, T, Arena, GSS}, - *G->findNonterminal("translation-unit")); + auto &Root = glrParse(ParseableStream, + clang::pseudo::ParseParams{*G, *T, Arena, GSS}, + *G->findNonterminal("translation-unit")); if (Print) llvm::outs() << Root.dumpRecursive(*G); } diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -132,34 +132,23 @@ const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params, SymbolID StartSymbol); -// An active stack head can have multiple available actions (reduce/reduce -// actions, reduce/shift actions). -// A step is any one action applied to any one stack head. -struct ParseStep { - // A specific stack head. - const GSS::Node *Head = nullptr; - // An action associated with the head. - LRTable::Action Action = LRTable::Action::sentinel(); -}; -// A callback is invoked whenever a new GSS head is created during the GLR -// parsing process (glrShift, or glrReduce). -using NewHeadCallback = std::function; // Apply all PendingShift actions on a given GSS state, newly-created heads are // passed to the callback. // // When this function returns, PendingShift is empty. // // Exposed for testing only. -void glrShift(std::vector &PendingShift, const ForestNode &NextTok, - const ParseParams &Params, NewHeadCallback NewHeadCB); +void glrShift(llvm::ArrayRef OldHeads, + const ForestNode &NextTok, const ParseParams &Params, + std::vector &NewHeads); // Applies PendingReduce actions, until no more reduce actions are available. // // When this function returns, PendingReduce is empty. Calls to NewHeadCB may // add elements to PendingReduce // // Exposed for testing only. -void glrReduce(std::vector &PendingReduce, const ParseParams &Params, - NewHeadCallback NewHeadCB); +void glrReduce(std::vector &Heads, + const ParseParams &Params); } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h @@ -174,12 +174,6 @@ // the augmented grammar.) SymbolID Underscore; }; -// For each nonterminal X, computes the set of terminals that begin strings -// derived from X. (Known as FIRST sets in grammar-based parsers). -std::vector> firstSets(const Grammar &); -// For each nonterminal X, computes the set of terminals that could immediately -// follow X. (Known as FOLLOW sets in grammar-based parsers). -std::vector> followSets(const Grammar &); // Storage for the underlying data of the Grammar. // It can be constructed dynamically (from compiling BNF file) or statically diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h @@ -38,9 +38,39 @@ #include "clang-pseudo/grammar/Grammar.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Capacity.h" +#include "llvm/Support/raw_ostream.h" #include #include +namespace clang { +namespace pseudo { +struct StateSymbol { + using StateID = uint16_t; // XXX + StateID State; + SymbolID Symbol; +}; +} // namespace pseudo +} // namespace clang +namespace llvm { +template <> struct llvm::DenseMapInfo { + using StateSymbol = clang::pseudo::StateSymbol; + static inline StateSymbol getEmptyKey() { + return StateSymbol{StateSymbol::StateID(-1), 0}; + } + static inline StateSymbol getTombstoneKey() { + return StateSymbol{StateSymbol::StateID(-1), 1}; + } + static unsigned getHashValue(const StateSymbol &Val) { + return (Val.State * 2754435761U) ^ Val.Symbol; // Knuth hash + } + static bool isEqual(const StateSymbol &LHS, const StateSymbol &RHS) { + return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol; + } +}; +} // namespace llvm namespace clang { namespace pseudo { @@ -60,79 +90,26 @@ using StateID = uint16_t; static constexpr unsigned StateBits = 13; - // Action represents the terminal and nonterminal actions, it combines the - // entry of the ACTION and GOTO tables from the LR literature. - class Action { - public: - enum Kind : uint8_t { - Sentinel = 0, - // Terminal actions, corresponding to entries of ACTION table. - - // Shift to state n: move forward with the lookahead, and push state n - // onto the state stack. - // A shift is a forward transition, and the value n is the next state that - // the parser is to enter. - Shift, - // Reduce by a rule: pop the state stack. - Reduce, - - // NOTE: there are no typical accept actions in the LRtable, accept - // actions are handled specifically in the parser -- if the parser - // reaches to a target state (which is goto(StartState, StartSymbol)) at - // the EOF token after a reduce, this indicates the input has been parsed - // as the StartSymbol successfully. - - // Nonterminal actions, corresponding to entry of GOTO table. - - // Go to state n: push state n onto the state stack. - // Similar to Shift, but it is a nonterminal forward transition. - GoTo, - }; - - static Action goTo(StateID S) { return Action(GoTo, S); } - static Action shift(StateID S) { return Action(Shift, S); } - static Action reduce(RuleID RID) { return Action(Reduce, RID); } - static Action sentinel() { return Action(Sentinel, 0); } - - StateID getShiftState() const { - assert(kind() == Shift); - return Value; - } - StateID getGoToState() const { - assert(kind() == GoTo); - return Value; - } - RuleID getReduceRule() const { - assert(kind() == Reduce); - return Value; - } - Kind kind() const { return static_cast(K); } - - bool operator==(const Action &L) const { return opaque() == L.opaque(); } - uint16_t opaque() const { return K << ValueBits | Value; }; - - private: - Action(Kind K1, unsigned Value) : K(K1), Value(Value) {} - static constexpr unsigned ValueBits = StateBits; - static constexpr unsigned KindBits = 3; - static_assert(ValueBits >= RuleBits, "Value must be able to store RuleID"); - static_assert(KindBits + ValueBits <= 16, - "Must be able to store kind and value efficiently"); - uint16_t K : KindBits; - // Either StateID or RuleID, depending on the Kind. - uint16_t Value : ValueBits; - }; - - // Returns all available actions for the given state on a terminal. - // Expected to be called by LR parsers. - llvm::ArrayRef getActions(StateID State, SymbolID Terminal) const; - // Returns the state after we reduce a nonterminal. - // Expected to be called by LR parsers. - StateID getGoToState(StateID State, SymbolID Nonterminal) const; - - // Looks up available actions. - // Returns empty if no available actions in the table. - llvm::ArrayRef find(StateID State, SymbolID Symbol) const; + // Return the list of reductions applicable from a given state. + // These correspond to items with the dot at the end. + llvm::ArrayRef getReduceRules(StateID State) const { + return Reduce.find(State); + } + // Return the state to transition to after shifting a token Terminal. + // Returns None if this terminal is not legal here. + llvm::Optional getShiftState(StateID From, SymbolID Terminal) const { + auto It = Shift.find(StateSymbol{From, Terminal}); + if (It == Shift.end()) + return llvm::None; + return It->second; + } + // Return the state to transition to after reducing a symbol Nonterminal. + // REQUIRES: this nonterminal is legal here. + StateID getGoToState(StateID From, SymbolID Nonterminal) const { + auto It = Goto.find(StateSymbol{From, Nonterminal}); + assert(It != Goto.end()); + return It->second; + } // Returns the state from which the LR parser should start to parse the input // tokens as the given StartSymbol. @@ -146,45 +123,71 @@ StateID getStartState(SymbolID StartSymbol) const; size_t bytes() const { - return sizeof(*this) + Actions.capacity() * sizeof(Action) + - Symbols.capacity() * sizeof(SymbolID) + - StateOffset.capacity() * sizeof(uint32_t); + return sizeof(*this) + llvm::capacity_in_bytes(Shift) + + llvm::capacity_in_bytes(Goto) + Reduce.bytes(); } std::string dumpStatistics() const; std::string dumpForTests(const Grammar &G) const; - // Build a SLR(1) parsing table. - static LRTable buildSLR(const Grammar &G); + static LRTable buildLR0(const Grammar &G); + struct Builder { + unsigned StateCount = 0; + std::vector> Reduce; + llvm::DenseMap Shift; + llvm::DenseMap GoTo; + std::vector> StartStates; - class Builder; - // Represents an entry in the table, used for building the LRTable. - struct Entry { - StateID State; - SymbolID Symbol; - Action Act; + LRTable build() && { return LRTable(std::move(*this)); } }; - // Build a specifid table for testing purposes. - static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef); private: - // Conceptually the LR table is a multimap from (State, SymbolID) => Action. - // Our physical representation is quite different for compactness. - - // Index is StateID, value is the offset into Symbols/Actions - // where the entries for this state begin. - // Give a state id, the corresponding half-open range of Symbols/Actions is - // [StateOffset[id], StateOffset[id+1]). - std::vector StateOffset; - // Parallel to Actions, the value is SymbolID (columns of the matrix). - // Grouped by the StateID, and only subranges are sorted. - std::vector Symbols; - // A flat list of available actions, sorted by (State, SymbolID). - std::vector Actions; + // A multimap from K => V, where Ks form a dense range [0, n). + // A flat array stores the values: + // Values = [ values for k=0 | values for k=1 | values for k=2 | ... ] + // And another stores the index for each K: + // Keys = [ index for k=0, index for k=1, ... , Values.size()] + // Lookup[k] is Values[ Keys[k]...Keys[k+1] ] + template struct OffsetTable { + std::vector Keys; + std::vector Values; + + OffsetTable(std::vector> &&Pairs, K Limit) { + assert(llvm::all_of(Pairs, [&](auto &P) { return P.first < Limit; })); + llvm::stable_sort(Pairs, llvm::less_first{}); + Keys.reserve(Limit + 1); + Values.reserve(Pairs.size()); + unsigned I = 0; + for (K Key = 0; Key < Limit; ++Key) { + Keys.push_back(Values.size()); + while (I < Pairs.size() && Pairs[I].first == Key) + Values.push_back(Pairs[I++].second); + } + Keys.push_back(Values.size()); + assert(Values.size() == Pairs.size()); + assert(Keys.size() == Limit + 1); + } + + size_t bytes() const { + return sizeof(*this) + llvm::capacity_in_bytes(Keys) + + llvm::capacity_in_bytes(Values); + } + size_t size() const { return Values.size(); } + size_t keys() const { return Keys.size() - 1; } + + llvm::ArrayRef find(K Key) const { + return llvm::makeArrayRef(&Values[Keys[Key]], &Values[Keys[Key + 1]]); + } + }; + + llvm::DenseMap Shift; + OffsetTable Reduce; + llvm::DenseMap Goto; // A sorted table, storing the start state for each target parsing symbol. std::vector> StartStates; + + LRTable(Builder B); }; -llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &); } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -46,67 +46,35 @@ auto &GSS = Params.GSStack; // Lists of active shift, reduce actions. - std::vector PendingShift, PendingReduce; - auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) { - for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) { - switch (Action.kind()) { - case LRTable::Action::Shift: - PendingShift.push_back({Head, Action}); - break; - case LRTable::Action::Reduce: - PendingReduce.push_back({Head, Action}); - break; - default: - llvm_unreachable("unexpected action kind!"); - } - } - }; StateID StartState = Params.Table.getStartState(StartSymbol); - std::vector NewHeads = { + std::vector Heads = { GSS.addNode(/*State=*/StartState, /*ForestNode=*/nullptr, {})}; + std::vector NextHeads; auto MaybeGC = [&, Roots(std::vector{}), I(0u)]() mutable { - assert(PendingShift.empty() && PendingReduce.empty() && - "Running GC at the wrong time!"); - + assert(NextHeads.empty() && "Running GC at the wrong time!"); if (++I != 20) // Run periodically to balance CPU and memory usage. return; I = 0; // We need to copy the list: Roots is consumed by the GC. - Roots = NewHeads; + Roots = Heads; GSS.gc(std::move(Roots)); }; for (const ForestNode &Terminal : Terminals) { LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n", G.symbolName(Terminal.symbol()), Terminal.symbol())); - for (const auto *Head : NewHeads) - AddSteps(Head, Terminal.symbol()); - NewHeads.clear(); - glrReduce(PendingReduce, Params, - [&](const GSS::Node * NewHead) { - // A reduce will enable more steps. - AddSteps(NewHead, Terminal.symbol()); - }); - - glrShift(PendingShift, Terminal, Params, - [&](const GSS::Node *NewHead) { NewHeads.push_back(NewHead); }); + glrShift(Heads, Terminal, Params, NextHeads); + std::swap(Heads, NextHeads); + glrReduce(Heads, Params); + NextHeads.clear(); MaybeGC(); } - LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n")); - for (const auto *Heads : NewHeads) - AddSteps(Heads, tokenSymbol(tok::eof)); + LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n")); StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol); // Collect new heads created from the final reduce. - std::vector Heads; - glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) { - Heads.push_back(NewHead); - // A reduce will enable more steps. - AddSteps(NewHead, tokenSymbol(tok::eof)); - }); - const ForestNode *Result = nullptr; for (const auto *Head : Heads) { if (Head->State == AcceptState) { @@ -138,42 +106,40 @@ // After the shift action, the GSS is: // 0---1---2---4 // └---3---┘ -void glrShift(std::vector &PendingShift, const ForestNode &NewTok, - const ParseParams &Params, NewHeadCallback NewHeadCB) { +void glrShift(llvm::ArrayRef OldHeads, + const ForestNode &NewTok, const ParseParams &Params, + std::vector &NewHeads) { assert(NewTok.kind() == ForestNode::Terminal); - assert(llvm::all_of(PendingShift, - [](const ParseStep &Step) { - return Step.Action.kind() == LRTable::Action::Shift; - }) && - "Pending shift actions must be shift actions"); LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" Shift {0} ({1} active heads):\n", Params.G.symbolName(NewTok.symbol()), - PendingShift.size())); + OldHeads.size())); // We group pending shifts by their target state so we can merge them. - llvm::stable_sort(PendingShift, [](const ParseStep &L, const ParseStep &R) { - return L.Action.getShiftState() < R.Action.getShiftState(); - }); - auto Rest = llvm::makeArrayRef(PendingShift); + llvm::SmallVector, 8> Shifts; + for (const auto* H : OldHeads) + if (auto S = Params.Table.getShiftState(H->State, NewTok.symbol())) + Shifts.push_back({*S, H}); + llvm::stable_sort(Shifts, llvm::less_first{}); + + auto Rest = llvm::makeArrayRef(Shifts); llvm::SmallVector Parents; while (!Rest.empty()) { // Collect the batch of PendingShift that have compatible shift states. // Their heads become TempParents, the parents of the new GSS node. - StateID NextState = Rest.front().Action.getShiftState(); + StateID NextState = Rest.front().first; Parents.clear(); for (const auto &Base : Rest) { - if (Base.Action.getShiftState() != NextState) + if (Base.first != NextState) break; - Parents.push_back(Base.Head); + Parents.push_back(Base.second); } Rest = Rest.drop_front(Parents.size()); LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" --> S{0} ({1} heads)\n", NextState, Parents.size())); - NewHeadCB(Params.GSStack.addNode(NextState, &NewTok, Parents)); + NewHeads.push_back(Params.GSStack.addNode(NextState, &NewTok, Parents)); } - PendingShift.clear(); } namespace { @@ -231,8 +197,8 @@ // After reducing 3 by `pointer := class-name STAR` and // 2 by`enum-name := class-name STAR`: // 0--5(pointer) // 5 is goto(0, pointer) -void glrReduce(std::vector &PendingReduce, const ParseParams &Params, - NewHeadCallback NewHeadCB) { +void glrReduce(std::vector &Heads, + const ParseParams &Params) { // There are two interacting complications: // 1. Performing one reduce can unlock new reduces on the newly-created head. // 2a. The ambiguous ForestNodes must be complete (have all sequence nodes). @@ -291,6 +257,36 @@ KeyedQueue Sequences; Sequence TempSequence; + + // We treat Heads as a queue of Pop operations still to be performed. + // PoppedHeads is our position within it. + unsigned PoppedHeads = 0; + // In general the sequencing is complicated: each pop can yield multiple + // pending pushes that might run in a different order than we found them. + // However in trivial cases (only pop that yields only one push) we can + // bypass all these fancy queues and pop+push directly. This is very common. + auto PopAndPushTrivial = [&]() -> bool { + if (!Sequences.empty() || Heads.size() != PoppedHeads + 1) + return false; + const GSS::Node *Head = Heads.back(); + auto Rules = Params.Table.getReduceRules(Head->State); + if (Rules.size() != 1) + return false; + const auto &Rule = Params.G.lookupRule(Rules.front()); + const GSS::Node *Base = Head; + TempSequence.resize_for_overwrite(Rule.Size); + for (unsigned I = 0; I < Rule.Size; ++I) { + if (Base->parents().size() != 1) + return false; + TempSequence[Rule.Size - 1 - I] = Base->Payload; + Base = Base->parents().front(); + } + const ForestNode *Parsed = + &Params.Forest.createSequence(Rule.Target, Rules.front(), TempSequence); + StateID NextState = Params.Table.getGoToState(Base->State, Rule.Target); + Heads.push_back(Params.GSStack.addNode(NextState, Parsed, {Base})); + return true; + }; // Pop walks up the parent chain(s) for a reduction from Head by to Rule. // Once we reach the end, record the bases and sequences. auto Pop = [&](const GSS::Node *Head, RuleID RID) { @@ -312,9 +308,12 @@ DFS(Head, 0, DFS); }; auto PopPending = [&] { - for (const ParseStep &Pending : PendingReduce) - Pop(Pending.Head, Pending.Action.getReduceRule()); - PendingReduce.clear(); + for (; PoppedHeads < Heads.size(); ++PoppedHeads) { + if (PopAndPushTrivial()) + continue; + for (RuleID R : Params.Table.getReduceRules(Heads[PoppedHeads]->State)) + Pop(Heads[PoppedHeads], R); + } }; std::vector> FamilyBases; @@ -379,9 +378,7 @@ } BasesLeft = BasesLeft.drop_front(Parents.size()); - // Invoking the callback for new heads, a real GLR parser may add new - // reduces to the PendingReduce queue! - NewHeadCB(Params.GSStack.addNode(NextState, Parsed, Parents)); + Heads.push_back(Params.GSStack.addNode(NextState, Parsed, Parents)); } PopPending(); } diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -25,7 +25,7 @@ } const LRTable &getLRTable() { - static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar())); + static LRTable *Table = new LRTable(LRTable::buildLR0(getGrammar())); return *Table; } diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp --- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp @@ -87,87 +87,6 @@ return OS.str(); } -std::vector> firstSets(const Grammar &G) { - std::vector> FirstSets( - G.table().Nonterminals.size()); - auto ExpandFirstSet = [&FirstSets](SymbolID Target, SymbolID First) { - assert(isNonterminal(Target)); - if (isToken(First)) - return FirstSets[Target].insert(First).second; - bool Changed = false; - for (SymbolID SID : FirstSets[First]) - Changed |= FirstSets[Target].insert(SID).second; - return Changed; - }; - - // A rule S := T ... implies elements in FIRST(S): - // - if T is a terminal, FIRST(S) contains T - // - if T is a nonterminal, FIRST(S) contains FIRST(T) - // Since FIRST(T) may not have been fully computed yet, FIRST(S) itself may - // end up being incomplete. - // We iterate until we hit a fixed point. - // (This isn't particularly efficient, but table building isn't on the - // critical path). - bool Changed = true; - while (Changed) { - Changed = false; - for (const auto &R : G.table().Rules) - // We only need to consider the first element because symbols are - // non-nullable. - Changed |= ExpandFirstSet(R.Target, R.seq().front()); - } - return FirstSets; -} - -std::vector> followSets(const Grammar &G) { - auto FirstSets = firstSets(G); - std::vector> FollowSets( - G.table().Nonterminals.size()); - // Expand the follow set of a nonterminal symbol Y by adding all from the - // given symbol set. - auto ExpandFollowSet = [&FollowSets](SymbolID Y, - const llvm::DenseSet &ToAdd) { - assert(isNonterminal(Y)); - bool Changed = false; - for (SymbolID F : ToAdd) - Changed |= FollowSets[Y].insert(F).second; - return Changed; - }; - // Follow sets is computed based on the following 3 rules, the computation - // is completed at a fixed point where there is no more new symbols can be - // added to any of the follow sets. - // - // Rule 1: add endmarker to the FOLLOW(S), where S is the start symbol of the - // augmented grammar, in our case it is '_'. - FollowSets[G.underscore()].insert(tokenSymbol(tok::eof)); - bool Changed = true; - while (Changed) { - Changed = false; - for (const auto &R : G.table().Rules) { - // Rule 2: for a rule X := ... Y Z, we add all symbols from FIRST(Z) to - // FOLLOW(Y). - for (size_t I = 0; I + 1 < R.seq().size(); ++I) { - if (isToken(R.seq()[I])) - continue; - // We only need to consider the next symbol because symbols are - // non-nullable. - SymbolID Next = R.seq()[I + 1]; - if (isToken(Next)) - // First set for a terminal is itself. - Changed |= ExpandFollowSet(R.seq()[I], {Next}); - else - Changed |= ExpandFollowSet(R.seq()[I], FirstSets[Next]); - } - // Rule 3: for a rule X := ... Z, we add all symbols from FOLLOW(X) to - // FOLLOW(Z). - SymbolID Z = R.seq().back(); - if (isNonterminal(Z)) - Changed |= ExpandFollowSet(Z, FollowSets[R.Target]); - } - } - return FollowSets; -} - static llvm::ArrayRef getTerminalNames() { static const auto &TerminalNames = []() { auto TerminalNames = new std::string[NumTerminals]; diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp --- a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp @@ -10,35 +10,19 @@ #include "clang-pseudo/grammar/Grammar.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" namespace clang { namespace pseudo { -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) { - switch (A.kind()) { - case LRTable::Action::Shift: - return OS << llvm::formatv("shift state {0}", A.getShiftState()); - case LRTable::Action::Reduce: - return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule()); - case LRTable::Action::GoTo: - return OS << llvm::formatv("go to state {0}", A.getGoToState()); - case LRTable::Action::Sentinel: - llvm_unreachable("unexpected Sentinel action kind!"); - } - llvm_unreachable("unexpected action kind!"); -} - std::string LRTable::dumpStatistics() const { return llvm::formatv(R"( Statistics of the LR parsing table: - number of states: {0} - number of actions: {1} - size of the table (bytes): {2} + number of actions: shift={0} reduce={1} goto={2} + size of the table (bytes): {3} )", - StateOffset.size() - 1, Actions.size(), bytes()) + Shift.size(), Reduce.size(), Goto.size(), bytes()) .str(); } @@ -46,66 +30,28 @@ std::string Result; llvm::raw_string_ostream OS(Result); OS << "LRTable:\n"; - for (StateID S = 0; S < StateOffset.size() - 1; ++S) { + for (StateID S = 0; S < Reduce.keys(); ++S) { OS << llvm::formatv("State {0}\n", S); + for (RuleID R : getReduceRules(S)) + OS.indent(4) << llvm::formatv("reduce by rule {0} '{1}'\n", R, + G.dumpRule(R)); for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) { SymbolID TokID = tokenSymbol(static_cast(Terminal)); - for (auto A : find(S, TokID)) { - if (A.kind() == LRTable::Action::Shift) - OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n", - G.symbolName(TokID), A.getShiftState()); - else if (A.kind() == LRTable::Action::Reduce) - OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n", - G.symbolName(TokID), A.getReduceRule(), - G.dumpRule(A.getReduceRule())); - } + if (auto Next = getShiftState(S, TokID)) + OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n", + G.symbolName(TokID), *Next); } for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size(); ++NontermID) { - if (find(S, NontermID).empty()) - continue; - OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n", - G.symbolName(NontermID), - getGoToState(S, NontermID)); + auto It = Goto.find(StateSymbol{S, NontermID}); + if (It != Goto.end()) + OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n", + G.symbolName(NontermID), It->second); } } return OS.str(); } -llvm::ArrayRef LRTable::getActions(StateID State, - SymbolID Terminal) const { - assert(pseudo::isToken(Terminal) && "expect terminal symbol!"); - return find(State, Terminal); -} - -LRTable::StateID LRTable::getGoToState(StateID State, - SymbolID Nonterminal) const { - assert(pseudo::isNonterminal(Nonterminal) && "expected nonterminal symbol!"); - auto Result = find(State, Nonterminal); - assert(Result.size() == 1 && Result.front().kind() == Action::GoTo); - return Result.front().getGoToState(); -} - -llvm::ArrayRef LRTable::find(StateID Src, SymbolID ID) const { - assert(Src + 1u < StateOffset.size()); - std::pair Range = - std::make_pair(StateOffset[Src], StateOffset[Src + 1]); - auto SymbolRange = llvm::makeArrayRef(Symbols.data() + Range.first, - Symbols.data() + Range.second); - - assert(llvm::is_sorted(SymbolRange) && - "subrange of the Symbols should be sorted!"); - const LRTable::StateID *Start = - llvm::partition_point(SymbolRange, [&ID](SymbolID S) { return S < ID; }); - if (Start == SymbolRange.end()) - return {}; - const LRTable::StateID *End = Start; - while (End != SymbolRange.end() && *End == ID) - ++End; - return llvm::makeArrayRef(&Actions[Start - Symbols.data()], - /*length=*/End - Start); -} - LRTable::StateID LRTable::getStartState(SymbolID Target) const { assert(llvm::is_sorted(StartStates) && "StartStates must be sorted!"); auto It = llvm::partition_point( @@ -117,5 +63,14 @@ return It->second; } +LRTable::LRTable(Builder B) + : Shift(std::move(B.Shift)), Reduce(std::move(B.Reduce), B.StateCount), + Goto(std::move(B.GoTo)), StartStates(std::move(B.StartStates)) { + assert(llvm::all_of(Shift, + [&](auto &E) { return E.first.State < B.StateCount; })); + assert(llvm::all_of(Goto, + [&](auto &E) { return E.first.State < B.StateCount; })); +} + } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp --- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp @@ -9,130 +9,40 @@ #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRGraph.h" #include "clang-pseudo/grammar/LRTable.h" -#include "clang/Basic/TokenKinds.h" #include -namespace llvm { -template <> struct DenseMapInfo { - using Entry = clang::pseudo::LRTable::Entry; - static inline Entry getEmptyKey() { - static Entry E{static_cast(-1), 0, - clang::pseudo::LRTable::Action::sentinel()}; - return E; - } - static inline Entry getTombstoneKey() { - static Entry E{static_cast(-2), 0, - clang::pseudo::LRTable::Action::sentinel()}; - return E; - } - static unsigned getHashValue(const Entry &I) { - return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque()); - } - static bool isEqual(const Entry &LHS, const Entry &RHS) { - return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol && - LHS.Act == RHS.Act; - } -}; -} // namespace llvm - namespace clang { namespace pseudo { -class LRTable::Builder { -public: - Builder(llvm::ArrayRef> StartStates) - : StartStates(StartStates) {} - - bool insert(Entry E) { return Entries.insert(std::move(E)).second; } - LRTable build(const GrammarTable >, unsigned NumStates) && { - // E.g. given the following parsing table with 3 states and 3 terminals: - // - // a b c - // +-------+----+-------+-+ - // |state0 | | s0,r0 | | - // |state1 | acc| | | - // |state2 | | r1 | | - // +-------+----+-------+-+ - // - // The final LRTable: - // - StateOffset: [s0] = 0, [s1] = 2, [s2] = 3, [sentinel] = 4 - // - Symbols: [ b, b, a, b] - // Actions: [ s0, r0, acc, r1] - // ~~~~~~ range for state 0 - // ~~~~ range for state 1 - // ~~ range for state 2 - // First step, we sort all entries by (State, Symbol, Action). - std::vector Sorted(Entries.begin(), Entries.end()); - llvm::sort(Sorted, [](const Entry &L, const Entry &R) { - return std::forward_as_tuple(L.State, L.Symbol, L.Act.opaque()) < - std::forward_as_tuple(R.State, R.Symbol, R.Act.opaque()); - }); - - LRTable Table; - Table.Actions.reserve(Sorted.size()); - Table.Symbols.reserve(Sorted.size()); - // We are good to finalize the States and Actions. - for (const auto &E : Sorted) { - Table.Actions.push_back(E.Act); - Table.Symbols.push_back(E.Symbol); - } - // Initialize the terminal and nonterminal offset, all ranges are empty by - // default. - Table.StateOffset = std::vector(NumStates + 1, 0); - size_t SortedIndex = 0; - for (StateID State = 0; State < Table.StateOffset.size(); ++State) { - Table.StateOffset[State] = SortedIndex; - while (SortedIndex < Sorted.size() && Sorted[SortedIndex].State == State) - ++SortedIndex; - } - Table.StartStates = std::move(StartStates); - return Table; - } - -private: - llvm::DenseSet Entries; - std::vector> StartStates; -}; +LRTable LRTable::buildLR0(const Grammar &G) { + auto Graph = LRGraph::buildLR0(G); + assert(Graph.states().size() <= (1 << StateBits) && + "Graph states execceds the maximum limit!"); -LRTable LRTable::buildForTests(const GrammarTable >, - llvm::ArrayRef Entries) { - StateID MaxState = 0; - for (const auto &Entry : Entries) - MaxState = std::max(MaxState, Entry.State); - Builder Build({}); - for (const Entry &E : Entries) - Build.insert(E); - return std::move(Build).build(GT, /*NumStates=*/MaxState + 1); -} + Builder B; + B.StartStates = Graph.startStates(); + B.StateCount = Graph.states().size(); -LRTable LRTable::buildSLR(const Grammar &G) { - auto Graph = LRGraph::buildLR0(G); - Builder Build(Graph.startStates()); for (const auto &T : Graph.edges()) { - Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst); - Build.insert({T.Src, T.Label, Act}); + (isToken(T.Label) ? B.Shift : B.GoTo) + .try_emplace(StateSymbol{T.Src, T.Label}, T.Dst); } - assert(Graph.states().size() <= (1 << StateBits) && - "Graph states execceds the maximum limit!"); - auto FollowSets = followSets(G); + for (StateID SID = 0; SID < Graph.states().size(); ++SID) { for (const Item &I : Graph.states()[SID].Items) { - // If we've just parsed the start symbol, this means we successfully parse - // the input. We don't add the reduce action of `_ := start_symbol` in the - // LRTable (the GLR parser handles it specifically). - if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) - continue; if (!I.hasNext()) { - // If we've reached the end of a rule A := ..., then we can reduce if - // the next token is in the follow set of A. - for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) { - assert(isToken(Follow)); - Build.insert({SID, Follow, Action::reduce(I.rule())}); - } + // If we've just parsed the start symbol, this means we successfully + // parse the input. We don't add the reduce action of `_ := + // start_symbol` in the LRTable (the GLR parser handles it + // specifically). + if (G.lookupRule(I.rule()).Target == G.underscore()) + continue; + // If we've reached the end of a rule A := ..., then we can reduce. + B.Reduce.push_back({SID, I.rule()}); } } } - return std::move(Build).build(G.table(), Graph.states().size()); + return LRTable(std::move(B)); } } // namespace pseudo diff --git a/clang-tools-extra/pseudo/test/lr-build-basic.test b/clang-tools-extra/pseudo/test/lr-build-basic.test --- a/clang-tools-extra/pseudo/test/lr-build-basic.test +++ b/clang-tools-extra/pseudo/test/lr-build-basic.test @@ -23,6 +23,6 @@ # TABLE-NEXT: 'id': go to state 2 # TABLE-NEXT: State 1 # TABLE-NEXT: State 2 -# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id' +# TABLE-NEXT: reduce by rule 1 'expr := id' # TABLE-NEXT: State 3 -# TABLE-NEXT: 'EOF': reduce by rule 0 'id := IDENTIFIER' +# TABLE-NEXT: reduce by rule 0 'id := IDENTIFIER' diff --git a/clang-tools-extra/pseudo/test/lr-build-conflicts.test b/clang-tools-extra/pseudo/test/lr-build-conflicts.test --- a/clang-tools-extra/pseudo/test/lr-build-conflicts.test +++ b/clang-tools-extra/pseudo/test/lr-build-conflicts.test @@ -35,12 +35,10 @@ # TABLE-NEXT: State 1 # TABLE-NEXT: '-': shift state 3 # TABLE-NEXT: State 2 -# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER' -# TABLE-NEXT: '-': reduce by rule 1 'expr := IDENTIFIER' +# TABLE-NEXT: reduce by rule 1 'expr := IDENTIFIER' # TABLE-NEXT: State 3 # TABLE-NEXT: 'IDENTIFIER': shift state 2 # TABLE-NEXT: 'expr': go to state 4 # TABLE-NEXT: State 4 -# TABLE-NEXT: 'EOF': reduce by rule 0 'expr := expr - expr' +# TABLE-NEXT: reduce by rule 0 'expr := expr - expr' # TABLE-NEXT: '-': shift state 3 -# TABLE-NEXT: '-': reduce by rule 0 'expr := expr - expr' diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -108,7 +108,7 @@ llvm::outs() << G->dump(); if (PrintGraph) llvm::outs() << clang::pseudo::LRGraph::buildLR0(*G).dumpForTests(*G); - auto LRTable = clang::pseudo::LRTable::buildSLR(*G); + auto LRTable = clang::pseudo::LRTable::buildLR0(*G); if (PrintTable) llvm::outs() << LRTable.dumpForTests(*G); if (PrintStatistics) diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -29,8 +29,9 @@ namespace { -using Action = LRTable::Action; using testing::AllOf; +using testing::ElementsAre; +using testing::UnorderedElementsAre; MATCHER_P(state, StateID, "") { return arg->State == StateID; } MATCHER_P(parsedSymbol, FNode, "") { return arg->Payload == FNode; } @@ -83,17 +84,10 @@ return 0; } - NewHeadCallback captureNewHeads() { - return [this](const GSS::Node *NewHead) { - NewHeadResults.push_back(NewHead); - }; - }; - protected: std::unique_ptr G; ForestArena Arena; GSS GSStack; - std::vector NewHeadResults; }; TEST_F(GLRTest, ShiftMergingHeads) { @@ -107,33 +101,35 @@ // 0---1---4 // └---2---┘ // └---3---5 + LRTable::Builder LR; + LR.StateCount = 6; + LR.Shift[{1, tokenSymbol(tok::semi)}] = 4; + LR.Shift[{2, tokenSymbol(tok::semi)}] = 4; + LR.Shift[{3, tokenSymbol(tok::semi)}] = 5; + auto *GSSNode0 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{}); - auto *GSSNode1 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, + auto *GSSNode1 = GSStack.addNode(/*State=*/1, /*ForestNode=*/nullptr, /*Parents=*/{GSSNode0}); - auto *GSSNode2 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, + auto *GSSNode2 = GSStack.addNode(/*State=*/2, /*ForestNode=*/nullptr, /*Parents=*/{GSSNode0}); - auto *GSSNode3 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, + auto *GSSNode3 = GSStack.addNode(/*State=*/3, /*ForestNode=*/nullptr, /*Parents=*/{GSSNode0}); buildGrammar({}, {}); // Create a fake empty grammar. - LRTable T = LRTable::buildForTests(G->table(), /*Entries=*/{}); + LRTable T = std::move(LR).build(); ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0); - std::vector PendingShift = { - {GSSNode1, Action::shift(4)}, - {GSSNode3, Action::shift(5)}, - {GSSNode2, Action::shift(4)}, - }; - glrShift(PendingShift, SemiTerminal, {*G, T, Arena, GSStack}, - captureNewHeads()); - - EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre( - AllOf(state(4), parsedSymbol(&SemiTerminal), - parents({GSSNode1, GSSNode2})), - AllOf(state(5), parsedSymbol(&SemiTerminal), - parents({GSSNode3})))) - << NewHeadResults; + std::vector NewHeads; + glrShift({GSSNode1, GSSNode3, GSSNode2}, SemiTerminal, + {*G, T, Arena, GSStack}, NewHeads); + + EXPECT_THAT(NewHeads, UnorderedElementsAre( + AllOf(state(4), parsedSymbol(&SemiTerminal), + parents({GSSNode1, GSSNode2})), + AllOf(state(5), parsedSymbol(&SemiTerminal), + parents({GSSNode3})))) + << NewHeads; } TEST_F(GLRTest, ReduceConflictsSplitting) { @@ -146,26 +142,28 @@ buildGrammar({"class-name", "enum-name"}, {"class-name := IDENTIFIER", "enum-name := IDENTIFIER"}); - LRTable Table = LRTable::buildForTests( - G->table(), {{/*State=*/0, id("class-name"), Action::goTo(2)}, - {/*State=*/0, id("enum-name"), Action::goTo(3)}}); + LRTable::Builder LR; + LR.StateCount = 4; + LR.GoTo[{0, id("class-name")}] = 2; + LR.GoTo[{0, id("enum-name")}] = 3; + LR.Reduce.push_back({1, ruleFor("class-name")}); + LR.Reduce.push_back({1, ruleFor("enum-name")}); + LRTable Table = std::move(LR).build(); const auto *GSSNode0 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{}); const auto *GSSNode1 = - GSStack.addNode(3, &Arena.createTerminal(tok::identifier, 0), {GSSNode0}); - - std::vector PendingReduce = { - {GSSNode1, Action::reduce(ruleFor("class-name"))}, - {GSSNode1, Action::reduce(ruleFor("enum-name"))}}; - glrReduce(PendingReduce, {*G, Table, Arena, GSStack}, - captureNewHeads()); - EXPECT_THAT(NewHeadResults, - testing::UnorderedElementsAre( - AllOf(state(2), parsedSymbolID(id("class-name")), - parents({GSSNode0})), - AllOf(state(3), parsedSymbolID(id("enum-name")), - parents({GSSNode0})))) << NewHeadResults; + GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0}); + + std::vector Heads = {GSSNode1}; + glrReduce(Heads, {*G, Table, Arena, GSStack}); + EXPECT_THAT(Heads, UnorderedElementsAre( + GSSNode1, + AllOf(state(2), parsedSymbolID(id("class-name")), + parents({GSSNode0})), + AllOf(state(3), parsedSymbolID(id("enum-name")), + parents({GSSNode0})))) + << Heads; } TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) { @@ -189,24 +187,26 @@ /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1), /*Parents=*/{GSSNode2, GSSNode3}); - LRTable Table = LRTable::buildForTests( - G->table(), - {{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)}, - {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)}}); - std::vector PendingReduce = { - {GSSNode4, Action::reduce(ruleFor("ptr-operator"))}}; - glrReduce(PendingReduce, {*G, Table, Arena, GSStack}, - captureNewHeads()); - - EXPECT_THAT(NewHeadResults, - testing::UnorderedElementsAre( - AllOf(state(5), parsedSymbolID(id("ptr-operator")), - parents({GSSNode2})), - AllOf(state(6), parsedSymbolID(id("ptr-operator")), - parents({GSSNode3})))) << NewHeadResults; + LRTable::Builder LR; + LR.StateCount = 7; + LR.GoTo[{2, id("ptr-operator")}] = 5; + LR.GoTo[{3, id("ptr-operator")}] = 6; + LR.Reduce.push_back({4, ruleFor("ptr-operator")}); + LRTable Table = std::move(LR).build(); + + std::vector Heads = {GSSNode4}; + glrReduce(Heads, {*G, Table, Arena, GSStack}); + + EXPECT_THAT(Heads, UnorderedElementsAre( + GSSNode4, + AllOf(state(5), parsedSymbolID(id("ptr-operator")), + parents({GSSNode2})), + AllOf(state(6), parsedSymbolID(id("ptr-operator")), + parents({GSSNode3})))) + << Heads; // Verify that the payload of the two new heads is shared, only a single // ptr-operator node is created in the forest. - EXPECT_EQ(NewHeadResults[0]->Payload, NewHeadResults[1]->Payload); + EXPECT_EQ(Heads[1]->Payload, Heads[2]->Payload); } TEST_F(GLRTest, ReduceJoiningWithMultipleBases) { @@ -238,28 +238,26 @@ GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode, /*Parents=*/{GSSNode2}); - LRTable Table = LRTable::buildForTests( - G->table(), - {{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)}, - {/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)}}); + LRTable::Builder LR; + LR.StateCount = 6; + LR.GoTo[{1, id("type-name")}] = 5; + LR.GoTo[{2, id("type-name")}] = 5; // FIXME: figure out a way to get rid of the hard-coded reduce RuleID! - std::vector PendingReduce = { - { - GSSNode3, Action::reduce(/*RuleID=*/0) // type-name := class-name - }, - { - GSSNode4, Action::reduce(/*RuleID=*/1) // type-name := enum-name - }}; - glrReduce(PendingReduce, {*G, Table, Arena, GSStack}, - captureNewHeads()); + LR.Reduce.push_back({3, /* type-name := class-name */0}); + LR.Reduce.push_back({4, /* type-name := enum-name */1}); + LRTable Table = std::move(LR).build(); + + std::vector Heads = {GSSNode3, GSSNode4}; + glrReduce(Heads, {*G, Table, Arena, GSStack}); // Verify that the stack heads are joint at state 5 after reduces. - EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(AllOf( - state(5), parsedSymbolID(id("type-name")), - parents({GSSNode1, GSSNode2})))) - << NewHeadResults; + EXPECT_THAT(Heads, ElementsAre(GSSNode3, GSSNode4, + AllOf(state(5), + parsedSymbolID(id("type-name")), + parents({GSSNode1, GSSNode2})))) + << Heads; // Verify that we create an ambiguous ForestNode of two parses of `type-name`. - EXPECT_EQ(NewHeadResults.front()->Payload->dumpRecursive(*G), + EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G), "[ 1, end) type-name := \n" "[ 1, end) ├─type-name := class-name\n" "[ 1, end) │ └─class-name := \n" @@ -295,25 +293,22 @@ const auto *GSSNode4 = GSStack.addNode(/*State=*/4, /*ForestNode=*/StartTerminal, /*Parents=*/{GSSNode2}); - - LRTable Table = LRTable::buildForTests( - G->table(), {{/*State=*/0, id("pointer"), Action::goTo(5)}}); + LRTable::Builder LR; + LR.StateCount = 6; + LR.GoTo[{0, id("pointer")}] = 5; // FIXME: figure out a way to get rid of the hard-coded reduce RuleID! - std::vector PendingReduce = { - { - GSSNode3, Action::reduce(/*RuleID=*/0) // pointer := class-name * - }, - { - GSSNode4, Action::reduce(/*RuleID=*/1) // pointer := enum-name * - }}; - glrReduce(PendingReduce, {*G, Table, Arena, GSStack}, - captureNewHeads()); - - EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre( - AllOf(state(5), parsedSymbolID(id("pointer")), - parents({GSSNode0})))) - << NewHeadResults; - EXPECT_EQ(NewHeadResults.front()->Payload->dumpRecursive(*G), + LR.Reduce.push_back({3, /* pointer := class-name */0}); + LR.Reduce.push_back({4, /* pointer := enum-name */1}); + LRTable Table = std::move(LR).build(); + + std::vector Heads = { GSSNode3, GSSNode4 }; + glrReduce(Heads, {*G, Table, Arena, GSStack}); + + EXPECT_THAT(Heads, ElementsAre(GSSNode3, GSSNode4, + AllOf(state(5), parsedSymbolID(id("pointer")), + parents({GSSNode0})))) + << Heads; + EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G), "[ 0, end) pointer := \n" "[ 0, end) ├─pointer := class-name *\n" "[ 0, 1) │ ├─class-name := \n" @@ -342,7 +337,7 @@ )bnf"); clang::LangOptions LOptions; const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(*G); + auto LRTable = LRTable::buildLR0(*G); const ForestNode &Parsed = glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test")); @@ -380,7 +375,7 @@ )bnf"); clang::LangOptions LOptions; const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(*G); + auto LRTable = LRTable::buildLR0(*G); const ForestNode &Parsed = glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test")); @@ -405,7 +400,7 @@ // of the nonterminal `test` when the next token is `eof`, verify that the // parser stops at the right state. const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(*G); + auto LRTable = LRTable::buildLR0(*G); const ForestNode &Parsed = glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test")); diff --git a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp --- a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp @@ -142,66 +142,6 @@ "Unknown attribute 'unknown'")); } -TEST_F(GrammarTest, FirstAndFollowSets) { - build( - R"bnf( -_ := expr -expr := expr - term -expr := term -term := IDENTIFIER -term := ( expr ) -)bnf"); - ASSERT_TRUE(Diags.empty()); - auto ToPairs = [](std::vector> Input) { - std::vector>> Sets; - for (SymbolID ID = 0; ID < Input.size(); ++ID) - Sets.emplace_back(ID, std::move(Input[ID])); - return Sets; - }; - - EXPECT_THAT( - ToPairs(firstSets(*G)), - UnorderedElementsAre( - Pair(id("_"), UnorderedElementsAre(id("IDENTIFIER"), id("("))), - Pair(id("expr"), UnorderedElementsAre(id("IDENTIFIER"), id("("))), - Pair(id("term"), UnorderedElementsAre(id("IDENTIFIER"), id("("))))); - EXPECT_THAT( - ToPairs(followSets(*G)), - UnorderedElementsAre( - Pair(id("_"), UnorderedElementsAre(id("EOF"))), - Pair(id("expr"), UnorderedElementsAre(id("-"), id("EOF"), id(")"))), - Pair(id("term"), UnorderedElementsAre(id("-"), id("EOF"), id(")"))))); - - build(R"bnf( -# A simplfied C++ decl-specifier-seq. -_ := decl-specifier-seq -decl-specifier-seq := decl-specifier decl-specifier-seq -decl-specifier-seq := decl-specifier -decl-specifier := simple-type-specifier -decl-specifier := INLINE -simple-type-specifier := INT - )bnf"); - ASSERT_TRUE(Diags.empty()); - EXPECT_THAT( - ToPairs(firstSets(*G)), - UnorderedElementsAre( - Pair(id("_"), UnorderedElementsAre(id("INLINE"), id("INT"))), - Pair(id("decl-specifier-seq"), - UnorderedElementsAre(id("INLINE"), id("INT"))), - Pair(id("simple-type-specifier"), UnorderedElementsAre(id("INT"))), - Pair(id("decl-specifier"), - UnorderedElementsAre(id("INLINE"), id("INT"))))); - EXPECT_THAT( - ToPairs(followSets(*G)), - UnorderedElementsAre( - Pair(id("_"), UnorderedElementsAre(id("EOF"))), - Pair(id("decl-specifier-seq"), UnorderedElementsAre(id("EOF"))), - Pair(id("decl-specifier"), - UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF"))), - Pair(id("simple-type-specifier"), - UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF"))))); -} - } // namespace } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/unittests/LRTableTest.cpp b/clang-tools-extra/pseudo/unittests/LRTableTest.cpp --- a/clang-tools-extra/pseudo/unittests/LRTableTest.cpp +++ b/clang-tools-extra/pseudo/unittests/LRTableTest.cpp @@ -17,36 +17,33 @@ namespace pseudo { namespace { +using testing::ElementsAre; using testing::IsEmpty; using testing::UnorderedElementsAre; -using Action = LRTable::Action; TEST(LRTable, Builder) { - GrammarTable GTable; - + GrammarTable GT; // eof semi ... // +-------+----+-------+--- // |state0 | | s0,r0 |... // |state1 | acc| |... // |state2 | | r1 |... // +-------+----+-------+--- - std::vector Entries = { - {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)}, - {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)}, - {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)}, - {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}}; - GrammarTable GT; - LRTable T = LRTable::buildForTests(GT, Entries); - EXPECT_THAT(T.find(0, tokenSymbol(tok::eof)), IsEmpty()); - EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)), - UnorderedElementsAre(Action::shift(0), Action::reduce(0))); - EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)), - UnorderedElementsAre(Action::reduce(2))); - EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty()); - EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)), - UnorderedElementsAre(Action::reduce(1))); + LRTable::Builder Builder; + Builder.StateCount = 3; + Builder.Shift[{/*State=*/0, tokenSymbol(tok::semi)}] = 0; + Builder.Reduce.push_back({/*State=*/0, /*Rule=*/0}); + Builder.Reduce.push_back({/*State=*/1, /*Rule=*/2}); + Builder.Reduce.push_back({/*State=*/2, /*Rule=*/1}); + LRTable T = std::move(Builder).build(); + EXPECT_EQ(T.getShiftState(0, tokenSymbol(tok::eof)), llvm::None); + EXPECT_EQ(T.getShiftState(0, tokenSymbol(tok::semi)), LRTable::StateID{0}); + EXPECT_THAT(T.getReduceRules(0), ElementsAre(0)); + EXPECT_EQ(T.getShiftState(1, tokenSymbol(tok::semi)), llvm::None); + EXPECT_THAT(T.getReduceRules(1), ElementsAre(2)); + EXPECT_THAT(T.getReduceRules(2), ElementsAre(1)); // Verify the behaivor for other non-available-actions terminals. - EXPECT_THAT(T.find(2, tokenSymbol(tok::kw_int)), IsEmpty()); + EXPECT_EQ(T.getShiftState(2, tokenSymbol(tok::kw_int)), llvm::None); } } // namespace