diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h @@ -38,6 +38,8 @@ #include "clang-pseudo/grammar/Grammar.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Capacity.h" +#include #include #include @@ -123,18 +125,24 @@ uint16_t Value : ValueBits; }; - // Returns all available actions for the given state on a terminal. - // Expected to be called by LR parsers. - llvm::ArrayRef getActions(StateID State, SymbolID Terminal) const; + struct Reduce { + StateID State; + RuleID Rule; + std::bitset Filter; + }; + + llvm::ArrayRef getReduces(StateID State) const { + unsigned Begin = ReducesLookup[State], End = Begin; + while (Reduces[End].State == State) + ++End; + return llvm::makeArrayRef(&Reduces[Begin], &Reduces[End]); + } + // Returns the state after we reduce a nonterminal. // Expected to be called by LR parsers. StateID getGoToState(StateID State, SymbolID Nonterminal) const; llvm::Optional getShiftState(StateID State, SymbolID Terminal) const; - // Looks up available actions. - // Returns empty if no available actions in the table. - llvm::ArrayRef find(StateID State, SymbolID Symbol) const; - // Returns the state from which the LR parser should start to parse the input // tokens as the given StartSymbol. // @@ -147,9 +155,9 @@ StateID getStartState(SymbolID StartSymbol) const; size_t bytes() const { - return sizeof(*this) + Actions.capacity() * sizeof(Action) + - Symbols.capacity() * sizeof(SymbolID) + - StateOffset.capacity() * sizeof(uint32_t); + return sizeof(*this) + llvm::capacity_in_bytes(Shift) + + llvm::capacity_in_bytes(Goto) + llvm::capacity_in_bytes(Reduces) + + llvm::capacity_in_bytes(ReducesLookup); } std::string dumpStatistics() const; @@ -169,19 +177,15 @@ static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef); private: - // Conceptually the LR table is a multimap from (State, SymbolID) => Action. - // Our physical representation is quite different for compactness. - - // Index is StateID, value is the offset into Symbols/Actions - // where the entries for this state begin. - // Give a state id, the corresponding half-open range of Symbols/Actions is - // [StateOffset[id], StateOffset[id+1]). - std::vector StateOffset; - // Parallel to Actions, the value is SymbolID (columns of the matrix). - // Grouped by the StateID, and only subranges are sorted. - std::vector Symbols; - // A flat list of available actions, sorted by (State, SymbolID). - std::vector Actions; + unsigned NumShift = 0; + unsigned NumGoto = 0; + unsigned NumReduce = 0; + + llvm::DenseMap, StateID> Shift; + llvm::DenseMap, StateID> Goto; + std::vector Reduces; + std::vector ReducesLookup; + // A sorted table, storing the start state for each target parsing symbol. std::vector> StartStates; }; diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -283,10 +283,10 @@ if (popAndPushTrivial()) continue; for (const auto &A : - Params.Table.getActions((*Heads)[PoppedHeads]->State, Lookahead)) { - if (A.kind() != LRTable::Action::Reduce) + Params.Table.getReduces((*Heads)[PoppedHeads]->State)) { + if (!A.Filter.test(symbolToToken(Lookahead))) continue; - pop((*Heads)[PoppedHeads], A.getReduceRule()); + pop((*Heads)[PoppedHeads], A.Rule); } } } @@ -364,12 +364,12 @@ return false; const GSS::Node *Head = Heads->back(); llvm::Optional RID; - for (auto &A : Params.Table.getActions(Head->State, Lookahead)) { - if (A.kind() != LRTable::Action::Reduce) + for (auto &A : Params.Table.getReduces(Head->State)) { + if (!A.Filter.test(symbolToToken(Lookahead))) continue; if (RID.hasValue()) return false; - RID = A.getReduceRule(); + RID = A.Rule; } if (!RID.hasValue()) return false; diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp --- a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp @@ -8,8 +8,8 @@ #include "clang-pseudo/grammar/LRTable.h" #include "clang-pseudo/grammar/Grammar.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" @@ -35,10 +35,14 @@ return llvm::formatv(R"( Statistics of the LR parsing table: number of states: {0} - number of actions: {1} - size of the table (bytes): {2} + number of actions: shift={1} reduce={2} goto={3} + size of the table (bytes): {4} (Shift={5} Reduce={6} Goto={7}) )", - StateOffset.size() - 1, Actions.size(), bytes()) + ReducesLookup.size() - 1, NumShift, NumReduce, NumGoto, + bytes(), capacity_in_bytes(Shift), + llvm::capacity_in_bytes(Reduces) + + llvm::capacity_in_bytes(ReducesLookup), + capacity_in_bytes(Goto)) .str(); } @@ -46,27 +50,27 @@ std::string Result; llvm::raw_string_ostream OS(Result); OS << "LRTable:\n"; - for (StateID S = 0; S < StateOffset.size() - 1; ++S) { + for (StateID S = 0; S < ReducesLookup.size() - 1; ++S) { OS << llvm::formatv("State {0}\n", S); for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) { SymbolID TokID = tokenSymbol(static_cast(Terminal)); - for (auto A : find(S, TokID)) { - if (A.kind() == LRTable::Action::Shift) - OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n", - G.symbolName(TokID), A.getShiftState()); - else if (A.kind() == LRTable::Action::Reduce) + if (auto NewState = getShiftState(S, TokID)) { + OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n", + G.symbolName(TokID), *NewState); + } + for (auto &R : getReduces(S)) { + if (R.Filter.test(TokID)) OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n", - G.symbolName(TokID), A.getReduceRule(), - G.dumpRule(A.getReduceRule())); + G.symbolName(TokID), R.Rule, + G.dumpRule(R.Rule)); } } for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size(); ++NontermID) { - if (find(S, NontermID).empty()) - continue; - OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n", - G.symbolName(NontermID), - getGoToState(S, NontermID)); + auto It = Goto.find({NontermID, S}); + if (It != Goto.end()) + OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n", + G.symbolName(NontermID), It->second); } } return OS.str(); @@ -75,44 +79,18 @@ llvm::Optional LRTable::getShiftState(StateID State, SymbolID Terminal) const { assert(pseudo::isToken(Terminal) && "expected terminal symbol!"); - for (const auto &Result : find(State, Terminal)) - if (Result.kind() == Action::Shift) - return Result.getShiftState(); - return llvm::None; -} - -llvm::ArrayRef LRTable::getActions(StateID State, - SymbolID Terminal) const { - assert(pseudo::isToken(Terminal) && "expect terminal symbol!"); - return find(State, Terminal); + auto It = Shift.find({Terminal, State}); + if (It == Shift.end()) + return llvm::None; + return It->second; } LRTable::StateID LRTable::getGoToState(StateID State, SymbolID Nonterminal) const { assert(pseudo::isNonterminal(Nonterminal) && "expected nonterminal symbol!"); - auto Result = find(State, Nonterminal); - assert(Result.size() == 1 && Result.front().kind() == Action::GoTo); - return Result.front().getGoToState(); -} - -llvm::ArrayRef LRTable::find(StateID Src, SymbolID ID) const { - assert(Src + 1u < StateOffset.size()); - std::pair Range = - std::make_pair(StateOffset[Src], StateOffset[Src + 1]); - auto SymbolRange = llvm::makeArrayRef(Symbols.data() + Range.first, - Symbols.data() + Range.second); - - assert(llvm::is_sorted(SymbolRange) && - "subrange of the Symbols should be sorted!"); - const LRTable::StateID *Start = - llvm::partition_point(SymbolRange, [&ID](SymbolID S) { return S < ID; }); - if (Start == SymbolRange.end()) - return {}; - const LRTable::StateID *End = Start; - while (End != SymbolRange.end() && *End == ID) - ++End; - return llvm::makeArrayRef(&Actions[Start - Symbols.data()], - /*length=*/End - Start); + auto It = Goto.find({Nonterminal, State}); + assert(It != Goto.end()); + return It->second; } LRTable::StateID LRTable::getStartState(SymbolID Target) const { diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp --- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp @@ -9,7 +9,6 @@ #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRGraph.h" #include "clang-pseudo/grammar/LRTable.h" -#include "clang/Basic/TokenKinds.h" #include namespace llvm { @@ -45,46 +44,47 @@ bool insert(Entry E) { return Entries.insert(std::move(E)).second; } LRTable build(const GrammarTable >, unsigned NumStates) && { - // E.g. given the following parsing table with 3 states and 3 terminals: - // - // a b c - // +-------+----+-------+-+ - // |state0 | | s0,r0 | | - // |state1 | acc| | | - // |state2 | | r1 | | - // +-------+----+-------+-+ - // - // The final LRTable: - // - StateOffset: [s0] = 0, [s1] = 2, [s2] = 3, [sentinel] = 4 - // - Symbols: [ b, b, a, b] - // Actions: [ s0, r0, acc, r1] - // ~~~~~~ range for state 0 - // ~~~~ range for state 1 - // ~~ range for state 2 - // First step, we sort all entries by (State, Symbol, Action). - std::vector Sorted(Entries.begin(), Entries.end()); - llvm::sort(Sorted, [](const Entry &L, const Entry &R) { - return std::forward_as_tuple(L.State, L.Symbol, L.Act.opaque()) < - std::forward_as_tuple(R.State, R.Symbol, R.Act.opaque()); - }); - + llvm::DenseMap, Reduce> Reduces; LRTable Table; - Table.Actions.reserve(Sorted.size()); - Table.Symbols.reserve(Sorted.size()); - // We are good to finalize the States and Actions. - for (const auto &E : Sorted) { - Table.Actions.push_back(E.Act); - Table.Symbols.push_back(E.Symbol); + for (const auto &E : Entries) { + switch (E.Act.kind()) { + case Action::Sentinel: + break; + case Action::Shift: + ++Table.NumShift; + Table.Shift.try_emplace(std::make_pair(E.Symbol, E.State), + E.Act.getShiftState()); + break; + case Action::GoTo: + ++Table.NumGoto; + Table.Goto.try_emplace(std::make_pair(E.Symbol, E.State), + E.Act.getGoToState()); + break; + case Action::Reduce: + ++Table.NumReduce; + auto &R = Reduces[{E.State, E.Act.getReduceRule()}]; + R.State = E.State; + R.Rule = E.Act.getReduceRule(); + R.Filter.set(symbolToToken(E.Symbol)); + break; + } } - // Initialize the terminal and nonterminal offset, all ranges are empty by - // default. - Table.StateOffset = std::vector(NumStates + 1, 0); - size_t SortedIndex = 0; - for (StateID State = 0; State < Table.StateOffset.size(); ++State) { - Table.StateOffset[State] = SortedIndex; - while (SortedIndex < Sorted.size() && Sorted[SortedIndex].State == State) - ++SortedIndex; + + Table.Reduces.reserve(Reduces.size()); + for (const auto &R : Reduces) + Table.Reduces.push_back(R.second); + llvm::sort(Table.Reduces, [](auto &L, auto &R) { + return std::tie(L.State, L.Rule) < std::tie(R.State, R.Rule); + }); + Table.ReducesLookup.resize(NumStates + 1); + unsigned ReducesIndex = 0; + for (StateID S = 0; S < NumStates; ++S) { + Table.ReducesLookup[S] = ReducesIndex; + while (ReducesIndex < Reduces.size() && + Table.Reduces[ReducesIndex].State == S) + ReducesIndex++; } + Table.ReducesLookup[NumStates] = ReducesIndex; Table.StartStates = std::move(StartStates); return Table; }