diff --git a/clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h b/clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h --- a/clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h +++ b/clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h @@ -174,6 +174,7 @@ // SymbolID is the index of the table. std::vector Nonterminals; }; +void initTerminals(std::vector &Terminals); } // namespace pseudo } // namespace syntax diff --git a/clang/include/clang/Tooling/Syntax/Pseudo/LRTable.h b/clang/include/clang/Tooling/Syntax/Pseudo/LRTable.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/Pseudo/LRTable.h @@ -0,0 +1,199 @@ +//===--- LRTable.h - Define LR Parsing Table ---------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The LRTable (referred as LR parsing table in the LR literature) is the core +// component in LR parsers, it drives the LR parsers by specifying an action to +// take given the current state on the top of the stack and the current +// lookahead token. +// +// The LRTable can be described as a matrix where the rows represent +// the states of the LR graph, the columns represent the symbols of the +// grammar, and each entry of the matrix (called action) represents a +// state transition in the graph. +// +// Typically, based on the category of the grammar symbol, the LRTable is +// broken into two logically separate tables: +// - ACTION table with terminals as columns -- e.g ACTION[S, a] specifies +// next action (shift/reduce/accept/error) on state S under a lookahead +// terminal a +// - GOTO table with nonterminals as columns -- e.g. GOTO[S, X] specify +// the state which we transist to from the state S with the nonterminal X +// +// LRTable is *performance-critial* as it is consulted frequently during a +// parse. In general, LRTable is very sprase (most of the entries are empty). +// For example, for the C++ language, the SLR table has ~1500 states and 650 +// symbols which results in a matrix having 975K entries, ~90% of entries are +// empty. +// +// This file implements a speed-and-space-efficient LRTable. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_SYNTAX_PSEUDO_LRTABLE_H +#define LLVM_CLANG_TOOLING_SYNTAX_PSEUDO_LRTABLE_H + +#include "clang/Tooling/Syntax/Pseudo/Grammar.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include +#include + +namespace clang { +namespace syntax { +namespace pseudo { + +// Represents the LR parsing table, which can efficiently the question "what is +// the next step given the lookahead token and current state on top of the +// stack?". +// +// This is a dense implementation, which only takes an amount of space that is +// proportional to the number of non-empty entries in the table. +// +// Unlike A typical LR parsing table allows at most one available action per +// entry, conflicted actions are allowed in LRTable. +class LRTable { +public: + using StateID = uint16_t; + // Action represents the terminal and nonterminal actions, it combines the + // entry of the ACTION and GOTO tables from the LR literature. + class Action { + public: + enum Kind : uint8_t { + // Terminal actions, corresponding to entries of ACTION table. + Error = 0, + // Shift to state n: move forward with the lookahead, and push state n + // onto the state stack. + // A shift is a forward transition, and the value n is the next state that + // the parser is to enter. + Shift, + // Reduce by a rule, the value is a ruleID. + Reduce, + // Signals that we have parsd the input successfully. + Accept, + + // Nonterminal actions, corresponding to entry of GOTO table. + // Go to state n: pust state n onto the state stack. + // Similar to Shift, but it is a nonterminal forward transition. + GoTo, + }; + + static Action accept() { return Action(Accept, 0); } + static Action goTo(StateID S) { return Action(GoTo, S); } + static Action shift(StateID S) { return Action(Shift, S); } + static Action reduce(RuleID RID) { return Action(Reduce, RID); } + static Action sentinel() { return Action(Error, 0); } + + StateID getShiftState() const { + assert(kind() == Shift); + return Value; + } + StateID getGoToState() const { + assert(kind() == GoTo); + return Value; + } + RuleID getReduceRule() const { + assert(kind() == Reduce); + return Value; + } + Kind kind() const { return K; } + + bool operator==(const Action &L) const { return value() == L.value(); } + uint16_t value() const { return K << ValueBits | Value; }; + + private: + Action(Kind K1, uint16_t Value) : K(K1), Value(Value) {} + static constexpr int ValueBits = 13; + static constexpr int KindBits = 3; + Kind K : KindBits; + // Either StateID or RuleID, depending on the Kind. + uint16_t Value : ValueBits; + }; + + // Returns all available actions for the given state on a terminal. + llvm::ArrayRef getActions(StateID State, SymbolID Terminal) const; + // Returns the state after we reduce a nonterminal. + StateID getGoToState(StateID State, SymbolID Nonterminal) const; + + // Looks up available actions. + // Returns empty if no available actions in the table. + llvm::ArrayRef find(StateID State, SymbolID Symbol) const; + + size_t bytes() const { + return sizeof(*this) + Actions.capacity() * sizeof(Action) + + StateIdx.capacity() * sizeof(StateID) + + NontermIdx.capacity() * sizeof(uint32_t) + + TerminalIdx.capacity() * sizeof(uint32_t); + } + + std::string dumpStatistics() const; + std::string dumpForTests(const Grammar &G) const; + + class Builder { + public: + bool insert(StateID State, SymbolID Symbol, LRTable::Action A); + LRTable build(const GrammarTable &) &&; + + private: + struct Entry { + StateID State; + SymbolID Symbol; + Action Act; + }; + friend struct llvm::DenseMapInfo; + llvm::DenseSet Entries; + }; + // Build a SLR(1) parsing table. + static LRTable buildSLR(const Grammar &G); + +private: + // Index is nonterminal SymbolID, values are indices of StateIdx. + // Give a non-terminal id, the corresponding half-open range of StateIdx is + // [NontermIdx[id], NontermIdx[id+1]). + std::vector NontermIdx; + // Similar to NontermIdx, but for terminals. + // Index is tok::TokenKind. + std::vector TerminalIdx; + // Parallel to Actions, the value is Src state (rows of the matrix). + // Grouped by the SymbolID, and only subranges are sorted. + std::vector StateIdx; + // A flat list of available (non-error) actions. + // Sorted by (SymbolID, State), concepetually, an action is identified by a + // row (State) and a column (SymbolID) in a matrix. + std::vector Actions; +}; +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &); + +} // namespace pseudo +} // namespace syntax +} // namespace clang + +namespace llvm { +template <> +struct DenseMapInfo { + using Entry = clang::syntax::pseudo::LRTable::Builder::Entry; + static inline Entry getEmptyKey() { + static Entry E{static_cast(-1), 0, + clang::syntax::pseudo::LRTable::Action::sentinel()}; + return E; + } + static inline Entry getTombstoneKey() { + static Entry E{static_cast(-2), 0, + clang::syntax::pseudo::LRTable::Action::sentinel()}; + return E; + } + static unsigned getHashValue(const Entry &I) { + return llvm::hash_combine(I.State, I.Symbol, I.Act.value()); + } + static bool isEqual(const Entry &LHS, const Entry &RHS) { + return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol && + LHS.Act == RHS.Act; + } +}; +} // namespace llvm + +#endif // LLVM_CLANG_TOOLING_SYNTAX_PSEUDO_LRTABLE_H diff --git a/clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt b/clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt --- a/clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt +++ b/clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt @@ -4,7 +4,9 @@ Grammar.cpp GrammarBNF.cpp LRGraph.cpp - + LRTable.cpp + LRTableBuild.cpp + LINK_LIBS clangBasic clangLex diff --git a/clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp b/clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp --- a/clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp +++ b/clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp @@ -163,6 +163,17 @@ return FollowSets; } +void initTerminals(std::vector &Terminals) { + Terminals.clear(); + Terminals.reserve(NumTerminals); + for (unsigned I = 0; I < NumTerminals; ++I) { + tok::TokenKind K = static_cast(I); + if (const auto *Punc = tok::getPunctuatorSpelling(K)) + Terminals.push_back(Punc); + else + Terminals.push_back(llvm::StringRef(tok::getTokenName(K)).upper()); + } +} } // namespace pseudo } // namespace syntax } // namespace clang diff --git a/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp b/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp --- a/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp +++ b/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp @@ -21,17 +21,6 @@ static const llvm::StringRef OptSuffix = "_opt"; static const llvm::StringRef StartSymbol = "_"; -void initTerminals(std::vector &Out) { - Out.clear(); - Out.reserve(NumTerminals); - for (unsigned I = 0; I < NumTerminals; ++I) { - tok::TokenKind K = static_cast(I); - if (const auto *Punc = tok::getPunctuatorSpelling(K)) - Out.push_back(Punc); - else - Out.push_back(llvm::StringRef(tok::getTokenName(K)).upper()); - } -} // Builds grammar from BNF files. class GrammarBuilder { public: diff --git a/clang/lib/Tooling/Syntax/Pseudo/LRTable.cpp b/clang/lib/Tooling/Syntax/Pseudo/LRTable.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/Pseudo/LRTable.cpp @@ -0,0 +1,197 @@ +//===--- LRTable.cpp - Parsing table for LR parsers --------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Syntax/Pseudo/LRTable.h" +#include "clang/Tooling/Syntax/Pseudo/Grammar.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace clang { +namespace syntax { +namespace pseudo { + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) { + switch (A.kind()) { + case LRTable::Action::Error: + return OS << "err"; + case LRTable::Action::Shift: + return OS << llvm::formatv("shift state {0}", A.getShiftState()); + case LRTable::Action::Reduce: + return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule()); + case LRTable::Action::GoTo: + return OS << llvm::formatv("goTo state {0}", A.getGoToState()); + case LRTable::Action::Accept: + return OS << "acc"; + } + assert(false); +} + +std::string LRTable::dumpStatistics() const { + StateID NumOfStates = 0; + for (auto It : StateIdx) + NumOfStates = std::max(It, NumOfStates); + return llvm::formatv(R"( +Statistics of the LR parsing table: + number of states: {0} + number of actions: {1} + size of the table (bytes): {2} +)", + NumOfStates, Actions.size(), bytes()) + .str(); +} + +std::string LRTable::dumpForTests(const Grammar &G) const { + std::string Result; + llvm::raw_string_ostream OS(Result); + StateID MaxState = 0; + for (const auto &It : StateIdx) + MaxState = std::max(MaxState, It); + OS << "LRTable:\n"; + for (StateID S = 0; S <= MaxState; ++S) { + OS << llvm::formatv("State {0}\n", S); + for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) { + SymbolID TokID = tokenSymbol(static_cast(Terminal)); + for (auto A : find(S, TokID)) { + if (A.kind() == LRTable::Action::Shift) + OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n", + G.symbolName(TokID), A.getShiftState()); + else if (A.kind() == LRTable::Action::Reduce) + OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n", + G.symbolName(TokID), A.getReduceRule(), + G.dumpRule(A.getReduceRule())); + else if (A.kind() == LRTable::Action::Accept) + OS.indent(4) << llvm::formatv("'{0}': accept\n", G.symbolName(TokID)); + } + } + for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size(); + ++NontermID) { + if (find(S, NontermID).empty()) + continue; + OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n", + G.symbolName(NontermID), + getGoToState(S, NontermID)); + } + } + return OS.str(); +} + +llvm::ArrayRef LRTable::getActions(StateID State, + SymbolID Terminal) const { + assert(pseudo::isToken(Terminal)); + return find(State, Terminal); +} + +LRTable::StateID LRTable::getGoToState(StateID State, + SymbolID Nonterminal) const { + assert(pseudo::isNonterminal(Nonterminal)); + auto Result = find(State, Nonterminal); + assert(Result.size() == 1 && Result.front().kind() == Action::GoTo); + return Result.front().getGoToState(); +} + +llvm::ArrayRef LRTable::find(StateID Src, SymbolID ID) const { + size_t Idx = isToken(ID) ? symbolToToken(ID) : ID; + assert(isToken(ID) ? Idx + 1 < TerminalIdx.size() + : Idx + 1 < NontermIdx.size()); + std::pair TargetStateRange = + isToken(ID) ? std::make_pair(TerminalIdx[Idx], TerminalIdx[Idx + 1]) + : std::make_pair(NontermIdx[Idx], NontermIdx[Idx + 1]); + auto TargetedStates = + llvm::makeArrayRef(StateIdx.data() + TargetStateRange.first, + StateIdx.data() + TargetStateRange.second); + + assert(llvm::is_sorted(TargetedStates) && + "subrange of the StateIdx should be sorted!"); + const LRTable::StateID *It = llvm::partition_point( + TargetedStates, [&Src](LRTable::StateID S) { return S < Src; }); + if (It == TargetedStates.end()) + return {}; + size_t Start = It - StateIdx.data(), End = Start; + while (End < StateIdx.size() && StateIdx[End] == Src) + ++End; + return llvm::makeArrayRef(&Actions[Start], &Actions[End]); +} + +bool LRTable::Builder::insert(StateID Src, SymbolID Label, + LRTable::Action New) { + return Entries.insert({Src, Label, New}).second; +} + +LRTable LRTable::Builder::build(const GrammarTable >) && { + // E.g. given the following parsing table with 3 states and 3 terminals: + // + // a b c + // +-------+----+-------+-+ + // |state0 | | s0,r0 | | + // |state1 | acc| | | + // |state2 | | r1 | | + // +-------+----+-------+-+ + // + // The final LRTable: + // - TerminalIdx: [a] = 0, [b] = 1, [c] = 4, [d] = 4 (d is a sentinel) + // - StateIdx: [ 1, 0, 0, 2] + // Actions: [ acc, s0, r0, r1] + // ~~~ corresponding range for terminal a + // ~~~~~~~~~~ corresponding range for terminal b + // First step, we sort all entries by (Symbol, State, Action). + std::vector Sorted(Entries.begin(), Entries.end()); + llvm::sort(Sorted, [](const Entry &L, const Entry &R) { + return std::forward_as_tuple(L.Symbol, L.State, L.Act.value()) < + std::forward_as_tuple(R.Symbol, R.State, R.Act.value()); + }); + + LRTable Table; + Table.Actions.reserve(Sorted.size()); + Table.StateIdx.reserve(Sorted.size()); + // We are good to finalize the StateIdx and Actions. + for (const auto &E : Sorted) { + Table.Actions.push_back(E.Act); + Table.StateIdx.push_back(E.State); + } + // Initialize the terminal and nonterminal idx, all ranges are empty by + // default. + Table.TerminalIdx.resize(GT.Terminals.size() + 1, 0); + Table.NontermIdx.resize(GT.Nonterminals.size() + 1, 0); + llvm::ArrayRef SortedEntries = Sorted; + while (!SortedEntries.empty()) { + // We iterate each symbol per time (conceptually a column in the matrix per + // time), calculate the number of the available actions, and set it to + // SymbolIdx. + const Entry &Front = SortedEntries.front(); + // llvm::MutableArrayRef SymbolIdx = + // isToken(Front.Symbol) ? Table.TerminalIdx : Table.NontermIdx; + llvm::MutableArrayRef SymbolIdx = Table.NontermIdx; + if (isToken(Front.Symbol)) + SymbolIdx = Table.TerminalIdx; + unsigned Index = + isToken(Front.Symbol) ? symbolToToken(Front.Symbol) : Front.Symbol; + SymbolIdx[Index] = &Front - &Sorted.front(); + auto Batch = SortedEntries.take_while( + [Target = Front.Symbol](const Entry &E) { return E.Symbol == Target; }); + assert(!Batch.empty()); + SymbolIdx[Index + 1] = SymbolIdx[Index] + Batch.size(); + SortedEntries = SortedEntries.drop_front(Batch.size()); + } + // Final step to fill some unpropagated holes in the SymbolIdx (d for the + // above symbol). + for (size_t I = 1; I < Table.TerminalIdx.size(); ++I) + Table.TerminalIdx[I] = + std::max(Table.TerminalIdx[I], Table.TerminalIdx[I - 1]); + for (size_t I = 1; I < Table.NontermIdx.size(); ++I) + Table.NontermIdx[I] = + std::max(Table.NontermIdx[I], Table.NontermIdx[I - 1]); + return Table; +} + +} // namespace pseudo +} // namespace syntax +} // namespace clang diff --git a/clang/lib/Tooling/Syntax/Pseudo/LRTableBuild.cpp b/clang/lib/Tooling/Syntax/Pseudo/LRTableBuild.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/Pseudo/LRTableBuild.cpp @@ -0,0 +1,49 @@ +//===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/TokenKinds.h" +#include "clang/Tooling/Syntax/Pseudo/Grammar.h" +#include "clang/Tooling/Syntax/Pseudo/LRGraph.h" +#include "clang/Tooling/Syntax/Pseudo/LRTable.h" + +namespace clang { +namespace syntax { +namespace pseudo { + +LRTable LRTable::buildSLR(const Grammar &G) { + Builder Build; + auto Graph = LRGraph::buildLR0(G); + for (const auto &T : Graph.edges()) { + Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst); + Build.insert(T.Src, T.Label, Act); + } + + auto FollowSets = followSets(G); + for (StateID SID = 0; SID < Graph.states().size(); ++SID) { + for (const Item &I : Graph.states()[SID].Items) { + // If we've just parsed the start symbol, we can accept the input. + if (G.lookupRule(I.rule()).Target == G.startSymbol() && !I.hasNext()) { + Build.insert(SID, tokenSymbol(tok::eof), Action::accept()); + continue; + } + if (!I.hasNext()) { + // If we've reached the end of a rule A := ..., then we can reduce if + // the next token is in the follow set of A". + for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) { + assert(isToken(Follow)); + Build.insert(SID, Follow, Action::reduce(I.rule())); + } + } + } + } + return std::move(Build).build(G.table()); +} + +} // namespace pseudo +} // namespace syntax +} // namespace clang diff --git a/clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt b/clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt --- a/clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt +++ b/clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt @@ -5,6 +5,7 @@ add_clang_unittest(ClangPseudoTests GrammarTest.cpp LRGraphTest.cpp + LRTableTest.cpp ) clang_target_link_libraries(ClangPseudoTests diff --git a/clang/unittests/Tooling/Syntax/Pseudo/LRGraphTest.cpp b/clang/unittests/Tooling/Syntax/Pseudo/LRGraphTest.cpp --- a/clang/unittests/Tooling/Syntax/Pseudo/LRGraphTest.cpp +++ b/clang/unittests/Tooling/Syntax/Pseudo/LRGraphTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/Pseudo/LRGraph.h" +#include "clang/Tooling/Syntax/Pseudo/LRTable.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -20,14 +21,16 @@ struct TestCase { llvm::StringRef BNF; llvm::StringRef ExpectedStates; + llvm::StringRef ExpectedTables; }; - TestCase Cases[] = {{ - R"bnf( + TestCase Cases[] = { + { + R"bnf( _ := expr expr := IDENTIFIER )bnf", - R"(States: + R"(States: State 0 _ := • expr expr := • IDENTIFIER @@ -37,15 +40,25 @@ expr := IDENTIFIER • 0 ->[expr] 1 0 ->[IDENTIFIER] 2 -)"}, - {// A grammar with a S/R conflict in SLR table: - // (id-id)-id, or id-(id-id). - R"bnf( +)", + R"(LRTable: +State 0 + 'IDENTIFIER': shift state 2 + 'expr': go to state 1 +State 1 + 'EOF': accept +State 2 + 'EOF': reduce by rule 1 'expr := IDENTIFIER' +)", + }, + {// A grammar with a S/R conflict in SLR table: + // (id-id)-id, or id-(id-id). + R"bnf( _ := expr expr := expr - expr # S/R conflict at state 4 on '-' token expr := IDENTIFIER - )bnf", - R"(States: + )bnf", + R"(States: State 0 _ := • expr expr := • expr - expr @@ -68,13 +81,33 @@ 3 ->[expr] 4 3 ->[IDENTIFIER] 2 4 ->[-] 3 -)"}}; +)", + R"(LRTable: +State 0 + 'IDENTIFIER': shift state 2 + 'expr': go to state 1 +State 1 + 'EOF': accept + '-': shift state 3 +State 2 + 'EOF': reduce by rule 1 'expr := IDENTIFIER' + '-': reduce by rule 1 'expr := IDENTIFIER' +State 3 + 'IDENTIFIER': shift state 2 + 'expr': go to state 4 +State 4 + 'EOF': reduce by rule 2 'expr := expr - expr' + '-': shift state 3 + '-': reduce by rule 2 'expr := expr - expr' +)"}, + }; for (const auto &C : Cases) { std::vector Diags; auto G = Grammar::parseBNF(C.BNF, Diags); ASSERT_THAT(Diags, testing::IsEmpty()); auto LR0 = LRGraph::buildLR0(*G); EXPECT_EQ(LR0.dumpForTests(*G), C.ExpectedStates); + EXPECT_EQ(LRTable::buildSLR(*G).dumpForTests(*G), C.ExpectedTables); } } diff --git a/clang/unittests/Tooling/Syntax/Pseudo/LRTableTest.cpp b/clang/unittests/Tooling/Syntax/Pseudo/LRTableTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Tooling/Syntax/Pseudo/LRTableTest.cpp @@ -0,0 +1,57 @@ +//===--- LRTableTest.cpp - ---------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Syntax/Pseudo/LRTable.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Tooling/Syntax/Pseudo/Grammar.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +namespace clang { +namespace syntax { +namespace pseudo { +namespace { + +using testing::IsEmpty; +using testing::UnorderedElementsAre; +using Action = LRTable::Action; + +TEST(LRTable, Builder) { + LRTable::Builder Build; + GrammarTable GTable; + initTerminals(GTable.Terminals); + + // eof semi ... + // +-------+----+-------+--- + // |state0 | | s0,r0 |... + // |state1 | acc| |... + // |state2 | | r1 |... + // +-------+----+-------+--- + Build.insert(/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)); + Build.insert(/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)); + Build.insert(/* State */ 1, tokenSymbol(tok::eof), Action::accept()); + Build.insert(/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)); + + LRTable T = std::move(Build).build(GTable); + EXPECT_THAT(T.find(0, tokenSymbol(tok::eof)), IsEmpty()); + EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)), + UnorderedElementsAre(Action::shift(0), Action::reduce(0))); + EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)), + UnorderedElementsAre(Action::accept())); + EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty()); + EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)), + UnorderedElementsAre(Action::reduce(1))); + // Verify the behaivor for other non-available-actions terminals. + EXPECT_THAT(T.find(2, tokenSymbol(tok::kw_int)), IsEmpty()); +} + +} // namespace +} // namespace pseudo +} // namespace syntax +} // namespace clang