diff --git a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp --- a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp +++ b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp @@ -122,8 +122,7 @@ for (auto _ : State) { pseudo::ForestArena Forest; pseudo::GSS GSS; - pseudo::glrParse(Stream, ParseParams{Lang->G, Lang->Table, Forest, GSS}, - StartSymbol); + pseudo::glrParse(ParseParams{Stream, Forest, GSS}, StartSymbol, *Lang); } State.SetBytesProcessed(static_cast(State.iterations()) * SourceText->size()); @@ -136,9 +135,7 @@ TokenStream Stream = lexAndPreprocess(); pseudo::ForestArena Forest; pseudo::GSS GSS; - pseudo::glrParse(lexAndPreprocess(), - ParseParams{Lang->G, Lang->Table, Forest, GSS}, - StartSymbol); + pseudo::glrParse(ParseParams{Stream, Forest, GSS}, StartSymbol, *Lang); } State.SetBytesProcessed(static_cast(State.iterations()) * SourceText->size()); diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -43,9 +43,8 @@ clang::pseudo::GSS GSS; const Language &Lang = getLanguageFromFlags(); auto &Root = - glrParse(ParseableStream, - clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS}, - *Lang.G.findNonterminal("translation-unit")); + glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS}, + *Lang.G.findNonterminal("translation-unit"), Lang); if (Print) llvm::outs() << Root.dumpRecursive(Lang.G); } diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp --- a/clang-tools-extra/pseudo/gen/Main.cpp +++ b/clang-tools-extra/pseudo/gen/Main.cpp @@ -79,6 +79,14 @@ switch (Emit) { case EmitSymbolList: + Out.os() << R"cpp( +#ifndef NONTERMINAL +#define NONTERMINAL(X, Y) +#endif +#ifndef EXTENSION +#define EXTENSION(X, Y) +#endif + )cpp"; for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID) { std::string Name = G.symbolName(ID).str(); @@ -86,6 +94,16 @@ std::replace(Name.begin(), Name.end(), '-', '_'); Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID); } + for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/; + EID < G.table().AttributeValues.size(); ++EID) { + llvm::StringRef Name = G.table().AttributeValues[EID]; + assert(!Name.empty()); + Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, EID); + } + Out.os() << R"cpp( +#undef NONTERMINAL +#undef EXTENSION + )cpp"; break; case EmitGrammarContent: for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) { diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -30,6 +30,7 @@ #define CLANG_PSEUDO_GLR_H #include "clang-pseudo/Forest.h" +#include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" #include "llvm/Support/Allocator.h" @@ -112,38 +113,35 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &); // Parameters for the GLR parsing. -// FIXME: refine it with the ParseLang struct. struct ParseParams { - // The grammar of the language we're going to parse. - const Grammar &G; - // The LR table which GLR uses to parse the input, should correspond to the - // Grammar G. - const LRTable &Table; + // The token stream to parse. + const TokenStream &Code; // Arena for data structure used by the GLR algorithm. ForestArena &Forest; // Storage for the output forest. GSS &GSStack; // Storage for parsing stacks. }; + // Parses the given token stream as the start symbol with the GLR algorithm, // and returns a forest node of the start symbol. // // A rule `_ := StartSymbol` must exit for the chosen start symbol. // // If the parsing fails, we model it as an opaque node in the forest. -const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params, - SymbolID StartSymbol); +const ForestNode &glrParse(const ParseParams &Params, SymbolID StartSymbol, + const Language &Lang); // Shift a token onto all OldHeads, placing the results into NewHeads. // // Exposed for testing only. void glrShift(llvm::ArrayRef OldHeads, const ForestNode &NextTok, const ParseParams &Params, - std::vector &NewHeads); + const Language &Lang, std::vector &NewHeads); // Applies available reductions on Heads, appending resulting heads to the list. // // Exposed for testing only. void glrReduce(std::vector &Heads, SymbolID Lookahead, - const ParseParams &Params); + const ParseParams &Params, const Language &Lang); } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h @@ -14,12 +14,28 @@ namespace clang { namespace pseudo { +class ForestNode; +class TokenStream; +class LRTable; + +// A guard restricts when a grammar rule can be used. +// +// The GLR parser will use the guard to determine whether a rule reduction will +// be conducted. For example, e.g. a guard may allow the rule +// `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`. +// +// Return true if the guard is satisfied. +using RuleGuard = llvm::function_ref RHS, const TokenStream &)>; // Specify a language that can be parsed by the pseduoparser. struct Language { Grammar G; LRTable Table; + // Binding "guard" extension id to a piece of C++ code. + llvm::DenseMap Guards; + // FIXME: add clang::LangOptions. // FIXME: add default start symbols. }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -37,6 +37,12 @@ #undef NONTERMINAL }; +enum class Extension : ExtensionID { +#define EXTENSION(X, Y) X = Y, +#include "CXXSymbols.inc" +#undef EXTENSION +}; + // Returns the Language for the cxx.bnf grammar. const Language &getLanguage(); diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang-pseudo/GLR.h" +#include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/TokenKinds.h" @@ -51,16 +52,16 @@ // └---3---┘ void glrShift(llvm::ArrayRef OldHeads, const ForestNode &NewTok, const ParseParams &Params, - std::vector &NewHeads) { + const Language &Lang, std::vector &NewHeads) { assert(NewTok.kind() == ForestNode::Terminal); LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" Shift {0} ({1} active heads):\n", - Params.G.symbolName(NewTok.symbol()), + Lang.G.symbolName(NewTok.symbol()), OldHeads.size())); // We group pending shifts by their target state so we can merge them. llvm::SmallVector, 8> Shifts; for (const auto *H : OldHeads) - if (auto S = Params.Table.getShiftState(H->State, NewTok.symbol())) + if (auto S = Lang.Table.getShiftState(H->State, NewTok.symbol())) Shifts.push_back({*S, H}); llvm::stable_sort(Shifts, llvm::less_first{}); @@ -144,7 +145,7 @@ // storage across calls). class GLRReduce { const ParseParams &Params; - + const Language& Lang; // There are two interacting complications: // 1. Performing one reduce can unlock new reduces on the newly-created head. // 2a. The ambiguous ForestNodes must be complete (have all sequence nodes). @@ -230,7 +231,8 @@ Sequence TempSequence; public: - GLRReduce(const ParseParams &Params) : Params(Params) {} + GLRReduce(const ParseParams &Params, const Language &Lang) + : Params(Params), Lang(Lang) {} void operator()(std::vector &Heads, SymbolID Lookahead) { assert(isToken(Lookahead)); @@ -249,10 +251,21 @@ } private: + bool canReduce(ExtensionID GuardID, RuleID RID, + llvm::ArrayRef RHS) const { + if (!GuardID) + return true; + if (auto Guard = Lang.Guards.lookup(GuardID)) + return Guard(RHS, Params.Code); + LLVM_DEBUG(llvm::dbgs() + << llvm::formatv("missing guard implementation for rule {0}\n", + Lang.G.dumpRule(RID))); + return true; + } // pop walks up the parent chain(s) for a reduction from Head by to Rule. // Once we reach the end, record the bases and sequences. void pop(const GSS::Node *Head, RuleID RID, const Rule &Rule) { - LLVM_DEBUG(llvm::dbgs() << " Pop " << Params.G.dumpRule(RID) << "\n"); + LLVM_DEBUG(llvm::dbgs() << " Pop " << Lang.G.dumpRule(RID) << "\n"); Family F{/*Start=*/0, /*Symbol=*/Rule.Target, /*Rule=*/RID}; TempSequence.resize_for_overwrite(Rule.Size); auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) { @@ -263,7 +276,8 @@ for (const auto *B : N->parents()) llvm::dbgs() << " --> base at S" << B->State << "\n"; }); - + if (!canReduce(Rule.Guard, RID, TempSequence)) + return; // Copy the chain to stable storage so it can be enqueued. if (SequenceStorageCount == SequenceStorage.size()) SequenceStorage.emplace_back(); @@ -286,9 +300,9 @@ if (popAndPushTrivial()) continue; for (RuleID RID : - Params.Table.getReduceRules((*Heads)[NextPopHead]->State)) { - const auto &Rule = Params.G.lookupRule(RID); - if (Params.Table.canFollow(Rule.Target, Lookahead)) + Lang.Table.getReduceRules((*Heads)[NextPopHead]->State)) { + const auto &Rule = Lang.G.lookupRule(RID); + if (Lang.Table.canFollow(Rule.Target, Lookahead)) pop((*Heads)[NextPopHead], RID, Rule); } } @@ -306,7 +320,7 @@ assert(!Sequences.empty()); Family F = Sequences.top().first; - LLVM_DEBUG(llvm::dbgs() << " Push " << Params.G.symbolName(F.Symbol) + LLVM_DEBUG(llvm::dbgs() << " Push " << Lang.G.symbolName(F.Symbol) << " from token " << F.Start << "\n"); // Grab the sequences and bases for this family. @@ -319,7 +333,7 @@ const PushSpec &Push = Sequences.top().second; FamilySequences.emplace_back(Sequences.top().first.Rule, *Push.Seq); for (const GSS::Node *Base : Push.LastPop->parents()) { - auto NextState = Params.Table.getGoToState(Base->State, F.Symbol); + auto NextState = Lang.Table.getGoToState(Base->State, F.Symbol); assert(NextState.hasValue() && "goto must succeed after reduce!"); FamilyBases.emplace_back(*NextState, Base); } @@ -337,7 +351,7 @@ SequenceNodes.size() == 1 ? SequenceNodes.front() : &Params.Forest.createAmbiguous(F.Symbol, SequenceNodes); - LLVM_DEBUG(llvm::dbgs() << " --> " << Parsed->dump(Params.G) << "\n"); + LLVM_DEBUG(llvm::dbgs() << " --> " << Parsed->dump(Lang.G) << "\n"); // Bases for this family, deduplicate them, and group by the goTo State. sortAndUnique(FamilyBases); @@ -375,15 +389,15 @@ return false; const GSS::Node *Head = Heads->back(); llvm::Optional RID; - for (RuleID R : Params.Table.getReduceRules(Head->State)) { + for (RuleID R : Lang.Table.getReduceRules(Head->State)) { if (RID.hasValue()) return false; RID = R; } if (!RID) return true; // no reductions available, but we've processed the head! - const auto &Rule = Params.G.lookupRule(*RID); - if (!Params.Table.canFollow(Rule.Target, Lookahead)) + const auto &Rule = Lang.G.lookupRule(*RID); + if (!Lang.Table.canFollow(Rule.Target, Lookahead)) return true; // reduction is not available const GSS::Node *Base = Head; TempSequence.resize_for_overwrite(Rule.Size); @@ -393,9 +407,11 @@ TempSequence[Rule.Size - 1 - I] = Base->Payload; Base = Base->parents().front(); } + if (!canReduce(Rule.Guard, *RID, TempSequence)) + return true; // reduction is not available const ForestNode *Parsed = &Params.Forest.createSequence(Rule.Target, *RID, TempSequence); - auto NextState = Params.Table.getGoToState(Base->State, Rule.Target); + auto NextState = Lang.Table.getGoToState(Base->State, Rule.Target); assert(NextState.hasValue() && "goto must succeed after reduce!"); Heads->push_back(Params.GSStack.addNode(*NextState, Parsed, {Base})); return true; @@ -404,16 +420,14 @@ } // namespace -const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params, - SymbolID StartSymbol) { - GLRReduce Reduce(Params); +const ForestNode &glrParse( const ParseParams &Params, SymbolID StartSymbol, + const Language& Lang) { + GLRReduce Reduce(Params, Lang); assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal"); - llvm::ArrayRef Terminals = Params.Forest.createTerminals(Tokens); - auto &G = Params.G; - (void)G; + llvm::ArrayRef Terminals = Params.Forest.createTerminals(Params.Code); auto &GSS = Params.GSStack; - StateID StartState = Params.Table.getStartState(StartSymbol); + StateID StartState = Lang.Table.getStartState(StartSymbol); // Heads correspond to the parse of tokens [0, I), NextHeads to [0, I+1). std::vector Heads = {GSS.addNode(/*State=*/StartState, /*ForestNode=*/nullptr, @@ -433,9 +447,9 @@ for (unsigned I = 0; I < Terminals.size(); ++I) { LLVM_DEBUG(llvm::dbgs() << llvm::formatv( "Next token {0} (id={1})\n", - G.symbolName(Terminals[I].symbol()), Terminals[I].symbol())); + Lang.G.symbolName(Terminals[I].symbol()), Terminals[I].symbol())); // Consume the token. - glrShift(Heads, Terminals[I], Params, NextHeads); + glrShift(Heads, Terminals[I], Params, Lang, NextHeads); // Form nonterminals containing the token we just consumed. SymbolID Lookahead = I + 1 == Terminals.size() ? tokenSymbol(tok::eof) : Terminals[I + 1].symbol(); @@ -447,7 +461,7 @@ } LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n")); - auto AcceptState = Params.Table.getGoToState(StartState, StartSymbol); + auto AcceptState = Lang.Table.getGoToState(StartState, StartSymbol); assert(AcceptState.hasValue() && "goto must succeed after start symbol!"); const ForestNode *Result = nullptr; for (const auto *Head : Heads) { @@ -468,9 +482,9 @@ } void glrReduce(std::vector &Heads, SymbolID Lookahead, - const ParseParams &Params) { + const ParseParams &Params, const Language &Lang) { // Create a new GLRReduce each time for tests, performance doesn't matter. - GLRReduce{Params}(Heads, Lookahead); + GLRReduce{Params, Lang}(Heads, Lookahead); } const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol, diff --git a/clang-tools-extra/pseudo/lib/cli/CLI.cpp b/clang-tools-extra/pseudo/lib/cli/CLI.cpp --- a/clang-tools-extra/pseudo/lib/cli/CLI.cpp +++ b/clang-tools-extra/pseudo/lib/cli/CLI.cpp @@ -8,6 +8,7 @@ #include "clang-pseudo/cli/CLI.h" #include "clang-pseudo/cxx/CXX.h" +#include "clang-pseudo/grammar/Grammar.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" @@ -39,7 +40,8 @@ for (const auto &Diag : Diags) llvm::errs() << Diag << "\n"; auto Table = LRTable::buildSLR(G); - return new Language{std::move(G), std::move(Table)}; + return new Language{std::move(G), std::move(Table), + llvm::DenseMap()}; }(); return *Lang; } diff --git a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt --- a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt @@ -9,5 +9,6 @@ cxx_gen LINK_LIBS + clangPseudo clangPseudoGrammar ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang-pseudo/cxx/CXX.h" +#include "clang-pseudo/Forest.h" #include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" @@ -19,6 +20,26 @@ static const char *CXXBNF = #include "CXXBNF.inc" ; + +bool guardOverride(llvm::ArrayRef RHS, + const TokenStream &Tokens) { + assert(RHS.size() == 1 && + RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); + return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override"; +} +bool guardFinal(llvm::ArrayRef RHS, + const TokenStream &Tokens) { + assert(RHS.size() == 1 && + RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); + return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final"; +} + +llvm::DenseMap buildGuards() { + return llvm::DenseMap( + {{(ExtensionID)Extension::Override, guardOverride}, + {(ExtensionID)Extension::Final, guardFinal}}); +} + } // namespace const Language &getLanguage() { @@ -27,10 +48,8 @@ auto G = Grammar::parseBNF(CXXBNF, Diags); assert(Diags.empty()); LRTable Table = LRTable::buildSLR(G); - const Language *PL = new Language{ - std::move(G), - std::move(Table), - }; + const Language *PL = + new Language{std::move(G), std::move(Table), buildGuards()}; return *PL; }(); return CXXLanguage; diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf --- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -744,8 +744,8 @@ #! Contextual keywords -- clang lexer always lexes them as identifier tokens. #! Placeholders for literal text in the grammar that lex as other things. -contextual-override := IDENTIFIER -contextual-final := IDENTIFIER +contextual-override := IDENTIFIER [guard=Override] +contextual-final := IDENTIFIER [guard=Final] contextual-zero := NUMERIC_CONSTANT module-keyword := IDENTIFIER import-keyword := IDENTIFIER diff --git a/clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp b/clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp @@ -0,0 +1,9 @@ +// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s +// Verify that the contextual-{final,override} rules are guarded conditionally, +// No ambiguous parsing for the virt-specifier. +class Foo { + void foo1() override; +// CHECK: virt-specifier-seq~IDENTIFIER := tok[7] + void foo2() final; +// CHECK: virt-specifier-seq~IDENTIFIER := tok[13] +}; diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -145,9 +145,8 @@ return 2; } auto &Root = - glrParse(*ParseableStream, - clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS}, - *StartSymID); + glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS}, + *StartSymID, Lang); if (PrintForest) llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true); diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -81,6 +81,14 @@ ADD_FAILURE() << "No such symbol found: " << Name; return 0; } + ExtensionID extensionID(llvm::StringRef AttrValueName) const { + for (ExtensionID EID = 0; EID < TestLang.G.table().AttributeValues.size(); + ++EID) + if (TestLang.G.table().AttributeValues[EID] == AttrValueName) + return EID; + ADD_FAILURE() << "No such attribute value found: " << AttrValueName; + return 0; + } RuleID ruleFor(llvm::StringRef NonterminalName) const { auto RuleRange = @@ -131,7 +139,7 @@ ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0); std::vector NewHeads; glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal, - {TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads); + {emptyTokenStream(), Arena, GSStack}, TestLang, NewHeads); EXPECT_THAT(NewHeads, UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal), @@ -164,7 +172,7 @@ std::vector Heads = {GSSNode1}; glrReduce(Heads, tokenSymbol(tok::eof), - {TestLang.G, TestLang.Table, Arena, GSStack}); + {emptyTokenStream(), Arena, GSStack}, TestLang); EXPECT_THAT(Heads, UnorderedElementsAre( GSSNode1, AllOf(state(2), parsedSymbolID(id("class-name")), @@ -202,7 +210,8 @@ TestLang.Table = std::move(B).build(); std::vector Heads = {GSSNode4}; - glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::eof), {emptyTokenStream(), Arena, GSStack}, + TestLang); EXPECT_THAT(Heads, UnorderedElementsAre( GSSNode4, @@ -254,7 +263,8 @@ TestLang.Table = std::move(B).build(); std::vector Heads = {GSSNode3, GSSNode4}; - glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::eof), {emptyTokenStream(), Arena, GSStack}, + TestLang); // Verify that the stack heads are joint at state 5 after reduces. EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4, @@ -309,7 +319,7 @@ std::vector Heads = {GSSNode3, GSSNode4}; glrReduce(Heads, tokenSymbol(tok::eof), - {TestLang.G, TestLang.Table, Arena, GSStack}); + {emptyTokenStream(), Arena, GSStack}, TestLang); EXPECT_THAT( Heads, UnorderedElementsAre(GSSNode3, GSSNode4, @@ -343,14 +353,16 @@ // When the lookahead is +, reduce is performed. std::vector Heads = {GSSNode1}; - glrReduce(Heads, tokenSymbol(tok::plus), {TestLang.G, TestLang.Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::plus), {emptyTokenStream(), Arena, GSStack}, + TestLang); EXPECT_THAT(Heads, ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")), parents(Root)))); // When the lookahead is -, reduce is not performed. Heads = {GSSNode1}; - glrReduce(Heads, tokenSymbol(tok::minus), {TestLang.G, TestLang.Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::minus), + {emptyTokenStream(), Arena, GSStack}, TestLang); EXPECT_THAT(Heads, ElementsAre(GSSNode1)); } @@ -376,7 +388,7 @@ const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions); const ForestNode &Parsed = - glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test")); + glrParse({Tokens, Arena, GSStack}, id("test"), TestLang); // Verify that there is no duplicated sequence node of `expr := IDENTIFIER` // in the forest, see the `#1` and `=#1` in the dump string. EXPECT_EQ(Parsed.dumpRecursive(TestLang.G), @@ -413,7 +425,7 @@ TestLang.Table = LRTable::buildSLR(TestLang.G); const ForestNode &Parsed = - glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test")); + glrParse({Tokens, Arena, GSStack}, id("test"), TestLang); EXPECT_EQ(Parsed.dumpRecursive(TestLang.G), "[ 0, end) test := \n" "[ 0, end) ├─test := IDENTIFIER\n" @@ -438,7 +450,7 @@ TestLang.Table = LRTable::buildSLR(TestLang.G); const ForestNode &Parsed = - glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test")); + glrParse({Tokens, Arena, GSStack}, id("test"), TestLang); EXPECT_EQ(Parsed.dumpRecursive(TestLang.G), "[ 0, end) test := IDENTIFIER test\n" "[ 0, 1) ├─IDENTIFIER := tok[0]\n" @@ -446,6 +458,36 @@ "[ 1, end) └─IDENTIFIER := tok[1]\n"); } +TEST_F(GLRTest, GuardExtension) { + build(R"bnf( + _ := start + + start := IDENTIFIER [guard=TestOnly] + )bnf"); + TestLang.Guards.try_emplace( + extensionID("TestOnly"), + [&](llvm::ArrayRef RHS, const TokenStream &Tokens) { + assert(RHS.size() == 1 && + RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); + return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test"; + }); + clang::LangOptions LOptions; + TestLang.Table = LRTable::buildSLR(TestLang.G); + + std::string Input = "test"; + const TokenStream &Succeeded = cook(lex(Input, LOptions), LOptions); + EXPECT_EQ(glrParse({Succeeded, Arena, GSStack}, id("start"), TestLang) + .dumpRecursive(TestLang.G), + "[ 0, end) start := IDENTIFIER [guard=TestOnly]\n" + "[ 0, end) └─IDENTIFIER := tok[0]\n"); + + Input = "notest"; + const TokenStream &Failed = cook(lex(Input, LOptions), LOptions); + EXPECT_EQ(glrParse({Failed, Arena, GSStack}, id("start"), TestLang) + .dumpRecursive(TestLang.G), + "[ 0, end) start := \n"); +} + TEST(GSSTest, GC) { // ┌-A-┬-AB // ├-B-┘