diff --git a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp --- a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp +++ b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp @@ -25,6 +25,7 @@ #include "clang-pseudo/Forest.h" #include "clang-pseudo/GLR.h" #include "clang-pseudo/Token.h" +#include "clang-pseudo/cli/CLI.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/LangOptions.h" @@ -39,9 +40,6 @@ using llvm::cl::opt; using llvm::cl::Required; -static opt GrammarFile("grammar", - desc("Parse and check a BNF grammar file."), - Required); static opt Source("source", desc("Source file"), Required); namespace clang { @@ -49,11 +47,10 @@ namespace bench { namespace { -const std::string *GrammarText = nullptr; const std::string *SourceText = nullptr; -const Grammar *G = nullptr; +const Language *Lang = nullptr; -void setupGrammarAndSource() { +void setup() { auto ReadFile = [](llvm::StringRef FilePath) -> std::string { llvm::ErrorOr> GrammarText = llvm::MemoryBuffer::getFile(FilePath); @@ -64,22 +61,13 @@ } return GrammarText.get()->getBuffer().str(); }; - GrammarText = new std::string(ReadFile(GrammarFile)); SourceText = new std::string(ReadFile(Source)); - std::vector Diags; - G = new Grammar(Grammar::parseBNF(*GrammarText, Diags)); + Lang = &getLanguageFromFlags(); } -static void parseBNF(benchmark::State &State) { - std::vector Diags; - for (auto _ : State) - Grammar::parseBNF(*GrammarText, Diags); -} -BENCHMARK(parseBNF); - static void buildSLR(benchmark::State &State) { for (auto _ : State) - LRTable::buildSLR(*G); + LRTable::buildSLR(Lang->G); } BENCHMARK(buildSLR); @@ -129,13 +117,13 @@ BENCHMARK(preprocess); static void glrParse(benchmark::State &State) { - LRTable Table = clang::pseudo::LRTable::buildSLR(*G); - SymbolID StartSymbol = *G->findNonterminal("translation-unit"); + SymbolID StartSymbol = *Lang->G->findNonterminal("translation-unit"); TokenStream Stream = lexAndPreprocess(); for (auto _ : State) { pseudo::ForestArena Forest; pseudo::GSS GSS; - pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol); + pseudo::glrParse(Stream, ParseParams{*Lang->G, Lang->Table, Forest, GSS}, + StartSymbol); } State.SetBytesProcessed(static_cast(State.iterations()) * SourceText->size()); @@ -143,13 +131,13 @@ BENCHMARK(glrParse); static void full(benchmark::State &State) { - LRTable Table = clang::pseudo::LRTable::buildSLR(*G); - SymbolID StartSymbol = *G->findNonterminal("translation-unit"); + SymbolID StartSymbol = *Lang->G.findNonterminal("translation-unit"); for (auto _ : State) { TokenStream Stream = lexAndPreprocess(); pseudo::ForestArena Forest; pseudo::GSS GSS; - pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS}, + pseudo::glrParse(lexAndPreprocess(), + ParseParams{Lang->G, Lang->Table, Forest, GSS}, StartSymbol); } State.SetBytesProcessed(static_cast(State.iterations()) * @@ -165,7 +153,7 @@ int main(int argc, char *argv[]) { benchmark::Initialize(&argc, argv); llvm::cl::ParseCommandLineOptions(argc, argv); - clang::pseudo::bench::setupGrammarAndSource(); + clang::pseudo::bench::setup(); benchmark::RunSpecifiedBenchmarks(); return 0; } diff --git a/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt b/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt --- a/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt +++ b/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt @@ -3,6 +3,7 @@ target_link_libraries(ClangPseudoBenchmark PRIVATE clangPseudo + clangPseudoCLI clangPseudoGrammar LLVMSupport ) diff --git a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt --- a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt +++ b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt @@ -11,5 +11,6 @@ target_link_libraries(clang-pseudo-fuzzer PRIVATE clangPseudo + clangPseudoCLI clangPseudoGrammar ) diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -10,6 +10,7 @@ #include "clang-pseudo/Forest.h" #include "clang-pseudo/GLR.h" #include "clang-pseudo/Token.h" +#include "clang-pseudo/cli/CLI.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/LangOptions.h" @@ -24,28 +25,10 @@ class Fuzzer { clang::LangOptions LangOpts = clang::pseudo::genericLangOpts(); - Grammar G; - LRTable T; bool Print; public: - Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) { - llvm::ErrorOr> GrammarText = - llvm::MemoryBuffer::getFile(GrammarPath); - if (std::error_code EC = GrammarText.getError()) { - llvm::errs() << "Error: can't read grammar file '" << GrammarPath - << "': " << EC.message() << "\n"; - std::exit(1); - } - std::vector Diags; - G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags); - if (!Diags.empty()) { - for (const auto &Diag : Diags) - llvm::errs() << Diag << "\n"; - std::exit(1); - } - T = LRTable::buildSLR(G); - } + Fuzzer(bool Print) : Print(Print) {} void operator()(llvm::StringRef Code) { std::string CodeStr = Code.str(); // Must be null-terminated. @@ -58,11 +41,13 @@ clang::pseudo::ForestArena Arena; clang::pseudo::GSS GSS; + const Language &Lang = getLanguageFromFlags(); auto &Root = - glrParse(ParseableStream, clang::pseudo::ParseParams{G, T, Arena, GSS}, - *G.findNonterminal("translation-unit")); + glrParse(ParseableStream, + clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS}, + *Lang.G.findNonterminal("translation-unit")); if (Print) - llvm::outs() << Root.dumpRecursive(G); + llvm::outs() << Root.dumpRecursive(Lang.G); } }; @@ -75,16 +60,11 @@ extern "C" { // Set up the fuzzer from command line flags: -// -grammar= (required) - path to cxx.bnf // -print - used for testing the fuzzer int LLVMFuzzerInitialize(int *Argc, char ***Argv) { - llvm::StringRef GrammarFile; bool PrintForest = false; auto ConsumeArg = [&](llvm::StringRef Arg) -> bool { - if (Arg.consume_front("-grammar=")) { - GrammarFile = Arg; - return true; - } else if (Arg == "-print") { + if (Arg == "-print") { PrintForest = true; return true; } @@ -92,11 +72,7 @@ }; *Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv; - if (GrammarFile.empty()) { - fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n"); - exit(1); - } - clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest); + clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(PrintForest); return 0; } diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -112,6 +112,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &); // Parameters for the GLR parsing. +// FIXME: refine it with the ParseLang struct. struct ParseParams { // The grammar of the language we're going to parse. const Grammar &G; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h @@ -0,0 +1,30 @@ +//===--- Language.h -------------------------------------------- -*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_GRAMMAR_LANGUAGE_H +#define CLANG_PSEUDO_GRAMMAR_LANGUAGE_H + +#include "clang-pseudo/grammar/Grammar.h" +#include "clang-pseudo/grammar/LRTable.h" + +namespace clang { +namespace pseudo { + +// Specify a language that can be parsed by the pseduoparser. +struct Language { + Grammar G; + LRTable Table; + + // FIXME: add clang::LangOptions. + // FIXME: add default start symbols. +}; + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_GRAMMAR_LANGUAGE_H diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h b/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h @@ -0,0 +1,31 @@ +//===--- ParseLang.h ------------------------------------------- -*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_PARSELANG_H +#define CLANG_PSEUDO_PARSELANG_H + +#include "clang-pseudo/grammar/Grammar.h" +#include "clang-pseudo/grammar/LRTable.h" + +namespace clang { +namespace pseudo { + +// Specify a language that can be parsed by the pseduoparser. +// Manifest generated from a bnf grammar file. +struct ParseLang { + Grammar G; + LRTable Table; + + // FIXME: add clang::LangOptions. + // FIXME: add default start symbols. +}; + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_PARSELANG_H diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h b/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h @@ -0,0 +1,35 @@ +//===--- CLI.h - Get grammar from variant sources ----------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Provides the Grammar, LRTable etc for a language specified by the `--grammar` +// flags. It is by design to be used by pseudoparser-based CLI tools. +// +// The CLI library defines a `--grammar` CLI flag, which supports 1) using a +// grammar from a file (--grammar=/path/to/lang.bnf) or using the prebuilt cxx +// language (--grammar=cxx). +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_CLI_CLI_H +#define CLANG_PSEUDO_CLI_CLI_H + +#include "clang-pseudo/Language.h" + +namespace clang { +namespace pseudo { + +// Returns the corresponding Language from the '--grammar' command-line flag. +// +// !! If the grammar flag is invalid (e.g. unexisting file), this function will +// exit the program immediately. +const Language &getLanguageFromFlags(); + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_CLI_CLI_H diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -23,12 +23,11 @@ #ifndef CLANG_PSEUDO_CXX_CXX_H #define CLANG_PSEUDO_CXX_CXX_H +#include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" namespace clang { namespace pseudo { -class LRTable; - namespace cxx { // Symbol represents nonterminal symbols in the C++ grammar. // It provides a simple uniform way to access a particular nonterminal. @@ -38,10 +37,8 @@ #undef NONTERMINAL }; -// Returns the C++ grammar. -const Grammar &getGrammar(); -// Returns the corresponding LRTable for the C++ grammar. -const LRTable &getLRTable(); +// Returns the Language for the cxx.bnf grammar. +const Language &getLanguage(); } // namespace cxx diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h @@ -57,6 +57,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" #include #include diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(cli) add_subdirectory(cxx) add_subdirectory(grammar) diff --git a/clang-tools-extra/pseudo/lib/cli/CLI.cpp b/clang-tools-extra/pseudo/lib/cli/CLI.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cli/CLI.cpp @@ -0,0 +1,48 @@ +//===--- CLI.cpp - ----------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/cli/CLI.h" +#include "clang-pseudo/cxx/CXX.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" + +static llvm::cl::opt Grammar( + "grammar", + llvm::cl::desc( + "Specify a BNF grammar file path, or a builtin language (cxx)."), + llvm::cl::init("cxx")); + +namespace clang { +namespace pseudo { + +const Language &getLanguageFromFlags() { + if (::Grammar == "cxx") + return cxx::getLanguage(); + + static Language *Lang = []() { + // Read from a bnf grammar file. + llvm::ErrorOr> GrammarText = + llvm::MemoryBuffer::getFile(::Grammar); + if (std::error_code EC = GrammarText.getError()) { + llvm::errs() << "Error: can't read grammar file '" << ::Grammar + << "': " << EC.message() << "\n"; + std::exit(1); + } + std::vector Diags; + auto G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags); + for (const auto &Diag : Diags) + llvm::errs() << Diag << "\n"; + auto Table = LRTable::buildSLR(G); + return new Language{std::move(G), std::move(Table)}; + }(); + return *Lang; +} + +} // namespace pseudo +} // namespace clang diff --git a/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt @@ -0,0 +1,11 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_clang_library(clangPseudoCLI + CLI.cpp + + LINK_LIBS + clangPseudoGrammar + clangPseudoCXX + ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -7,26 +7,33 @@ //===----------------------------------------------------------------------===// #include "clang-pseudo/cxx/CXX.h" +#include "clang-pseudo/Language.h" +#include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" +#include namespace clang { namespace pseudo { namespace cxx { - +namespace { static const char *CXXBNF = #include "CXXBNF.inc" ; +} // namespace -const Grammar &getGrammar() { - static std::vector Diags; - static Grammar *G = new Grammar(Grammar::parseBNF(CXXBNF, Diags)); - assert(Diags.empty()); - return *G; -} - -const LRTable &getLRTable() { - static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar())); - return *Table; +const Language &getLanguage() { + static const auto &CXXLanguage = []() -> const Language & { + std::vector Diags; + auto G = Grammar::parseBNF(CXXBNF, Diags); + assert(Diags.empty()); + LRTable Table = LRTable::buildSLR(G); + const Language *PL = new Language{ + std::move(G), + std::move(Table), + }; + return *PL; + }(); + return CXXLanguage; } } // namespace cxx diff --git a/clang-tools-extra/pseudo/tool/CMakeLists.txt b/clang-tools-extra/pseudo/tool/CMakeLists.txt --- a/clang-tools-extra/pseudo/tool/CMakeLists.txt +++ b/clang-tools-extra/pseudo/tool/CMakeLists.txt @@ -13,5 +13,6 @@ PRIVATE clangPseudo clangPseudoGrammar + clangPseudoCLI ) diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -9,7 +9,9 @@ #include "clang-pseudo/Bracket.h" #include "clang-pseudo/DirectiveTree.h" #include "clang-pseudo/GLR.h" +#include "clang-pseudo/Language.h" #include "clang-pseudo/Token.h" +#include "clang-pseudo/cli/CLI.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRGraph.h" #include "clang-pseudo/grammar/LRTable.h" @@ -20,14 +22,11 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Signals.h" -using clang::pseudo::Grammar; using clang::pseudo::TokenStream; using llvm::cl::desc; using llvm::cl::init; using llvm::cl::opt; -static opt - Grammar("grammar", desc("Parse and check a BNF grammar file."), init("")); static opt PrintGrammar("print-grammar", desc("Print the grammar.")); static opt PrintGraph("print-graph", desc("Print the LR graph for the grammar")); @@ -93,49 +92,40 @@ pairBrackets(*ParseableStream); } - if (Grammar.getNumOccurrences()) { - std::string Text = readOrDie(Grammar); - std::vector Diags; - auto G = Grammar::parseBNF(Text, Diags); + const auto &Lang = clang::pseudo::getLanguageFromFlags(); + if (PrintGrammar) + llvm::outs() << Lang.G.dump(); + if (PrintGraph) + llvm::outs() << clang::pseudo::LRGraph::buildLR0(Lang.G).dumpForTests( + Lang.G); - if (!Diags.empty()) { - llvm::errs() << llvm::join(Diags, "\n"); + if (PrintTable) + llvm::outs() << Lang.Table.dumpForTests(Lang.G); + if (PrintStatistics) + llvm::outs() << Lang.Table.dumpStatistics(); + + if (ParseableStream) { + clang::pseudo::ForestArena Arena; + clang::pseudo::GSS GSS; + llvm::Optional StartSymID = + Lang.G.findNonterminal(StartSymbol); + if (!StartSymID) { + llvm::errs() << llvm::formatv( + "The start symbol {0} doesn't exit in the grammar!\n", StartSymbol); return 2; } - llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n", - Grammar); - if (PrintGrammar) - llvm::outs() << G.dump(); - if (PrintGraph) - llvm::outs() << clang::pseudo::LRGraph::buildLR0(G).dumpForTests(G); - auto LRTable = clang::pseudo::LRTable::buildSLR(G); - if (PrintTable) - llvm::outs() << LRTable.dumpForTests(G); - if (PrintStatistics) - llvm::outs() << LRTable.dumpStatistics(); - - if (ParseableStream) { - clang::pseudo::ForestArena Arena; - clang::pseudo::GSS GSS; - llvm::Optional StartSymID = - G.findNonterminal(StartSymbol); - if (!StartSymID) { - llvm::errs() << llvm::formatv( - "The start symbol {0} doesn't exit in the grammar!\n", Grammar); - return 2; - } - auto &Root = glrParse(*ParseableStream, - clang::pseudo::ParseParams{G, LRTable, Arena, GSS}, - *StartSymID); - if (PrintForest) - llvm::outs() << Root.dumpRecursive(G, /*Abbreviated=*/true); + auto &Root = + glrParse(*ParseableStream, + clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS}, + *StartSymID); + if (PrintForest) + llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true); - if (PrintStatistics) { - llvm::outs() << "Forest bytes: " << Arena.bytes() - << " nodes: " << Arena.nodeCount() << "\n"; - llvm::outs() << "GSS bytes: " << GSS.bytes() - << " nodes: " << GSS.nodesCreated() << "\n"; - } + if (PrintStatistics) { + llvm::outs() << "Forest bytes: " << Arena.bytes() + << " nodes: " << Arena.nodeCount() << "\n"; + llvm::outs() << "GSS bytes: " << GSS.bytes() + << " nodes: " << GSS.nodesCreated() << "\n"; } } diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -8,6 +8,7 @@ #include "clang-pseudo/GLR.h" #include "clang-pseudo/Token.h" +#include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TokenKinds.h" @@ -48,9 +49,15 @@ public: void build(llvm::StringRef GrammarBNF) { std::vector Diags; - G = Grammar::parseBNF(GrammarBNF, Diags); + TestLang.G = Grammar::parseBNF(GrammarBNF, Diags); } + TokenStream emptyTokenStream() { + TokenStream Empty; + Empty.finalize(); + return Empty; + } + void buildGrammar(std::vector Nonterminals, std::vector Rules) { Nonterminals.push_back("_"); @@ -66,19 +73,22 @@ SymbolID id(llvm::StringRef Name) const { for (unsigned I = 0; I < NumTerminals; ++I) - if (G.table().Terminals[I] == Name) + if (TestLang.G.table().Terminals[I] == Name) return tokenSymbol(static_cast(I)); - for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID) - if (G.table().Nonterminals[ID].Name == Name) + for (SymbolID ID = 0; ID < TestLang.G.table().Nonterminals.size(); ++ID) + if (TestLang.G.table().Nonterminals[ID].Name == Name) return ID; ADD_FAILURE() << "No such symbol found: " << Name; return 0; } RuleID ruleFor(llvm::StringRef NonterminalName) const { - auto RuleRange = G.table().Nonterminals[id(NonterminalName)].RuleRange; + auto RuleRange = + TestLang.G.table().Nonterminals[id(NonterminalName)].RuleRange; if (RuleRange.End - RuleRange.Start == 1) - return G.table().Nonterminals[id(NonterminalName)].RuleRange.Start; + return TestLang.G.table() + .Nonterminals[id(NonterminalName)] + .RuleRange.Start; ADD_FAILURE() << "Expected a single rule for " << NonterminalName << ", but it has " << RuleRange.End - RuleRange.Start << " rule!\n"; @@ -86,7 +96,7 @@ } protected: - Grammar G; + Language TestLang; ForestArena Arena; GSS GSStack; }; @@ -112,9 +122,8 @@ /*Parents=*/{GSSNode0}); buildGrammar({}, {}); // Create a fake empty grammar. - LRTable T = - LRTable::buildForTests(G, /*Entries=*/ - { + TestLang.Table = + LRTable::buildForTests(TestLang.G, /*Entries=*/{ {1, tokenSymbol(tok::semi), Action::shift(4)}, {2, tokenSymbol(tok::semi), Action::shift(4)}, {3, tokenSymbol(tok::semi), Action::shift(5)}, @@ -123,8 +132,8 @@ ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0); std::vector NewHeads; - glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal, {G, T, Arena, GSStack}, - NewHeads); + glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal, + {TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads); EXPECT_THAT(NewHeads, UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal), @@ -144,8 +153,8 @@ buildGrammar({"class-name", "enum-name"}, {"class-name := IDENTIFIER", "enum-name := IDENTIFIER"}); - LRTable Table = LRTable::buildForTests( - G, + TestLang.Table = LRTable::buildForTests( + TestLang.G, { {/*State=*/0, id("class-name"), Action::goTo(2)}, {/*State=*/0, id("enum-name"), Action::goTo(3)}, @@ -161,7 +170,8 @@ GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0}); std::vector Heads = {GSSNode1}; - glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::eof), + {TestLang.G, TestLang.Table, Arena, GSStack}); EXPECT_THAT(Heads, UnorderedElementsAre( GSSNode1, AllOf(state(2), parsedSymbolID(id("class-name")), @@ -192,8 +202,8 @@ /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1), /*Parents=*/{GSSNode2, GSSNode3}); - LRTable Table = LRTable::buildForTests( - G, + TestLang.Table = LRTable::buildForTests( + TestLang.G, { {/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)}, {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)}, @@ -202,7 +212,7 @@ {/*State=*/4, ruleFor("ptr-operator")}, }); std::vector Heads = {GSSNode4}; - glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack}); EXPECT_THAT(Heads, UnorderedElementsAre( GSSNode4, @@ -246,8 +256,8 @@ /*Parents=*/{GSSNode2}); // FIXME: figure out a way to get rid of the hard-coded reduce RuleID! - LRTable Table = LRTable::buildForTests( - G, + TestLang.Table = LRTable::buildForTests( + TestLang.G, { {/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)}, {/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)}, @@ -257,7 +267,7 @@ {/*State=*/4, /* type-name := enum-name */ 1}, }); std::vector Heads = {GSSNode3, GSSNode4}; - glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack}); // Verify that the stack heads are joint at state 5 after reduces. EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4, @@ -266,7 +276,7 @@ parents({GSSNode1, GSSNode2})))) << Heads; // Verify that we create an ambiguous ForestNode of two parses of `type-name`. - EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G), + EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G), "[ 1, end) type-name := \n" "[ 1, end) ├─type-name := class-name\n" "[ 1, end) │ └─class-name := \n" @@ -304,8 +314,8 @@ /*Parents=*/{GSSNode2}); // FIXME: figure out a way to get rid of the hard-coded reduce RuleID! - LRTable Table = - LRTable::buildForTests(G, + TestLang.Table = + LRTable::buildForTests(TestLang.G, { {/*State=*/0, id("pointer"), Action::goTo(5)}, }, @@ -314,14 +324,15 @@ {4, /* pointer := enum-name */ 1}, }); std::vector Heads = {GSSNode3, GSSNode4}; - glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::eof), + {TestLang.G, TestLang.Table, Arena, GSStack}); EXPECT_THAT( Heads, UnorderedElementsAre(GSSNode3, GSSNode4, AllOf(state(5), parsedSymbolID(id("pointer")), parents({GSSNode0})))) << Heads; - EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G), + EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G), "[ 0, end) pointer := \n" "[ 0, end) ├─pointer := class-name *\n" "[ 0, 1) │ ├─class-name := \n" @@ -334,8 +345,8 @@ TEST_F(GLRTest, ReduceLookahead) { // A term can be followed by +, but not by -. buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"}); - LRTable Table = - LRTable::buildForTests(G, + TestLang.Table = + LRTable::buildForTests(TestLang.G, { {/*State=*/0, id("term"), Action::goTo(2)}, }, @@ -352,14 +363,14 @@ // When the lookahead is +, reduce is performed. std::vector Heads = {GSSNode1}; - glrReduce(Heads, tokenSymbol(tok::plus), {G, Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::plus), {TestLang.G, TestLang.Table, Arena, GSStack}); EXPECT_THAT(Heads, ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")), parents(Root)))); // When the lookahead is -, reduce is not performed. Heads = {GSSNode1}; - glrReduce(Heads, tokenSymbol(tok::minus), {G, Table, Arena, GSStack}); + glrReduce(Heads, tokenSymbol(tok::minus), {TestLang.G, TestLang.Table, Arena, GSStack}); EXPECT_THAT(Heads, ElementsAre(GSSNode1)); } @@ -380,27 +391,28 @@ left-paren := { expr := IDENTIFIER )bnf"); + TestLang.Table = LRTable::buildSLR(TestLang.G); clang::LangOptions LOptions; const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(G); const ForestNode &Parsed = - glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test")); + glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test")); // Verify that there is no duplicated sequence node of `expr := IDENTIFIER` // in the forest, see the `#1` and `=#1` in the dump string. - EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := \n" - "[ 0, end) ├─test := { expr\n" - "[ 0, 1) │ ├─{ := tok[0]\n" - "[ 1, end) │ └─expr := IDENTIFIER #1\n" - "[ 1, end) │ └─IDENTIFIER := tok[1]\n" - "[ 0, end) ├─test := { IDENTIFIER\n" - "[ 0, 1) │ ├─{ := tok[0]\n" - "[ 1, end) │ └─IDENTIFIER := tok[1]\n" - "[ 0, end) └─test := left-paren expr\n" - "[ 0, 1) ├─left-paren := {\n" - "[ 0, 1) │ └─{ := tok[0]\n" - "[ 1, end) └─expr := IDENTIFIER =#1\n" - "[ 1, end) └─IDENTIFIER := tok[1]\n"); + EXPECT_EQ(Parsed.dumpRecursive(TestLang.G), + "[ 0, end) test := \n" + "[ 0, end) ├─test := { expr\n" + "[ 0, 1) │ ├─{ := tok[0]\n" + "[ 1, end) │ └─expr := IDENTIFIER #1\n" + "[ 1, end) │ └─IDENTIFIER := tok[1]\n" + "[ 0, end) ├─test := { IDENTIFIER\n" + "[ 0, 1) │ ├─{ := tok[0]\n" + "[ 1, end) │ └─IDENTIFIER := tok[1]\n" + "[ 0, end) └─test := left-paren expr\n" + "[ 0, 1) ├─left-paren := {\n" + "[ 0, 1) │ └─{ := tok[0]\n" + "[ 1, end) └─expr := IDENTIFIER =#1\n" + "[ 1, end) └─IDENTIFIER := tok[1]\n"); } TEST_F(GLRTest, GLRReduceOrder) { @@ -419,16 +431,17 @@ )bnf"); clang::LangOptions LOptions; const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(G); + TestLang.Table = LRTable::buildSLR(TestLang.G); const ForestNode &Parsed = - glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test")); - EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := \n" - "[ 0, end) ├─test := IDENTIFIER\n" - "[ 0, end) │ └─IDENTIFIER := tok[0]\n" - "[ 0, end) └─test := foo\n" - "[ 0, end) └─foo := IDENTIFIER\n" - "[ 0, end) └─IDENTIFIER := tok[0]\n"); + glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test")); + EXPECT_EQ(Parsed.dumpRecursive(TestLang.G), + "[ 0, end) test := \n" + "[ 0, end) ├─test := IDENTIFIER\n" + "[ 0, end) │ └─IDENTIFIER := tok[0]\n" + "[ 0, end) └─test := foo\n" + "[ 0, end) └─foo := IDENTIFIER\n" + "[ 0, end) └─IDENTIFIER := tok[0]\n"); } TEST_F(GLRTest, NoExplicitAccept) { @@ -443,14 +456,15 @@ // of the nonterminal `test` when the next token is `eof`, verify that the // parser stops at the right state. const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(G); + TestLang.Table = LRTable::buildSLR(TestLang.G); const ForestNode &Parsed = - glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test")); - EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := IDENTIFIER test\n" - "[ 0, 1) ├─IDENTIFIER := tok[0]\n" - "[ 1, end) └─test := IDENTIFIER\n" - "[ 1, end) └─IDENTIFIER := tok[1]\n"); + glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test")); + EXPECT_EQ(Parsed.dumpRecursive(TestLang.G), + "[ 0, end) test := IDENTIFIER test\n" + "[ 0, 1) ├─IDENTIFIER := tok[0]\n" + "[ 1, end) └─test := IDENTIFIER\n" + "[ 1, end) └─IDENTIFIER := tok[1]\n"); } TEST(GSSTest, GC) {