diff --git a/clang-tools-extra/pseudo/CMakeLists.txt b/clang-tools-extra/pseudo/CMakeLists.txt --- a/clang-tools-extra/pseudo/CMakeLists.txt +++ b/clang-tools-extra/pseudo/CMakeLists.txt @@ -1,9 +1,12 @@ +set(CLANG_PSEUDO_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + include_directories(include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) add_subdirectory(lib) add_subdirectory(tool) add_subdirectory(fuzzer) add_subdirectory(benchmarks) +add_subdirectory(gen) if(CLANG_INCLUDE_TESTS) add_subdirectory(unittests) add_subdirectory(test) diff --git a/clang-tools-extra/pseudo/gen/CMakeLists.txt b/clang-tools-extra/pseudo/gen/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_executable(pseudo-cxx-gen + CxxGen.cpp + ) + +target_link_libraries(pseudo-cxx-gen + PRIVATE + clangPseudoBasic + ) diff --git a/clang-tools-extra/pseudo/gen/Cxx.cmake b/clang-tools-extra/pseudo/gen/Cxx.cmake new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/Cxx.cmake @@ -0,0 +1,21 @@ +# Compiles the BNF grammar file, and produces a pair of files called +# ${filename}.h and ${filename}.cpp in the ${CLANG_PSEUDO_BINARY_DIR}. +function(gen_cxx grammar_file filename) + set(header_file ${CLANG_PSEUDO_BINARY_DIR}/${filename}.h) + set(cpp_file ${CLANG_PSEUDO_BINARY_DIR}/${filename}.cpp) + + add_custom_command(OUTPUT ${header_file} ${cpp_file} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-cxx-gen" + --grammar ${grammar_file} + --output-dir ${CLANG_PSEUDO_BINARY_DIR} + --filename ${filename} + COMMENT "Generating code for cxx grammar..." + DEPENDS pseudo-cxx-gen + VERBATIM) + + set_source_files_properties(${header_file} PROPERTIES + GENERATED 1) + set_source_files_properties(${cpp_file} PROPERTIES + GENERATED 1) + +endfunction() diff --git a/clang-tools-extra/pseudo/gen/CxxGen.cpp b/clang-tools-extra/pseudo/gen/CxxGen.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/CxxGen.cpp @@ -0,0 +1,230 @@ +//===-- CxxGen.cpp - Compile BNF grammar and LR table ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Grammar.h" +#include "clang-pseudo/LRGraph.h" +#include "clang-pseudo/LRTable.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +using clang::pseudo::Grammar; +using llvm::cl::desc; +using llvm::cl::init; +using llvm::cl::opt; + +static opt + Grammar("grammar", desc("Parse and check a BNF grammar file."), init("")); +static opt + Filename("filename", desc("Output file name (without file extension)"), + init("Cxx")); +static opt OutputDir("output-dir", desc("Output directory"), + init("")); + +static std::string readOrDie(llvm::StringRef Path) { + llvm::ErrorOr> Text = + llvm::MemoryBuffer::getFile(Path); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Error: can't read grammar file '" << Path + << "': " << EC.message() << "\n"; + ::exit(1); + } + return Text.get()->getBuffer().str(); +} + +static std::string genHeaderCode(const clang::pseudo::Grammar &G, + llvm::StringRef Filename) { + std::vector NonterminalEnums; + NonterminalEnums.reserve(G.table().Nonterminals.size()); + for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size(); + ++ID) { + std::string Name = G.symbolName(ID).str(); + // translation-unit -> translation_unit + std::replace(Name.begin(), Name.end(), '-', '_'); + NonterminalEnums.push_back(llvm::formatv(" {0} = {1}", Name, ID)); + } + std::string HeaderGuard = + llvm::formatv("GENERATED_CLANG_PSEUDO_{0}_H", Filename); + return llvm::formatv(R"cpp( +#ifndef {0} +#define {0} + +#include "clang-pseudo/Grammar.h" +#include "llvm/Support/Compiler.h" + +namespace clang { +namespace pseudo { +class LRTable; +namespace cxx { + +enum Symbol : SymbolID { +{1} +}; + +const Grammar& getGrammar(); +const LRTable& getLRTable(); + +} // namespace cxx +} // namespace pseudo +} // namespace clang + +#endif // {0})cpp", + HeaderGuard, llvm::join(NonterminalEnums, ",\n")); +} + +template +std::string genericJoin(const Container &C, llvm::StringRef Separator) { + std::vector Strings; + for (const auto &E : C) + Strings.push_back(llvm::formatv("{0}", E)); + return llvm::join(Strings, Separator); +} + +static std::string genCppCode(const clang::pseudo::Grammar &G, + llvm::StringRef Filename) { + auto ToNames = [&](llvm::ArrayRef Syms) { + std::vector Names; + for (auto SID : Syms) + Names.push_back(llvm::formatv("/*{0}*/{1}", G.symbolName(SID), SID)); + return Names; + }; + std::vector Rules; + for (const auto &R : G.table().Rules) { + Rules.push_back(llvm::formatv(" { /*{0}*/{1}, /*Seq=*/{ {2} } }", + G.symbolName(R.Target), R.Target, + llvm::join(ToNames(R.seq()), ", "))); + } + + std::vector Nonterminals; + for (const auto &NT : G.table().Nonterminals) { + Nonterminals.push_back( + llvm::formatv(" { \"{0}\", {/*Start*/{1}, /*End*/{2} } }", NT.Name, + NT.RuleRange.Start, NT.RuleRange.End)); + } + std::vector Terminals; + for (const auto &T : G.table().Terminals) { + Terminals.push_back(llvm::formatv(" \"{0}\"", T)); + } + + auto LRTable = clang::pseudo::LRTable::buildSLR(G); + + std::string LRNontermOffset = genericJoin(LRTable.NontermOffset, ", "); + std::string LRTermOffsetCode = genericJoin(LRTable.TerminalOffset, ", "); + std::string LRStates = genericJoin(LRTable.States, ", "); + std::vector LRActions; + for (const auto &Action : LRTable.Actions) { + switch (Action.kind()) { + case clang::pseudo::LRTable::Action::Shift: + LRActions.push_back( + llvm::formatv("Action::shift({0})", Action.getShiftState())); + break; + case clang::pseudo::LRTable::Action::Reduce: + LRActions.push_back( + llvm::formatv("Action::reduce({0})", Action.getReduceRule())); + break; + case clang::pseudo::LRTable::Action::Accept: + // FIXME: use a real RID here + LRActions.push_back(llvm::formatv("Action::accept(0)")); + break; + case clang::pseudo::LRTable::Action::GoTo: + LRActions.push_back( + llvm::formatv("Action::goTo({0})", Action.getGoToState())); + break; + default: + assert(false); + break; + } + } + std::vector LRStartStates; + for (const auto &SA : LRTable.StartStates) { + LRStartStates.push_back(llvm::formatv( + "{ /*SymbolID*/{0}, /*StartState*/{1} }", SA.first, SA.second)); + } + return llvm::formatv( + R"cpp(#include + +#include "{0}.h" +#include "clang-pseudo/Grammar.h" +#include "clang-pseudo/LRTable.h" + +namespace clang { +namespace pseudo { + +namespace cxx { + +const Grammar& getGrammar() { + static GrammarTable* Table = new GrammarTable({ + { // Rules +{1} + }, // Rules + { // Nonterminals +{2} + }, // Nonterminals + { // Terminals +{3} + } // Terminals + }); + static Grammar* G = new Grammar(std::unique_ptr(Table)); + return *G; +} + +const LRTable& getLRTable() { + using Action = LRTable::Action; + static LRTable* Table = new LRTable({ + /*NontermOffset=*/{ {4} }, + /*TermOffset=*/{ {5} }, + /*States=*/{ {6} }, + /*Actions=*/{ {7} }, + /*StartStates=*/{ {8} }, + }); + return *Table; +} + +} // namespace cxx +} // namespace pseudo +} // namespace clang +)cpp", + Filename, llvm::join(Rules, ",\n"), llvm::join(Nonterminals, ", "), + llvm::join(Terminals, ", "), LRNontermOffset, LRTermOffsetCode, LRStates, + llvm::join(LRActions, ", "), llvm::join(LRStartStates, ", ")); +} + +void writeFile(llvm::StringRef Filepath, llvm::StringRef Content) { + std::error_code EC; + llvm::raw_fd_ostream FD(llvm::StringRef(Filepath), EC); + if (EC) { + llvm::errs() << "Faile to open file: " << Filepath << ": " << EC.message(); + exit(1); + } + FD << Content; +} + +int main(int argc, char *argv[]) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + if (!Grammar.getNumOccurrences()) { + llvm::errs() << "Grammar file must be provided!\n"; + return 1; + } + + std::string GrammarText = readOrDie(Grammar); + std::vector Diags; + auto G = Grammar::parseBNF(GrammarText, Diags); + + if (!Diags.empty()) { + llvm::errs() << llvm::join(Diags, "\n"); + return 1; + } + + std::string HeaderPath = llvm::formatv("{0}/{1}.h", OutputDir, Filename); + std::string CppPath = llvm::formatv("{0}/{1}.cpp", OutputDir, Filename); + writeFile(HeaderPath, genHeaderCode(*G, Filename)); + writeFile(CppPath, genCppCode(*G, Filename)); + return 0; +} diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h @@ -153,8 +153,6 @@ // It can be constructed dynamically (from compiling BNF file) or statically // (a compiled data-source). struct GrammarTable { - GrammarTable(); - struct Nonterminal { std::string Name; // Corresponding rules that construct the nonterminal, it is a [Start, End) @@ -164,6 +162,11 @@ RuleID End; } RuleRange; }; + GrammarTable(); + GrammarTable(std::vector Rules, std::vector Nonterminals, + llvm::ArrayRef Terminals) + : Rules(std::move(Rules)), Terminals(Terminals), + Nonterminals(std::move(Nonterminals)){}; // RuleID is an index into this table of rule definitions. // diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h @@ -165,7 +165,6 @@ // Build a specifid table for testing purposes. static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef); -private: // Conceptually the LR table is a multimap from (State, SymbolID) => Action. // Our physical representation is quite different for compactness. diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,6 +1,11 @@ set(LLVM_LINK_COMPONENTS Support) -add_clang_library(clangPseudo +include(${CMAKE_CURRENT_SOURCE_DIR}/../gen/Cxx.cmake) +set(CXX_GRAMMAR ${CMAKE_CURRENT_LIST_DIR}/cxx.bnf) +gen_cxx(${CXX_GRAMMAR} "Cxx") + +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES DirectiveTree.cpp Forest.cpp GLR.cpp @@ -11,8 +16,34 @@ LRTable.cpp LRTableBuild.cpp Token.cpp + ) + +add_clang_library(clangPseudoBasic + Grammar.cpp + GrammarBNF.cpp + LRGraph.cpp + LRTable.cpp + LRTableBuild.cpp + + LINK_LIBS + clangBasic + ) + +add_clang_library(clangPseudo + DirectiveTree.cpp + Forest.cpp + GLR.cpp + Lex.cpp + Token.cpp LINK_LIBS clangBasic clangLex + clangPseudoBasic ) + +add_clang_library(clangPseudoCXX + ${CLANG_PSEUDO_BINARY_DIR}/Cxx.cpp + LINK_LIBS + clangBasic +)