diff --git a/clang-tools-extra/pseudo/CMakeLists.txt b/clang-tools-extra/pseudo/CMakeLists.txt --- a/clang-tools-extra/pseudo/CMakeLists.txt +++ b/clang-tools-extra/pseudo/CMakeLists.txt @@ -1,5 +1,7 @@ include_directories(include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) +add_subdirectory(include) +add_subdirectory(gen) add_subdirectory(lib) add_subdirectory(tool) add_subdirectory(fuzzer) diff --git a/clang-tools-extra/pseudo/gen/CMakeLists.txt b/clang-tools-extra/pseudo/gen/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_executable(pseudo-gen + Main.cpp + ) + +target_link_libraries(pseudo-gen + PRIVATE + clangPseudoGrammar + ) diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/Main.cpp @@ -0,0 +1,89 @@ +//===--- Main.cpp - Compile BNF grammar -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a tool to compile a BNF grammar, it is used by the build system to +// generate a necessary data bits to statically construct core pieces (Grammar, +// LRTable etc) of the LR parser. +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Grammar.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +using llvm::cl::desc; +using llvm::cl::init; +using llvm::cl::opt; +using llvm::cl::values; + +namespace { +enum EmitType { + EmitSymbolList, + EmitGrammarContent, +}; + +opt Grammar("grammar", desc("Parse a BNF grammar file."), + init("")); +opt + Emit(desc("which information to emit:"), + values(clEnumValN(EmitSymbolList, "emit-symbol-list", + "Print nonterminal symbols (default)"), + clEnumValN(EmitGrammarContent, "emit-grammar-content", + "Print the BNF grammar content as a string"))); +std::string readOrDie(llvm::StringRef Path) { + llvm::ErrorOr> Text = + llvm::MemoryBuffer::getFile(Path); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Error: can't read grammar file '" << Path + << "': " << EC.message() << "\n"; + ::exit(1); + } + return Text.get()->getBuffer().str(); +} +} // namespace + +int main(int argc, char *argv[]) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + if (!Grammar.getNumOccurrences()) { + llvm::errs() << "Grammar file must be provided!\n"; + return 1; + } + + std::string GrammarText = readOrDie(Grammar); + std::vector Diags; + auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags); + + if (!Diags.empty()) { + llvm::errs() << llvm::join(Diags, "\n"); + return 1; + } + switch (Emit) { + + case EmitSymbolList: + for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size(); + ++ID) { + std::string Name = G->symbolName(ID).str(); + // translation-unit -> translation_unit + std::replace(Name.begin(), Name.end(), '-', '_'); + llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID)); + } + break; + case EmitGrammarContent: + for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) { + llvm::outs() << '"'; + llvm::outs().write_escaped((Line + "\n").str()); + llvm::outs() << "\"\n"; + } + break; + } + + return 0; +} diff --git a/clang-tools-extra/pseudo/include/CMakeLists.txt b/clang-tools-extra/pseudo/include/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/include/CMakeLists.txt @@ -0,0 +1,29 @@ +# The cxx.bnf grammar file +set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf) + +# Generate inc files. +set(cxx_symbols_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXSymbols.inc) +add_custom_command(OUTPUT ${cxx_symbols_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-symbol-list + > ${cxx_symbols_inc} + COMMENT "Generating nonterminal symbol file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +set(cxx_bnf_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXBNF.inc) +add_custom_command(OUTPUT ${cxx_bnf_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-grammar-content + > ${cxx_bnf_inc} + COMMENT "Generating bnf string file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +# add_custom_command does not create a new target, we need to deine a target +# explicitly, so that other targets can depend on it. +add_custom_target(cxx_gen + DEPENDS ${cxx_symbols_inc} ${cxx_bnf_inc} + VERBATIM) diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -0,0 +1,51 @@ +//===--- CXX.h - Public interfaces for the C++ grammar -----------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines public interfaces for the C++ grammar +// (pseudo/lib/cxx.bnf). It provides a fast way to access core building pieces +// of the LR parser, e.g. Grammar, LRTable, rather than parsing the grammar +// file at the runtime. +// +// We do a compilation of the C++ BNF grammar at build time, and generate +// critical data sources. The implementation of the interfaces are based on the +// generated data sources. +// +// FIXME: not everything is fully compiled yet. The implementation of the +// interfaces are still parsing the grammar file at the runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_CXX_CXX_H +#define CLANG_PSEUDO_CXX_CXX_H + +#include "clang-pseudo/Grammar.h" + +namespace clang { +namespace pseudo { +class LRTable; + +namespace cxx { +// Symbol represents nonterminal symbols in the C++ grammar. +// It provides a simple uniform way to access a particular nonterminal. +enum class Symbol : SymbolID { +#define NONTERMINAL(X, Y) X = Y, +#include "CXXSymbols.inc" +#undef NONTERMINAL +}; + +// Returns the C++ grammar. +const Grammar &getGrammar(); +// Returns the corresponding LRTable for the C++ grammar. +const LRTable &getLRTable(); + +} // namespace cxx + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_CXX_CXX_H diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,3 +1,6 @@ +add_subdirectory(cxx) +add_subdirectory(grammar) + set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangPseudo @@ -5,15 +8,11 @@ DirectiveTree.cpp Forest.cpp GLR.cpp - Grammar.cpp - GrammarBNF.cpp Lex.cpp - LRGraph.cpp - LRTable.cpp - LRTableBuild.cpp Token.cpp LINK_LIBS clangBasic clangLex + clangPseudoGrammar ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt @@ -0,0 +1,9 @@ +add_clang_library(clangPseudoCXX + CXX.cpp + + DEPENDS + cxx_gen + + LINK_LIBS + clangPseudoGrammar + ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -0,0 +1,34 @@ +//===--- CXX.cpp - Define public interfaces for C++ grammar ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/cxx/CXX.h" +#include "clang-pseudo/LRTable.h" + +namespace clang { +namespace pseudo { +namespace cxx { + +static const char *CXXBNF = +#include "CXXBNF.inc" + ; + +const Grammar &getGrammar() { + static std::vector Diags; + static Grammar *G = Grammar::parseBNF(CXXBNF, Diags).release(); + assert(Diags.empty()); + return *G; +} + +const LRTable &getLRTable() { + static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar())); + return *Table; +} + +} // namespace cxx +} // namespace pseudo +} // namespace clang diff --git a/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt b/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_LINK_COMPONENTS Support) + +# This library intents to keep as minimal dependencies as possible, it is a base +# library of the cxx generator, to avoid creating long dep paths in the build +# graph. +add_clang_library(clangPseudoGrammar + Grammar.cpp + GrammarBNF.cpp + LRGraph.cpp + LRTable.cpp + LRTableBuild.cpp + + # FIXME: can we get rid of the clangBasic dependency? We need it for the + # clang::tok::getTokenName and clang::tok::getPunctuatorSpelling functions, we + # could consider remimplement these functions. + LINK_LIBS + clangBasic + ) diff --git a/clang-tools-extra/pseudo/lib/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp rename from clang-tools-extra/pseudo/lib/Grammar.cpp rename to clang-tools-extra/pseudo/lib/grammar/Grammar.cpp diff --git a/clang-tools-extra/pseudo/lib/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp rename from clang-tools-extra/pseudo/lib/GrammarBNF.cpp rename to clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp diff --git a/clang-tools-extra/pseudo/lib/LRGraph.cpp b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp rename from clang-tools-extra/pseudo/lib/LRGraph.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp diff --git a/clang-tools-extra/pseudo/lib/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp rename from clang-tools-extra/pseudo/lib/LRTable.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRTable.cpp diff --git a/clang-tools-extra/pseudo/lib/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp rename from clang-tools-extra/pseudo/lib/LRTableBuild.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp