diff --git a/clang-tools-extra/pseudo/CMakeLists.txt b/clang-tools-extra/pseudo/CMakeLists.txt --- a/clang-tools-extra/pseudo/CMakeLists.txt +++ b/clang-tools-extra/pseudo/CMakeLists.txt @@ -1,5 +1,10 @@ +set(CLANG_PSEUDO_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + + include_directories(include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) +add_subdirectory(include) +add_subdirectory(gen) add_subdirectory(lib) add_subdirectory(tool) add_subdirectory(fuzzer) diff --git a/clang-tools-extra/pseudo/gen/CMakeLists.txt b/clang-tools-extra/pseudo/gen/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_executable(pseudo-gen + Main.cpp + ) + +target_link_libraries(pseudo-gen + PRIVATE + clangPseudoGrammar + ) diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/Main.cpp @@ -0,0 +1,79 @@ +//===--- Main.cpp - Compile BNF grammar -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Grammar.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +using llvm::cl::desc; +using llvm::cl::init; +using llvm::cl::opt; +using llvm::cl::values; + +namespace { +enum EmitType { + EmitSymbolList, + EmitGrammarContent, +}; + +opt Grammar("grammar", desc("Parse and check a BNF grammar file."), + init("")); +opt + Emit(desc("which information to emit:"), + values(clEnumValN(EmitSymbolList, "emit-symbol-list", + "Print nonterminal symbols (default)"), + clEnumValN(EmitGrammarContent, "emit-grammar-content", + "Print the BNF grammar content as a string"))); +std::string readOrDie(llvm::StringRef Path) { + llvm::ErrorOr> Text = + llvm::MemoryBuffer::getFile(Path); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Error: can't read grammar file '" << Path + << "': " << EC.message() << "\n"; + ::exit(1); + } + return Text.get()->getBuffer().str(); +} +} // namespace + +int main(int argc, char *argv[]) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + if (!Grammar.getNumOccurrences()) { + llvm::errs() << "Grammar file must be provided!\n"; + return 1; + } + + std::string GrammarText = readOrDie(Grammar); + std::vector Diags; + auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags); + + if (!Diags.empty()) { + llvm::errs() << llvm::join(Diags, "\n"); + return 1; + } + switch (Emit) { + + case EmitSymbolList: + for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size(); + ++ID) { + std::string Name = G->symbolName(ID).str(); + // translation-unit -> translation_unit + std::replace(Name.begin(), Name.end(), '-', '_'); + llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID)); + } + break; + case EmitGrammarContent: + llvm::outs() << llvm::formatv("R\"bnf(\n{0})bnf\"\n", GrammarText); + break; + } + + return 0; +} diff --git a/clang-tools-extra/pseudo/gen/cxx_gen.cmake b/clang-tools-extra/pseudo/gen/cxx_gen.cmake new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/cxx_gen.cmake @@ -0,0 +1,29 @@ +# The cxx.bnf grammar file +set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf) + +# Generate inc files. +set(cxx_symbols_inc ${CLANG_PSEUDO_BINARY_DIR}/include/CxxSymbols.inc) +add_custom_command(OUTPUT ${cxx_symbols_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-symbol-list + > ${cxx_symbols_inc} + COMMENT "Generating nonterminal symbol file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +set(cxx_bnf_inc ${CLANG_PSEUDO_BINARY_DIR}/include/CxxBNF.inc) +add_custom_command(OUTPUT ${cxx_bnf_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-grammar-content + > ${cxx_bnf_inc} + COMMENT "Generating bnf string file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +# add_custom_command does not create a new target, we need to deine a target +# explicitly, so that other targets can depend on it. +add_custom_target(cxx_gen + DEPENDS ${cxx_symbols_inc} ${cxx_bnf_inc} + VERBATIM) diff --git a/clang-tools-extra/pseudo/include/CMakeLists.txt b/clang-tools-extra/pseudo/include/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/include/CMakeLists.txt @@ -0,0 +1,3 @@ +# We put an empty cmake file here so that cmake can create an include directory +# in the build directory, the include directory is the home for generated source +# files. diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/cxx.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/cxx.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/cxx.h @@ -0,0 +1,51 @@ +//===--- cxx.h - Public interfaces for the C++ grammar -----------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines public interfaces for the C++ grammar +// (pseudo/lib/cxx.bnf). It provides a fast way to access core building pieces +// of the LR parser, e.g. Grammar, LRTable, rather than parsing the grammar +// file at the runtime. +// +// We do a compilation of the C++ BNF grammar at build time, and generate +// critical data sources. The implementation of the interfaces are based on the +// generated data sources. +// +// FIXME: not everything is fully compiled yet. The implementation of the +// interfaces are still parsing the grammar file at the runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_CXX_CXX_H +#define CLANG_PSEUDO_CXX_CXX_H + +#include "clang-pseudo/Grammar.h" + +namespace clang { +namespace pseudo { +class LRTable; + +namespace cxx { +// Symbol represents nonterminal symbols in the C++ grammar. +// It provides a simple uniform way to access a particular nonterminal. +enum Symbol : SymbolID { +#define NONTERMINAL(X, Y) X = Y, +#include "CxxSymbols.inc" +#undef NONTERMINAL +}; + +// Returns the C++ grammar. +const Grammar &getGrammar(); +// Returns the corresponding LRTable for the C++ grammar. +const LRTable &getLRTable(); + +} // namespace cxx + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_CXX_CXX_H diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_LINK_COMPONENTS Support) -add_clang_library(clangPseudo +# Needed by LLVM's CMake checks because this file defines multiple targets. +set(LLVM_OPTIONAL_SOURCES DirectiveTree.cpp Forest.cpp GLR.cpp @@ -11,8 +12,42 @@ LRTable.cpp LRTableBuild.cpp Token.cpp + ) + +add_clang_library(clangPseudoGrammar + Grammar.cpp + GrammarBNF.cpp + LRGraph.cpp + LRTable.cpp + LRTableBuild.cpp + + # FIXME: can we get rid of the clangBasic dependency? We need it for the + # clang::tok::getTokenName and clang::tok::getPunctuatorSpelling functions, we + # could consider remimplement these functions. + LINK_LIBS + clangBasic + ) + +add_clang_library(clangPseudo + DirectiveTree.cpp + Forest.cpp + GLR.cpp + Lex.cpp + Token.cpp LINK_LIBS clangBasic clangLex + clangPseudoGrammar + ) + +include(${CMAKE_CURRENT_SOURCE_DIR}/../gen/cxx_gen.cmake) +add_clang_library(clangPseudoCxx + cxx/cxx.cpp + + DEPENDS + cxx_gen + + LINK_LIBS + clangPseudoGrammar ) diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.cpp b/clang-tools-extra/pseudo/lib/cxx/cxx.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.cpp @@ -0,0 +1,34 @@ +//===--- cxx.cpp - Define public intefaces for C++ grammar ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/cxx/cxx.h" +#include "clang-pseudo/LRTable.h" + +namespace clang { +namespace pseudo { +namespace cxx { + +static const char *CxxBNF = +#include "CxxBNF.inc" + ; + +const Grammar &getGrammar() { + static std::vector Diags; + static std::unique_ptr G = Grammar::parseBNF(CxxBNF, Diags); + assert(Diags.empty()); + return *G; +} + +const LRTable &getLRTable() { + static LRTable Table = LRTable::buildSLR(getGrammar()); + return Table; +} + +} // namespace cxx +} // namespace pseudo +} // namespace clang